diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-20 18:58:50 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-20 18:58:50 -0800 |
commit | a0b1c42951dd06ec83cc1bc2c9788131d9fefcd8 (patch) | |
tree | a572f1523cf904c93020c9cdb32f3bc84ec3ac16 /net | |
parent | 8ec4942212a6d337982967778a3dc3b60aea782e (diff) | |
parent | ecd9883724b78cc72ed92c98bcb1a46c764fff21 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking update from David Miller:
1) Checkpoint/restarted TCP sockets now can properly propagate the TCP
timestamp offset. From Andrey Vagin.
2) VMWARE VM VSOCK layer, from Andy King.
3) Much improved support for virtual functions and SR-IOV in bnx2x,
from Ariel ELior.
4) All protocols on ipv4 and ipv6 are now network namespace aware, and
all the compatability checks for initial-namespace-only protocols is
removed. Thanks to Tom Parkin for helping deal with the last major
holdout, L2TP.
5) IPV6 support in netpoll and network namespace support in pktgen,
from Cong Wang.
6) Multiple Registration Protocol (MRP) and Multiple VLAN Registration
Protocol (MVRP) support, from David Ward.
7) Compute packet lengths more accurately in the packet scheduler, from
Eric Dumazet.
8) Use per-task page fragment allocator in skb_append_datato_frags(),
also from Eric Dumazet.
9) Add support for connection tracking labels in netfilter, from
Florian Westphal.
10) Fix default multicast group joining on ipv6, and add anti-spoofing
checks to 6to4 and 6rd. From Hannes Frederic Sowa.
11) Make ipv4/ipv6 fragmentation memory limits more reasonable in modern
times, rearrange inet frag datastructures for better cacheline
locality, and move more operations outside of locking. From Jesper
Dangaard Brouer.
12) Instead of strict master <--> slave relationships, allow arbitrary
scenerios with "upper device lists". From Jiri Pirko.
13) Improve rate limiting accuracy in TBF and act_police, also from Jiri
Pirko.
14) Add a BPF filter netfilter match target, from Willem de Bruijn.
15) Orphan and delete a bunch of pre-historic networking drivers from
Paul Gortmaker.
16) Add TSO support for GRE tunnels, from Pravin B SHelar. Although
this still needs some minor bug fixing before it's %100 correct in
all cases.
17) Handle unresolved IPSEC states like ARP, with a resolution packet
queue. From Steffen Klassert.
18) Remove TCP Appropriate Byte Count support (ABC), from Stephen
Hemminger. This was long overdue.
19) Support SO_REUSEPORT, from Tom Herbert.
20) Allow locking a socket BPF filter, so that it cannot change after a
process drops capabilities.
21) Add VLAN filtering to bridge, from Vlad Yasevich.
22) Bring ipv6 on-par with ipv4 and do not cache neighbour entries in
the ipv6 routes, from YOSHIFUJI Hideaki.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1538 commits)
ipv6: fix race condition regarding dst->expires and dst->from.
net: fix a wrong assignment in skb_split()
ip_gre: remove an extra dst_release()
ppp: set qdisc_tx_busylock to avoid LOCKDEP splat
atl1c: restore buffer state
net: fix a build failure when !CONFIG_PROC_FS
net: ipv4: fix waring -Wunused-variable
net: proc: fix build failed when procfs is not configured
Revert "xen: netback: remove redundant xenvif_put"
net: move procfs code to net/core/net-procfs.c
qmi_wwan, cdc-ether: add ADU960S
bonding: set sysfs device_type to 'bond'
bonding: fix bond_release_all inconsistencies
b44: use netdev_alloc_skb_ip_align()
xen: netback: remove redundant xenvif_put
net: fec: Do a sanity check on the gpio number
ip_gre: propogate target device GSO capability to the tunnel device
ip_gre: allow CSUM capable devices to handle packets
bonding: Fix initialize after use for 3ad machine state spinlock
bonding: Fix race condition between bond_enslave() and bond_3ad_update_lacp_rate()
...
Diffstat (limited to 'net')
397 files changed, 22992 insertions, 9406 deletions
diff --git a/net/802/Kconfig b/net/802/Kconfig index be33d27c8e6..80d4bf78905 100644 --- a/net/802/Kconfig +++ b/net/802/Kconfig @@ -5,3 +5,6 @@ config STP config GARP tristate select STP + +config MRP + tristate diff --git a/net/802/Makefile b/net/802/Makefile index a30d6e385ae..37e654d6615 100644 --- a/net/802/Makefile +++ b/net/802/Makefile @@ -11,3 +11,4 @@ obj-$(CONFIG_IPX) += p8022.o psnap.o p8023.o obj-$(CONFIG_ATALK) += p8022.o psnap.o obj-$(CONFIG_STP) += stp.o obj-$(CONFIG_GARP) += garp.o +obj-$(CONFIG_MRP) += mrp.o diff --git a/net/802/mrp.c b/net/802/mrp.c new file mode 100644 index 00000000000..a4cc3229952 --- /dev/null +++ b/net/802/mrp.c @@ -0,0 +1,895 @@ +/* + * IEEE 802.1Q Multiple Registration Protocol (MRP) + * + * Copyright (c) 2012 Massachusetts Institute of Technology + * + * Adapted from code in net/802/garp.c + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/timer.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <net/mrp.h> +#include <asm/unaligned.h> + +static unsigned int mrp_join_time __read_mostly = 200; +module_param(mrp_join_time, uint, 0644); +MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)"); +MODULE_LICENSE("GPL"); + +static const u8 +mrp_applicant_state_table[MRP_APPLICANT_MAX + 1][MRP_EVENT_MAX + 1] = { + [MRP_APPLICANT_VO] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_VP, + [MRP_EVENT_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_TX] = MRP_APPLICANT_VO, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_VO, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_AO, + [MRP_EVENT_R_IN] = MRP_APPLICANT_VO, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_VO, + [MRP_EVENT_R_MT] = MRP_APPLICANT_VO, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VO, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VO, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_VO, + }, + [MRP_APPLICANT_VP] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_VP, + [MRP_EVENT_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_TX] = MRP_APPLICANT_AA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_VP, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_AP, + [MRP_EVENT_R_IN] = MRP_APPLICANT_VP, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_VP, + [MRP_EVENT_R_MT] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_VP, + }, + [MRP_APPLICANT_VN] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_VN, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_AN, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_VN, + [MRP_EVENT_R_IN] = MRP_APPLICANT_VN, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_VN, + [MRP_EVENT_R_MT] = MRP_APPLICANT_VN, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VN, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VN, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VN, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_VN, + }, + [MRP_APPLICANT_AN] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_AN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AN, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_QA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_AN, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_AN, + [MRP_EVENT_R_IN] = MRP_APPLICANT_AN, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AN, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AN, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VN, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VN, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VN, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AN, + }, + [MRP_APPLICANT_AA] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AA, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_QA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_AA, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QA, + [MRP_EVENT_R_IN] = MRP_APPLICANT_AA, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AA, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AA, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AA, + }, + [MRP_APPLICANT_QA] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_QA, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_QA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_QA, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QA, + [MRP_EVENT_R_IN] = MRP_APPLICANT_QA, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AA, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AA, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AA, + }, + [MRP_APPLICANT_LA] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AA, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_VO, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_LA, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_LA, + [MRP_EVENT_R_IN] = MRP_APPLICANT_LA, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_LA, + [MRP_EVENT_R_MT] = MRP_APPLICANT_LA, + [MRP_EVENT_R_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_R_LA] = MRP_APPLICANT_LA, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_LA, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_LA, + }, + [MRP_APPLICANT_AO] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AP, + [MRP_EVENT_LV] = MRP_APPLICANT_AO, + [MRP_EVENT_TX] = MRP_APPLICANT_AO, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_AO, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QO, + [MRP_EVENT_R_IN] = MRP_APPLICANT_AO, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AO, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AO, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VO, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VO, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AO, + }, + [MRP_APPLICANT_QO] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_QP, + [MRP_EVENT_LV] = MRP_APPLICANT_QO, + [MRP_EVENT_TX] = MRP_APPLICANT_QO, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_QO, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QO, + [MRP_EVENT_R_IN] = MRP_APPLICANT_QO, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AO, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AO, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VO, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VO, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_QO, + }, + [MRP_APPLICANT_AP] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AP, + [MRP_EVENT_LV] = MRP_APPLICANT_AO, + [MRP_EVENT_TX] = MRP_APPLICANT_QA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_AP, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QP, + [MRP_EVENT_R_IN] = MRP_APPLICANT_AP, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AP, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AP, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AP, + }, + [MRP_APPLICANT_QP] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_QP, + [MRP_EVENT_LV] = MRP_APPLICANT_QO, + [MRP_EVENT_TX] = MRP_APPLICANT_QP, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_QP, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QP, + [MRP_EVENT_R_IN] = MRP_APPLICANT_QP, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AP, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AP, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AP, + }, +}; + +static const u8 +mrp_tx_action_table[MRP_APPLICANT_MAX + 1] = { + [MRP_APPLICANT_VO] = MRP_TX_ACTION_S_IN_OPTIONAL, + [MRP_APPLICANT_VP] = MRP_TX_ACTION_S_JOIN_IN, + [MRP_APPLICANT_VN] = MRP_TX_ACTION_S_NEW, + [MRP_APPLICANT_AN] = MRP_TX_ACTION_S_NEW, + [MRP_APPLICANT_AA] = MRP_TX_ACTION_S_JOIN_IN, + [MRP_APPLICANT_QA] = MRP_TX_ACTION_S_JOIN_IN_OPTIONAL, + [MRP_APPLICANT_LA] = MRP_TX_ACTION_S_LV, + [MRP_APPLICANT_AO] = MRP_TX_ACTION_S_IN_OPTIONAL, + [MRP_APPLICANT_QO] = MRP_TX_ACTION_S_IN_OPTIONAL, + [MRP_APPLICANT_AP] = MRP_TX_ACTION_S_JOIN_IN, + [MRP_APPLICANT_QP] = MRP_TX_ACTION_S_IN_OPTIONAL, +}; + +static void mrp_attrvalue_inc(void *value, u8 len) +{ + u8 *v = (u8 *)value; + + /* Add 1 to the last byte. If it becomes zero, + * go to the previous byte and repeat. + */ + while (len > 0 && !++v[--len]) + ; +} + +static int mrp_attr_cmp(const struct mrp_attr *attr, + const void *value, u8 len, u8 type) +{ + if (attr->type != type) + return attr->type - type; + if (attr->len != len) + return attr->len - len; + return memcmp(attr->value, value, len); +} + +static struct mrp_attr *mrp_attr_lookup(const struct mrp_applicant *app, + const void *value, u8 len, u8 type) +{ + struct rb_node *parent = app->mad.rb_node; + struct mrp_attr *attr; + int d; + + while (parent) { + attr = rb_entry(parent, struct mrp_attr, node); + d = mrp_attr_cmp(attr, value, len, type); + if (d > 0) + parent = parent->rb_left; + else if (d < 0) + parent = parent->rb_right; + else + return attr; + } + return NULL; +} + +static struct mrp_attr *mrp_attr_create(struct mrp_applicant *app, + const void *value, u8 len, u8 type) +{ + struct rb_node *parent = NULL, **p = &app->mad.rb_node; + struct mrp_attr *attr; + int d; + + while (*p) { + parent = *p; + attr = rb_entry(parent, struct mrp_attr, node); + d = mrp_attr_cmp(attr, value, len, type); + if (d > 0) + p = &parent->rb_left; + else if (d < 0) + p = &parent->rb_right; + else { + /* The attribute already exists; re-use it. */ + return attr; + } + } + attr = kmalloc(sizeof(*attr) + len, GFP_ATOMIC); + if (!attr) + return attr; + attr->state = MRP_APPLICANT_VO; + attr->type = type; + attr->len = len; + memcpy(attr->value, value, len); + + rb_link_node(&attr->node, parent, p); + rb_insert_color(&attr->node, &app->mad); + return attr; +} + +static void mrp_attr_destroy(struct mrp_applicant *app, struct mrp_attr *attr) +{ + rb_erase(&attr->node, &app->mad); + kfree(attr); +} + +static int mrp_pdu_init(struct mrp_applicant *app) +{ + struct sk_buff *skb; + struct mrp_pdu_hdr *ph; + + skb = alloc_skb(app->dev->mtu + LL_RESERVED_SPACE(app->dev), + GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + skb->dev = app->dev; + skb->protocol = app->app->pkttype.type; + skb_reserve(skb, LL_RESERVED_SPACE(app->dev)); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + + ph = (struct mrp_pdu_hdr *)__skb_put(skb, sizeof(*ph)); + ph->version = app->app->version; + + app->pdu = skb; + return 0; +} + +static int mrp_pdu_append_end_mark(struct mrp_applicant *app) +{ + __be16 *endmark; + + if (skb_tailroom(app->pdu) < sizeof(*endmark)) + return -1; + endmark = (__be16 *)__skb_put(app->pdu, sizeof(*endmark)); + put_unaligned(MRP_END_MARK, endmark); + return 0; +} + +static void mrp_pdu_queue(struct mrp_applicant *app) +{ + if (!app->pdu) + return; + + if (mrp_cb(app->pdu)->mh) + mrp_pdu_append_end_mark(app); + mrp_pdu_append_end_mark(app); + + dev_hard_header(app->pdu, app->dev, ntohs(app->app->pkttype.type), + app->app->group_address, app->dev->dev_addr, + app->pdu->len); + + skb_queue_tail(&app->queue, app->pdu); + app->pdu = NULL; +} + +static void mrp_queue_xmit(struct mrp_applicant *app) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&app->queue))) + dev_queue_xmit(skb); +} + +static int mrp_pdu_append_msg_hdr(struct mrp_applicant *app, + u8 attrtype, u8 attrlen) +{ + struct mrp_msg_hdr *mh; + + if (mrp_cb(app->pdu)->mh) { + if (mrp_pdu_append_end_mark(app) < 0) + return -1; + mrp_cb(app->pdu)->mh = NULL; + mrp_cb(app->pdu)->vah = NULL; + } + + if (skb_tailroom(app->pdu) < sizeof(*mh)) + return -1; + mh = (struct mrp_msg_hdr *)__skb_put(app->pdu, sizeof(*mh)); + mh->attrtype = attrtype; + mh->attrlen = attrlen; + mrp_cb(app->pdu)->mh = mh; + return 0; +} + +static int mrp_pdu_append_vecattr_hdr(struct mrp_applicant *app, + const void *firstattrvalue, u8 attrlen) +{ + struct mrp_vecattr_hdr *vah; + + if (skb_tailroom(app->pdu) < sizeof(*vah) + attrlen) + return -1; + vah = (struct mrp_vecattr_hdr *)__skb_put(app->pdu, + sizeof(*vah) + attrlen); + put_unaligned(0, &vah->lenflags); + memcpy(vah->firstattrvalue, firstattrvalue, attrlen); + mrp_cb(app->pdu)->vah = vah; + memcpy(mrp_cb(app->pdu)->attrvalue, firstattrvalue, attrlen); + return 0; +} + +static int mrp_pdu_append_vecattr_event(struct mrp_applicant *app, + const struct mrp_attr *attr, + enum mrp_vecattr_event vaevent) +{ + u16 len, pos; + u8 *vaevents; + int err; +again: + if (!app->pdu) { + err = mrp_pdu_init(app); + if (err < 0) + return err; + } + + /* If there is no Message header in the PDU, or the Message header is + * for a different attribute type, add an EndMark (if necessary) and a + * new Message header to the PDU. + */ + if (!mrp_cb(app->pdu)->mh || + mrp_cb(app->pdu)->mh->attrtype != attr->type || + mrp_cb(app->pdu)->mh->attrlen != attr->len) { + if (mrp_pdu_append_msg_hdr(app, attr->type, attr->len) < 0) + goto queue; + } + + /* If there is no VectorAttribute header for this Message in the PDU, + * or this attribute's value does not sequentially follow the previous + * attribute's value, add a new VectorAttribute header to the PDU. + */ + if (!mrp_cb(app->pdu)->vah || + memcmp(mrp_cb(app->pdu)->attrvalue, attr->value, attr->len)) { + if (mrp_pdu_append_vecattr_hdr(app, attr->value, attr->len) < 0) + goto queue; + } + + len = be16_to_cpu(get_unaligned(&mrp_cb(app->pdu)->vah->lenflags)); + pos = len % 3; + + /* Events are packed into Vectors in the PDU, three to a byte. Add a + * byte to the end of the Vector if necessary. + */ + if (!pos) { + if (skb_tailroom(app->pdu) < sizeof(u8)) + goto queue; + vaevents = (u8 *)__skb_put(app->pdu, sizeof(u8)); + } else { + vaevents = (u8 *)(skb_tail_pointer(app->pdu) - sizeof(u8)); + } + + switch (pos) { + case 0: + *vaevents = vaevent * (__MRP_VECATTR_EVENT_MAX * + __MRP_VECATTR_EVENT_MAX); + break; + case 1: + *vaevents += vaevent * __MRP_VECATTR_EVENT_MAX; + break; + case 2: + *vaevents += vaevent; + break; + default: + WARN_ON(1); + } + + /* Increment the length of the VectorAttribute in the PDU, as well as + * the value of the next attribute that would continue its Vector. + */ + put_unaligned(cpu_to_be16(++len), &mrp_cb(app->pdu)->vah->lenflags); + mrp_attrvalue_inc(mrp_cb(app->pdu)->attrvalue, attr->len); + + return 0; + +queue: + mrp_pdu_queue(app); + goto again; +} + +static void mrp_attr_event(struct mrp_applicant *app, + struct mrp_attr *attr, enum mrp_event event) +{ + enum mrp_applicant_state state; + + state = mrp_applicant_state_table[attr->state][event]; + if (state == MRP_APPLICANT_INVALID) { + WARN_ON(1); + return; + } + + if (event == MRP_EVENT_TX) { + /* When appending the attribute fails, don't update its state + * in order to retry at the next TX event. + */ + + switch (mrp_tx_action_table[attr->state]) { + case MRP_TX_ACTION_NONE: + case MRP_TX_ACTION_S_JOIN_IN_OPTIONAL: + case MRP_TX_ACTION_S_IN_OPTIONAL: + break; + case MRP_TX_ACTION_S_NEW: + if (mrp_pdu_append_vecattr_event( + app, attr, MRP_VECATTR_EVENT_NEW) < 0) + return; + break; + case MRP_TX_ACTION_S_JOIN_IN: + if (mrp_pdu_append_vecattr_event( + app, attr, MRP_VECATTR_EVENT_JOIN_IN) < 0) + return; + break; + case MRP_TX_ACTION_S_LV: + if (mrp_pdu_append_vecattr_event( + app, attr, MRP_VECATTR_EVENT_LV) < 0) + return; + /* As a pure applicant, sending a leave message + * implies that the attribute was unregistered and + * can be destroyed. + */ + mrp_attr_destroy(app, attr); + return; + default: + WARN_ON(1); + } + } + + attr->state = state; +} + +int mrp_request_join(const struct net_device *dev, + const struct mrp_application *appl, + const void *value, u8 len, u8 type) +{ + struct mrp_port *port = rtnl_dereference(dev->mrp_port); + struct mrp_applicant *app = rtnl_dereference( + port->applicants[appl->type]); + struct mrp_attr *attr; + + if (sizeof(struct mrp_skb_cb) + len > + FIELD_SIZEOF(struct sk_buff, cb)) + return -ENOMEM; + + spin_lock_bh(&app->lock); + attr = mrp_attr_create(app, value, len, type); + if (!attr) { + spin_unlock_bh(&app->lock); + return -ENOMEM; + } + mrp_attr_event(app, attr, MRP_EVENT_JOIN); + spin_unlock_bh(&app->lock); + return 0; +} +EXPORT_SYMBOL_GPL(mrp_request_join); + +void mrp_request_leave(const struct net_device *dev, + const struct mrp_application *appl, + const void *value, u8 len, u8 type) +{ + struct mrp_port *port = rtnl_dereference(dev->mrp_port); + struct mrp_applicant *app = rtnl_dereference( + port->applicants[appl->type]); + struct mrp_attr *attr; + + if (sizeof(struct mrp_skb_cb) + len > + FIELD_SIZEOF(struct sk_buff, cb)) + return; + + spin_lock_bh(&app->lock); + attr = mrp_attr_lookup(app, value, len, type); + if (!attr) { + spin_unlock_bh(&app->lock); + return; + } + mrp_attr_event(app, attr, MRP_EVENT_LV); + spin_unlock_bh(&app->lock); +} +EXPORT_SYMBOL_GPL(mrp_request_leave); + +static void mrp_mad_event(struct mrp_applicant *app, enum mrp_event event) +{ + struct rb_node *node, *next; + struct mrp_attr *attr; + + for (node = rb_first(&app->mad); + next = node ? rb_next(node) : NULL, node != NULL; + node = next) { + attr = rb_entry(node, struct mrp_attr, node); + mrp_attr_event(app, attr, event); + } +} + +static void mrp_join_timer_arm(struct mrp_applicant *app) +{ + unsigned long delay; + + delay = (u64)msecs_to_jiffies(mrp_join_time) * net_random() >> 32; + mod_timer(&app->join_timer, jiffies + delay); +} + +static void mrp_join_timer(unsigned long data) +{ + struct mrp_applicant *app = (struct mrp_applicant *)data; + + spin_lock(&app->lock); + mrp_mad_event(app, MRP_EVENT_TX); + mrp_pdu_queue(app); + spin_unlock(&app->lock); + + mrp_queue_xmit(app); + mrp_join_timer_arm(app); +} + +static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) +{ + __be16 endmark; + + if (skb_copy_bits(skb, *offset, &endmark, sizeof(endmark)) < 0) + return -1; + if (endmark == MRP_END_MARK) { + *offset += sizeof(endmark); + return -1; + } + return 0; +} + +static void mrp_pdu_parse_vecattr_event(struct mrp_applicant *app, + struct sk_buff *skb, + enum mrp_vecattr_event vaevent) +{ + struct mrp_attr *attr; + enum mrp_event event; + + attr = mrp_attr_lookup(app, mrp_cb(skb)->attrvalue, + mrp_cb(skb)->mh->attrlen, + mrp_cb(skb)->mh->attrtype); + if (attr == NULL) + return; + + switch (vaevent) { + case MRP_VECATTR_EVENT_NEW: + event = MRP_EVENT_R_NEW; + break; + case MRP_VECATTR_EVENT_JOIN_IN: + event = MRP_EVENT_R_JOIN_IN; + break; + case MRP_VECATTR_EVENT_IN: + event = MRP_EVENT_R_IN; + break; + case MRP_VECATTR_EVENT_JOIN_MT: + event = MRP_EVENT_R_JOIN_MT; + break; + case MRP_VECATTR_EVENT_MT: + event = MRP_EVENT_R_MT; + break; + case MRP_VECATTR_EVENT_LV: + event = MRP_EVENT_R_LV; + break; + default: + return; + } + + mrp_attr_event(app, attr, event); +} + +static int mrp_pdu_parse_vecattr(struct mrp_applicant *app, + struct sk_buff *skb, int *offset) +{ + struct mrp_vecattr_hdr _vah; + u16 valen; + u8 vaevents, vaevent; + + mrp_cb(skb)->vah = skb_header_pointer(skb, *offset, sizeof(_vah), + &_vah); + if (!mrp_cb(skb)->vah) + return -1; + *offset += sizeof(_vah); + + if (get_unaligned(&mrp_cb(skb)->vah->lenflags) & + MRP_VECATTR_HDR_FLAG_LA) + mrp_mad_event(app, MRP_EVENT_R_LA); + valen = be16_to_cpu(get_unaligned(&mrp_cb(skb)->vah->lenflags) & + MRP_VECATTR_HDR_LEN_MASK); + + /* The VectorAttribute structure in a PDU carries event information + * about one or more attributes having consecutive values. Only the + * value for the first attribute is contained in the structure. So + * we make a copy of that value, and then increment it each time we + * advance to the next event in its Vector. + */ + if (sizeof(struct mrp_skb_cb) + mrp_cb(skb)->mh->attrlen > + FIELD_SIZEOF(struct sk_buff, cb)) + return -1; + if (skb_copy_bits(skb, *offset, mrp_cb(skb)->attrvalue, + mrp_cb(skb)->mh->attrlen) < 0) + return -1; + *offset += mrp_cb(skb)->mh->attrlen; + + /* In a VectorAttribute, the Vector contains events which are packed + * three to a byte. We process one byte of the Vector at a time. + */ + while (valen > 0) { + if (skb_copy_bits(skb, *offset, &vaevents, + sizeof(vaevents)) < 0) + return -1; + *offset += sizeof(vaevents); + + /* Extract and process the first event. */ + vaevent = vaevents / (__MRP_VECATTR_EVENT_MAX * + __MRP_VECATTR_EVENT_MAX); + if (vaevent >= __MRP_VECATTR_EVENT_MAX) { + /* The byte is malformed; stop processing. */ + return -1; + } + mrp_pdu_parse_vecattr_event(app, skb, vaevent); + + /* If present, extract and process the second event. */ + if (!--valen) + break; + mrp_attrvalue_inc(mrp_cb(skb)->attrvalue, + mrp_cb(skb)->mh->attrlen); + vaevents %= (__MRP_VECATTR_EVENT_MAX * + __MRP_VECATTR_EVENT_MAX); + vaevent = vaevents / __MRP_VECATTR_EVENT_MAX; + mrp_pdu_parse_vecattr_event(app, skb, vaevent); + + /* If present, extract and process the third event. */ + if (!--valen) + break; + mrp_attrvalue_inc(mrp_cb(skb)->attrvalue, + mrp_cb(skb)->mh->attrlen); + vaevents %= __MRP_VECATTR_EVENT_MAX; + vaevent = vaevents; + mrp_pdu_parse_vecattr_event(app, skb, vaevent); + } + return 0; +} + +static int mrp_pdu_parse_msg(struct mrp_applicant *app, struct sk_buff *skb, + int *offset) +{ + struct mrp_msg_hdr _mh; + + mrp_cb(skb)->mh = skb_header_pointer(skb, *offset, sizeof(_mh), &_mh); + if (!mrp_cb(skb)->mh) + return -1; + *offset += sizeof(_mh); + + if (mrp_cb(skb)->mh->attrtype == 0 || + mrp_cb(skb)->mh->attrtype > app->app->maxattr || + mrp_cb(skb)->mh->attrlen == 0) + return -1; + + while (skb->len > *offset) { + if (mrp_pdu_parse_end_mark(skb, offset) < 0) + break; + if (mrp_pdu_parse_vecattr(app, skb, offset) < 0) + return -1; + } + return 0; +} + +static int mrp_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct mrp_application *appl = container_of(pt, struct mrp_application, + pkttype); + struct mrp_port *port; + struct mrp_applicant *app; + struct mrp_pdu_hdr _ph; + const struct mrp_pdu_hdr *ph; + int offset = skb_network_offset(skb); + + /* If the interface is in promiscuous mode, drop the packet if + * it was unicast to another host. + */ + if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) + goto out; + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + goto out; + port = rcu_dereference(dev->mrp_port); + if (unlikely(!port)) + goto out; + app = rcu_dereference(port->applicants[appl->type]); + if (unlikely(!app)) + goto out; + + ph = skb_header_pointer(skb, offset, sizeof(_ph), &_ph); + if (!ph) + goto out; + offset += sizeof(_ph); + + if (ph->version != app->app->version) + goto out; + + spin_lock(&app->lock); + while (skb->len > offset) { + if (mrp_pdu_parse_end_mark(skb, &offset) < 0) + break; + if (mrp_pdu_parse_msg(app, skb, &offset) < 0) + break; + } + spin_unlock(&app->lock); +out: + kfree_skb(skb); + return 0; +} + +static int mrp_init_port(struct net_device *dev) +{ + struct mrp_port *port; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return -ENOMEM; + rcu_assign_pointer(dev->mrp_port, port); + return 0; +} + +static void mrp_release_port(struct net_device *dev) +{ + struct mrp_port *port = rtnl_dereference(dev->mrp_port); + unsigned int i; + + for (i = 0; i <= MRP_APPLICATION_MAX; i++) { + if (rtnl_dereference(port->applicants[i])) + return; + } + RCU_INIT_POINTER(dev->mrp_port, NULL); + kfree_rcu(port, rcu); +} + +int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl) +{ + struct mrp_applicant *app; + int err; + + ASSERT_RTNL(); + + if (!rtnl_dereference(dev->mrp_port)) { + err = mrp_init_port(dev); + if (err < 0) + goto err1; + } + + err = -ENOMEM; + app = kzalloc(sizeof(*app), GFP_KERNEL); + if (!app) + goto err2; + + err = dev_mc_add(dev, appl->group_address); + if (err < 0) + goto err3; + + app->dev = dev; + app->app = appl; + app->mad = RB_ROOT; + spin_lock_init(&app->lock); + skb_queue_head_init(&app->queue); + rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); + setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); + mrp_join_timer_arm(app); + return 0; + +err3: + kfree(app); +err2: + mrp_release_port(dev); +err1: + return err; +} +EXPORT_SYMBOL_GPL(mrp_init_applicant); + +void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) +{ + struct mrp_port *port = rtnl_dereference(dev->mrp_port); + struct mrp_applicant *app = rtnl_dereference( + port->applicants[appl->type]); + + ASSERT_RTNL(); + + RCU_INIT_POINTER(port->applicants[appl->type], NULL); + + /* Delete timer and generate a final TX event to flush out + * all pending messages before the applicant is gone. + */ + del_timer_sync(&app->join_timer); + mrp_mad_event(app, MRP_EVENT_TX); + mrp_pdu_queue(app); + mrp_queue_xmit(app); + + dev_mc_del(dev, appl->group_address); + kfree_rcu(app, rcu); + mrp_release_port(dev); +} +EXPORT_SYMBOL_GPL(mrp_uninit_applicant); + +int mrp_register_application(struct mrp_application *appl) +{ + appl->pkttype.func = mrp_rcv; + dev_add_pack(&appl->pkttype); + return 0; +} +EXPORT_SYMBOL_GPL(mrp_register_application); + +void mrp_unregister_application(struct mrp_application *appl) +{ + dev_remove_pack(&appl->pkttype); +} +EXPORT_SYMBOL_GPL(mrp_unregister_application); diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig index fa073a54963..8f7517df41a 100644 --- a/net/8021q/Kconfig +++ b/net/8021q/Kconfig @@ -27,3 +27,14 @@ config VLAN_8021Q_GVRP automatic propagation of registered VLANs to switches. If unsure, say N. + +config VLAN_8021Q_MVRP + bool "MVRP (Multiple VLAN Registration Protocol) support" + depends on VLAN_8021Q + select MRP + help + Select this to enable MVRP end-system support. MVRP is used for + automatic propagation of registered VLANs to switches; it + supersedes GVRP and is not backwards-compatible. + + If unsure, say N. diff --git a/net/8021q/Makefile b/net/8021q/Makefile index 9f4f174ead1..7bc8db08d7e 100644 --- a/net/8021q/Makefile +++ b/net/8021q/Makefile @@ -6,5 +6,6 @@ obj-$(CONFIG_VLAN_8021Q) += 8021q.o 8021q-y := vlan.o vlan_dev.o vlan_netlink.o 8021q-$(CONFIG_VLAN_8021Q_GVRP) += vlan_gvrp.o +8021q-$(CONFIG_VLAN_8021Q_MVRP) += vlan_mvrp.o 8021q-$(CONFIG_PROC_FS) += vlanproc.o diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index a292e8050ef..a18714469bf 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -95,6 +95,8 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) grp->nr_vlan_devs--; + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_leave(dev); if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_leave(dev); @@ -105,8 +107,12 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) */ unregister_netdevice_queue(dev, head); - if (grp->nr_vlan_devs == 0) + netdev_upper_dev_unlink(real_dev, dev); + + if (grp->nr_vlan_devs == 0) { + vlan_mvrp_uninit_applicant(real_dev); vlan_gvrp_uninit_applicant(real_dev); + } /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); @@ -115,19 +121,12 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) { const char *name = real_dev->name; - const struct net_device_ops *ops = real_dev->netdev_ops; if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { pr_info("VLANs not supported on %s\n", name); return -EOPNOTSUPP; } - if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && - (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) { - pr_info("Device %s has buggy VLAN hw accel\n", name); - return -EOPNOTSUPP; - } - if (vlan_find_dev(real_dev, vlan_id) != NULL) return -EEXIST; @@ -156,15 +155,22 @@ int register_vlan_dev(struct net_device *dev) err = vlan_gvrp_init_applicant(real_dev); if (err < 0) goto out_vid_del; + err = vlan_mvrp_init_applicant(real_dev); + if (err < 0) + goto out_uninit_gvrp; } err = vlan_group_prealloc_vid(grp, vlan_id); if (err < 0) - goto out_uninit_applicant; + goto out_uninit_mvrp; + + err = netdev_upper_dev_link(real_dev, dev); + if (err) + goto out_uninit_mvrp; err = register_netdevice(dev); if (err < 0) - goto out_uninit_applicant; + goto out_upper_dev_unlink; /* Account for reference in struct vlan_dev_priv */ dev_hold(real_dev); @@ -180,7 +186,12 @@ int register_vlan_dev(struct net_device *dev) return 0; -out_uninit_applicant: +out_upper_dev_unlink: + netdev_upper_dev_unlink(real_dev, dev); +out_uninit_mvrp: + if (grp->nr_vlan_devs == 0) + vlan_mvrp_uninit_applicant(real_dev); +out_uninit_gvrp: if (grp->nr_vlan_devs == 0) vlan_gvrp_uninit_applicant(real_dev); out_vid_del: @@ -654,13 +665,19 @@ static int __init vlan_proto_init(void) if (err < 0) goto err3; - err = vlan_netlink_init(); + err = vlan_mvrp_init(); if (err < 0) goto err4; + err = vlan_netlink_init(); + if (err < 0) + goto err5; + vlan_ioctl_set(vlan_ioctl_handler); return 0; +err5: + vlan_mvrp_uninit(); err4: vlan_gvrp_uninit(); err3: @@ -681,6 +698,7 @@ static void __exit vlan_cleanup_module(void) unregister_pernet_subsys(&vlan_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ + vlan_mvrp_uninit(); vlan_gvrp_uninit(); } diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index a4886d94c40..670f1e8cfc0 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -171,6 +171,22 @@ static inline int vlan_gvrp_init(void) { return 0; } static inline void vlan_gvrp_uninit(void) {} #endif +#ifdef CONFIG_VLAN_8021Q_MVRP +extern int vlan_mvrp_request_join(const struct net_device *dev); +extern void vlan_mvrp_request_leave(const struct net_device *dev); +extern int vlan_mvrp_init_applicant(struct net_device *dev); +extern void vlan_mvrp_uninit_applicant(struct net_device *dev); +extern int vlan_mvrp_init(void); +extern void vlan_mvrp_uninit(void); +#else +static inline int vlan_mvrp_request_join(const struct net_device *dev) { return 0; } +static inline void vlan_mvrp_request_leave(const struct net_device *dev) {} +static inline int vlan_mvrp_init_applicant(struct net_device *dev) { return 0; } +static inline void vlan_mvrp_uninit_applicant(struct net_device *dev) {} +static inline int vlan_mvrp_init(void) { return 0; } +static inline void vlan_mvrp_uninit(void) {} +#endif + extern const char vlan_fullname[]; extern const char vlan_version[]; extern int vlan_netlink_init(void); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 65e06abe023..f3b6f515eba 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -60,21 +60,25 @@ bool vlan_do_receive(struct sk_buff **skbp) return true; } -/* Must be invoked with rcu_read_lock or with RTNL. */ -struct net_device *__vlan_find_dev_deep(struct net_device *real_dev, +/* Must be invoked with rcu_read_lock. */ +struct net_device *__vlan_find_dev_deep(struct net_device *dev, u16 vlan_id) { - struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info); + struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info); if (vlan_info) { return vlan_group_get_device(&vlan_info->grp, vlan_id); } else { /* - * Bonding slaves do not have grp assigned to themselves. - * Grp is assigned to bonding master instead. + * Lower devices of master uppers (bonding, team) do not have + * grp assigned to themselves. Grp is assigned to upper device + * instead. */ - if (netif_is_bond_slave(real_dev)) - return __vlan_find_dev_deep(real_dev->master, vlan_id); + struct net_device *upper_dev; + + upper_dev = netdev_master_upper_dev_get_rcu(dev); + if (upper_dev) + return __vlan_find_dev_deep(upper_dev, vlan_id); } return NULL; @@ -140,6 +144,7 @@ err_free: kfree_skb(skb); return NULL; } +EXPORT_SYMBOL(vlan_untag); /* @@ -220,8 +225,7 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid, if (!vid_info) return -ENOMEM; - if ((dev->features & NETIF_F_HW_VLAN_FILTER) && - ops->ndo_vlan_rx_add_vid) { + if (dev->features & NETIF_F_HW_VLAN_FILTER) { err = ops->ndo_vlan_rx_add_vid(dev, vid); if (err) { kfree(vid_info); @@ -278,8 +282,7 @@ static void __vlan_vid_del(struct vlan_info *vlan_info, unsigned short vid = vid_info->vid; int err; - if ((dev->features & NETIF_F_HW_VLAN_FILTER) && - ops->ndo_vlan_rx_kill_vid) { + if (dev->features & NETIF_F_HW_VLAN_FILTER) { err = ops->ndo_vlan_rx_kill_vid(dev, vid); if (err) { pr_warn("failed to kill vid %d for device %s\n", diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 4a6d31a082b..19cf81bf9f6 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -261,7 +261,7 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) u32 old_flags = vlan->flags; if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | - VLAN_FLAG_LOOSE_BINDING)) + VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP)) return -EINVAL; vlan->flags = (old_flags & ~mask) | (flags & mask); @@ -272,6 +272,13 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) else vlan_gvrp_request_leave(dev); } + + if (netif_running(dev) && (vlan->flags ^ old_flags) & VLAN_FLAG_MVRP) { + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_join(dev); + else + vlan_mvrp_request_leave(dev); + } return 0; } @@ -312,6 +319,9 @@ static int vlan_dev_open(struct net_device *dev) if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_join(dev); + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_join(dev); + if (netif_carrier_ok(real_dev)) netif_carrier_on(dev); return 0; @@ -640,9 +650,9 @@ static int vlan_ethtool_get_settings(struct net_device *dev, static void vlan_ethtool_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strcpy(info->driver, vlan_fullname); - strcpy(info->version, vlan_version); - strcpy(info->fw_version, "N/A"); + strlcpy(info->driver, vlan_fullname, sizeof(info->driver)); + strlcpy(info->version, vlan_version, sizeof(info->version)); + strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); } static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) @@ -723,7 +733,7 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev) vlan->netpoll = NULL; - __netpoll_free_rcu(netpoll); + __netpoll_free_async(netpoll); } #endif /* CONFIG_NET_POLL_CONTROLLER */ diff --git a/net/8021q/vlan_mvrp.c b/net/8021q/vlan_mvrp.c new file mode 100644 index 00000000000..d9ec1d5964a --- /dev/null +++ b/net/8021q/vlan_mvrp.c @@ -0,0 +1,72 @@ +/* + * IEEE 802.1Q Multiple VLAN Registration Protocol (MVRP) + * + * Copyright (c) 2012 Massachusetts Institute of Technology + * + * Adapted from code in net/8021q/vlan_gvrp.c + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include <linux/types.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <net/mrp.h> +#include "vlan.h" + +#define MRP_MVRP_ADDRESS { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x21 } + +enum mvrp_attributes { + MVRP_ATTR_INVALID, + MVRP_ATTR_VID, + __MVRP_ATTR_MAX +}; +#define MVRP_ATTR_MAX (__MVRP_ATTR_MAX - 1) + +static struct mrp_application vlan_mrp_app __read_mostly = { + .type = MRP_APPLICATION_MVRP, + .maxattr = MVRP_ATTR_MAX, + .pkttype.type = htons(ETH_P_MVRP), + .group_address = MRP_MVRP_ADDRESS, + .version = 0, +}; + +int vlan_mvrp_request_join(const struct net_device *dev) +{ + const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + __be16 vlan_id = htons(vlan->vlan_id); + + return mrp_request_join(vlan->real_dev, &vlan_mrp_app, + &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); +} + +void vlan_mvrp_request_leave(const struct net_device *dev) +{ + const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + __be16 vlan_id = htons(vlan->vlan_id); + + mrp_request_leave(vlan->real_dev, &vlan_mrp_app, + &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); +} + +int vlan_mvrp_init_applicant(struct net_device *dev) +{ + return mrp_init_applicant(dev, &vlan_mrp_app); +} + +void vlan_mvrp_uninit_applicant(struct net_device *dev) +{ + mrp_uninit_applicant(dev, &vlan_mrp_app); +} + +int __init vlan_mvrp_init(void) +{ + return mrp_register_application(&vlan_mrp_app); +} + +void vlan_mvrp_uninit(void) +{ + mrp_unregister_application(&vlan_mrp_app); +} diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 708c80ea187..1789658b7cd 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -62,7 +62,7 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) flags = nla_data(data[IFLA_VLAN_FLAGS]); if ((flags->flags & flags->mask) & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | - VLAN_FLAG_LOOSE_BINDING)) + VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP)) return -EINVAL; } diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 4de77ea5fa3..dc526ec965e 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -131,7 +131,7 @@ void vlan_proc_cleanup(struct net *net) remove_proc_entry(name_conf, vn->proc_vlan_dir); if (vn->proc_vlan_dir) - proc_net_remove(net, name_root); + remove_proc_entry(name_root, net->proc_net); /* Dynamically added entries should be cleaned up as their vlan_device * is removed, so we should not have to take care of it here... diff --git a/net/Kconfig b/net/Kconfig index 30b48f52313..5a1888bb036 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -209,7 +209,6 @@ source "net/ipx/Kconfig" source "drivers/net/appletalk/Kconfig" source "net/x25/Kconfig" source "net/lapb/Kconfig" -source "net/wanrouter/Kconfig" source "net/phonet/Kconfig" source "net/ieee802154/Kconfig" source "net/mac802154/Kconfig" @@ -218,6 +217,7 @@ source "net/dcb/Kconfig" source "net/dns_resolver/Kconfig" source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" +source "net/vmw_vsock/Kconfig" config RPS boolean @@ -232,7 +232,7 @@ config RFS_ACCEL config XPS boolean - depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS + depends on SMP && USE_GENERIC_SMP_HELPERS default y config NETPRIO_CGROUP diff --git a/net/Makefile b/net/Makefile index 4f4ee083064..091e7b04f30 100644 --- a/net/Makefile +++ b/net/Makefile @@ -26,7 +26,6 @@ obj-$(CONFIG_BRIDGE) += bridge/ obj-$(CONFIG_NET_DSA) += dsa/ obj-$(CONFIG_IPX) += ipx/ obj-$(CONFIG_ATALK) += appletalk/ -obj-$(CONFIG_WAN_ROUTER) += wanrouter/ obj-$(CONFIG_X25) += x25/ obj-$(CONFIG_LAPB) += lapb/ obj-$(CONFIG_NETROM) += netrom/ @@ -70,3 +69,4 @@ obj-$(CONFIG_CEPH_LIB) += ceph/ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ +obj-$(CONFIG_VSOCKETS) += vmw_vsock/ diff --git a/net/atm/proc.c b/net/atm/proc.c index 0d020de8d23..b4e75340b16 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -460,7 +460,7 @@ static void atm_proc_dirs_remove(void) if (e->dirent) remove_proc_entry(e->name, atm_proc_root); } - proc_net_remove(&init_net, "atm"); + remove_proc_entry("atm", init_net.proc_net); } int __init atm_proc_init(void) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 779095ded68..69a06c47b64 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1992,9 +1992,10 @@ static int __init ax25_init(void) dev_add_pack(&ax25_packet_type); register_netdevice_notifier(&ax25_dev_notifier); - proc_net_fops_create(&init_net, "ax25_route", S_IRUGO, &ax25_route_fops); - proc_net_fops_create(&init_net, "ax25", S_IRUGO, &ax25_info_fops); - proc_net_fops_create(&init_net, "ax25_calls", S_IRUGO, &ax25_uid_fops); + proc_create("ax25_route", S_IRUGO, init_net.proc_net, + &ax25_route_fops); + proc_create("ax25", S_IRUGO, init_net.proc_net, &ax25_info_fops); + proc_create("ax25_calls", S_IRUGO, init_net.proc_net, &ax25_uid_fops); out: return rc; } @@ -2008,9 +2009,9 @@ MODULE_ALIAS_NETPROTO(PF_AX25); static void __exit ax25_exit(void) { - proc_net_remove(&init_net, "ax25_route"); - proc_net_remove(&init_net, "ax25"); - proc_net_remove(&init_net, "ax25_calls"); + remove_proc_entry("ax25_route", init_net.proc_net); + remove_proc_entry("ax25", init_net.proc_net); + remove_proc_entry("ax25_calls", init_net.proc_net); unregister_netdevice_notifier(&ax25_dev_notifier); diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h index a0ba3bff9b3..a4808c29ea3 100644 --- a/net/batman-adv/bat_algo.h +++ b/net/batman-adv/bat_algo.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 7d02ebd11a7..72fe1bbf772 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -123,7 +123,7 @@ batadv_iv_ogm_emit_send_time(const struct batadv_priv *bat_priv) unsigned int msecs; msecs = atomic_read(&bat_priv->orig_interval) - BATADV_JITTER; - msecs += random32() % (2 * BATADV_JITTER); + msecs += prandom_u32() % (2 * BATADV_JITTER); return jiffies + msecs_to_jiffies(msecs); } @@ -131,7 +131,7 @@ batadv_iv_ogm_emit_send_time(const struct batadv_priv *bat_priv) /* when do we schedule a ogm packet to be sent */ static unsigned long batadv_iv_ogm_fwd_send_time(void) { - return jiffies + msecs_to_jiffies(random32() % (BATADV_JITTER / 2)); + return jiffies + msecs_to_jiffies(prandom_u32() % (BATADV_JITTER / 2)); } /* apply hop penalty for a normal link */ @@ -183,7 +183,6 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, /* adjust all flags and log packets */ while (batadv_iv_ogm_aggr_packet(buff_pos, forw_packet->packet_len, batadv_ogm_packet->tt_num_changes)) { - /* we might have aggregated direct link packets with an * ordinary base packet */ @@ -261,7 +260,6 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) */ if ((directlink && (batadv_ogm_packet->header.ttl == 1)) || (forw_packet->own && (forw_packet->if_incoming != primary_if))) { - /* FIXME: what about aggregated packets ? */ batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "%s packet (originator %pM, seqno %u, TTL %d) on interface %s [%pM]\n", @@ -325,7 +323,6 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, if (time_before(send_time, forw_packet->send_time) && time_after_eq(aggregation_end_time, forw_packet->send_time) && (aggregated_bytes <= BATADV_MAX_AGGREGATION_BYTES)) { - /* check aggregation compatibility * -> direct link packets are broadcasted on * their interface only @@ -815,7 +812,6 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, rcu_read_lock(); hlist_for_each_entry_rcu(tmp_neigh_node, node, &orig_neigh_node->neigh_list, list) { - if (!batadv_compare_eth(tmp_neigh_node->addr, orig_neigh_node->orig)) continue; @@ -949,7 +945,6 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, rcu_read_lock(); hlist_for_each_entry_rcu(tmp_neigh_node, node, &orig_node->neigh_list, list) { - is_duplicate |= batadv_test_bit(tmp_neigh_node->real_bits, orig_node->last_real_seqno, seqno); @@ -1033,7 +1028,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, is_single_hop_neigh = true; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, ttvn %u, crc %u, changes %u, td %d, TTL %d, V %d, IDF %d)\n", + "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, ttvn %u, crc %#.4x, changes %u, tq %d, TTL %d, V %d, IDF %d)\n", ethhdr->h_source, if_incoming->net_dev->name, if_incoming->net_dev->dev_addr, batadv_ogm_packet->orig, batadv_ogm_packet->prev_sender, @@ -1223,7 +1218,6 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, /* is single hop (direct) neighbor */ if (is_single_hop_neigh) { - /* mark direct link on incoming interface */ batadv_iv_ogm_forward(orig_node, ethhdr, batadv_ogm_packet, is_single_hop_neigh, diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index 5453b17d8df..973982414d5 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index cebaae7e148..a81b9322e38 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 5aebe9327d6..30f46526cbb 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: * * Simon Wunderlich * @@ -34,13 +34,14 @@ static const uint8_t batadv_announce_mac[4] = {0x43, 0x05, 0x43, 0x05}; static void batadv_bla_periodic_work(struct work_struct *work); -static void batadv_bla_send_announce(struct batadv_priv *bat_priv, - struct batadv_backbone_gw *backbone_gw); +static void +batadv_bla_send_announce(struct batadv_priv *bat_priv, + struct batadv_bla_backbone_gw *backbone_gw); /* return the index of the claim */ static inline uint32_t batadv_choose_claim(const void *data, uint32_t size) { - struct batadv_claim *claim = (struct batadv_claim *)data; + struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; uint32_t hash = 0; hash = batadv_hash_bytes(hash, &claim->addr, sizeof(claim->addr)); @@ -57,7 +58,7 @@ static inline uint32_t batadv_choose_claim(const void *data, uint32_t size) static inline uint32_t batadv_choose_backbone_gw(const void *data, uint32_t size) { - struct batadv_claim *claim = (struct batadv_claim *)data; + const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; uint32_t hash = 0; hash = batadv_hash_bytes(hash, &claim->addr, sizeof(claim->addr)); @@ -75,9 +76,9 @@ static inline uint32_t batadv_choose_backbone_gw(const void *data, static int batadv_compare_backbone_gw(const struct hlist_node *node, const void *data2) { - const void *data1 = container_of(node, struct batadv_backbone_gw, + const void *data1 = container_of(node, struct batadv_bla_backbone_gw, hash_entry); - const struct batadv_backbone_gw *gw1 = data1, *gw2 = data2; + const struct batadv_bla_backbone_gw *gw1 = data1, *gw2 = data2; if (!batadv_compare_eth(gw1->orig, gw2->orig)) return 0; @@ -92,9 +93,9 @@ static int batadv_compare_backbone_gw(const struct hlist_node *node, static int batadv_compare_claim(const struct hlist_node *node, const void *data2) { - const void *data1 = container_of(node, struct batadv_claim, + const void *data1 = container_of(node, struct batadv_bla_claim, hash_entry); - const struct batadv_claim *cl1 = data1, *cl2 = data2; + const struct batadv_bla_claim *cl1 = data1, *cl2 = data2; if (!batadv_compare_eth(cl1->addr, cl2->addr)) return 0; @@ -106,7 +107,8 @@ static int batadv_compare_claim(const struct hlist_node *node, } /* free a backbone gw */ -static void batadv_backbone_gw_free_ref(struct batadv_backbone_gw *backbone_gw) +static void +batadv_backbone_gw_free_ref(struct batadv_bla_backbone_gw *backbone_gw) { if (atomic_dec_and_test(&backbone_gw->refcount)) kfree_rcu(backbone_gw, rcu); @@ -115,16 +117,16 @@ static void batadv_backbone_gw_free_ref(struct batadv_backbone_gw *backbone_gw) /* finally deinitialize the claim */ static void batadv_claim_free_rcu(struct rcu_head *rcu) { - struct batadv_claim *claim; + struct batadv_bla_claim *claim; - claim = container_of(rcu, struct batadv_claim, rcu); + claim = container_of(rcu, struct batadv_bla_claim, rcu); batadv_backbone_gw_free_ref(claim->backbone_gw); kfree(claim); } /* free a claim, call claim_free_rcu if its the last reference */ -static void batadv_claim_free_ref(struct batadv_claim *claim) +static void batadv_claim_free_ref(struct batadv_bla_claim *claim) { if (atomic_dec_and_test(&claim->refcount)) call_rcu(&claim->rcu, batadv_claim_free_rcu); @@ -136,14 +138,15 @@ static void batadv_claim_free_ref(struct batadv_claim *claim) * looks for a claim in the hash, and returns it if found * or NULL otherwise. */ -static struct batadv_claim *batadv_claim_hash_find(struct batadv_priv *bat_priv, - struct batadv_claim *data) +static struct batadv_bla_claim +*batadv_claim_hash_find(struct batadv_priv *bat_priv, + struct batadv_bla_claim *data) { struct batadv_hashtable *hash = bat_priv->bla.claim_hash; struct hlist_head *head; struct hlist_node *node; - struct batadv_claim *claim; - struct batadv_claim *claim_tmp = NULL; + struct batadv_bla_claim *claim; + struct batadv_bla_claim *claim_tmp = NULL; int index; if (!hash) @@ -176,15 +179,15 @@ static struct batadv_claim *batadv_claim_hash_find(struct batadv_priv *bat_priv, * * Returns claim if found or NULL otherwise. */ -static struct batadv_backbone_gw * +static struct batadv_bla_backbone_gw * batadv_backbone_hash_find(struct batadv_priv *bat_priv, uint8_t *addr, short vid) { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; struct hlist_node *node; - struct batadv_backbone_gw search_entry, *backbone_gw; - struct batadv_backbone_gw *backbone_gw_tmp = NULL; + struct batadv_bla_backbone_gw search_entry, *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw_tmp = NULL; int index; if (!hash) @@ -215,12 +218,12 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, /* delete all claims for a backbone */ static void -batadv_bla_del_backbone_claims(struct batadv_backbone_gw *backbone_gw) +batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) { struct batadv_hashtable *hash; struct hlist_node *node, *node_tmp; struct hlist_head *head; - struct batadv_claim *claim; + struct batadv_bla_claim *claim; int i; spinlock_t *list_lock; /* protects write access to the hash lists */ @@ -235,7 +238,6 @@ batadv_bla_del_backbone_claims(struct batadv_backbone_gw *backbone_gw) spin_lock_bh(list_lock); hlist_for_each_entry_safe(claim, node, node_tmp, head, hash_entry) { - if (claim->backbone_gw != backbone_gw) continue; @@ -338,7 +340,6 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, "bla_send_claim(): REQUEST of %pM to %pMon vid %d\n", ethhdr->h_source, ethhdr->h_dest, vid); break; - } if (vid != -1) @@ -366,11 +367,11 @@ out: * searches for the backbone gw or creates a new one if it could not * be found. */ -static struct batadv_backbone_gw * +static struct batadv_bla_backbone_gw * batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, short vid, bool own_backbone) { - struct batadv_backbone_gw *entry; + struct batadv_bla_backbone_gw *entry; struct batadv_orig_node *orig_node; int hash_added; @@ -437,7 +438,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, short vid) { - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; backbone_gw = batadv_bla_get_backbone_gw(bat_priv, primary_if->net_dev->dev_addr, @@ -462,8 +463,8 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, struct hlist_node *node; struct hlist_head *head; struct batadv_hashtable *hash; - struct batadv_claim *claim; - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_claim *claim; + struct batadv_bla_backbone_gw *backbone_gw; int i; batadv_dbg(BATADV_DBG_BLA, bat_priv, @@ -502,7 +503,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, * After the request, it will repeat all of his own claims and finally * send an announcement claim with which we can check again. */ -static void batadv_bla_send_request(struct batadv_backbone_gw *backbone_gw) +static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw) { /* first, remove all old entries */ batadv_bla_del_backbone_claims(backbone_gw); @@ -528,7 +529,7 @@ static void batadv_bla_send_request(struct batadv_backbone_gw *backbone_gw) * places. */ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, - struct batadv_backbone_gw *backbone_gw) + struct batadv_bla_backbone_gw *backbone_gw) { uint8_t mac[ETH_ALEN]; __be16 crc; @@ -539,7 +540,6 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, batadv_bla_send_claim(bat_priv, mac, backbone_gw->vid, BATADV_CLAIM_TYPE_ANNOUNCE); - } /** @@ -551,10 +551,10 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, */ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, const uint8_t *mac, const short vid, - struct batadv_backbone_gw *backbone_gw) + struct batadv_bla_backbone_gw *backbone_gw) { - struct batadv_claim *claim; - struct batadv_claim search_claim; + struct batadv_bla_claim *claim; + struct batadv_bla_claim search_claim; int hash_added; memcpy(search_claim.addr, mac, ETH_ALEN); @@ -598,7 +598,6 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); batadv_backbone_gw_free_ref(claim->backbone_gw); - } /* set (new) backbone gw */ atomic_inc(&backbone_gw->refcount); @@ -617,7 +616,7 @@ claim_free_ref: static void batadv_bla_del_claim(struct batadv_priv *bat_priv, const uint8_t *mac, const short vid) { - struct batadv_claim search_claim, *claim; + struct batadv_bla_claim search_claim, *claim; memcpy(search_claim.addr, mac, ETH_ALEN); search_claim.vid = vid; @@ -643,7 +642,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, uint8_t *an_addr, uint8_t *backbone_addr, short vid) { - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; uint16_t crc; if (memcmp(an_addr, batadv_announce_mac, 4) != 0) @@ -661,12 +660,12 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, crc = ntohs(*((__be16 *)(&an_addr[4]))); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %04x\n", + "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n", vid, backbone_gw->orig, crc); if (backbone_gw->crc != crc) { batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, - "handle_announce(): CRC FAILED for %pM/%d (my = %04x, sent = %04x)\n", + "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n", backbone_gw->orig, backbone_gw->vid, backbone_gw->crc, crc); @@ -715,7 +714,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, uint8_t *backbone_addr, uint8_t *claim_addr, short vid) { - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; /* unclaim in any case if it is our own */ if (primary_if && batadv_compare_eth(backbone_addr, @@ -744,7 +743,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv, uint8_t *backbone_addr, uint8_t *claim_addr, short vid) { - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; /* register the gateway if not yet available, and add the claim. */ @@ -835,7 +834,7 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv, /* if our mesh friends mac is bigger, use it for ourselves. */ if (ntohs(bla_dst->group) > ntohs(bla_dst_own->group)) { batadv_dbg(BATADV_DBG_BLA, bat_priv, - "taking other backbones claim group: %04x\n", + "taking other backbones claim group: %#.4x\n", ntohs(bla_dst->group)); bla_dst_own->group = bla_dst->group; } @@ -958,7 +957,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, */ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) { - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; struct hlist_node *node, *node_tmp; struct hlist_head *head; struct batadv_hashtable *hash; @@ -1013,7 +1012,7 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, int now) { - struct batadv_claim *claim; + struct batadv_bla_claim *claim; struct hlist_node *node; struct hlist_head *head; struct batadv_hashtable *hash; @@ -1062,7 +1061,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, struct batadv_hard_iface *oldif) { - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; struct hlist_node *node; struct hlist_head *head; struct batadv_hashtable *hash; @@ -1104,16 +1103,6 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, } } - - -/* (re)start the timer */ -static void batadv_bla_start_timer(struct batadv_priv *bat_priv) -{ - INIT_DELAYED_WORK(&bat_priv->bla.work, batadv_bla_periodic_work); - queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work, - msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH)); -} - /* periodic work to do: * * purge structures when they are too old * * send announcements @@ -1125,7 +1114,7 @@ static void batadv_bla_periodic_work(struct work_struct *work) struct batadv_priv_bla *priv_bla; struct hlist_node *node; struct hlist_head *head; - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; struct batadv_hashtable *hash; struct batadv_hard_iface *primary_if; int i; @@ -1184,7 +1173,8 @@ out: if (primary_if) batadv_hardif_free_ref(primary_if); - batadv_bla_start_timer(bat_priv); + queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work, + msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH)); } /* The hash for claim and backbone hash receive the same key because they @@ -1242,7 +1232,10 @@ int batadv_bla_init(struct batadv_priv *bat_priv) batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla hashes initialized\n"); - batadv_bla_start_timer(bat_priv); + INIT_DELAYED_WORK(&bat_priv->bla.work, batadv_bla_periodic_work); + + queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work, + msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH)); return 0; } @@ -1330,7 +1323,7 @@ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig) struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; struct hlist_node *node; - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; int i; if (!atomic_read(&bat_priv->bridge_loop_avoidance)) @@ -1371,7 +1364,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, { struct ethhdr *ethhdr; struct vlan_ethhdr *vhdr; - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; short vid = -1; if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance)) @@ -1442,7 +1435,7 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, bool is_bcast) { struct ethhdr *ethhdr; - struct batadv_claim search_claim, *claim = NULL; + struct batadv_bla_claim search_claim, *claim = NULL; struct batadv_hard_iface *primary_if; int ret; @@ -1536,7 +1529,7 @@ out: int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid) { struct ethhdr *ethhdr; - struct batadv_claim search_claim, *claim = NULL; + struct batadv_bla_claim search_claim, *claim = NULL; struct batadv_hard_iface *primary_if; int ret = 0; @@ -1612,7 +1605,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); struct batadv_hashtable *hash = bat_priv->bla.claim_hash; - struct batadv_claim *claim; + struct batadv_bla_claim *claim; struct batadv_hard_iface *primary_if; struct hlist_node *node; struct hlist_head *head; @@ -1626,10 +1619,10 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) primary_addr = primary_if->net_dev->dev_addr; seq_printf(seq, - "Claims announced for the mesh %s (orig %pM, group id %04x)\n", + "Claims announced for the mesh %s (orig %pM, group id %#.4x)\n", net_dev->name, primary_addr, ntohs(bat_priv->bla.claim_dest.group)); - seq_printf(seq, " %-17s %-5s %-17s [o] (%-4s)\n", + seq_printf(seq, " %-17s %-5s %-17s [o] (%-6s)\n", "Client", "VID", "Originator", "CRC"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -1638,7 +1631,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) hlist_for_each_entry_rcu(claim, node, head, hash_entry) { is_own = batadv_compare_eth(claim->backbone_gw->orig, primary_addr); - seq_printf(seq, " * %pM on % 5d by %pM [%c] (%04x)\n", + seq_printf(seq, " * %pM on % 5d by %pM [%c] (%#.4x)\n", claim->addr, claim->vid, claim->backbone_gw->orig, (is_own ? 'x' : ' '), @@ -1657,7 +1650,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; - struct batadv_backbone_gw *backbone_gw; + struct batadv_bla_backbone_gw *backbone_gw; struct batadv_hard_iface *primary_if; struct hlist_node *node; struct hlist_head *head; @@ -1672,10 +1665,10 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) primary_addr = primary_if->net_dev->dev_addr; seq_printf(seq, - "Backbones announced for the mesh %s (orig %pM, group id %04x)\n", + "Backbones announced for the mesh %s (orig %pM, group id %#.4x)\n", net_dev->name, primary_addr, ntohs(bat_priv->bla.claim_dest.group)); - seq_printf(seq, " %-17s %-5s %-9s (%-4s)\n", + seq_printf(seq, " %-17s %-5s %-9s (%-6s)\n", "Originator", "VID", "last seen", "CRC"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -1693,7 +1686,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) continue; seq_printf(seq, - " * %pM on % 5d % 4i.%03is (%04x)\n", + " * %pM on % 5d % 4i.%03is (%#.4x)\n", backbone_gw->orig, backbone_gw->vid, secs, msecs, backbone_gw->crc); } diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 196d9a0254b..dea2fbc5d98 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: * * Simon Wunderlich * diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 6f58ddd53bf..6ae86516db4 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -40,13 +40,14 @@ static struct dentry *batadv_debugfs; static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN; -static char *batadv_log_char_addr(struct batadv_debug_log *debug_log, +static char *batadv_log_char_addr(struct batadv_priv_debug_log *debug_log, size_t idx) { return &debug_log->log_buff[idx & BATADV_LOG_BUFF_MASK]; } -static void batadv_emit_log_char(struct batadv_debug_log *debug_log, char c) +static void batadv_emit_log_char(struct batadv_priv_debug_log *debug_log, + char c) { char *char_addr; @@ -59,7 +60,7 @@ static void batadv_emit_log_char(struct batadv_debug_log *debug_log, char c) } __printf(2, 3) -static int batadv_fdebug_log(struct batadv_debug_log *debug_log, +static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log, const char *fmt, ...) { va_list args; @@ -114,7 +115,7 @@ static int batadv_log_release(struct inode *inode, struct file *file) return 0; } -static int batadv_log_empty(struct batadv_debug_log *debug_log) +static int batadv_log_empty(struct batadv_priv_debug_log *debug_log) { return !(debug_log->log_start - debug_log->log_end); } @@ -123,7 +124,7 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct batadv_priv *bat_priv = file->private_data; - struct batadv_debug_log *debug_log = bat_priv->debug_log; + struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; int error, i = 0; char *char_addr; char c; @@ -164,7 +165,6 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf, buf++; i++; - } spin_unlock_bh(&debug_log->lock); @@ -178,7 +178,7 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf, static unsigned int batadv_log_poll(struct file *file, poll_table *wait) { struct batadv_priv *bat_priv = file->private_data; - struct batadv_debug_log *debug_log = bat_priv->debug_log; + struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; poll_wait(file, &debug_log->queue_wait, wait); @@ -230,7 +230,6 @@ static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) #else /* CONFIG_BATMAN_ADV_DEBUG */ static int batadv_debug_log_setup(struct batadv_priv *bat_priv) { - bat_priv->debug_log = NULL; return 0; } @@ -397,10 +396,8 @@ err: void batadv_debugfs_destroy(void) { - if (batadv_debugfs) { - debugfs_remove_recursive(batadv_debugfs); - batadv_debugfs = NULL; - } + debugfs_remove_recursive(batadv_debugfs); + batadv_debugfs = NULL; } int batadv_debugfs_add_meshif(struct net_device *dev) diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index 3319e1f21f5..f8c3849edff 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 553921511e4..761a59002e3 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: * * Antonio Quartulli * diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index d060c033e7d..125c8c6fcfa 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: * * Antonio Quartulli * diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index dd07c7e3654..074107f2cfa 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index f0d129e323c..039902dca4a 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 9001208d175..84bb2b18d71 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index 13697f6e711..509b2bf8c2f 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index f1d37cd8181..368219e026a 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -457,6 +457,24 @@ out: batadv_hardif_free_ref(primary_if); } +/** + * batadv_hardif_remove_interface_finish - cleans up the remains of a hardif + * @work: work queue item + * + * Free the parts of the hard interface which can not be removed under + * rtnl lock (to prevent deadlock situations). + */ +static void batadv_hardif_remove_interface_finish(struct work_struct *work) +{ + struct batadv_hard_iface *hard_iface; + + hard_iface = container_of(work, struct batadv_hard_iface, + cleanup_work); + + batadv_sysfs_del_hardif(&hard_iface->hardif_obj); + batadv_hardif_free_ref(hard_iface); +} + static struct batadv_hard_iface * batadv_hardif_add_interface(struct net_device *net_dev) { @@ -484,6 +502,9 @@ batadv_hardif_add_interface(struct net_device *net_dev) hard_iface->soft_iface = NULL; hard_iface->if_status = BATADV_IF_NOT_IN_USE; INIT_LIST_HEAD(&hard_iface->list); + INIT_WORK(&hard_iface->cleanup_work, + batadv_hardif_remove_interface_finish); + /* extra reference for return */ atomic_set(&hard_iface->refcount, 2); @@ -518,8 +539,7 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) return; hard_iface->if_status = BATADV_IF_TO_BE_REMOVED; - batadv_sysfs_del_hardif(&hard_iface->hardif_obj); - batadv_hardif_free_ref(hard_iface); + queue_work(batadv_event_workqueue, &hard_iface->cleanup_work); } void batadv_hardif_remove_interfaces(void) diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 3732366e744..308437d52e2 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index 15a849c2d41..7198dafd3bf 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index e05333905af..1b4da72f209 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * @@ -89,7 +89,7 @@ static inline void batadv_hash_delete(struct batadv_hashtable *hash, * * Returns the new hash value. */ -static inline uint32_t batadv_hash_bytes(uint32_t hash, void *data, +static inline uint32_t batadv_hash_bytes(uint32_t hash, const void *data, uint32_t size) { const unsigned char *key = data; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 87ca8095b01..0ba6c899b2d 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index 29443a1dbb5..1fcca37b622 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index f65a222b7b8..21fe6987733 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 2f85577086a..ced08b936a9 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -26,7 +26,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2012.5.0" +#define BATADV_SOURCE_VERSION "2013.1.0" #endif /* B.A.T.M.A.N. parameters */ @@ -41,9 +41,11 @@ * -> TODO: check influence on BATADV_TQ_LOCAL_WINDOW_SIZE */ #define BATADV_PURGE_TIMEOUT 200000 /* 200 seconds */ -#define BATADV_TT_LOCAL_TIMEOUT 3600000 /* in milliseconds */ +#define BATADV_TT_LOCAL_TIMEOUT 600000 /* in milliseconds */ #define BATADV_TT_CLIENT_ROAM_TIMEOUT 600000 /* in milliseconds */ #define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */ +#define BATADV_TT_WORK_PERIOD 5000 /* 5 seconds */ +#define BATADV_ORIG_WORK_PERIOD 1000 /* 1 second */ #define BATADV_DAT_ENTRY_TIMEOUT (5*60000) /* 5 mins in milliseconds */ /* sliding packet range of received originator messages in sequence numbers * (should be a multiple of our word size) @@ -276,9 +278,7 @@ static inline bool batadv_has_timed_out(unsigned long timestamp, static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx, size_t count) { - int cpu = get_cpu(); - per_cpu_ptr(bat_priv->bat_counters, cpu)[idx] += count; - put_cpu(); + this_cpu_add(bat_priv->bat_counters[idx], count); } #define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 8c32cf1c2de..457ea445217 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -29,14 +29,10 @@ #include "soft-interface.h" #include "bridge_loop_avoidance.h" -static void batadv_purge_orig(struct work_struct *work); +/* hash class keys */ +static struct lock_class_key batadv_orig_hash_lock_class_key; -static void batadv_start_purge_timer(struct batadv_priv *bat_priv) -{ - INIT_DELAYED_WORK(&bat_priv->orig_work, batadv_purge_orig); - queue_delayed_work(batadv_event_workqueue, - &bat_priv->orig_work, msecs_to_jiffies(1000)); -} +static void batadv_purge_orig(struct work_struct *work); /* returns 1 if they are the same originator */ static int batadv_compare_orig(const struct hlist_node *node, const void *data2) @@ -57,7 +53,14 @@ int batadv_originator_init(struct batadv_priv *bat_priv) if (!bat_priv->orig_hash) goto err; - batadv_start_purge_timer(bat_priv); + batadv_hash_set_lock_class(bat_priv->orig_hash, + &batadv_orig_hash_lock_class_key); + + INIT_DELAYED_WORK(&bat_priv->orig_work, batadv_purge_orig); + queue_delayed_work(batadv_event_workqueue, + &bat_priv->orig_work, + msecs_to_jiffies(BATADV_ORIG_WORK_PERIOD)); + return 0; err: @@ -178,7 +181,6 @@ void batadv_originator_free(struct batadv_priv *bat_priv) spin_lock_bh(list_lock); hlist_for_each_entry_safe(orig_node, node, node_tmp, head, hash_entry) { - hlist_del_rcu(node); batadv_orig_node_free_ref(orig_node); } @@ -285,7 +287,6 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, /* for all neighbors towards this originator ... */ hlist_for_each_entry_safe(neigh_node, node, node_tmp, &orig_node->neigh_list, list) { - last_seen = neigh_node->last_seen; if_incoming = neigh_node->if_incoming; @@ -293,7 +294,6 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, (if_incoming->if_status == BATADV_IF_INACTIVE) || (if_incoming->if_status == BATADV_IF_NOT_IN_USE) || (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) { - if ((if_incoming->if_status == BATADV_IF_INACTIVE) || (if_incoming->if_status == BATADV_IF_NOT_IN_USE) || (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) @@ -393,7 +393,9 @@ static void batadv_purge_orig(struct work_struct *work) delayed_work = container_of(work, struct delayed_work, work); bat_priv = container_of(delayed_work, struct batadv_priv, orig_work); _batadv_purge_orig(bat_priv); - batadv_start_purge_timer(bat_priv); + queue_delayed_work(batadv_event_workqueue, + &bat_priv->orig_work, + msecs_to_jiffies(BATADV_ORIG_WORK_PERIOD)); } void batadv_purge_orig_ref(struct batadv_priv *bat_priv) diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 9778e656dec..286bf743e76 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index cb6405bf755..ed0aa89bbf8 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c index c8f61e395b7..ccab0bbdbb5 100644 --- a/net/batman-adv/ring_buffer.c +++ b/net/batman-adv/ring_buffer.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h index fda8c17df27..3f92ae248e8 100644 --- a/net/batman-adv/ring_buffer.h +++ b/net/batman-adv/ring_buffer.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 1aa1722d018..60ba03fc839 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -80,7 +80,6 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, /* route added */ } else if ((!curr_router) && (neigh_node)) { - batadv_dbg(BATADV_DBG_ROUTES, bat_priv, "Adding route towards: %pM (via %pM)\n", orig_node->orig, neigh_node->addr); @@ -172,7 +171,6 @@ void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node, */ hlist_for_each_entry_rcu(tmp_neigh_node, node, &orig_node->neigh_list, list) { - if (tmp_neigh_node == neigh_node) continue; @@ -836,7 +834,6 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, if (unicast_packet->header.packet_type == BATADV_UNICAST_FRAG && batadv_frag_can_reassemble(skb, neigh_node->if_incoming->net_dev->mtu)) { - ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb); if (ret == NET_RX_DROP) @@ -1103,7 +1100,6 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb, /* packet for me */ if (batadv_is_my_mac(unicast_packet->dest)) { - ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb); if (ret == NET_RX_DROP) diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 9262279ea66..99eeafaba40 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 4425af9dad4..80ca65fc89a 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -155,8 +155,6 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, spin_unlock_bh(&bat_priv->forw_bcast_list_lock); /* start timer for this packet */ - INIT_DELAYED_WORK(&forw_packet->delayed_work, - batadv_send_outstanding_bcast_packet); queue_delayed_work(batadv_event_workqueue, &forw_packet->delayed_work, send_time); } @@ -210,6 +208,9 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, /* how often did we send the bcast packet ? */ forw_packet->num_packets = 0; + INIT_DELAYED_WORK(&forw_packet->delayed_work, + batadv_send_outstanding_bcast_packet); + _batadv_add_bcast_packet_to_list(bat_priv, forw_packet, delay); return NETDEV_TX_OK; @@ -330,7 +331,6 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->forw_bcast_list_lock); hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node, &bat_priv->forw_bcast_list, list) { - /* if purge_outstanding_packets() was called with an argument * we delete only packets belonging to the given interface */ @@ -357,7 +357,6 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->forw_bat_list_lock); hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node, &bat_priv->forw_bat_list, list) { - /* if purge_outstanding_packets() was called with an argument * we delete only packets belonging to the given interface */ diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 0078dece1ab..38e662f619a 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 6b548fde8e0..2711e870f55 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -124,7 +124,6 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) batadv_tt_local_add(dev, addr->sa_data, BATADV_NULL_IFINDEX); } - dev->addr_assign_type &= ~NET_ADDR_RANDOM; return 0; } @@ -181,7 +180,8 @@ static int batadv_interface_tx(struct sk_buff *skb, goto dropped; /* Register the client MAC in the transtable */ - batadv_tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif); + if (!is_multicast_ether_addr(ethhdr->h_source)) + batadv_tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif); /* don't accept stp packets. STP does not help in meshes. * better use the bridge loop avoidance ... @@ -449,6 +449,30 @@ static void batadv_interface_setup(struct net_device *dev) memset(priv, 0, sizeof(*priv)); } +/** + * batadv_softif_destroy_finish - cleans up the remains of a softif + * @work: work queue item + * + * Free the parts of the soft interface which can not be removed under + * rtnl lock (to prevent deadlock situations). + */ +static void batadv_softif_destroy_finish(struct work_struct *work) +{ + struct batadv_priv *bat_priv; + struct net_device *soft_iface; + + bat_priv = container_of(work, struct batadv_priv, + cleanup_work); + soft_iface = bat_priv->soft_iface; + + batadv_debugfs_del_meshif(soft_iface); + batadv_sysfs_del_meshif(soft_iface); + + rtnl_lock(); + unregister_netdevice(soft_iface); + rtnl_unlock(); +} + struct net_device *batadv_softif_create(const char *name) { struct net_device *soft_iface; @@ -463,6 +487,8 @@ struct net_device *batadv_softif_create(const char *name) goto out; bat_priv = netdev_priv(soft_iface); + bat_priv->soft_iface = soft_iface; + INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish); /* batadv_interface_stats() needs to be available as soon as * register_netdevice() has been called @@ -480,7 +506,9 @@ struct net_device *batadv_softif_create(const char *name) atomic_set(&bat_priv->aggregated_ogms, 1); atomic_set(&bat_priv->bonding, 0); +#ifdef CONFIG_BATMAN_ADV_BLA atomic_set(&bat_priv->bridge_loop_avoidance, 0); +#endif #ifdef CONFIG_BATMAN_ADV_DAT atomic_set(&bat_priv->distributed_arp_table, 1); #endif @@ -491,7 +519,9 @@ struct net_device *batadv_softif_create(const char *name) atomic_set(&bat_priv->gw_bandwidth, 41); atomic_set(&bat_priv->orig_interval, 1000); atomic_set(&bat_priv->hop_penalty, 30); +#ifdef CONFIG_BATMAN_ADV_DEBUG atomic_set(&bat_priv->log_level, 0); +#endif atomic_set(&bat_priv->fragmentation, 1); atomic_set(&bat_priv->bcast_queue_left, BATADV_BCAST_QUEUE_LEN); atomic_set(&bat_priv->batman_queue_left, BATADV_BATMAN_QUEUE_LEN); @@ -547,10 +577,10 @@ out: void batadv_softif_destroy(struct net_device *soft_iface) { - batadv_debugfs_del_meshif(soft_iface); - batadv_sysfs_del_meshif(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(soft_iface); + batadv_mesh_free(soft_iface); - unregister_netdevice(soft_iface); + queue_work(batadv_event_workqueue, &bat_priv->cleanup_work); } int batadv_softif_is_valid(const struct net_device *net_dev) @@ -581,10 +611,10 @@ static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) static void batadv_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strcpy(info->driver, "B.A.T.M.A.N. advanced"); - strcpy(info->version, BATADV_SOURCE_VERSION); - strcpy(info->fw_version, "N/A"); - strcpy(info->bus_info, "batman"); + strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver)); + strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version)); + strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); + strlcpy(info->bus_info, "batman", sizeof(info->bus_info)); } static u32 batadv_get_msglevel(struct net_device *dev) diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 07a08fed28b..43182e5e603 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 84a55cb19b0..afbba319d73 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h index 3fd1412b062..479acf4c16f 100644 --- a/net/batman-adv/sysfs.h +++ b/net/batman-adv/sysfs.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 22457a7952b..d44672f4a34 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli * @@ -29,6 +29,10 @@ #include <linux/crc16.h> +/* hash class keys */ +static struct lock_class_key batadv_tt_local_hash_lock_class_key; +static struct lock_class_key batadv_tt_global_hash_lock_class_key; + static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, struct batadv_orig_node *orig_node); static void batadv_tt_purge(struct work_struct *work); @@ -48,13 +52,6 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2) return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); } -static void batadv_tt_start_timer(struct batadv_priv *bat_priv) -{ - INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge); - queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work, - msecs_to_jiffies(5000)); -} - static struct batadv_tt_common_entry * batadv_tt_hash_find(struct batadv_hashtable *hash, const void *data) { @@ -112,7 +109,6 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const void *data) struct batadv_tt_global_entry, common); return tt_global_entry; - } static void @@ -235,6 +231,9 @@ static int batadv_tt_local_init(struct batadv_priv *bat_priv) if (!bat_priv->tt.local_hash) return -ENOMEM; + batadv_hash_set_lock_class(bat_priv->tt.local_hash, + &batadv_tt_local_hash_lock_class_key); + return 0; } @@ -249,7 +248,6 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt, batadv_choose_orig, tt_global->common.addr); batadv_tt_global_entry_free_ref(tt_global); - } void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, @@ -305,7 +303,11 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, (uint8_t)atomic_read(&bat_priv->tt.vn)); memcpy(tt_local->common.addr, addr, ETH_ALEN); - tt_local->common.flags = BATADV_NO_FLAGS; + /* The local entry has to be marked as NEW to avoid to send it in + * a full table response going out before the next ttvn increment + * (consistency check) + */ + tt_local->common.flags = BATADV_TT_CLIENT_NEW; if (batadv_is_wifi_iface(ifindex)) tt_local->common.flags |= BATADV_TT_CLIENT_WIFI; atomic_set(&tt_local->common.refcount, 2); @@ -316,12 +318,6 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, if (batadv_compare_eth(addr, soft_iface->dev_addr)) tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE; - /* The local entry has to be marked as NEW to avoid to send it in - * a full table response going out before the next ttvn increment - * (consistency check) - */ - tt_local->common.flags |= BATADV_TT_CLIENT_NEW; - hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt, batadv_choose_orig, &tt_local->common, &tt_local->common.hash_entry); @@ -472,18 +468,27 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) struct batadv_priv *bat_priv = netdev_priv(net_dev); struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common_entry; + struct batadv_tt_local_entry *tt_local; struct batadv_hard_iface *primary_if; struct hlist_node *node; struct hlist_head *head; uint32_t i; + int last_seen_secs; + int last_seen_msecs; + unsigned long last_seen_jiffies; + bool no_purge; + uint16_t np_flag = BATADV_TT_CLIENT_NOPURGE; primary_if = batadv_seq_print_text_primary_if_get(seq); if (!primary_if) goto out; seq_printf(seq, - "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n", - net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn)); + "Locally retrieved addresses (from %s) announced via TT (TTVN: %u CRC: %#.4x):\n", + net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn), + bat_priv->tt.local_crc); + seq_printf(seq, " %-13s %-7s %-10s\n", "Client", "Flags", + "Last seen"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -491,18 +496,29 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) rcu_read_lock(); hlist_for_each_entry_rcu(tt_common_entry, node, head, hash_entry) { - seq_printf(seq, " * %pM [%c%c%c%c%c]\n", + tt_local = container_of(tt_common_entry, + struct batadv_tt_local_entry, + common); + last_seen_jiffies = jiffies - tt_local->last_seen; + last_seen_msecs = jiffies_to_msecs(last_seen_jiffies); + last_seen_secs = last_seen_msecs / 1000; + last_seen_msecs = last_seen_msecs % 1000; + + no_purge = tt_common_entry->flags & np_flag; + + seq_printf(seq, " * %pM [%c%c%c%c%c] %3u.%03u\n", tt_common_entry->addr, (tt_common_entry->flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'), - (tt_common_entry->flags & - BATADV_TT_CLIENT_NOPURGE ? 'P' : '.'), + no_purge ? 'P' : '.', (tt_common_entry->flags & BATADV_TT_CLIENT_NEW ? 'N' : '.'), (tt_common_entry->flags & BATADV_TT_CLIENT_PENDING ? 'X' : '.'), (tt_common_entry->flags & - BATADV_TT_CLIENT_WIFI ? 'W' : '.')); + BATADV_TT_CLIENT_WIFI ? 'W' : '.'), + no_purge ? 0 : last_seen_secs, + no_purge ? 0 : last_seen_msecs); } rcu_read_unlock(); } @@ -627,7 +643,6 @@ static void batadv_tt_local_purge(struct batadv_priv *bat_priv) batadv_tt_local_purge_list(bat_priv, head); spin_unlock_bh(list_lock); } - } static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) @@ -676,6 +691,9 @@ static int batadv_tt_global_init(struct batadv_priv *bat_priv) if (!bat_priv->tt.global_hash) return -ENOMEM; + batadv_hash_set_lock_class(bat_priv->tt.global_hash, + &batadv_tt_global_hash_lock_class_key); + return 0; } @@ -967,10 +985,11 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry, best_entry = batadv_transtable_best_orig(tt_global_entry); if (best_entry) { last_ttvn = atomic_read(&best_entry->orig_node->last_ttvn); - seq_printf(seq, " %c %pM (%3u) via %pM (%3u) [%c%c%c]\n", + seq_printf(seq, + " %c %pM (%3u) via %pM (%3u) (%#.4x) [%c%c%c]\n", '*', tt_global_entry->common.addr, best_entry->ttvn, best_entry->orig_node->orig, - last_ttvn, + last_ttvn, best_entry->orig_node->tt_crc, (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'), (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'), (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.')); @@ -1012,8 +1031,9 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Globally announced TT entries received via the mesh %s\n", net_dev->name); - seq_printf(seq, " %-13s %s %-15s %s %s\n", - "Client", "(TTVN)", "Originator", "(Curr TTVN)", "Flags"); + seq_printf(seq, " %-13s %s %-15s %s (%-6s) %s\n", + "Client", "(TTVN)", "Originator", "(Curr TTVN)", "CRC", + "Flags"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -1049,7 +1069,6 @@ batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry) batadv_tt_orig_list_entry_free_ref(orig_entry); } spin_unlock_bh(&tt_global_entry->list_lock); - } static void @@ -1825,7 +1844,6 @@ out: if (!ret) kfree_skb(skb); return ret; - } static bool @@ -2111,7 +2129,9 @@ int batadv_tt_init(struct batadv_priv *bat_priv) if (ret < 0) return ret; - batadv_tt_start_timer(bat_priv); + INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge); + queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work, + msecs_to_jiffies(BATADV_TT_WORK_PERIOD)); return 1; } @@ -2261,7 +2281,8 @@ static void batadv_tt_purge(struct work_struct *work) batadv_tt_req_purge(bat_priv); batadv_tt_roam_purge(bat_priv); - batadv_tt_start_timer(bat_priv); + queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work, + msecs_to_jiffies(BATADV_TT_WORK_PERIOD)); } void batadv_tt_free(struct batadv_priv *bat_priv) @@ -2352,7 +2373,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) } spin_unlock_bh(list_lock); } - } static int batadv_tt_commit_changes(struct batadv_priv *bat_priv, @@ -2496,7 +2516,7 @@ void batadv_tt_update_orig(struct batadv_priv *bat_priv, orig_node->tt_crc != tt_crc) { request_table: batadv_dbg(BATADV_DBG_TT, bat_priv, - "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u crc: %u last_crc: %u num_changes: %u)\n", + "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u crc: %#.4x last_crc: %#.4x num_changes: %u)\n", orig_node->orig, ttvn, orig_ttvn, tt_crc, orig_node->tt_crc, tt_num_changes); batadv_send_tt_request(bat_priv, orig_node, ttvn, @@ -2549,7 +2569,6 @@ bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv, batadv_tt_local_entry_free_ref(tt_local_entry); out: return ret; - } bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 46d4451a59e..ab8e683b402 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli * diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index ae9ac9aca8c..4cd87a0b5b8 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -24,6 +24,9 @@ #include "bitarray.h" #include <linux/kernel.h> +/** + * Maximum overhead for the encapsulation for a payload packet + */ #define BATADV_HEADER_LEN \ (ETH_HLEN + max(sizeof(struct batadv_unicast_packet), \ sizeof(struct batadv_bcast_packet))) @@ -51,6 +54,22 @@ struct batadv_hard_iface_bat_iv { atomic_t ogm_seqno; }; +/** + * struct batadv_hard_iface - network device known to batman-adv + * @list: list node for batadv_hardif_list + * @if_num: identificator of the interface + * @if_status: status of the interface for batman-adv + * @net_dev: pointer to the net_device + * @frag_seqno: last fragment sequence number sent by this interface + * @hardif_obj: kobject of the per interface sysfs "mesh" directory + * @refcount: number of contexts the object is used + * @batman_adv_ptype: packet type describing packets that should be processed by + * batman-adv for this interface + * @soft_iface: the batman-adv interface which uses this network interface + * @rcu: struct used for freeing in an RCU-safe manner + * @bat_iv: BATMAN IV specific per hard interface data + * @cleanup_work: work queue callback item for hard interface deinit + */ struct batadv_hard_iface { struct list_head list; int16_t if_num; @@ -63,22 +82,52 @@ struct batadv_hard_iface { struct net_device *soft_iface; struct rcu_head rcu; struct batadv_hard_iface_bat_iv bat_iv; + struct work_struct cleanup_work; }; /** - * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh - * @primary_addr: hosts primary interface address - * @last_seen: when last packet from this node was received - * @bcast_seqno_reset: time when the broadcast seqno window was reset - * @batman_seqno_reset: time when the batman seqno window was reset - * @gw_flags: flags related to gateway class - * @flags: for now only VIS_SERVER flag - * @last_real_seqno: last and best known sequence number - * @last_ttl: ttl of last received packet - * @last_bcast_seqno: last broadcast sequence number received by this host - * - * @candidates: how many candidates are available - * @selected: next bonding candidate + * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh + * @orig: originator ethernet address + * @primary_addr: hosts primary interface address + * @router: router that should be used to reach this originator + * @batadv_dat_addr_t: address of the orig node in the distributed hash + * @bcast_own: bitfield containing the number of our OGMs this orig_node + * rebroadcasted "back" to us (relative to last_real_seqno) + * @bcast_own_sum: counted result of bcast_own + * @last_seen: time when last packet from this node was received + * @bcast_seqno_reset: time when the broadcast seqno window was reset + * @batman_seqno_reset: time when the batman seqno window was reset + * @gw_flags: flags related to gateway class + * @flags: for now only VIS_SERVER flag + * @last_ttvn: last seen translation table version number + * @tt_crc: CRC of the translation table + * @tt_buff: last tt changeset this node received from the orig node + * @tt_buff_len: length of the last tt changeset this node received from the + * orig node + * @tt_buff_lock: lock that protects tt_buff and tt_buff_len + * @tt_size: number of global TT entries announced by the orig node + * @tt_initialised: bool keeping track of whether or not this node have received + * any translation table information from the orig node yet + * @last_real_seqno: last and best known sequence number + * @last_ttl: ttl of last received packet + * @bcast_bits: bitfield containing the info which payload broadcast originated + * from this orig node this host already has seen (relative to + * last_bcast_seqno) + * @last_bcast_seqno: last broadcast sequence number received by this host + * @neigh_list: list of potential next hop neighbor towards this orig node + * @frag_list: fragmentation buffer list for fragment re-assembly + * @last_frag_packet: time when last fragmented packet from this node was + * received + * @neigh_list_lock: lock protecting neigh_list, router and bonding_list + * @hash_entry: hlist node for batadv_priv::orig_hash + * @bat_priv: pointer to soft_iface this orig node belongs to + * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum, + * neigh_node->real_bits & neigh_node->real_packet_count + * @bcast_seqno_lock: lock protecting bcast_bits & last_bcast_seqno + * @bond_candidates: how many candidates are available + * @bond_list: list of bonding candidates + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_orig_node { uint8_t orig[ETH_ALEN]; @@ -94,11 +143,11 @@ struct batadv_orig_node { unsigned long batman_seqno_reset; uint8_t gw_flags; uint8_t flags; - atomic_t last_ttvn; /* last seen translation table version number */ + atomic_t last_ttvn; uint16_t tt_crc; unsigned char *tt_buff; int16_t tt_buff_len; - spinlock_t tt_buff_lock; /* protects tt_buff */ + spinlock_t tt_buff_lock; /* protects tt_buff & tt_buff_len */ atomic_t tt_size; bool tt_initialised; uint32_t last_real_seqno; @@ -107,23 +156,31 @@ struct batadv_orig_node { uint32_t last_bcast_seqno; struct hlist_head neigh_list; struct list_head frag_list; - spinlock_t neigh_list_lock; /* protects neigh_list and router */ - atomic_t refcount; - struct rcu_head rcu; + unsigned long last_frag_packet; + /* neigh_list_lock protects: neigh_list, router & bonding_list */ + spinlock_t neigh_list_lock; struct hlist_node hash_entry; struct batadv_priv *bat_priv; - unsigned long last_frag_packet; /* ogm_cnt_lock protects: bcast_own, bcast_own_sum, - * neigh_node->real_bits, neigh_node->real_packet_count + * neigh_node->real_bits & neigh_node->real_packet_count */ spinlock_t ogm_cnt_lock; - /* bcast_seqno_lock protects bcast_bits, last_bcast_seqno */ + /* bcast_seqno_lock protects: bcast_bits & last_bcast_seqno */ spinlock_t bcast_seqno_lock; - spinlock_t tt_list_lock; /* protects tt_list */ atomic_t bond_candidates; struct list_head bond_list; + atomic_t refcount; + struct rcu_head rcu; }; +/** + * struct batadv_gw_node - structure for orig nodes announcing gw capabilities + * @list: list node for batadv_priv_gw::list + * @orig_node: pointer to corresponding orig node + * @deleted: this struct is scheduled for deletion + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner + */ struct batadv_gw_node { struct hlist_node list; struct batadv_orig_node *orig_node; @@ -132,13 +189,28 @@ struct batadv_gw_node { struct rcu_head rcu; }; -/* batadv_neigh_node - * @last_seen: when last packet via this neighbor was received +/** + * struct batadv_neigh_node - structure for single hop neighbors + * @list: list node for batadv_orig_node::neigh_list + * @addr: mac address of neigh node + * @tq_recv: ring buffer of received TQ values from this neigh node + * @tq_index: ring buffer index + * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv) + * @last_ttl: last received ttl from this neigh node + * @bonding_list: list node for batadv_orig_node::bond_list + * @last_seen: when last packet via this neighbor was received + * @real_bits: bitfield containing the number of OGMs received from this neigh + * node (relative to orig_node->last_real_seqno) + * @real_packet_count: counted result of real_bits + * @orig_node: pointer to corresponding orig_node + * @if_incoming: pointer to incoming hard interface + * @lq_update_lock: lock protecting tq_recv & tq_index + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_neigh_node { struct hlist_node list; uint8_t addr[ETH_ALEN]; - uint8_t real_packet_count; uint8_t tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE]; uint8_t tq_index; uint8_t tq_avg; @@ -146,13 +218,20 @@ struct batadv_neigh_node { struct list_head bonding_list; unsigned long last_seen; DECLARE_BITMAP(real_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); - atomic_t refcount; - struct rcu_head rcu; + uint8_t real_packet_count; struct batadv_orig_node *orig_node; struct batadv_hard_iface *if_incoming; - spinlock_t lq_update_lock; /* protects: tq_recv, tq_index */ + spinlock_t lq_update_lock; /* protects tq_recv & tq_index */ + atomic_t refcount; + struct rcu_head rcu; }; +/** + * struct batadv_bcast_duplist_entry - structure for LAN broadcast suppression + * @orig[ETH_ALEN]: mac address of orig node orginating the broadcast + * @crc: crc32 checksum of broadcast payload + * @entrytime: time when the broadcast packet was received + */ #ifdef CONFIG_BATMAN_ADV_BLA struct batadv_bcast_duplist_entry { uint8_t orig[ETH_ALEN]; @@ -161,6 +240,33 @@ struct batadv_bcast_duplist_entry { }; #endif +/** + * enum batadv_counters - indices for traffic counters + * @BATADV_CNT_TX: transmitted payload traffic packet counter + * @BATADV_CNT_TX_BYTES: transmitted payload traffic bytes counter + * @BATADV_CNT_TX_DROPPED: dropped transmission payload traffic packet counter + * @BATADV_CNT_RX: received payload traffic packet counter + * @BATADV_CNT_RX_BYTES: received payload traffic bytes counter + * @BATADV_CNT_FORWARD: forwarded payload traffic packet counter + * @BATADV_CNT_FORWARD_BYTES: forwarded payload traffic bytes counter + * @BATADV_CNT_MGMT_TX: transmitted routing protocol traffic packet counter + * @BATADV_CNT_MGMT_TX_BYTES: transmitted routing protocol traffic bytes counter + * @BATADV_CNT_MGMT_RX: received routing protocol traffic packet counter + * @BATADV_CNT_MGMT_RX_BYTES: received routing protocol traffic bytes counter + * @BATADV_CNT_TT_REQUEST_TX: transmitted tt req traffic packet counter + * @BATADV_CNT_TT_REQUEST_RX: received tt req traffic packet counter + * @BATADV_CNT_TT_RESPONSE_TX: transmitted tt resp traffic packet counter + * @BATADV_CNT_TT_RESPONSE_RX: received tt resp traffic packet counter + * @BATADV_CNT_TT_ROAM_ADV_TX: transmitted tt roam traffic packet counter + * @BATADV_CNT_TT_ROAM_ADV_RX: received tt roam traffic packet counter + * @BATADV_CNT_DAT_GET_TX: transmitted dht GET traffic packet counter + * @BATADV_CNT_DAT_GET_RX: received dht GET traffic packet counter + * @BATADV_CNT_DAT_PUT_TX: transmitted dht PUT traffic packet counter + * @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter + * @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic packet + * counter + * @BATADV_CNT_NUM: number of traffic counters + */ enum batadv_counters { BATADV_CNT_TX, BATADV_CNT_TX_BYTES, @@ -192,14 +298,23 @@ enum batadv_counters { /** * struct batadv_priv_tt - per mesh interface translation table data * @vn: translation table version number + * @ogm_append_cnt: counter of number of OGMs containing the local tt diff * @local_changes: changes registered in an originator interval - * @poss_change: Detect an ongoing roaming phase. If true, then this node - * received a roaming_adv and has to inspect every packet directed to it to - * check whether it still is the true destination or not. This flag will be - * reset to false as soon as the this node's ttvn is increased * @changes_list: tracks tt local changes within an originator interval - * @req_list: list of pending tt_requests + * @local_hash: local translation table hash table + * @global_hash: global translation table hash table + * @req_list: list of pending & unanswered tt_requests + * @roam_list: list of the last roaming events of each client limiting the + * number of roaming events to avoid route flapping + * @changes_list_lock: lock protecting changes_list + * @req_list_lock: lock protecting req_list + * @roam_list_lock: lock protecting roam_list + * @local_entry_num: number of entries in the local hash table * @local_crc: Checksum of the local table, recomputed before sending a new OGM + * @last_changeset: last tt changeset this host has generated + * @last_changeset_len: length of last tt changeset this host has generated + * @last_changeset_lock: lock protecting last_changeset & last_changeset_len + * @work: work queue callback item for translation table purging */ struct batadv_priv_tt { atomic_t vn; @@ -217,36 +332,83 @@ struct batadv_priv_tt { uint16_t local_crc; unsigned char *last_changeset; int16_t last_changeset_len; - spinlock_t last_changeset_lock; /* protects last_changeset */ + /* protects last_changeset & last_changeset_len */ + spinlock_t last_changeset_lock; struct delayed_work work; }; +/** + * struct batadv_priv_bla - per mesh interface bridge loope avoidance data + * @num_requests; number of bla requests in flight + * @claim_hash: hash table containing mesh nodes this host has claimed + * @backbone_hash: hash table containing all detected backbone gateways + * @bcast_duplist: recently received broadcast packets array (for broadcast + * duplicate suppression) + * @bcast_duplist_curr: index of last broadcast packet added to bcast_duplist + * @bcast_duplist_lock: lock protecting bcast_duplist & bcast_duplist_curr + * @claim_dest: local claim data (e.g. claim group) + * @work: work queue callback item for cleanups & bla announcements + */ #ifdef CONFIG_BATMAN_ADV_BLA struct batadv_priv_bla { - atomic_t num_requests; /* number of bla requests in flight */ + atomic_t num_requests; struct batadv_hashtable *claim_hash; struct batadv_hashtable *backbone_hash; struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE]; int bcast_duplist_curr; - /* protects bcast_duplist and bcast_duplist_curr */ + /* protects bcast_duplist & bcast_duplist_curr */ spinlock_t bcast_duplist_lock; struct batadv_bla_claim_dst claim_dest; struct delayed_work work; }; #endif +/** + * struct batadv_debug_log - debug logging data + * @log_buff: buffer holding the logs (ring bufer) + * @log_start: index of next character to read + * @log_end: index of next character to write + * @lock: lock protecting log_buff, log_start & log_end + * @queue_wait: log reader's wait queue + */ +#ifdef CONFIG_BATMAN_ADV_DEBUG +struct batadv_priv_debug_log { + char log_buff[BATADV_LOG_BUF_LEN]; + unsigned long log_start; + unsigned long log_end; + spinlock_t lock; /* protects log_buff, log_start and log_end */ + wait_queue_head_t queue_wait; +}; +#endif + +/** + * struct batadv_priv_gw - per mesh interface gateway data + * @list: list of available gateway nodes + * @list_lock: lock protecting gw_list & curr_gw + * @curr_gw: pointer to currently selected gateway node + * @reselect: bool indicating a gateway re-selection is in progress + */ struct batadv_priv_gw { struct hlist_head list; - spinlock_t list_lock; /* protects gw_list and curr_gw */ + spinlock_t list_lock; /* protects gw_list & curr_gw */ struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */ atomic_t reselect; }; +/** + * struct batadv_priv_vis - per mesh interface vis data + * @send_list: list of batadv_vis_info packets to sent + * @hash: hash table containing vis data from other nodes in the network + * @hash_lock: lock protecting the hash table + * @list_lock: lock protecting my_info::recv_list + * @work: work queue callback item for vis packet sending + * @my_info: holds this node's vis data sent on a regular basis + */ struct batadv_priv_vis { struct list_head send_list; struct batadv_hashtable *hash; spinlock_t hash_lock; /* protects hash */ - spinlock_t list_lock; /* protects info::recv_list */ + spinlock_t list_lock; /* protects my_info::recv_list */ struct delayed_work work; struct batadv_vis_info *my_info; }; @@ -265,30 +427,78 @@ struct batadv_priv_dat { }; #endif +/** + * struct batadv_priv - per mesh interface data + * @mesh_state: current status of the mesh (inactive/active/deactivating) + * @soft_iface: net device which holds this struct as private data + * @stats: structure holding the data for the ndo_get_stats() call + * @bat_counters: mesh internal traffic statistic counters (see batadv_counters) + * @aggregated_ogms: bool indicating whether OGM aggregation is enabled + * @bonding: bool indicating whether traffic bonding is enabled + * @fragmentation: bool indicating whether traffic fragmentation is enabled + * @ap_isolation: bool indicating whether ap isolation is enabled + * @bridge_loop_avoidance: bool indicating whether bridge loop avoidance is + * enabled + * @distributed_arp_table: bool indicating whether distributed ARP table is + * enabled + * @vis_mode: vis operation: client or server (see batadv_vis_packettype) + * @gw_mode: gateway operation: off, client or server (see batadv_gw_modes) + * @gw_sel_class: gateway selection class (applies if gw_mode client) + * @gw_bandwidth: gateway announced bandwidth (applies if gw_mode server) + * @orig_interval: OGM broadcast interval in milliseconds + * @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop + * @log_level: configured log level (see batadv_dbg_level) + * @bcast_seqno: last sent broadcast packet sequence number + * @bcast_queue_left: number of remaining buffered broadcast packet slots + * @batman_queue_left: number of remaining OGM packet slots + * @num_ifaces: number of interfaces assigned to this mesh interface + * @mesh_obj: kobject for sysfs mesh subdirectory + * @debug_dir: dentry for debugfs batman-adv subdirectory + * @forw_bat_list: list of aggregated OGMs that will be forwarded + * @forw_bcast_list: list of broadcast packets that will be rebroadcasted + * @orig_hash: hash table containing mesh participants (orig nodes) + * @forw_bat_list_lock: lock protecting forw_bat_list + * @forw_bcast_list_lock: lock protecting forw_bcast_list + * @orig_work: work queue callback item for orig node purging + * @cleanup_work: work queue callback item for soft interface deinit + * @primary_if: one of the hard interfaces assigned to this mesh interface + * becomes the primary interface + * @bat_algo_ops: routing algorithm used by this mesh interface + * @bla: bridge loope avoidance data + * @debug_log: holding debug logging relevant data + * @gw: gateway data + * @tt: translation table data + * @vis: vis data + * @dat: distributed arp table data + */ struct batadv_priv { atomic_t mesh_state; + struct net_device *soft_iface; struct net_device_stats stats; uint64_t __percpu *bat_counters; /* Per cpu counters */ - atomic_t aggregated_ogms; /* boolean */ - atomic_t bonding; /* boolean */ - atomic_t fragmentation; /* boolean */ - atomic_t ap_isolation; /* boolean */ - atomic_t bridge_loop_avoidance; /* boolean */ + atomic_t aggregated_ogms; + atomic_t bonding; + atomic_t fragmentation; + atomic_t ap_isolation; +#ifdef CONFIG_BATMAN_ADV_BLA + atomic_t bridge_loop_avoidance; +#endif #ifdef CONFIG_BATMAN_ADV_DAT - atomic_t distributed_arp_table; /* boolean */ + atomic_t distributed_arp_table; +#endif + atomic_t vis_mode; + atomic_t gw_mode; + atomic_t gw_sel_class; + atomic_t gw_bandwidth; + atomic_t orig_interval; + atomic_t hop_penalty; +#ifdef CONFIG_BATMAN_ADV_DEBUG + atomic_t log_level; #endif - atomic_t vis_mode; /* VIS_TYPE_* */ - atomic_t gw_mode; /* GW_MODE_* */ - atomic_t gw_sel_class; /* uint */ - atomic_t gw_bandwidth; /* gw bandwidth */ - atomic_t orig_interval; /* uint */ - atomic_t hop_penalty; /* uint */ - atomic_t log_level; /* uint */ atomic_t bcast_seqno; atomic_t bcast_queue_left; atomic_t batman_queue_left; char num_ifaces; - struct batadv_debug_log *debug_log; struct kobject *mesh_obj; struct dentry *debug_dir; struct hlist_head forw_bat_list; @@ -297,11 +507,15 @@ struct batadv_priv { spinlock_t forw_bat_list_lock; /* protects forw_bat_list */ spinlock_t forw_bcast_list_lock; /* protects forw_bcast_list */ struct delayed_work orig_work; + struct work_struct cleanup_work; struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */ struct batadv_algo_ops *bat_algo_ops; #ifdef CONFIG_BATMAN_ADV_BLA struct batadv_priv_bla bla; #endif +#ifdef CONFIG_BATMAN_ADV_DEBUG + struct batadv_priv_debug_log *debug_log; +#endif struct batadv_priv_gw gw; struct batadv_priv_tt tt; struct batadv_priv_vis vis; @@ -310,21 +524,97 @@ struct batadv_priv { #endif }; +/** + * struct batadv_socket_client - layer2 icmp socket client data + * @queue_list: packet queue for packets destined for this socket client + * @queue_len: number of packets in the packet queue (queue_list) + * @index: socket client's index in the batadv_socket_client_hash + * @lock: lock protecting queue_list, queue_len & index + * @queue_wait: socket client's wait queue + * @bat_priv: pointer to soft_iface this client belongs to + */ struct batadv_socket_client { struct list_head queue_list; unsigned int queue_len; unsigned char index; - spinlock_t lock; /* protects queue_list, queue_len, index */ + spinlock_t lock; /* protects queue_list, queue_len & index */ wait_queue_head_t queue_wait; struct batadv_priv *bat_priv; }; +/** + * struct batadv_socket_packet - layer2 icmp packet for socket client + * @list: list node for batadv_socket_client::queue_list + * @icmp_len: size of the layer2 icmp packet + * @icmp_packet: layer2 icmp packet + */ struct batadv_socket_packet { struct list_head list; size_t icmp_len; struct batadv_icmp_packet_rr icmp_packet; }; +/** + * struct batadv_bla_backbone_gw - batman-adv gateway bridged into the LAN + * @orig: originator address of backbone node (mac address of primary iface) + * @vid: vlan id this gateway was detected on + * @hash_entry: hlist node for batadv_priv_bla::backbone_hash + * @bat_priv: pointer to soft_iface this backbone gateway belongs to + * @lasttime: last time we heard of this backbone gw + * @wait_periods: grace time for bridge forward delays and bla group forming at + * bootup phase - no bcast traffic is formwared until it has elapsed + * @request_sent: if this bool is set to true we are out of sync with this + * backbone gateway - no bcast traffic is formwared until the situation was + * resolved + * @crc: crc16 checksum over all claims + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner + */ +#ifdef CONFIG_BATMAN_ADV_BLA +struct batadv_bla_backbone_gw { + uint8_t orig[ETH_ALEN]; + short vid; + struct hlist_node hash_entry; + struct batadv_priv *bat_priv; + unsigned long lasttime; + atomic_t wait_periods; + atomic_t request_sent; + uint16_t crc; + atomic_t refcount; + struct rcu_head rcu; +}; + +/** + * struct batadv_bla_claim - claimed non-mesh client structure + * @addr: mac address of claimed non-mesh client + * @vid: vlan id this client was detected on + * @batadv_bla_backbone_gw: pointer to backbone gw claiming this client + * @lasttime: last time we heard of claim (locals only) + * @hash_entry: hlist node for batadv_priv_bla::claim_hash + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner + */ +struct batadv_bla_claim { + uint8_t addr[ETH_ALEN]; + short vid; + struct batadv_bla_backbone_gw *backbone_gw; + unsigned long lasttime; + struct hlist_node hash_entry; + struct rcu_head rcu; + atomic_t refcount; +}; +#endif + +/** + * struct batadv_tt_common_entry - tt local & tt global common data + * @addr: mac address of non-mesh client + * @hash_entry: hlist node for batadv_priv_tt::local_hash or for + * batadv_priv_tt::global_hash + * @flags: various state handling flags (see batadv_tt_client_flags) + * @added_at: timestamp used for purging stale tt common entries + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner + */ struct batadv_tt_common_entry { uint8_t addr[ETH_ALEN]; struct hlist_node hash_entry; @@ -334,62 +624,76 @@ struct batadv_tt_common_entry { struct rcu_head rcu; }; +/** + * struct batadv_tt_local_entry - translation table local entry data + * @common: general translation table data + * @last_seen: timestamp used for purging stale tt local entries + */ struct batadv_tt_local_entry { struct batadv_tt_common_entry common; unsigned long last_seen; }; +/** + * struct batadv_tt_global_entry - translation table global entry data + * @common: general translation table data + * @orig_list: list of orig nodes announcing this non-mesh client + * @list_lock: lock protecting orig_list + * @roam_at: time at which TT_GLOBAL_ROAM was set + */ struct batadv_tt_global_entry { struct batadv_tt_common_entry common; struct hlist_head orig_list; - spinlock_t list_lock; /* protects the list */ - unsigned long roam_at; /* time at which TT_GLOBAL_ROAM was set */ + spinlock_t list_lock; /* protects orig_list */ + unsigned long roam_at; }; +/** + * struct batadv_tt_orig_list_entry - orig node announcing a non-mesh client + * @orig_node: pointer to orig node announcing this non-mesh client + * @ttvn: translation table version number which added the non-mesh client + * @list: list node for batadv_tt_global_entry::orig_list + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner + */ struct batadv_tt_orig_list_entry { struct batadv_orig_node *orig_node; uint8_t ttvn; - atomic_t refcount; - struct rcu_head rcu; struct hlist_node list; -}; - -#ifdef CONFIG_BATMAN_ADV_BLA -struct batadv_backbone_gw { - uint8_t orig[ETH_ALEN]; - short vid; /* used VLAN ID */ - struct hlist_node hash_entry; - struct batadv_priv *bat_priv; - unsigned long lasttime; /* last time we heard of this backbone gw */ - atomic_t wait_periods; - atomic_t request_sent; atomic_t refcount; struct rcu_head rcu; - uint16_t crc; /* crc checksum over all claims */ -}; - -struct batadv_claim { - uint8_t addr[ETH_ALEN]; - short vid; - struct batadv_backbone_gw *backbone_gw; - unsigned long lasttime; /* last time we heard of claim (locals only) */ - struct rcu_head rcu; - atomic_t refcount; - struct hlist_node hash_entry; }; -#endif +/** + * struct batadv_tt_change_node - structure for tt changes occured + * @list: list node for batadv_priv_tt::changes_list + * @change: holds the actual translation table diff data + */ struct batadv_tt_change_node { struct list_head list; struct batadv_tt_change change; }; +/** + * struct batadv_tt_req_node - data to keep track of the tt requests in flight + * @addr: mac address address of the originator this request was sent to + * @issued_at: timestamp used for purging stale tt requests + * @list: list node for batadv_priv_tt::req_list + */ struct batadv_tt_req_node { uint8_t addr[ETH_ALEN]; unsigned long issued_at; struct list_head list; }; +/** + * struct batadv_tt_roam_node - roaming client data + * @addr: mac address of the client in the roaming phase + * @counter: number of allowed roaming events per client within a single + * OGM interval (changes are committed with each OGM) + * @first_time: timestamp used for purging stale roaming node entries + * @list: list node for batadv_priv_tt::roam_list + */ struct batadv_tt_roam_node { uint8_t addr[ETH_ALEN]; atomic_t counter; @@ -397,8 +701,19 @@ struct batadv_tt_roam_node { struct list_head list; }; -/* forw_packet - structure for forw_list maintaining packets to be - * send/forwarded +/** + * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded + * @list: list node for batadv_socket_client::queue_list + * @send_time: execution time for delayed_work (packet sending) + * @own: bool for locally generated packets (local OGMs are re-scheduled after + * sending) + * @skb: bcast packet's skb buffer + * @packet_len: size of aggregated OGM packet inside the skb buffer + * @direct_link_flags: direct link flags for aggregated OGM packets + * @num_packets: counter for bcast packet retransmission + * @delayed_work: work queue callback item for packet sending + * @if_incoming: pointer incoming hard-iface or primary iface if locally + * generated packet */ struct batadv_forw_packet { struct hlist_node list; @@ -412,72 +727,98 @@ struct batadv_forw_packet { struct batadv_hard_iface *if_incoming; }; -/* While scanning for vis-entries of a particular vis-originator - * this list collects its interfaces to create a subgraph/cluster - * out of them later +/** + * struct batadv_frag_packet_list_entry - storage for fragment packet + * @list: list node for orig_node::frag_list + * @seqno: sequence number of the fragment + * @skb: fragment's skb buffer */ -struct batadv_if_list_entry { - uint8_t addr[ETH_ALEN]; - bool primary; - struct hlist_node list; -}; - -struct batadv_debug_log { - char log_buff[BATADV_LOG_BUF_LEN]; - unsigned long log_start; - unsigned long log_end; - spinlock_t lock; /* protects log_buff, log_start and log_end */ - wait_queue_head_t queue_wait; -}; - struct batadv_frag_packet_list_entry { struct list_head list; uint16_t seqno; struct sk_buff *skb; }; +/** + * struct batadv_vis_info - local data for vis information + * @first_seen: timestamp used for purging stale vis info entries + * @recv_list: List of server-neighbors we have received this packet from. This + * packet should not be re-forward to them again. List elements are struct + * batadv_vis_recvlist_node + * @send_list: list of packets to be forwarded + * @refcount: number of contexts the object is used + * @hash_entry: hlist node for batadv_priv_vis::hash + * @bat_priv: pointer to soft_iface this orig node belongs to + * @skb_packet: contains the vis packet + */ struct batadv_vis_info { unsigned long first_seen; - /* list of server-neighbors we received a vis-packet - * from. we should not reply to them. - */ struct list_head recv_list; struct list_head send_list; struct kref refcount; struct hlist_node hash_entry; struct batadv_priv *bat_priv; - /* this packet might be part of the vis send queue. */ struct sk_buff *skb_packet; - /* vis_info may follow here */ } __packed; +/** + * struct batadv_vis_info_entry - contains link information for vis + * @src: source MAC of the link, all zero for local TT entry + * @dst: destination MAC of the link, client mac address for local TT entry + * @quality: transmission quality of the link, or 0 for local TT entry + */ struct batadv_vis_info_entry { uint8_t src[ETH_ALEN]; uint8_t dest[ETH_ALEN]; - uint8_t quality; /* quality = 0 client */ + uint8_t quality; } __packed; -struct batadv_recvlist_node { +/** + * struct batadv_vis_recvlist_node - list entry for batadv_vis_info::recv_list + * @list: list node for batadv_vis_info::recv_list + * @mac: MAC address of the originator from where the vis_info was received + */ +struct batadv_vis_recvlist_node { struct list_head list; uint8_t mac[ETH_ALEN]; }; +/** + * struct batadv_vis_if_list_entry - auxiliary data for vis data generation + * @addr: MAC address of the interface + * @primary: true if this interface is the primary interface + * @list: list node the interface list + * + * While scanning for vis-entries of a particular vis-originator + * this list collects its interfaces to create a subgraph/cluster + * out of them later + */ +struct batadv_vis_if_list_entry { + uint8_t addr[ETH_ALEN]; + bool primary; + struct hlist_node list; +}; + +/** + * struct batadv_algo_ops - mesh algorithm callbacks + * @list: list node for the batadv_algo_list + * @name: name of the algorithm + * @bat_iface_enable: init routing info when hard-interface is enabled + * @bat_iface_disable: de-init routing info when hard-interface is disabled + * @bat_iface_update_mac: (re-)init mac addresses of the protocol information + * belonging to this hard-interface + * @bat_primary_iface_set: called when primary interface is selected / changed + * @bat_ogm_schedule: prepare a new outgoing OGM for the send queue + * @bat_ogm_emit: send scheduled OGM + */ struct batadv_algo_ops { struct hlist_node list; char *name; - /* init routing info when hard-interface is enabled */ int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface); - /* de-init routing info when hard-interface is disabled */ void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface); - /* (re-)init mac addresses of the protocol information - * belonging to this hard-interface - */ void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface); - /* called when primary interface is selected / changed */ void (*bat_primary_iface_set)(struct batadv_hard_iface *hard_iface); - /* prepare a new outgoing OGM for the send queue */ void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface); - /* send scheduled OGM */ void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet); }; diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 10aff49fcf2..50e079f00be 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: * * Andreas Langer * @@ -133,7 +133,6 @@ batadv_frag_search_packet(struct list_head *head, is_head = !!(up->flags & BATADV_UNI_FRAG_HEAD); list_for_each_entry(tfp, head, list) { - if (!tfp->skb) continue; @@ -162,7 +161,6 @@ void batadv_frag_list_free(struct list_head *head) struct batadv_frag_packet_list_entry *pf, *tmp_pf; if (!list_empty(head)) { - list_for_each_entry_safe(pf, tmp_pf, head, list) { kfree_skb(pf->skb); list_del(&pf->list); diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h index 61abba58bd8..429cf8a4a31 100644 --- a/net/batman-adv/unicast.h +++ b/net/batman-adv/unicast.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: * * Andreas Langer * diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 0f65a9de5f7..22d2785177d 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2008-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2008-2013 B.A.T.M.A.N. contributors: * * Simon Wunderlich * @@ -28,14 +28,15 @@ #define BATADV_MAX_VIS_PACKET_SIZE 1000 -static void batadv_start_vis_timer(struct batadv_priv *bat_priv); +/* hash class keys */ +static struct lock_class_key batadv_vis_hash_lock_class_key; /* free the info */ static void batadv_free_info(struct kref *ref) { struct batadv_vis_info *info; struct batadv_priv *bat_priv; - struct batadv_recvlist_node *entry, *tmp; + struct batadv_vis_recvlist_node *entry, *tmp; info = container_of(ref, struct batadv_vis_info, refcount); bat_priv = info->bat_priv; @@ -126,7 +127,7 @@ static void batadv_vis_data_insert_interface(const uint8_t *interface, struct hlist_head *if_list, bool primary) { - struct batadv_if_list_entry *entry; + struct batadv_vis_if_list_entry *entry; struct hlist_node *pos; hlist_for_each_entry(entry, pos, if_list, list) { @@ -146,7 +147,7 @@ static void batadv_vis_data_insert_interface(const uint8_t *interface, static void batadv_vis_data_read_prim_sec(struct seq_file *seq, const struct hlist_head *if_list) { - struct batadv_if_list_entry *entry; + struct batadv_vis_if_list_entry *entry; struct hlist_node *pos; hlist_for_each_entry(entry, pos, if_list, list) { @@ -196,7 +197,7 @@ static void batadv_vis_data_read_entries(struct seq_file *seq, struct batadv_vis_info_entry *entries) { int i; - struct batadv_if_list_entry *entry; + struct batadv_vis_if_list_entry *entry; struct hlist_node *pos; hlist_for_each_entry(entry, pos, list, list) { @@ -222,7 +223,7 @@ static void batadv_vis_seq_print_text_bucket(struct seq_file *seq, struct batadv_vis_packet *packet; uint8_t *entries_pos; struct batadv_vis_info_entry *entries; - struct batadv_if_list_entry *entry; + struct batadv_vis_if_list_entry *entry; struct hlist_node *pos, *n; HLIST_HEAD(vis_if_list); @@ -304,7 +305,7 @@ static void batadv_send_list_del(struct batadv_vis_info *info) static void batadv_recv_list_add(struct batadv_priv *bat_priv, struct list_head *recv_list, const char *mac) { - struct batadv_recvlist_node *entry; + struct batadv_vis_recvlist_node *entry; entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) @@ -321,7 +322,7 @@ static int batadv_recv_list_is_in(struct batadv_priv *bat_priv, const struct list_head *recv_list, const char *mac) { - const struct batadv_recvlist_node *entry; + const struct batadv_vis_recvlist_node *entry; spin_lock_bh(&bat_priv->vis.list_lock); list_for_each_entry(entry, recv_list, list) { @@ -827,7 +828,9 @@ static void batadv_send_vis_packets(struct work_struct *work) kref_put(&info->refcount, batadv_free_info); } spin_unlock_bh(&bat_priv->vis.hash_lock); - batadv_start_vis_timer(bat_priv); + + queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work, + msecs_to_jiffies(BATADV_VIS_INTERVAL)); } /* init the vis server. this may only be called when if_list is already @@ -852,6 +855,9 @@ int batadv_vis_init(struct batadv_priv *bat_priv) goto err; } + batadv_hash_set_lock_class(bat_priv->vis.hash, + &batadv_vis_hash_lock_class_key); + bat_priv->vis.my_info = kmalloc(BATADV_MAX_VIS_PACKET_SIZE, GFP_ATOMIC); if (!bat_priv->vis.my_info) goto err; @@ -894,7 +900,11 @@ int batadv_vis_init(struct batadv_priv *bat_priv) } spin_unlock_bh(&bat_priv->vis.hash_lock); - batadv_start_vis_timer(bat_priv); + + INIT_DELAYED_WORK(&bat_priv->vis.work, batadv_send_vis_packets); + queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work, + msecs_to_jiffies(BATADV_VIS_INTERVAL)); + return 0; free_info: @@ -931,11 +941,3 @@ void batadv_vis_quit(struct batadv_priv *bat_priv) bat_priv->vis.my_info = NULL; spin_unlock_bh(&bat_priv->vis.hash_lock); } - -/* schedule packets for (re)transmission */ -static void batadv_start_vis_timer(struct batadv_priv *bat_priv) -{ - INIT_DELAYED_WORK(&bat_priv->vis.work, batadv_send_vis_packets); - queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work, - msecs_to_jiffies(BATADV_VIS_INTERVAL)); -} diff --git a/net/batman-adv/vis.h b/net/batman-adv/vis.h index 873282fa86d..ad92b0e3c23 100644 --- a/net/batman-adv/vis.h +++ b/net/batman-adv/vis.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2008-2012 B.A.T.M.A.N. contributors: +/* Copyright (C) 2008-2013 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 2f67d5ecc90..eb0f4b16ff0 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -290,7 +290,7 @@ static int a2mp_getinfo_req(struct amp_mgr *mgr, struct sk_buff *skb, goto done; } - mgr->state = READ_LOC_AMP_INFO; + set_bit(READ_LOC_AMP_INFO, &mgr->state); hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); done: @@ -499,8 +499,16 @@ send_rsp: if (hdev) hci_dev_put(hdev); - a2mp_send(mgr, A2MP_CREATEPHYSLINK_RSP, hdr->ident, sizeof(rsp), - &rsp); + /* Reply error now and success after HCI Write Remote AMP Assoc + command complete with success status + */ + if (rsp.status != A2MP_STATUS_SUCCESS) { + a2mp_send(mgr, A2MP_CREATEPHYSLINK_RSP, hdr->ident, + sizeof(rsp), &rsp); + } else { + set_bit(WRITE_REMOTE_AMP_ASSOC, &mgr->state); + mgr->ident = hdr->ident; + } skb_pull(skb, le16_to_cpu(hdr->len)); return 0; @@ -840,7 +848,7 @@ struct amp_mgr *amp_mgr_lookup_by_state(u8 state) mutex_lock(&_mgr_list_lock); list_for_each_entry(mgr, &_mgr_list, list) { - if (mgr->state == state) { + if (test_and_clear_bit(state, &mgr->state)) { amp_mgr_get(mgr); mutex_unlock(&_mgr_list_lock); return mgr; @@ -949,6 +957,32 @@ clean: kfree(req); } +void a2mp_send_create_phy_link_rsp(struct hci_dev *hdev, u8 status) +{ + struct amp_mgr *mgr; + struct a2mp_physlink_rsp rsp; + struct hci_conn *hs_hcon; + + mgr = amp_mgr_lookup_by_state(WRITE_REMOTE_AMP_ASSOC); + if (!mgr) + return; + + hs_hcon = hci_conn_hash_lookup_state(hdev, AMP_LINK, BT_CONNECT); + if (!hs_hcon) { + rsp.status = A2MP_STATUS_UNABLE_START_LINK_CREATION; + } else { + rsp.remote_id = hs_hcon->remote_id; + rsp.status = A2MP_STATUS_SUCCESS; + } + + BT_DBG("%s mgr %p hs_hcon %p status %u", hdev->name, mgr, hs_hcon, + status); + + rsp.local_id = hdev->id; + a2mp_send(mgr, A2MP_CREATEPHYSLINK_RSP, mgr->ident, sizeof(rsp), &rsp); + amp_mgr_put(mgr); +} + void a2mp_discover_amp(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 5355df63d39..d3ee69b35a7 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -641,7 +641,7 @@ int bt_procfs_init(struct module* module, struct net *net, const char *name, sk_list->fops.llseek = seq_lseek; sk_list->fops.release = seq_release_private; - pde = proc_net_fops_create(net, name, 0, &sk_list->fops); + pde = proc_create(name, 0, net->proc_net, &sk_list->fops); if (!pde) return -ENOMEM; @@ -652,7 +652,7 @@ int bt_procfs_init(struct module* module, struct net *net, const char *name, void bt_procfs_cleanup(struct net *net, const char *name) { - proc_net_remove(net, name); + remove_proc_entry(name, net->proc_net); } #else int bt_procfs_init(struct module* module, struct net *net, const char *name, diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c index 1b0d92c0643..d459ed43c77 100644 --- a/net/bluetooth/amp.c +++ b/net/bluetooth/amp.c @@ -236,7 +236,7 @@ void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr) cp.max_len = cpu_to_le16(hdev->amp_assoc_size); - mgr->state = READ_LOC_AMP_ASSOC; + set_bit(READ_LOC_AMP_ASSOC, &mgr->state); hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp); } @@ -250,7 +250,7 @@ void amp_read_loc_assoc_final_data(struct hci_dev *hdev, cp.len_so_far = cpu_to_le16(0); cp.max_len = cpu_to_le16(hdev->amp_assoc_size); - mgr->state = READ_LOC_AMP_ASSOC_FINAL; + set_bit(READ_LOC_AMP_ASSOC_FINAL, &mgr->state); /* Read Local AMP Assoc final link information data */ hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp); @@ -317,7 +317,9 @@ void amp_write_rem_assoc_continue(struct hci_dev *hdev, u8 handle) if (!hcon) return; - amp_write_rem_assoc_frag(hdev, hcon); + /* Send A2MP create phylink rsp when all fragments are written */ + if (amp_write_rem_assoc_frag(hdev, hcon)) + a2mp_send_create_phy_link_rsp(hdev, 0); } void amp_write_remote_assoc(struct hci_dev *hdev, u8 handle) @@ -403,26 +405,20 @@ void amp_physical_cfm(struct hci_conn *bredr_hcon, struct hci_conn *hs_hcon) void amp_create_logical_link(struct l2cap_chan *chan) { + struct hci_conn *hs_hcon = chan->hs_hcon; struct hci_cp_create_accept_logical_link cp; - struct hci_conn *hcon; struct hci_dev *hdev; - BT_DBG("chan %p", chan); + BT_DBG("chan %p hs_hcon %p dst %pMR", chan, hs_hcon, chan->conn->dst); - if (!chan->hs_hcon) + if (!hs_hcon) return; hdev = hci_dev_hold(chan->hs_hcon->hdev); if (!hdev) return; - BT_DBG("chan %p dst %pMR", chan, chan->conn->dst); - - hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK, chan->conn->dst); - if (!hcon) - goto done; - - cp.phy_handle = hcon->handle; + cp.phy_handle = hs_hcon->handle; cp.tx_flow_spec.id = chan->local_id; cp.tx_flow_spec.stype = chan->local_stype; @@ -438,14 +434,13 @@ void amp_create_logical_link(struct l2cap_chan *chan) cp.rx_flow_spec.acc_lat = cpu_to_le32(chan->remote_acc_lat); cp.rx_flow_spec.flush_to = cpu_to_le32(chan->remote_flush_to); - if (hcon->out) + if (hs_hcon->out) hci_send_cmd(hdev, HCI_OP_CREATE_LOGICAL_LINK, sizeof(cp), &cp); else hci_send_cmd(hdev, HCI_OP_ACCEPT_LOGICAL_LINK, sizeof(cp), &cp); -done: hci_dev_put(hdev); } diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index a5b63970263..e430b1abcd2 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -33,7 +33,6 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> -#include <net/bluetooth/l2cap.h> #include "bnep.h" diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0f78e34220c..22e77a78654 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1146,7 +1146,8 @@ static void hci_power_on(struct work_struct *work) return; if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) - schedule_delayed_work(&hdev->power_off, HCI_AUTO_OFF_TIMEOUT); + queue_delayed_work(hdev->req_workqueue, &hdev->power_off, + HCI_AUTO_OFF_TIMEOUT); if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) mgmt_index_added(hdev); @@ -1182,14 +1183,10 @@ static void hci_discov_off(struct work_struct *work) int hci_uuids_clear(struct hci_dev *hdev) { - struct list_head *p, *n; - - list_for_each_safe(p, n, &hdev->uuids) { - struct bt_uuid *uuid; + struct bt_uuid *uuid, *tmp; - uuid = list_entry(p, struct bt_uuid, list); - - list_del(p); + list_for_each_entry_safe(uuid, tmp, &hdev->uuids, list) { + list_del(&uuid->list); kfree(uuid); } @@ -1621,8 +1618,8 @@ static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval, if (err < 0) return err; - schedule_delayed_work(&hdev->le_scan_disable, - msecs_to_jiffies(timeout)); + queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, + msecs_to_jiffies(timeout)); return 0; } @@ -1799,6 +1796,15 @@ int hci_register_dev(struct hci_dev *hdev) goto err; } + hdev->req_workqueue = alloc_workqueue(hdev->name, + WQ_HIGHPRI | WQ_UNBOUND | + WQ_MEM_RECLAIM, 1); + if (!hdev->req_workqueue) { + destroy_workqueue(hdev->workqueue); + error = -ENOMEM; + goto err; + } + error = hci_add_sysfs(hdev); if (error < 0) goto err_wqueue; @@ -1821,12 +1827,13 @@ int hci_register_dev(struct hci_dev *hdev) hci_notify(hdev, HCI_DEV_REG); hci_dev_hold(hdev); - schedule_work(&hdev->power_on); + queue_work(hdev->req_workqueue, &hdev->power_on); return id; err_wqueue: destroy_workqueue(hdev->workqueue); + destroy_workqueue(hdev->req_workqueue); err: ida_simple_remove(&hci_index_ida, hdev->id); write_lock(&hci_dev_list_lock); @@ -1880,6 +1887,7 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_del_sysfs(hdev); destroy_workqueue(hdev->workqueue); + destroy_workqueue(hdev->req_workqueue); hci_dev_lock(hdev); hci_blacklist_clear(hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 81b44481d0d..477726a6351 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -609,8 +609,17 @@ static void le_setup(struct hci_dev *hdev) /* Read LE Buffer Size */ hci_send_cmd(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL); + /* Read LE Local Supported Features */ + hci_send_cmd(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL); + /* Read LE Advertising Channel TX Power */ hci_send_cmd(hdev, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); + + /* Read LE White List Size */ + hci_send_cmd(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL); + + /* Read LE Supported States */ + hci_send_cmd(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL); } static void hci_setup(struct hci_dev *hdev) @@ -1090,6 +1099,19 @@ static void hci_cc_le_read_buffer_size(struct hci_dev *hdev, hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status); } +static void hci_cc_le_read_local_features(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_local_features *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (!rp->status) + memcpy(hdev->le_features, rp->features, 8); + + hci_req_complete(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, rp->status); +} + static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, struct sk_buff *skb) { @@ -1290,6 +1312,19 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, } } +static void hci_cc_le_read_white_list_size(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_white_list_size *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x size %u", hdev->name, rp->status, rp->size); + + if (!rp->status) + hdev->le_white_list_size = rp->size; + + hci_req_complete(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, rp->status); +} + static void hci_cc_le_ltk_reply(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_rp_le_ltk_reply *rp = (void *) skb->data; @@ -1314,6 +1349,19 @@ static void hci_cc_le_ltk_neg_reply(struct hci_dev *hdev, struct sk_buff *skb) hci_req_complete(hdev, HCI_OP_LE_LTK_NEG_REPLY, rp->status); } +static void hci_cc_le_read_supported_states(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_supported_states *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (!rp->status) + memcpy(hdev->le_states, rp->le_states, 8); + + hci_req_complete(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, rp->status); +} + static void hci_cc_write_le_host_supported(struct hci_dev *hdev, struct sk_buff *skb) { @@ -2628,6 +2676,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_le_read_buffer_size(hdev, skb); break; + case HCI_OP_LE_READ_LOCAL_FEATURES: + hci_cc_le_read_local_features(hdev, skb); + break; + case HCI_OP_LE_READ_ADV_TX_POWER: hci_cc_le_read_adv_tx_power(hdev, skb); break; @@ -2664,6 +2716,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_le_set_scan_enable(hdev, skb); break; + case HCI_OP_LE_READ_WHITE_LIST_SIZE: + hci_cc_le_read_white_list_size(hdev, skb); + break; + case HCI_OP_LE_LTK_REPLY: hci_cc_le_ltk_reply(hdev, skb); break; @@ -2672,6 +2728,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_le_ltk_neg_reply(hdev, skb); break; + case HCI_OP_LE_READ_SUPPORTED_STATES: + hci_cc_le_read_supported_states(hdev, skb); + break; + case HCI_OP_WRITE_LE_HOST_SUPPORTED: hci_cc_write_le_host_supported(hdev, skb); break; @@ -3928,8 +3988,6 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) void *ptr = &skb->data[1]; s8 rssi; - hci_dev_lock(hdev); - while (num_reports--) { struct hci_ev_le_advertising_info *ev = ptr; @@ -3939,8 +3997,6 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) ptr += sizeof(*ev) + ev->length + 1; } - - hci_dev_unlock(hdev); } static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb) diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 55cceee02a8..23b4e242a31 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -2,6 +2,7 @@ #include <linux/debugfs.h> #include <linux/module.h> +#include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -461,19 +462,18 @@ static const struct file_operations blacklist_fops = { static void print_bt_uuid(struct seq_file *f, u8 *uuid) { - __be32 data0, data4; - __be16 data1, data2, data3, data5; + u32 data0, data5; + u16 data1, data2, data3, data4; - memcpy(&data0, &uuid[0], 4); - memcpy(&data1, &uuid[4], 2); - memcpy(&data2, &uuid[6], 2); - memcpy(&data3, &uuid[8], 2); - memcpy(&data4, &uuid[10], 4); - memcpy(&data5, &uuid[14], 2); + data5 = get_unaligned_le32(uuid); + data4 = get_unaligned_le16(uuid + 4); + data3 = get_unaligned_le16(uuid + 6); + data2 = get_unaligned_le16(uuid + 8); + data1 = get_unaligned_le16(uuid + 10); + data0 = get_unaligned_le32(uuid + 12); - seq_printf(f, "%.8x-%.4x-%.4x-%.4x-%.8x%.4x\n", - ntohl(data0), ntohs(data1), ntohs(data2), ntohs(data3), - ntohl(data4), ntohs(data5)); + seq_printf(f, "%.8x-%.4x-%.4x-%.4x-%.4x%.8x\n", + data0, data1, data2, data3, data4, data5); } static int uuids_show(struct seq_file *f, void *p) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 22e65832284..7c7e9321f1e 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1527,17 +1527,12 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan); switch (hcon->type) { - case AMP_LINK: - conn->mtu = hcon->hdev->block_mtu; - break; - case LE_LINK: if (hcon->hdev->le_mtu) { conn->mtu = hcon->hdev->le_mtu; break; } /* fall through */ - default: conn->mtu = hcon->hdev->acl_mtu; break; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f559b966279..39395c7144a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -35,7 +35,7 @@ bool enable_hs; #define MGMT_VERSION 1 -#define MGMT_REVISION 2 +#define MGMT_REVISION 3 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, @@ -435,35 +435,117 @@ static u32 get_current_settings(struct hci_dev *hdev) #define PNP_INFO_SVCLASS_ID 0x1200 -static u8 bluetooth_base_uuid[] = { - 0xFB, 0x34, 0x9B, 0x5F, 0x80, 0x00, 0x00, 0x80, - 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -}; +static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 4) + return ptr; + + list_for_each_entry(uuid, &hdev->uuids, list) { + u16 uuid16; + + if (uuid->size != 16) + continue; + + uuid16 = get_unaligned_le16(&uuid->uuid[12]); + if (uuid16 < 0x1100) + continue; + + if (uuid16 == PNP_INFO_SVCLASS_ID) + continue; -static u16 get_uuid16(u8 *uuid128) + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID16_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + sizeof(u16) > len) { + uuids_start[1] = EIR_UUID16_SOME; + break; + } + + *ptr++ = (uuid16 & 0x00ff); + *ptr++ = (uuid16 & 0xff00) >> 8; + uuids_start[0] += sizeof(uuid16); + } + + return ptr; +} + +static u8 *create_uuid32_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) { - u32 val; - int i; + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 6) + return ptr; - for (i = 0; i < 12; i++) { - if (bluetooth_base_uuid[i] != uuid128[i]) - return 0; + list_for_each_entry(uuid, &hdev->uuids, list) { + if (uuid->size != 32) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID32_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + sizeof(u32) > len) { + uuids_start[1] = EIR_UUID32_SOME; + break; + } + + memcpy(ptr, &uuid->uuid[12], sizeof(u32)); + ptr += sizeof(u32); + uuids_start[0] += sizeof(u32); } - val = get_unaligned_le32(&uuid128[12]); - if (val > 0xffff) - return 0; + return ptr; +} + +static u8 *create_uuid128_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 18) + return ptr; - return (u16) val; + list_for_each_entry(uuid, &hdev->uuids, list) { + if (uuid->size != 128) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID128_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + 16 > len) { + uuids_start[1] = EIR_UUID128_SOME; + break; + } + + memcpy(ptr, uuid->uuid, 16); + ptr += 16; + uuids_start[0] += 16; + } + + return ptr; } static void create_eir(struct hci_dev *hdev, u8 *data) { u8 *ptr = data; - u16 eir_len = 0; - u16 uuid16_list[HCI_MAX_EIR_LENGTH / sizeof(u16)]; - int i, truncated = 0; - struct bt_uuid *uuid; size_t name_len; name_len = strlen(hdev->dev_name); @@ -481,7 +563,6 @@ static void create_eir(struct hci_dev *hdev, u8 *data) memcpy(ptr + 2, hdev->dev_name, name_len); - eir_len += (name_len + 2); ptr += (name_len + 2); } @@ -490,7 +571,6 @@ static void create_eir(struct hci_dev *hdev, u8 *data) ptr[1] = EIR_TX_POWER; ptr[2] = (u8) hdev->inq_tx_power; - eir_len += 3; ptr += 3; } @@ -503,60 +583,12 @@ static void create_eir(struct hci_dev *hdev, u8 *data) put_unaligned_le16(hdev->devid_product, ptr + 6); put_unaligned_le16(hdev->devid_version, ptr + 8); - eir_len += 10; ptr += 10; } - memset(uuid16_list, 0, sizeof(uuid16_list)); - - /* Group all UUID16 types */ - list_for_each_entry(uuid, &hdev->uuids, list) { - u16 uuid16; - - uuid16 = get_uuid16(uuid->uuid); - if (uuid16 == 0) - return; - - if (uuid16 < 0x1100) - continue; - - if (uuid16 == PNP_INFO_SVCLASS_ID) - continue; - - /* Stop if not enough space to put next UUID */ - if (eir_len + 2 + sizeof(u16) > HCI_MAX_EIR_LENGTH) { - truncated = 1; - break; - } - - /* Check for duplicates */ - for (i = 0; uuid16_list[i] != 0; i++) - if (uuid16_list[i] == uuid16) - break; - - if (uuid16_list[i] == 0) { - uuid16_list[i] = uuid16; - eir_len += sizeof(u16); - } - } - - if (uuid16_list[0] != 0) { - u8 *length = ptr; - - /* EIR Data type */ - ptr[1] = truncated ? EIR_UUID16_SOME : EIR_UUID16_ALL; - - ptr += 2; - eir_len += 2; - - for (i = 0; uuid16_list[i] != 0; i++) { - *ptr++ = (uuid16_list[i] & 0x00ff); - *ptr++ = (uuid16_list[i] & 0xff00) >> 8; - } - - /* EIR Data length */ - *length = (i * sizeof(u16)) + 1; - } + ptr = create_uuid16_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); + ptr = create_uuid32_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); + ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); } static int update_eir(struct hci_dev *hdev) @@ -728,13 +760,9 @@ static void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, void *data), void *data) { - struct list_head *p, *n; - - list_for_each_safe(p, n, &hdev->mgmt_pending) { - struct pending_cmd *cmd; - - cmd = list_entry(p, struct pending_cmd, list); + struct pending_cmd *cmd, *tmp; + list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { if (opcode > 0 && cmd->opcode != opcode) continue; @@ -777,14 +805,19 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("request for %s", hdev->name); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { cancel_delayed_work(&hdev->power_off); if (cp->val) { - err = send_settings_rsp(sk, MGMT_OP_SET_POWERED, hdev); - mgmt_powered(hdev, 1); + mgmt_pending_add(sk, MGMT_OP_SET_POWERED, hdev, + data, len); + err = mgmt_powered(hdev, 1); goto failed; } } @@ -807,9 +840,9 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, } if (cp->val) - schedule_work(&hdev->power_on); + queue_work(hdev->req_workqueue, &hdev->power_on); else - schedule_work(&hdev->power_off.work); + queue_work(hdev->req_workqueue, &hdev->power_off.work); err = 0; @@ -872,6 +905,10 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, MGMT_STATUS_NOT_SUPPORTED); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_INVALID_PARAMS); + timeout = __le16_to_cpu(cp->timeout); if (!cp->val && timeout > 0) return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, @@ -971,6 +1008,10 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, return cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, MGMT_STATUS_NOT_SUPPORTED); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { @@ -1041,6 +1082,10 @@ static int set_pairable(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("request for %s", hdev->name); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_PAIRABLE, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); if (cp->val) @@ -1073,6 +1118,10 @@ static int set_link_security(struct sock *sk, struct hci_dev *hdev, void *data, return cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, MGMT_STATUS_NOT_SUPPORTED); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { @@ -1133,13 +1182,15 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) BT_DBG("request for %s", hdev->name); - hci_dev_lock(hdev); + if (!lmp_ssp_capable(hdev)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, + MGMT_STATUS_NOT_SUPPORTED); - if (!lmp_ssp_capable(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, - MGMT_STATUS_NOT_SUPPORTED); - goto failed; - } + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); val = !!cp->val; @@ -1199,6 +1250,10 @@ static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) return cmd_status(sk, hdev->id, MGMT_OP_SET_HS, MGMT_STATUS_NOT_SUPPORTED); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_HS, + MGMT_STATUS_INVALID_PARAMS); + if (cp->val) set_bit(HCI_HS_ENABLED, &hdev->dev_flags); else @@ -1217,13 +1272,15 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) BT_DBG("request for %s", hdev->name); - hci_dev_lock(hdev); + if (!lmp_le_capable(hdev)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_LE, + MGMT_STATUS_NOT_SUPPORTED); - if (!lmp_le_capable(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_LE, - MGMT_STATUS_NOT_SUPPORTED); - goto unlock; - } + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_LE, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); val = !!cp->val; enabled = lmp_host_le_capable(hdev); @@ -1275,6 +1332,25 @@ unlock: return err; } +static const u8 bluetooth_base_uuid[] = { + 0xfb, 0x34, 0x9b, 0x5f, 0x80, 0x00, 0x00, 0x80, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +static u8 get_uuid_size(const u8 *uuid) +{ + u32 val; + + if (memcmp(uuid, bluetooth_base_uuid, 12)) + return 128; + + val = get_unaligned_le32(&uuid[12]); + if (val > 0xffff) + return 32; + + return 16; +} + static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_uuid *cp = data; @@ -1300,8 +1376,9 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) memcpy(uuid->uuid, cp->uuid, 16); uuid->svc_hint = cp->svc_hint; + uuid->size = get_uuid_size(cp->uuid); - list_add(&uuid->list, &hdev->uuids); + list_add_tail(&uuid->list, &hdev->uuids); err = update_class(hdev); if (err < 0) @@ -1332,7 +1409,8 @@ static bool enable_service_cache(struct hci_dev *hdev) return false; if (!test_and_set_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) { - schedule_delayed_work(&hdev->service_cache, CACHE_TIMEOUT); + queue_delayed_work(hdev->workqueue, &hdev->service_cache, + CACHE_TIMEOUT); return true; } @@ -1344,7 +1422,7 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_remove_uuid *cp = data; struct pending_cmd *cmd; - struct list_head *p, *n; + struct bt_uuid *match, *tmp; u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; int err, found; @@ -1372,9 +1450,7 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, found = 0; - list_for_each_safe(p, n, &hdev->uuids) { - struct bt_uuid *match = list_entry(p, struct bt_uuid, list); - + list_for_each_entry_safe(match, tmp, &hdev->uuids, list) { if (memcmp(match->uuid, cp->uuid, 16) != 0) continue; @@ -1422,13 +1498,19 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("request for %s", hdev->name); - hci_dev_lock(hdev); + if (!lmp_bredr_capable(hdev)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_NOT_SUPPORTED); - if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, - MGMT_STATUS_BUSY); - goto unlock; - } + if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_BUSY); + + if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0) + return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); hdev->major_class = cp->major; hdev->minor_class = cp->minor; @@ -1483,9 +1565,21 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, MGMT_STATUS_INVALID_PARAMS); } + if (cp->debug_keys != 0x00 && cp->debug_keys != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); + BT_DBG("%s debug_keys %u key_count %u", hdev->name, cp->debug_keys, key_count); + for (i = 0; i < key_count; i++) { + struct mgmt_link_key_info *key = &cp->keys[i]; + + if (key->addr.type != BDADDR_BREDR) + return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); + } + hci_dev_lock(hdev); hci_link_keys_clear(hdev); @@ -1533,12 +1627,22 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, struct hci_conn *conn; int err; - hci_dev_lock(hdev); - memset(&rp, 0, sizeof(rp)); bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); rp.addr.type = cp->addr.type; + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); + + if (cp->disconnect != 0x00 && cp->disconnect != 0x01) + return cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); + + hci_dev_lock(hdev); + if (!hdev_is_powered(hdev)) { err = cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); @@ -1596,6 +1700,7 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_disconnect *cp = data; + struct mgmt_rp_disconnect rp; struct hci_cp_disconnect dc; struct pending_cmd *cmd; struct hci_conn *conn; @@ -1603,17 +1708,26 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); + memset(&rp, 0, sizeof(rp)); + bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); + rp.addr.type = cp->addr.type; + + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); + hci_dev_lock(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_NOT_POWERED); + err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); goto failed; } if (mgmt_pending_find(MGMT_OP_DISCONNECT, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_BUSY); + err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_BUSY, &rp, sizeof(rp)); goto failed; } @@ -1624,8 +1738,8 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); if (!conn || conn->state == BT_OPEN || conn->state == BT_CLOSED) { - err = cmd_status(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_NOT_CONNECTED); + err = cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, + MGMT_STATUS_NOT_CONNECTED, &rp, sizeof(rp)); goto failed; } @@ -1903,11 +2017,20 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); + memset(&rp, 0, sizeof(rp)); + bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); + rp.addr.type = cp->addr.type; + + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); + hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_PAIR_DEVICE, - MGMT_STATUS_NOT_POWERED); + err = cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); goto unlock; } @@ -1924,10 +2047,6 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, conn = hci_connect(hdev, LE_LINK, &cp->addr.bdaddr, cp->addr.type, sec_level, auth_type); - memset(&rp, 0, sizeof(rp)); - bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); - rp.addr.type = cp->addr.type; - if (IS_ERR(conn)) { int status; @@ -2254,24 +2373,16 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); - if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, - MGMT_STATUS_NOT_POWERED, &cp->addr, - sizeof(cp->addr)); - goto unlock; - } - err = hci_add_remote_oob_data(hdev, &cp->addr.bdaddr, cp->hash, cp->randomizer); if (err < 0) status = MGMT_STATUS_FAILED; else - status = 0; + status = MGMT_STATUS_SUCCESS; err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, status, &cp->addr, sizeof(cp->addr)); -unlock: hci_dev_unlock(hdev); return err; } @@ -2287,24 +2398,15 @@ static int remove_remote_oob_data(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); - if (!hdev_is_powered(hdev)) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_REMOVE_REMOTE_OOB_DATA, - MGMT_STATUS_NOT_POWERED, &cp->addr, - sizeof(cp->addr)); - goto unlock; - } - err = hci_remove_remote_oob_data(hdev, &cp->addr.bdaddr); if (err < 0) status = MGMT_STATUS_INVALID_PARAMS; else - status = 0; + status = MGMT_STATUS_SUCCESS; err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_REMOTE_OOB_DATA, status, &cp->addr, sizeof(cp->addr)); -unlock: hci_dev_unlock(hdev); return err; } @@ -2365,31 +2467,45 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, switch (hdev->discovery.type) { case DISCOV_TYPE_BREDR: - if (lmp_bredr_capable(hdev)) - err = hci_do_inquiry(hdev, INQUIRY_LEN_BREDR); - else - err = -ENOTSUPP; + if (!lmp_bredr_capable(hdev)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_NOT_SUPPORTED); + mgmt_pending_remove(cmd); + goto failed; + } + + err = hci_do_inquiry(hdev, INQUIRY_LEN_BREDR); break; case DISCOV_TYPE_LE: - if (lmp_host_le_capable(hdev)) - err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, - LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY); - else - err = -ENOTSUPP; + if (!lmp_host_le_capable(hdev)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_NOT_SUPPORTED); + mgmt_pending_remove(cmd); + goto failed; + } + + err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, + LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY); break; case DISCOV_TYPE_INTERLEAVED: - if (lmp_host_le_capable(hdev) && lmp_bredr_capable(hdev)) - err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, - LE_SCAN_WIN, - LE_SCAN_TIMEOUT_BREDR_LE); - else - err = -ENOTSUPP; + if (!lmp_host_le_capable(hdev) || !lmp_bredr_capable(hdev)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_NOT_SUPPORTED); + mgmt_pending_remove(cmd); + goto failed; + } + + err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, LE_SCAN_WIN, + LE_SCAN_TIMEOUT_BREDR_LE); break; default: - err = -EINVAL; + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); + goto failed; } if (err < 0) @@ -2510,7 +2626,8 @@ static int confirm_name(struct sock *sk, struct hci_dev *hdev, void *data, hci_inquiry_cache_update_resolve(hdev, e); } - err = 0; + err = cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME, 0, &cp->addr, + sizeof(cp->addr)); failed: hci_dev_unlock(hdev); @@ -2526,13 +2643,18 @@ static int block_device(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("%s", hdev->name); + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); + hci_dev_lock(hdev); err = hci_blacklist_add(hdev, &cp->addr.bdaddr, cp->addr.type); if (err < 0) status = MGMT_STATUS_FAILED; else - status = 0; + status = MGMT_STATUS_SUCCESS; err = cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, status, &cp->addr, sizeof(cp->addr)); @@ -2551,13 +2673,18 @@ static int unblock_device(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("%s", hdev->name); + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); + hci_dev_lock(hdev); err = hci_blacklist_del(hdev, &cp->addr.bdaddr, cp->addr.type); if (err < 0) status = MGMT_STATUS_INVALID_PARAMS; else - status = 0; + status = MGMT_STATUS_SUCCESS; err = cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, status, &cp->addr, sizeof(cp->addr)); @@ -2612,6 +2739,10 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, MGMT_STATUS_NOT_SUPPORTED); + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_INVALID_PARAMS); + if (!hdev_is_powered(hdev)) return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, MGMT_STATUS_NOT_POWERED); @@ -2659,12 +2790,23 @@ done: return err; } +static bool ltk_is_valid(struct mgmt_ltk_info *key) +{ + if (key->authenticated != 0x00 && key->authenticated != 0x01) + return false; + if (key->master != 0x00 && key->master != 0x01) + return false; + if (!bdaddr_type_is_le(key->addr.type)) + return false; + return true; +} + static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, void *cp_data, u16 len) { struct mgmt_cp_load_long_term_keys *cp = cp_data; u16 key_count, expected_len; - int i; + int i, err; key_count = __le16_to_cpu(cp->key_count); @@ -2674,11 +2816,20 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, BT_ERR("load_keys: expected %u bytes, got %u bytes", len, expected_len); return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, - EINVAL); + MGMT_STATUS_INVALID_PARAMS); } BT_DBG("%s key_count %u", hdev->name, key_count); + for (i = 0; i < key_count; i++) { + struct mgmt_ltk_info *key = &cp->keys[i]; + + if (!ltk_is_valid(key)) + return cmd_status(sk, hdev->id, + MGMT_OP_LOAD_LONG_TERM_KEYS, + MGMT_STATUS_INVALID_PARAMS); + } + hci_dev_lock(hdev); hci_smp_ltks_clear(hdev); @@ -2698,9 +2849,12 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, key->enc_size, key->ediv, key->rand); } + err = cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 0, + NULL, 0); + hci_dev_unlock(hdev); - return 0; + return err; } static const struct mgmt_handler { @@ -2915,6 +3069,8 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); if (powered) { + u8 link_sec; + if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && !lmp_host_ssp_capable(hdev)) { u8 ssp = 1; @@ -2938,6 +3094,11 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) sizeof(cp), &cp); } + link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags); + if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) + hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, + sizeof(link_sec), &link_sec); + if (lmp_bredr_capable(hdev)) { set_bredr_scan(hdev); update_class(hdev); @@ -2946,7 +3107,13 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) } } else { u8 status = MGMT_STATUS_NOT_POWERED; + u8 zero_cod[] = { 0, 0, 0 }; + mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status); + + if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) + mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, + zero_cod, sizeof(zero_cod), NULL); } err = new_settings(hdev, match.sk); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 201fdf73720..b23e2713fea 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -257,7 +257,7 @@ static void rfcomm_session_clear_timer(struct rfcomm_session *s) { BT_DBG("session %p state %ld", s, s->state); - if (timer_pending(&s->timer) && del_timer(&s->timer)) + if (del_timer(&s->timer)) rfcomm_session_put(s); } @@ -285,7 +285,7 @@ static void rfcomm_dlc_clear_timer(struct rfcomm_dlc *d) { BT_DBG("dlc %p state %ld", d, d->state); - if (timer_pending(&d->timer) && del_timer(&d->timer)) + if (del_timer(&d->timer)) rfcomm_dlc_put(d); } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 57f250c20e3..b5178d62064 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -900,8 +900,6 @@ static void sco_conn_ready(struct sco_conn *conn) BT_DBG("conn %p", conn); - sco_conn_lock(conn); - if (sk) { sco_sock_clear_timer(sk); bh_lock_sock(sk); @@ -909,9 +907,13 @@ static void sco_conn_ready(struct sco_conn *conn) sk->sk_state_change(sk); bh_unlock_sock(sk); } else { + sco_conn_lock(conn); + parent = sco_get_sock_listen(conn->src); - if (!parent) - goto done; + if (!parent) { + sco_conn_unlock(conn); + return; + } bh_lock_sock(parent); @@ -919,7 +921,8 @@ static void sco_conn_ready(struct sco_conn *conn) BTPROTO_SCO, GFP_ATOMIC); if (!sk) { bh_unlock_sock(parent); - goto done; + sco_conn_unlock(conn); + return; } sco_sock_init(sk, parent); @@ -939,10 +942,9 @@ static void sco_conn_ready(struct sco_conn *conn) parent->sk_data_ready(parent, 1); bh_unlock_sock(parent); - } -done: - sco_conn_unlock(conn); + sco_conn_unlock(conn); + } } /* ----- SCO interface with lower layer (HCI) ----- */ diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 6dee7bf648a..aa0d3b2f1bb 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -46,3 +46,17 @@ config BRIDGE_IGMP_SNOOPING Say N to exclude this support and reduce the binary size. If unsure, say Y. + +config BRIDGE_VLAN_FILTERING + bool "VLAN filtering" + depends on BRIDGE + depends on VLAN_8021Q + default n + ---help--- + If you say Y here, then the Ethernet bridge will be able selectively + receive and forward traffic based on VLAN information in the packet + any VLAN information configured on the bridge port or bridge device. + + Say N to exclude this support and reduce the binary size. + + If unsure, say Y. diff --git a/net/bridge/Makefile b/net/bridge/Makefile index e859098f5ee..e85498b2f16 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -14,4 +14,6 @@ bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o +bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o + obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/ diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 7c78e264019..d5f1d3fd4b2 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -30,6 +30,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); + u16 vid = 0; rcu_read_lock(); #ifdef CONFIG_BRIDGE_NETFILTER @@ -45,6 +46,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) brstats->tx_bytes += skb->len; u64_stats_update_end(&brstats->syncp); + if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid)) + goto out; + BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); @@ -67,7 +71,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) br_multicast_deliver(mdst, skb); else br_flood_deliver(br, skb); - } else if ((dst = __br_fdb_get(br, dest)) != NULL) + } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) br_deliver(dst->dst, skb); else br_flood_deliver(br, skb); @@ -172,12 +176,10 @@ static int br_set_mac_address(struct net_device *dev, void *p) spin_lock_bh(&br->lock); if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) { - dev->addr_assign_type &= ~NET_ADDR_RANDOM; memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); br_fdb_change_mac_address(br, addr->sa_data); br_stp_change_bridge_id(br, addr->sa_data); } - br->flags |= BR_SET_MAC_ADDR; spin_unlock_bh(&br->lock); return 0; @@ -185,10 +187,10 @@ static int br_set_mac_address(struct net_device *dev, void *p) static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strcpy(info->driver, "bridge"); - strcpy(info->version, BR_VERSION); - strcpy(info->fw_version, "N/A"); - strcpy(info->bus_info, "N/A"); + strlcpy(info->driver, "bridge", sizeof(info->driver)); + strlcpy(info->version, BR_VERSION, sizeof(info->version)); + strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); + strlcpy(info->bus_info, "N/A", sizeof(info->bus_info)); } static netdev_features_t br_fix_features(struct net_device *dev, @@ -267,7 +269,7 @@ void br_netpoll_disable(struct net_bridge_port *p) p->np = NULL; - __netpoll_free_rcu(np); + __netpoll_free_async(np); } #endif @@ -315,6 +317,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_dump = br_fdb_dump, .ndo_bridge_getlink = br_getlink, .ndo_bridge_setlink = br_setlink, + .ndo_bridge_dellink = br_dellink, }; static void br_dev_free(struct net_device *dev) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index d9576e6de2b..8117900af4d 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -23,11 +23,12 @@ #include <linux/slab.h> #include <linux/atomic.h> #include <asm/unaligned.h> +#include <linux/if_vlan.h> #include "br_private.h" static struct kmem_cache *br_fdb_cache __read_mostly; static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr); + const unsigned char *addr, u16 vid); static void fdb_notify(struct net_bridge *br, const struct net_bridge_fdb_entry *, int); @@ -67,11 +68,11 @@ static inline int has_expired(const struct net_bridge *br, time_before_eq(fdb->updated + hold_time(br), jiffies); } -static inline int br_mac_hash(const unsigned char *mac) +static inline int br_mac_hash(const unsigned char *mac, __u16 vid) { - /* use 1 byte of OUI cnd 3 bytes of NIC */ + /* use 1 byte of OUI and 3 bytes of NIC */ u32 key = get_unaligned((u32 *)(mac + 2)); - return jhash_1word(key, fdb_salt) & (BR_HASH_SIZE - 1); + return jhash_2words(key, vid, fdb_salt) & (BR_HASH_SIZE - 1); } static void fdb_rcu_free(struct rcu_head *head) @@ -91,6 +92,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) { struct net_bridge *br = p->br; + bool no_vlan = (nbp_get_vlan_info(p) == NULL) ? true : false; int i; spin_lock_bh(&br->hash_lock); @@ -105,10 +107,12 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) if (f->dst == p && f->is_local) { /* maybe another port has same hw addr? */ struct net_bridge_port *op; + u16 vid = f->vlan_id; list_for_each_entry(op, &br->port_list, list) { if (op != p && ether_addr_equal(op->dev->dev_addr, - f->addr.addr)) { + f->addr.addr) && + nbp_vlan_find(op, vid)) { f->dst = op; goto insert; } @@ -116,27 +120,55 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) /* delete old one */ fdb_delete(br, f); - goto insert; +insert: + /* insert new address, may fail if invalid + * address or dup. + */ + fdb_insert(br, p, newaddr, vid); + + /* if this port has no vlan information + * configured, we can safely be done at + * this point. + */ + if (no_vlan) + goto done; } } } - insert: - /* insert new address, may fail if invalid address or dup. */ - fdb_insert(br, p, newaddr); +done: spin_unlock_bh(&br->hash_lock); } void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) { struct net_bridge_fdb_entry *f; + struct net_port_vlans *pv; + u16 vid = 0; /* If old entry was unassociated with any port, then delete it. */ - f = __br_fdb_get(br, br->dev->dev_addr); + f = __br_fdb_get(br, br->dev->dev_addr, 0); if (f && f->is_local && !f->dst) fdb_delete(br, f); - fdb_insert(br, NULL, newaddr); + fdb_insert(br, NULL, newaddr, 0); + + /* Now remove and add entries for every VLAN configured on the + * bridge. This function runs under RTNL so the bitmap will not + * change from under us. + */ + pv = br_get_vlan_info(br); + if (!pv) + return; + + for (vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid); + vid < BR_VLAN_BITMAP_LEN; + vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid+1)) { + f = __br_fdb_get(br, br->dev->dev_addr, vid); + if (f && f->is_local && !f->dst) + fdb_delete(br, f); + fdb_insert(br, NULL, newaddr, vid); + } } void br_fdb_cleanup(unsigned long _data) @@ -231,13 +263,16 @@ void br_fdb_delete_by_port(struct net_bridge *br, /* No locking or refcounting, assumes caller has rcu_read_lock */ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, - const unsigned char *addr) + const unsigned char *addr, + __u16 vid) { struct hlist_node *h; struct net_bridge_fdb_entry *fdb; - hlist_for_each_entry_rcu(fdb, h, &br->hash[br_mac_hash(addr)], hlist) { - if (ether_addr_equal(fdb->addr.addr, addr)) { + hlist_for_each_entry_rcu(fdb, h, + &br->hash[br_mac_hash(addr, vid)], hlist) { + if (ether_addr_equal(fdb->addr.addr, addr) && + fdb->vlan_id == vid) { if (unlikely(has_expired(br, fdb))) break; return fdb; @@ -261,7 +296,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) if (!port) ret = 0; else { - fdb = __br_fdb_get(port->br, addr); + fdb = __br_fdb_get(port->br, addr, 0); ret = fdb && fdb->dst && fdb->dst->dev != dev && fdb->dst->state == BR_STATE_FORWARDING; } @@ -325,26 +360,30 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, } static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, - const unsigned char *addr) + const unsigned char *addr, + __u16 vid) { struct hlist_node *h; struct net_bridge_fdb_entry *fdb; hlist_for_each_entry(fdb, h, head, hlist) { - if (ether_addr_equal(fdb->addr.addr, addr)) + if (ether_addr_equal(fdb->addr.addr, addr) && + fdb->vlan_id == vid) return fdb; } return NULL; } static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, - const unsigned char *addr) + const unsigned char *addr, + __u16 vid) { struct hlist_node *h; struct net_bridge_fdb_entry *fdb; hlist_for_each_entry_rcu(fdb, h, head, hlist) { - if (ether_addr_equal(fdb->addr.addr, addr)) + if (ether_addr_equal(fdb->addr.addr, addr) && + fdb->vlan_id == vid) return fdb; } return NULL; @@ -352,7 +391,8 @@ static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, struct net_bridge_port *source, - const unsigned char *addr) + const unsigned char *addr, + __u16 vid) { struct net_bridge_fdb_entry *fdb; @@ -360,6 +400,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, if (fdb) { memcpy(fdb->addr.addr, addr, ETH_ALEN); fdb->dst = source; + fdb->vlan_id = vid; fdb->is_local = 0; fdb->is_static = 0; fdb->updated = fdb->used = jiffies; @@ -369,15 +410,15 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, } static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr) + const unsigned char *addr, u16 vid) { - struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; if (!is_valid_ether_addr(addr)) return -EINVAL; - fdb = fdb_find(head, addr); + fdb = fdb_find(head, addr, vid); if (fdb) { /* it is okay to have multiple ports with same * address, just use the first one. @@ -390,7 +431,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, fdb_delete(br, fdb); } - fdb = fdb_create(head, source, addr); + fdb = fdb_create(head, source, addr, vid); if (!fdb) return -ENOMEM; @@ -401,20 +442,20 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, /* Add entry for local address of interface */ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr) + const unsigned char *addr, u16 vid) { int ret; spin_lock_bh(&br->hash_lock); - ret = fdb_insert(br, source, addr); + ret = fdb_insert(br, source, addr, vid); spin_unlock_bh(&br->hash_lock); return ret; } void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr) + const unsigned char *addr, u16 vid) { - struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; /* some users want to always flood. */ @@ -426,7 +467,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, source->state == BR_STATE_FORWARDING)) return; - fdb = fdb_find_rcu(head, addr); + fdb = fdb_find_rcu(head, addr, vid); if (likely(fdb)) { /* attempt to update an entry for a local interface */ if (unlikely(fdb->is_local)) { @@ -441,8 +482,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, } } else { spin_lock(&br->hash_lock); - if (likely(!fdb_find(head, addr))) { - fdb = fdb_create(head, source, addr); + if (likely(!fdb_find(head, addr, vid))) { + fdb = fdb_create(head, source, addr, vid); if (fdb) fdb_notify(br, fdb, RTM_NEWNEIGH); } @@ -495,6 +536,10 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, ci.ndm_refcnt = 0; if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; + + if (nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) + goto nla_put_failure; + return nlmsg_end(skb, nlh); nla_put_failure: @@ -506,6 +551,7 @@ static inline size_t fdb_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(u16)) /* NDA_VLAN */ + nla_total_size(sizeof(struct nda_cacheinfo)); } @@ -571,18 +617,18 @@ out: /* Update (create or replace) forwarding database entry */ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, - __u16 state, __u16 flags) + __u16 state, __u16 flags, __u16 vid) { struct net_bridge *br = source->br; - struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; - fdb = fdb_find(head, addr); + fdb = fdb_find(head, addr, vid); if (fdb == NULL) { if (!(flags & NLM_F_CREATE)) return -ENOENT; - fdb = fdb_create(head, source, addr); + fdb = fdb_create(head, source, addr, vid); if (!fdb) return -ENOMEM; fdb_notify(br, fdb, RTM_NEWNEIGH); @@ -607,6 +653,25 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, return 0; } +static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p, + const unsigned char *addr, u16 nlh_flags, u16 vid) +{ + int err = 0; + + if (ndm->ndm_flags & NTF_USE) { + rcu_read_lock(); + br_fdb_update(p->br, p, addr, vid); + rcu_read_unlock(); + } else { + spin_lock_bh(&p->br->hash_lock); + err = fdb_add_entry(p, addr, ndm->ndm_state, + nlh_flags, vid); + spin_unlock_bh(&p->br->hash_lock); + } + + return err; +} + /* Add new permanent fdb entry with RTM_NEWNEIGH */ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, @@ -614,12 +679,29 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], { struct net_bridge_port *p; int err = 0; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) { pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state); return -EINVAL; } + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + + if (vid >= VLAN_N_VID) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } + p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", @@ -627,40 +709,90 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - if (ndm->ndm_flags & NTF_USE) { - rcu_read_lock(); - br_fdb_update(p->br, p, addr); - rcu_read_unlock(); + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_NEWNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + + /* VID was specified, so use it. */ + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); } else { - spin_lock_bh(&p->br->hash_lock); - err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags); - spin_unlock_bh(&p->br->hash_lock); + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); + goto out; + } + + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + while (vid < BR_VLAN_BITMAP_LEN) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); + if (err) + goto out; + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1); + } } +out: return err; } -static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr) +int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, + u16 vlan) { - struct net_bridge *br = p->br; - struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)]; struct net_bridge_fdb_entry *fdb; - fdb = fdb_find(head, addr); + fdb = fdb_find(head, addr, vlan); if (!fdb) return -ENOENT; - fdb_delete(p->br, fdb); + fdb_delete(br, fdb); return 0; } +static int __br_fdb_delete(struct net_bridge_port *p, + const unsigned char *addr, u16 vid) +{ + int err; + + spin_lock_bh(&p->br->hash_lock); + err = fdb_delete_by_addr(p->br, addr, vid); + spin_unlock_bh(&p->br->hash_lock); + + return err; +} + /* Remove neighbor entry with RTM_DELNEIGH */ -int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, +int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr) { struct net_bridge_port *p; int err; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; + + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + if (vid >= VLAN_N_VID) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n", @@ -668,9 +800,33 @@ int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, return -EINVAL; } - spin_lock_bh(&p->br->hash_lock); - err = fdb_delete_by_addr(p, addr); - spin_unlock_bh(&p->br->hash_lock); + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_DELNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + + err = __br_fdb_delete(p, addr, vid); + } else { + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + err = __br_fdb_delete(p, addr, 0); + goto out; + } + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + err = -ENOENT; + vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + while (vid < BR_VLAN_BITMAP_LEN) { + err &= __br_fdb_delete(p, addr, vid); + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1); + } + } +out: return err; } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 02015a505d2..092b20e4ee4 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -31,6 +31,7 @@ static inline int should_deliver(const struct net_bridge_port *p, const struct sk_buff *skb) { return (((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && + br_allowed_egress(p->br, nbp_get_vlan_info(p), skb) && p->state == BR_STATE_FORWARDING); } @@ -63,6 +64,10 @@ int br_forward_finish(struct sk_buff *skb) static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { + skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb); + if (!skb) + return; + skb->dev = to->dev; if (unlikely(netpoll_tx_running(to->br->dev))) { @@ -88,6 +93,10 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) return; } + skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb); + if (!skb) + return; + indev = skb->dev; skb->dev = to->dev; skb_forward_csum(skb); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 37fe693471a..ef1b91431c6 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -23,6 +23,7 @@ #include <linux/if_ether.h> #include <linux/slab.h> #include <net/sock.h> +#include <linux/if_vlan.h> #include "br_private.h" @@ -66,14 +67,14 @@ void br_port_carrier_check(struct net_bridge_port *p) struct net_device *dev = p->dev; struct net_bridge *br = p->br; - if (netif_running(dev) && netif_carrier_ok(dev)) + if (netif_running(dev) && netif_oper_up(dev)) p->path_cost = port_cost(dev); if (!netif_running(br->dev)) return; spin_lock_bh(&br->lock); - if (netif_running(dev) && netif_carrier_ok(dev)) { + if (netif_running(dev) && netif_oper_up(dev)) { if (p->state == BR_STATE_DISABLED) br_stp_enable_port(p); } else { @@ -139,6 +140,7 @@ static void del_nbp(struct net_bridge_port *p) br_ifinfo_notify(RTM_DELLINK, p); + nbp_vlan_flush(p); br_fdb_delete_by_port(br, p, 1); list_del_rcu(&p->list); @@ -148,7 +150,7 @@ static void del_nbp(struct net_bridge_port *p) netdev_rx_handler_unregister(dev); synchronize_net(); - netdev_set_master(dev, NULL); + netdev_upper_dev_unlink(dev, br->dev); br_multicast_del_port(p); @@ -364,7 +366,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (br_netpoll_info(br) && ((err = br_netpoll_enable(p, GFP_KERNEL)))) goto err3; - err = netdev_set_master(dev, br->dev); + err = netdev_master_upper_dev_link(dev, br->dev); if (err) goto err4; @@ -383,7 +385,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) spin_lock_bh(&br->lock); changed_addr = br_stp_recalculate_bridge_id(br); - if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) && + if (netif_running(dev) && netif_oper_up(dev) && (br->dev->flags & IFF_UP)) br_stp_enable_port(p); spin_unlock_bh(&br->lock); @@ -395,7 +397,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev_set_mtu(br->dev, br_min_mtu(br)); - if (br_fdb_insert(br, p, dev->dev_addr)) + if (br_fdb_insert(br, p, dev->dev_addr, 0)) netdev_err(dev, "failed insert local address bridge forwarding table\n"); kobject_uevent(&p->kobj, KOBJ_ADD); @@ -403,7 +405,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) return 0; err5: - netdev_set_master(dev, NULL); + netdev_upper_dev_unlink(dev, br->dev); err4: br_netpoll_disable(p); err3: diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 4b34207419b..48033015189 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -17,6 +17,7 @@ #include <linux/etherdevice.h> #include <linux/netfilter_bridge.h> #include <linux/export.h> +#include <linux/rculist.h> #include "br_private.h" /* Hook for brouter */ @@ -34,6 +35,20 @@ static int br_pass_frame_up(struct sk_buff *skb) brstats->rx_bytes += skb->len; u64_stats_update_end(&brstats->syncp); + /* Bridge is just like any other port. Make sure the + * packet is allowed except in promisc modue when someone + * may be running packet capture. + */ + if (!(brdev->flags & IFF_PROMISC) && + !br_allowed_egress(br, br_get_vlan_info(br), skb)) { + kfree_skb(skb); + return NET_RX_DROP; + } + + skb = br_handle_vlan(br, br_get_vlan_info(br), skb); + if (!skb) + return NET_RX_DROP; + indev = skb->dev; skb->dev = brdev; @@ -50,13 +65,17 @@ int br_handle_frame_finish(struct sk_buff *skb) struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; struct sk_buff *skb2; + u16 vid = 0; if (!p || p->state == BR_STATE_DISABLED) goto drop; + if (!br_allowed_ingress(p->br, nbp_get_vlan_info(p), skb, &vid)) + goto drop; + /* insert into forwarding database after filtering to avoid spoofing */ br = p->br; - br_fdb_update(br, p, eth_hdr(skb)->h_source); + br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && br_multicast_rcv(br, p, skb)) @@ -91,7 +110,8 @@ int br_handle_frame_finish(struct sk_buff *skb) skb2 = skb; br->dev->stats.multicast++; - } else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) { + } else if ((dst = __br_fdb_get(br, dest, vid)) && + dst->is_local) { skb2 = skb; /* Do not forward the packet since it's local. */ skb = NULL; @@ -119,8 +139,10 @@ drop: static int br_handle_local_finish(struct sk_buff *skb) { struct net_bridge_port *p = br_port_get_rcu(skb->dev); + u16 vid = 0; - br_fdb_update(p->br, p, eth_hdr(skb)->h_source); + br_vlan_get_tag(skb, &vid); + br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid); return 0; /* process further */ } diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index acc9f4cc18f..38991e03646 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -272,9 +272,6 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, struct net_device *dev; int err; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY, NULL); if (err < 0) return err; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 6d6f26531de..7d886b0a8b7 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -39,6 +39,8 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) { if (a->proto != b->proto) return 0; + if (a->vid != b->vid) + return 0; switch (a->proto) { case htons(ETH_P_IP): return a->u.ip4 == b->u.ip4; @@ -50,16 +52,19 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) return 0; } -static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip) +static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip, + __u16 vid) { - return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1); + return jhash_2words((__force u32)ip, vid, mdb->secret) & (mdb->max - 1); } #if IS_ENABLED(CONFIG_IPV6) static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb, - const struct in6_addr *ip) + const struct in6_addr *ip, + __u16 vid) { - return jhash2((__force u32 *)ip->s6_addr32, 4, mdb->secret) & (mdb->max - 1); + return jhash_2words(ipv6_addr_hash(ip), vid, + mdb->secret) & (mdb->max - 1); } #endif @@ -68,10 +73,10 @@ static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, { switch (ip->proto) { case htons(ETH_P_IP): - return __br_ip4_hash(mdb, ip->u.ip4); + return __br_ip4_hash(mdb, ip->u.ip4, ip->vid); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - return __br_ip6_hash(mdb, &ip->u.ip6); + return __br_ip6_hash(mdb, &ip->u.ip6, ip->vid); #endif } return 0; @@ -101,24 +106,27 @@ struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, } static struct net_bridge_mdb_entry *br_mdb_ip4_get( - struct net_bridge_mdb_htable *mdb, __be32 dst) + struct net_bridge_mdb_htable *mdb, __be32 dst, __u16 vid) { struct br_ip br_dst; br_dst.u.ip4 = dst; br_dst.proto = htons(ETH_P_IP); + br_dst.vid = vid; return br_mdb_ip_get(mdb, &br_dst); } #if IS_ENABLED(CONFIG_IPV6) static struct net_bridge_mdb_entry *br_mdb_ip6_get( - struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst) + struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst, + __u16 vid) { struct br_ip br_dst; br_dst.u.ip6 = *dst; br_dst.proto = htons(ETH_P_IPV6); + br_dst.vid = vid; return br_mdb_ip_get(mdb, &br_dst); } @@ -694,7 +702,8 @@ err: static int br_ip4_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, - __be32 group) + __be32 group, + __u16 vid) { struct br_ip br_group; @@ -703,6 +712,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br, br_group.u.ip4 = group; br_group.proto = htons(ETH_P_IP); + br_group.vid = vid; return br_multicast_add_group(br, port, &br_group); } @@ -710,7 +720,8 @@ static int br_ip4_multicast_add_group(struct net_bridge *br, #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, - const struct in6_addr *group) + const struct in6_addr *group, + __u16 vid) { struct br_ip br_group; @@ -719,6 +730,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br, br_group.u.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); + br_group.vid = vid; return br_multicast_add_group(br, port, &br_group); } @@ -895,10 +907,12 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, int type; int err = 0; __be32 group; + u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*ih))) return -EINVAL; + br_vlan_get_tag(skb, &vid); ih = igmpv3_report_hdr(skb); num = ntohs(ih->ngrec); len = sizeof(*ih); @@ -930,7 +944,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, continue; } - err = br_ip4_multicast_add_group(br, port, group); + err = br_ip4_multicast_add_group(br, port, group, vid); if (err) break; } @@ -949,10 +963,12 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, int len; int num; int err = 0; + u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*icmp6h))) return -EINVAL; + br_vlan_get_tag(skb, &vid); icmp6h = icmp6_hdr(skb); num = ntohs(icmp6h->icmp6_dataun.un_data16[1]); len = sizeof(*icmp6h); @@ -990,7 +1006,8 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, continue; } - err = br_ip6_multicast_add_group(br, port, &grec->grec_mca); + err = br_ip6_multicast_add_group(br, port, &grec->grec_mca, + vid); if (!err) break; } @@ -1074,6 +1091,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, unsigned long now = jiffies; __be32 group; int err = 0; + u16 vid = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -1108,7 +1126,8 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (!group) goto out; - mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group); + br_vlan_get_tag(skb, &vid); + mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) goto out; @@ -1149,6 +1168,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, unsigned long now = jiffies; const struct in6_addr *group = NULL; int err = 0; + u16 vid = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -1180,7 +1200,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (!group) goto out; - mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group); + br_vlan_get_tag(skb, &vid); + mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) goto out; @@ -1286,7 +1307,8 @@ out: static void br_ip4_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, - __be32 group) + __be32 group, + __u16 vid) { struct br_ip br_group; @@ -1295,6 +1317,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, br_group.u.ip4 = group; br_group.proto = htons(ETH_P_IP); + br_group.vid = vid; br_multicast_leave_group(br, port, &br_group); } @@ -1302,7 +1325,8 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, - const struct in6_addr *group) + const struct in6_addr *group, + __u16 vid) { struct br_ip br_group; @@ -1311,6 +1335,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, br_group.u.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); + br_group.vid = vid; br_multicast_leave_group(br, port, &br_group); } @@ -1326,6 +1351,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, unsigned int len; unsigned int offset; int err; + u16 vid = 0; /* We treat OOM as packet loss for now. */ if (!pskb_may_pull(skb, sizeof(*iph))) @@ -1386,6 +1412,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = 0; + br_vlan_get_tag(skb2, &vid); BR_INPUT_SKB_CB(skb)->igmp = 1; ih = igmp_hdr(skb2); @@ -1393,7 +1420,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: BR_INPUT_SKB_CB(skb)->mrouters_only = 1; - err = br_ip4_multicast_add_group(br, port, ih->group); + err = br_ip4_multicast_add_group(br, port, ih->group, vid); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: err = br_ip4_multicast_igmp3_report(br, port, skb2); @@ -1402,7 +1429,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = br_ip4_multicast_query(br, port, skb2); break; case IGMP_HOST_LEAVE_MESSAGE: - br_ip4_multicast_leave_group(br, port, ih->group); + br_ip4_multicast_leave_group(br, port, ih->group, vid); break; } @@ -1427,6 +1454,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, unsigned int len; int offset; int err; + u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*ip6h))) return -EINVAL; @@ -1510,6 +1538,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, err = 0; + br_vlan_get_tag(skb, &vid); BR_INPUT_SKB_CB(skb)->igmp = 1; switch (icmp6_type) { @@ -1522,7 +1551,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, } mld = (struct mld_msg *)skb_transport_header(skb2); BR_INPUT_SKB_CB(skb)->mrouters_only = 1; - err = br_ip6_multicast_add_group(br, port, &mld->mld_mca); + err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid); break; } case ICMPV6_MLD2_REPORT: @@ -1539,7 +1568,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, goto out; } mld = (struct mld_msg *)skb_transport_header(skb2); - br_ip6_multicast_leave_group(br, port, &mld->mld_mca); + br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid); } } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 5dc66abcc9e..27aa3ee517c 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -16,6 +16,7 @@ #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> +#include <uapi/linux/if_bridge.h> #include "br_private.h" #include "br_private_stp.h" @@ -64,15 +65,21 @@ static int br_port_fill_attrs(struct sk_buff *skb, * Create one netlink message for one interface * Contains port and master info as well as carrier and bridge state. */ -static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *port, - u32 pid, u32 seq, int event, unsigned int flags) +static int br_fill_ifinfo(struct sk_buff *skb, + const struct net_bridge_port *port, + u32 pid, u32 seq, int event, unsigned int flags, + u32 filter_mask, const struct net_device *dev) { - const struct net_bridge *br = port->br; - const struct net_device *dev = port->dev; + const struct net_bridge *br; struct ifinfomsg *hdr; struct nlmsghdr *nlh; u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + if (port) + br = port->br; + else + br = netdev_priv(dev); + br_debug(br, "br_fill_info event %d port %s master %s\n", event, dev->name, br->dev->name); @@ -98,7 +105,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nla_put_u32(skb, IFLA_LINK, dev->iflink))) goto nla_put_failure; - if (event == RTM_NEWLINK) { + if (event == RTM_NEWLINK && port) { struct nlattr *nest = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); @@ -107,6 +114,48 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nla_nest_end(skb, nest); } + /* Check if the VID information is requested */ + if (filter_mask & RTEXT_FILTER_BRVLAN) { + struct nlattr *af; + const struct net_port_vlans *pv; + struct bridge_vlan_info vinfo; + u16 vid; + u16 pvid; + + if (port) + pv = nbp_get_vlan_info(port); + else + pv = br_get_vlan_info(br); + + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) + goto done; + + af = nla_nest_start(skb, IFLA_AF_SPEC); + if (!af) + goto nla_put_failure; + + pvid = br_get_pvid(pv); + for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + vid < BR_VLAN_BITMAP_LEN; + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1)) { + vinfo.vid = vid; + vinfo.flags = 0; + if (vid == pvid) + vinfo.flags |= BRIDGE_VLAN_INFO_PVID; + + if (test_bit(vid, pv->untagged_bitmap)) + vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } + + nla_nest_end(skb, af); + } + +done: return nlmsg_end(skb, nlh); nla_put_failure: @@ -119,10 +168,14 @@ nla_put_failure: */ void br_ifinfo_notify(int event, struct net_bridge_port *port) { - struct net *net = dev_net(port->dev); + struct net *net; struct sk_buff *skb; int err = -ENOBUFS; + if (!port) + return; + + net = dev_net(port->dev); br_debug(port->br, "port %u(%s) event %d\n", (unsigned int)port->port_no, port->dev->name, event); @@ -130,7 +183,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) if (skb == NULL) goto errout; - err = br_fill_ifinfo(skb, port, 0, 0, event, 0); + err = br_fill_ifinfo(skb, port, 0, 0, event, 0, 0, port->dev); if (err < 0) { /* -EMSGSIZE implies BUG in br_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -144,24 +197,85 @@ errout: rtnl_set_sk_err(net, RTNLGRP_LINK, err); } + /* * Dump information about all ports, in response to GETLINK */ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev) + struct net_device *dev, u32 filter_mask) { int err = 0; struct net_bridge_port *port = br_port_get_rcu(dev); - /* not a bridge port */ - if (!port) + /* not a bridge port and */ + if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN)) goto out; - err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI); + err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, + filter_mask, dev); out: return err; } +static const struct nla_policy ifla_br_policy[IFLA_MAX+1] = { + [IFLA_BRIDGE_FLAGS] = { .type = NLA_U16 }, + [IFLA_BRIDGE_MODE] = { .type = NLA_U16 }, + [IFLA_BRIDGE_VLAN_INFO] = { .type = NLA_BINARY, + .len = sizeof(struct bridge_vlan_info), }, +}; + +static int br_afspec(struct net_bridge *br, + struct net_bridge_port *p, + struct nlattr *af_spec, + int cmd) +{ + struct nlattr *tb[IFLA_BRIDGE_MAX+1]; + int err = 0; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy); + if (err) + return err; + + if (tb[IFLA_BRIDGE_VLAN_INFO]) { + struct bridge_vlan_info *vinfo; + + vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]); + + if (vinfo->vid >= VLAN_N_VID) + return -EINVAL; + + switch (cmd) { + case RTM_SETLINK: + if (p) { + err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); + if (err) + break; + + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + err = br_vlan_add(p->br, vinfo->vid, + vinfo->flags); + } else + err = br_vlan_add(br, vinfo->vid, vinfo->flags); + + if (err) + break; + + break; + + case RTM_DELLINK: + if (p) { + nbp_vlan_delete(p, vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + br_vlan_delete(p->br, vinfo->vid); + } else + br_vlan_delete(br, vinfo->vid); + break; + } + } + + return err; +} + static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, [IFLA_BRPORT_COST] = { .type = NLA_U32 }, @@ -181,8 +295,11 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state) if (p->br->stp_enabled == BR_KERNEL_STP) return -EBUSY; + /* if device is not up, change is not allowed + * if link is not present, only allowable state is disabled + */ if (!netif_running(p->dev) || - (!netif_carrier_ok(p->dev) && state != BR_STATE_DISABLED)) + (!netif_oper_up(p->dev) && state != BR_STATE_DISABLED)) return -ENETDOWN; p->state = state; @@ -238,6 +355,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) { struct ifinfomsg *ifm; struct nlattr *protinfo; + struct nlattr *afspec; struct net_bridge_port *p; struct nlattr *tb[IFLA_BRPORT_MAX + 1]; int err; @@ -245,38 +363,76 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) ifm = nlmsg_data(nlh); protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); - if (!protinfo) + afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + if (!protinfo && !afspec) return 0; p = br_port_get_rtnl(dev); - if (!p) + /* We want to accept dev as bridge itself if the AF_SPEC + * is set to see if someone is setting vlan info on the brigde + */ + if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec)) return -EINVAL; - if (protinfo->nla_type & NLA_F_NESTED) { - err = nla_parse_nested(tb, IFLA_BRPORT_MAX, - protinfo, ifla_brport_policy); + if (p && protinfo) { + if (protinfo->nla_type & NLA_F_NESTED) { + err = nla_parse_nested(tb, IFLA_BRPORT_MAX, + protinfo, ifla_brport_policy); + if (err) + return err; + + spin_lock_bh(&p->br->lock); + err = br_setport(p, tb); + spin_unlock_bh(&p->br->lock); + } else { + /* Binary compatability with old RSTP */ + if (nla_len(protinfo) < sizeof(u8)) + return -EINVAL; + + spin_lock_bh(&p->br->lock); + err = br_set_port_state(p, nla_get_u8(protinfo)); + spin_unlock_bh(&p->br->lock); + } if (err) - return err; - - spin_lock_bh(&p->br->lock); - err = br_setport(p, tb); - spin_unlock_bh(&p->br->lock); - } else { - /* Binary compatability with old RSTP */ - if (nla_len(protinfo) < sizeof(u8)) - return -EINVAL; + goto out; + } - spin_lock_bh(&p->br->lock); - err = br_set_port_state(p, nla_get_u8(protinfo)); - spin_unlock_bh(&p->br->lock); + if (afspec) { + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_SETLINK); } if (err == 0) br_ifinfo_notify(RTM_NEWLINK, p); +out: return err; } +/* Delete port information */ +int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) +{ + struct ifinfomsg *ifm; + struct nlattr *afspec; + struct net_bridge_port *p; + int err; + + ifm = nlmsg_data(nlh); + + afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + if (!afspec) + return 0; + + p = br_port_get_rtnl(dev); + /* We want to accept dev as bridge itself as well */ + if (!p && !(dev->priv_flags & IFF_EBRIDGE)) + return -EINVAL; + + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_DELLINK); + + return err; +} static int br_validate(struct nlattr *tb[], struct nlattr *data[]) { if (tb[IFLA_ADDRESS]) { @@ -289,6 +445,29 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } +static size_t br_get_link_af_size(const struct net_device *dev) +{ + struct net_port_vlans *pv; + + if (br_port_exists(dev)) + pv = nbp_get_vlan_info(br_port_get_rcu(dev)); + else if (dev->priv_flags & IFF_EBRIDGE) + pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev)); + else + return 0; + + if (!pv) + return 0; + + /* Each VLAN is returned in bridge_vlan_info along with flags */ + return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info)); +} + +static struct rtnl_af_ops br_af_ops = { + .family = AF_BRIDGE, + .get_link_af_size = br_get_link_af_size, +}; + struct rtnl_link_ops br_link_ops __read_mostly = { .kind = "bridge", .priv_size = sizeof(struct net_bridge), @@ -302,11 +481,18 @@ int __init br_netlink_init(void) int err; br_mdb_init(); - err = rtnl_link_register(&br_link_ops); + err = rtnl_af_register(&br_af_ops); if (err) goto out; + err = rtnl_link_register(&br_link_ops); + if (err) + goto out_af; + return 0; + +out_af: + rtnl_af_unregister(&br_af_ops); out: br_mdb_uninit(); return err; @@ -315,5 +501,6 @@ out: void __exit br_netlink_fini(void) { br_mdb_uninit(); + rtnl_af_unregister(&br_af_ops); rtnl_link_unregister(&br_link_ops); } diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index a76b6213555..1644b3e1f94 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -82,7 +82,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v break; case NETDEV_UP: - if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP)) { + if (netif_running(br->dev) && netif_oper_up(dev)) { spin_lock_bh(&br->lock); br_stp_enable_port(p); spin_unlock_bh(&br->lock); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 711094aed41..6d314c4e6bc 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -18,6 +18,7 @@ #include <linux/netpoll.h> #include <linux/u64_stats_sync.h> #include <net/route.h> +#include <linux/if_vlan.h> #define BR_HASH_BITS 8 #define BR_HASH_SIZE (1 << BR_HASH_BITS) @@ -26,6 +27,7 @@ #define BR_PORT_BITS 10 #define BR_MAX_PORTS (1<<BR_PORT_BITS) +#define BR_VLAN_BITMAP_LEN BITS_TO_LONGS(VLAN_N_VID) #define BR_VERSION "2.3" @@ -61,6 +63,20 @@ struct br_ip #endif } u; __be16 proto; + __u16 vid; +}; + +struct net_port_vlans { + u16 port_idx; + u16 pvid; + union { + struct net_bridge_port *port; + struct net_bridge *br; + } parent; + struct rcu_head rcu; + unsigned long vlan_bitmap[BR_VLAN_BITMAP_LEN]; + unsigned long untagged_bitmap[BR_VLAN_BITMAP_LEN]; + u16 num_vlans; }; struct net_bridge_fdb_entry @@ -74,6 +90,7 @@ struct net_bridge_fdb_entry mac_addr addr; unsigned char is_local; unsigned char is_static; + __u16 vlan_id; }; struct net_bridge_port_group { @@ -156,6 +173,9 @@ struct net_bridge_port #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *np; #endif +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + struct net_port_vlans __rcu *vlan_info; +#endif }; #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT) @@ -197,9 +217,6 @@ struct net_bridge bool nf_call_ip6tables; bool nf_call_arptables; #endif - unsigned long flags; -#define BR_SET_MAC_ADDR 0x00000001 - u16 group_fwd_mask; /* STP */ @@ -260,6 +277,10 @@ struct net_bridge struct timer_list topology_change_timer; struct timer_list gc_timer; struct kobject *ifobj; +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + u8 vlan_enabled; + struct net_port_vlans __rcu *vlan_info; +#endif }; struct br_input_skb_cb { @@ -355,18 +376,22 @@ extern void br_fdb_cleanup(unsigned long arg); extern void br_fdb_delete_by_port(struct net_bridge *br, const struct net_bridge_port *p, int do_all); extern struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, - const unsigned char *addr); + const unsigned char *addr, + __u16 vid); extern int br_fdb_test_addr(struct net_device *dev, unsigned char *addr); extern int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count, unsigned long off); extern int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr); + const unsigned char *addr, + u16 vid); extern void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr); + const unsigned char *addr, + u16 vid); +extern int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vid); -extern int br_fdb_delete(struct ndmsg *ndm, +extern int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr); extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], @@ -534,6 +559,142 @@ static inline void br_mdb_uninit(void) } #endif +/* br_vlan.c */ +#ifdef CONFIG_BRIDGE_VLAN_FILTERING +extern bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, + struct sk_buff *skb, u16 *vid); +extern bool br_allowed_egress(struct net_bridge *br, + const struct net_port_vlans *v, + const struct sk_buff *skb); +extern struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_port_vlans *v, + struct sk_buff *skb); +extern int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); +extern int br_vlan_delete(struct net_bridge *br, u16 vid); +extern void br_vlan_flush(struct net_bridge *br); +extern int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); +extern int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); +extern int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); +extern void nbp_vlan_flush(struct net_bridge_port *port); +extern bool nbp_vlan_find(struct net_bridge_port *port, u16 vid); + +static inline struct net_port_vlans *br_get_vlan_info( + const struct net_bridge *br) +{ + return rcu_dereference_rtnl(br->vlan_info); +} + +static inline struct net_port_vlans *nbp_get_vlan_info( + const struct net_bridge_port *p) +{ + return rcu_dereference_rtnl(p->vlan_info); +} + +/* Since bridge now depends on 8021Q module, but the time bridge sees the + * skb, the vlan tag will always be present if the frame was tagged. + */ +static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid) +{ + int err = 0; + + if (vlan_tx_tag_present(skb)) + *vid = vlan_tx_tag_get(skb) & VLAN_VID_MASK; + else { + *vid = 0; + err = -EINVAL; + } + + return err; +} + +static inline u16 br_get_pvid(const struct net_port_vlans *v) +{ + /* Return just the VID if it is set, or VLAN_N_VID (invalid vid) if + * vid wasn't set + */ + smp_rmb(); + return (v->pvid & VLAN_TAG_PRESENT) ? + (v->pvid & ~VLAN_TAG_PRESENT) : + VLAN_N_VID; +} + +#else +static inline bool br_allowed_ingress(struct net_bridge *br, + struct net_port_vlans *v, + struct sk_buff *skb, + u16 *vid) +{ + return true; +} + +static inline bool br_allowed_egress(struct net_bridge *br, + const struct net_port_vlans *v, + const struct sk_buff *skb) +{ + return true; +} + +static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_port_vlans *v, + struct sk_buff *skb) +{ + return skb; +} + +static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) +{ + return -EOPNOTSUPP; +} + +static inline int br_vlan_delete(struct net_bridge *br, u16 vid) +{ + return -EOPNOTSUPP; +} + +static inline void br_vlan_flush(struct net_bridge *br) +{ +} + +static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) +{ + return -EOPNOTSUPP; +} + +static inline int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) +{ + return -EOPNOTSUPP; +} + +static inline void nbp_vlan_flush(struct net_bridge_port *port) +{ +} + +static inline struct net_port_vlans *br_get_vlan_info( + const struct net_bridge *br) +{ + return NULL; +} +static inline struct net_port_vlans *nbp_get_vlan_info( + const struct net_bridge_port *p) +{ + return NULL; +} + +static inline bool nbp_vlan_find(struct net_bridge_port *port, u16 vid) +{ + return false; +} + +static inline u16 br_vlan_get_tag(const struct sk_buff *skb, u16 *tag) +{ + return 0; +} +static inline u16 br_get_pvid(const struct net_port_vlans *v) +{ + return VLAN_N_VID; /* Returns invalid vid */ +} +#endif + /* br_netfilter.c */ #ifdef CONFIG_BRIDGE_NETFILTER extern int br_netfilter_init(void); @@ -594,8 +755,9 @@ extern int br_netlink_init(void); extern void br_netlink_fini(void); extern void br_ifinfo_notify(int event, struct net_bridge_port *port); extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); +extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev); + struct net_device *dev, u32 filter_mask); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 9d5a414a394..0bdb4ebd362 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -54,7 +54,7 @@ void br_stp_enable_bridge(struct net_bridge *br) br_config_bpdu_generation(br); list_for_each_entry(p, &br->port_list, list) { - if ((p->dev->flags & IFF_UP) && netif_carrier_ok(p->dev)) + if (netif_running(p->dev) && netif_oper_up(p->dev)) br_stp_enable_port(p); } @@ -216,7 +216,7 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br) struct net_bridge_port *p; /* user has chosen a value so keep it */ - if (br->flags & BR_SET_MAC_ADDR) + if (br->dev->addr_assign_type == NET_ADDR_SET) return false; list_for_each_entry(p, &br->port_list, list) { diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 5913a3a0047..8baa9c08e1a 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -691,6 +691,24 @@ static ssize_t store_nf_call_arptables( static DEVICE_ATTR(nf_call_arptables, S_IRUGO | S_IWUSR, show_nf_call_arptables, store_nf_call_arptables); #endif +#ifdef CONFIG_BRIDGE_VLAN_FILTERING +static ssize_t show_vlan_filtering(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->vlan_enabled); +} + +static ssize_t store_vlan_filtering(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_vlan_filter_toggle); +} +static DEVICE_ATTR(vlan_filtering, S_IRUGO | S_IWUSR, + show_vlan_filtering, store_vlan_filtering); +#endif static struct attribute *bridge_attrs[] = { &dev_attr_forward_delay.attr, @@ -732,6 +750,9 @@ static struct attribute *bridge_attrs[] = { &dev_attr_nf_call_ip6tables.attr, &dev_attr_nf_call_arptables.attr, #endif +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + &dev_attr_vlan_filtering.attr, +#endif NULL }; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c new file mode 100644 index 00000000000..93dde75923f --- /dev/null +++ b/net/bridge/br_vlan.c @@ -0,0 +1,415 @@ +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> + +#include "br_private.h" + +static void __vlan_add_pvid(struct net_port_vlans *v, u16 vid) +{ + if (v->pvid == vid) + return; + + smp_wmb(); + v->pvid = vid; +} + +static void __vlan_delete_pvid(struct net_port_vlans *v, u16 vid) +{ + if (v->pvid != vid) + return; + + smp_wmb(); + v->pvid = 0; +} + +static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags) +{ + if (flags & BRIDGE_VLAN_INFO_PVID) + __vlan_add_pvid(v, vid); + + if (flags & BRIDGE_VLAN_INFO_UNTAGGED) + set_bit(vid, v->untagged_bitmap); +} + +static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) +{ + struct net_bridge_port *p = NULL; + struct net_bridge *br; + struct net_device *dev; + int err; + + if (test_bit(vid, v->vlan_bitmap)) { + __vlan_add_flags(v, vid, flags); + return 0; + } + + if (vid) { + if (v->port_idx) { + p = v->parent.port; + br = p->br; + dev = p->dev; + } else { + br = v->parent.br; + dev = br->dev; + } + + if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) { + /* Add VLAN to the device filter if it is supported. + * Stricly speaking, this is not necessary now, since + * devices are made promiscuous by the bridge, but if + * that ever changes this code will allow tagged + * traffic to enter the bridge. + */ + err = dev->netdev_ops->ndo_vlan_rx_add_vid(dev, vid); + if (err) + return err; + } + + err = br_fdb_insert(br, p, dev->dev_addr, vid); + if (err) { + br_err(br, "failed insert local address into bridge " + "forwarding table\n"); + goto out_filt; + } + + } + + set_bit(vid, v->vlan_bitmap); + v->num_vlans++; + __vlan_add_flags(v, vid, flags); + + return 0; + +out_filt: + if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) + dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); + return err; +} + +static int __vlan_del(struct net_port_vlans *v, u16 vid) +{ + if (!test_bit(vid, v->vlan_bitmap)) + return -EINVAL; + + __vlan_delete_pvid(v, vid); + clear_bit(vid, v->untagged_bitmap); + + if (v->port_idx && vid) { + struct net_device *dev = v->parent.port->dev; + + if (dev->features & NETIF_F_HW_VLAN_FILTER) + dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); + } + + clear_bit(vid, v->vlan_bitmap); + v->num_vlans--; + if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + if (v->port_idx) + rcu_assign_pointer(v->parent.port->vlan_info, NULL); + else + rcu_assign_pointer(v->parent.br->vlan_info, NULL); + kfree_rcu(v, rcu); + } + return 0; +} + +static void __vlan_flush(struct net_port_vlans *v) +{ + smp_wmb(); + v->pvid = 0; + bitmap_zero(v->vlan_bitmap, BR_VLAN_BITMAP_LEN); + if (v->port_idx) + rcu_assign_pointer(v->parent.port->vlan_info, NULL); + else + rcu_assign_pointer(v->parent.br->vlan_info, NULL); + kfree_rcu(v, rcu); +} + +/* Strip the tag from the packet. Will return skb with tci set 0. */ +static struct sk_buff *br_vlan_untag(struct sk_buff *skb) +{ + if (skb->protocol != htons(ETH_P_8021Q)) { + skb->vlan_tci = 0; + return skb; + } + + skb->vlan_tci = 0; + skb = vlan_untag(skb); + if (skb) + skb->vlan_tci = 0; + + return skb; +} + +struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_port_vlans *pv, + struct sk_buff *skb) +{ + u16 vid; + + if (!br->vlan_enabled) + goto out; + + /* At this point, we know that the frame was filtered and contains + * a valid vlan id. If the vlan id is set in the untagged bitmap, + * send untagged; otherwise, send taged. + */ + br_vlan_get_tag(skb, &vid); + if (test_bit(vid, pv->untagged_bitmap)) + skb = br_vlan_untag(skb); + else { + /* Egress policy says "send tagged". If output device + * is the bridge, we need to add the VLAN header + * ourselves since we'll be going through the RX path. + * Sending to ports puts the frame on the TX path and + * we let dev_hard_start_xmit() add the header. + */ + if (skb->protocol != htons(ETH_P_8021Q) && + pv->port_idx == 0) { + /* vlan_put_tag expects skb->data to point to + * mac header. + */ + skb_push(skb, ETH_HLEN); + skb = __vlan_put_tag(skb, skb->vlan_tci); + if (!skb) + goto out; + /* put skb->data back to where it was */ + skb_pull(skb, ETH_HLEN); + skb->vlan_tci = 0; + } + } + +out: + return skb; +} + +/* Called under RCU */ +bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, + struct sk_buff *skb, u16 *vid) +{ + /* If VLAN filtering is disabled on the bridge, all packets are + * permitted. + */ + if (!br->vlan_enabled) + return true; + + /* If there are no vlan in the permitted list, all packets are + * rejected. + */ + if (!v) + return false; + + if (br_vlan_get_tag(skb, vid)) { + u16 pvid = br_get_pvid(v); + + /* Frame did not have a tag. See if pvid is set + * on this port. That tells us which vlan untagged + * traffic belongs to. + */ + if (pvid == VLAN_N_VID) + return false; + + /* PVID is set on this port. Any untagged ingress + * frame is considered to belong to this vlan. + */ + __vlan_hwaccel_put_tag(skb, pvid); + return true; + } + + /* Frame had a valid vlan tag. See if vlan is allowed */ + if (test_bit(*vid, v->vlan_bitmap)) + return true; + + return false; +} + +/* Called under RCU. */ +bool br_allowed_egress(struct net_bridge *br, + const struct net_port_vlans *v, + const struct sk_buff *skb) +{ + u16 vid; + + if (!br->vlan_enabled) + return true; + + if (!v) + return false; + + br_vlan_get_tag(skb, &vid); + if (test_bit(vid, v->vlan_bitmap)) + return true; + + return false; +} + +/* Must be protected by RTNL */ +int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) +{ + struct net_port_vlans *pv = NULL; + int err; + + ASSERT_RTNL(); + + pv = rtnl_dereference(br->vlan_info); + if (pv) + return __vlan_add(pv, vid, flags); + + /* Create port vlan infomration + */ + pv = kzalloc(sizeof(*pv), GFP_KERNEL); + if (!pv) + return -ENOMEM; + + pv->parent.br = br; + err = __vlan_add(pv, vid, flags); + if (err) + goto out; + + rcu_assign_pointer(br->vlan_info, pv); + return 0; +out: + kfree(pv); + return err; +} + +/* Must be protected by RTNL */ +int br_vlan_delete(struct net_bridge *br, u16 vid) +{ + struct net_port_vlans *pv; + + ASSERT_RTNL(); + + pv = rtnl_dereference(br->vlan_info); + if (!pv) + return -EINVAL; + + if (vid) { + /* If the VID !=0 remove fdb for this vid. VID 0 is special + * in that it's the default and is always there in the fdb. + */ + spin_lock_bh(&br->hash_lock); + fdb_delete_by_addr(br, br->dev->dev_addr, vid); + spin_unlock_bh(&br->hash_lock); + } + + __vlan_del(pv, vid); + return 0; +} + +void br_vlan_flush(struct net_bridge *br) +{ + struct net_port_vlans *pv; + + ASSERT_RTNL(); + pv = rtnl_dereference(br->vlan_info); + if (!pv) + return; + + __vlan_flush(pv); +} + +int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) +{ + if (!rtnl_trylock()) + return restart_syscall(); + + if (br->vlan_enabled == val) + goto unlock; + + br->vlan_enabled = val; + +unlock: + rtnl_unlock(); + return 0; +} + +/* Must be protected by RTNL */ +int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) +{ + struct net_port_vlans *pv = NULL; + int err; + + ASSERT_RTNL(); + + pv = rtnl_dereference(port->vlan_info); + if (pv) + return __vlan_add(pv, vid, flags); + + /* Create port vlan infomration + */ + pv = kzalloc(sizeof(*pv), GFP_KERNEL); + if (!pv) { + err = -ENOMEM; + goto clean_up; + } + + pv->port_idx = port->port_no; + pv->parent.port = port; + err = __vlan_add(pv, vid, flags); + if (err) + goto clean_up; + + rcu_assign_pointer(port->vlan_info, pv); + return 0; + +clean_up: + kfree(pv); + return err; +} + +/* Must be protected by RTNL */ +int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) +{ + struct net_port_vlans *pv; + + ASSERT_RTNL(); + + pv = rtnl_dereference(port->vlan_info); + if (!pv) + return -EINVAL; + + if (vid) { + /* If the VID !=0 remove fdb for this vid. VID 0 is special + * in that it's the default and is always there in the fdb. + */ + spin_lock_bh(&port->br->hash_lock); + fdb_delete_by_addr(port->br, port->dev->dev_addr, vid); + spin_unlock_bh(&port->br->hash_lock); + } + + return __vlan_del(pv, vid); +} + +void nbp_vlan_flush(struct net_bridge_port *port) +{ + struct net_port_vlans *pv; + + ASSERT_RTNL(); + + pv = rtnl_dereference(port->vlan_info); + if (!pv) + return; + + __vlan_flush(pv); +} + +bool nbp_vlan_find(struct net_bridge_port *port, u16 vid) +{ + struct net_port_vlans *pv; + bool found = false; + + rcu_read_lock(); + pv = rcu_dereference(port->vlan_info); + + if (!pv) + goto out; + + if (test_bit(vid, pv->vlan_bitmap)) + found = true; + +out: + rcu_read_unlock(); + return found; +} diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 3476ec46974..3bf43f7bb9d 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -70,8 +70,7 @@ static void ulog_send(unsigned int nlgroup) { ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup]; - if (timer_pending(&ub->timer)) - del_timer(&ub->timer); + del_timer(&ub->timer); if (!ub->skb) return; @@ -319,8 +318,7 @@ static void __exit ebt_ulog_fini(void) xt_unregister_target(&ebt_ulog_tg_reg); for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { ub = &ulog_buffers[i]; - if (timer_pending(&ub->timer)) - del_timer(&ub->timer); + del_timer(&ub->timer); spin_lock_bh(&ub->lock); if (ub->skb) { kfree_skb(ub->skb); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 5fe2ff3b01e..8d493c91a56 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1472,16 +1472,17 @@ static int do_ebt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { int ret; + struct net *net = sock_net(sk); - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; switch(cmd) { case EBT_SO_SET_ENTRIES: - ret = do_replace(sock_net(sk), user, len); + ret = do_replace(net, user, len); break; case EBT_SO_SET_COUNTERS: - ret = update_counters(sock_net(sk), user, len); + ret = update_counters(net, user, len); break; default: ret = -EINVAL; @@ -1494,14 +1495,15 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) int ret; struct ebt_replace tmp; struct ebt_table *t; + struct net *net = sock_net(sk); - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; - t = find_table_lock(sock_net(sk), tmp.name, &ret, &ebt_mutex); + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) return ret; @@ -2279,16 +2281,17 @@ static int compat_do_ebt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { int ret; + struct net *net = sock_net(sk); - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case EBT_SO_SET_ENTRIES: - ret = compat_do_replace(sock_net(sk), user, len); + ret = compat_do_replace(net, user, len); break; case EBT_SO_SET_COUNTERS: - ret = compat_update_counters(sock_net(sk), user, len); + ret = compat_update_counters(net, user, len); break; default: ret = -EINVAL; @@ -2302,8 +2305,9 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, int ret; struct compat_ebt_replace tmp; struct ebt_table *t; + struct net *net = sock_net(sk); - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; /* try real handler in case userland supplied needed padding */ @@ -2314,7 +2318,7 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; - t = find_table_lock(sock_net(sk), tmp.name, &ret, &ebt_mutex); + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) return ret; diff --git a/net/can/Kconfig b/net/can/Kconfig index 03200699d27..a15c0e0d1fc 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -16,10 +16,11 @@ menuconfig CAN If you want CAN support you should say Y here and also to the specific driver for your controller(s) below. +if CAN + config CAN_RAW tristate "Raw CAN Protocol (raw access with CAN-ID filtering)" - depends on CAN - default N + default y ---help--- The raw CAN protocol option offers access to the CAN bus via the BSD socket API. You probably want to use the raw socket in @@ -29,8 +30,7 @@ config CAN_RAW config CAN_BCM tristate "Broadcast Manager CAN Protocol (with content filtering)" - depends on CAN - default N + default y ---help--- The Broadcast Manager offers content filtering, timeout monitoring, sending of RTR frames, and cyclic CAN messages without permanent user @@ -42,8 +42,7 @@ config CAN_BCM config CAN_GW tristate "CAN Gateway/Router (with netlink configuration)" - depends on CAN - default N + default y ---help--- The CAN Gateway/Router is used to route (and modify) CAN frames. It is based on the PF_CAN core infrastructure for msg filtering and @@ -53,3 +52,5 @@ config CAN_GW by the netlink configuration interface known e.g. from iptables. source "drivers/net/can/Kconfig" + +endif diff --git a/net/can/bcm.c b/net/can/bcm.c index 969b7cdff59..5dcb20076f3 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -54,6 +54,7 @@ #include <linux/skbuff.h> #include <linux/can.h> #include <linux/can/core.h> +#include <linux/can/skb.h> #include <linux/can/bcm.h> #include <linux/slab.h> #include <net/sock.h> @@ -256,10 +257,13 @@ static void bcm_can_tx(struct bcm_op *op) return; } - skb = alloc_skb(CFSIZ, gfp_any()); + skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), gfp_any()); if (!skb) goto out; + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = dev->ifindex; + memcpy(skb_put(skb, CFSIZ), cf, CFSIZ); /* send with loopback */ @@ -1199,11 +1203,12 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) if (!ifindex) return -ENODEV; - skb = alloc_skb(CFSIZ, GFP_KERNEL); - + skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), GFP_KERNEL); if (!skb) return -ENOMEM; + can_skb_reserve(skb); + err = memcpy_fromiovec(skb_put(skb, CFSIZ), msg->msg_iov, CFSIZ); if (err < 0) { kfree_skb(skb); @@ -1216,6 +1221,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) return -ENODEV; } + can_skb_prv(skb)->ifindex = dev->ifindex; skb->dev = dev; skb->sk = sk; err = can_send(skb, 1); /* send with loopback */ @@ -1627,7 +1633,7 @@ static void __exit bcm_module_exit(void) can_proto_unregister(&bcm_can_proto); if (proc_dir) - proc_net_remove(&init_net, "can-bcm"); + remove_proc_entry("can-bcm", init_net.proc_net); } module_init(bcm_module_init); diff --git a/net/can/gw.c b/net/can/gw.c index 574dda78eb0..c185fcd5e82 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -42,6 +42,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> +#include <linux/kernel.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/rcupdate.h> @@ -52,19 +53,31 @@ #include <linux/skbuff.h> #include <linux/can.h> #include <linux/can/core.h> +#include <linux/can/skb.h> #include <linux/can/gw.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> -#define CAN_GW_VERSION "20101209" -static __initconst const char banner[] = - KERN_INFO "can: netlink gateway (rev " CAN_GW_VERSION ")\n"; +#define CAN_GW_VERSION "20130117" +#define CAN_GW_NAME "can-gw" MODULE_DESCRIPTION("PF_CAN netlink gateway"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>"); -MODULE_ALIAS("can-gw"); +MODULE_ALIAS(CAN_GW_NAME); + +#define CGW_MIN_HOPS 1 +#define CGW_MAX_HOPS 6 +#define CGW_DEFAULT_HOPS 1 + +static unsigned int max_hops __read_mostly = CGW_DEFAULT_HOPS; +module_param(max_hops, uint, S_IRUGO); +MODULE_PARM_DESC(max_hops, + "maximum " CAN_GW_NAME " routing hops for CAN frames " + "(valid values: " __stringify(CGW_MIN_HOPS) "-" + __stringify(CGW_MAX_HOPS) " hops, " + "default: " __stringify(CGW_DEFAULT_HOPS) ")"); static HLIST_HEAD(cgw_list); static struct notifier_block notifier; @@ -118,6 +131,7 @@ struct cgw_job { struct rcu_head rcu; u32 handled_frames; u32 dropped_frames; + u32 deleted_frames; struct cf_mod mod; union { /* CAN frame data source */ @@ -338,15 +352,38 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) struct sk_buff *nskb; int modidx = 0; - /* do not handle already routed frames - see comment below */ - if (skb_mac_header_was_set(skb)) + /* + * Do not handle CAN frames routed more than 'max_hops' times. + * In general we should never catch this delimiter which is intended + * to cover a misconfiguration protection (e.g. circular CAN routes). + * + * The Controller Area Network controllers only accept CAN frames with + * correct CRCs - which are not visible in the controller registers. + * According to skbuff.h documentation the csum_start element for IP + * checksums is undefined/unsued when ip_summed == CHECKSUM_UNNECESSARY. + * Only CAN skbs can be processed here which already have this property. + */ + +#define cgw_hops(skb) ((skb)->csum_start) + + BUG_ON(skb->ip_summed != CHECKSUM_UNNECESSARY); + + if (cgw_hops(skb) >= max_hops) { + /* indicate deleted frames due to misconfiguration */ + gwj->deleted_frames++; return; + } if (!(gwj->dst.dev->flags & IFF_UP)) { gwj->dropped_frames++; return; } + /* is sending the skb back to the incoming interface not allowed? */ + if (!(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK) && + can_skb_prv(skb)->ifindex == gwj->dst.dev->ifindex) + return; + /* * clone the given skb, which has not been done in can_rcv() * @@ -363,15 +400,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) return; } - /* - * Mark routed frames by setting some mac header length which is - * not relevant for the CAN frames located in the skb->data section. - * - * As dev->header_ops is not set in CAN netdevices no one is ever - * accessing the various header offsets in the CAN skbuffs anyway. - * E.g. using the packet socket to read CAN frames is still working. - */ - skb_set_mac_header(nskb, 8); + /* put the incremented hop counter in the cloned skb */ + cgw_hops(nskb) = cgw_hops(skb) + 1; nskb->dev = gwj->dst.dev; /* pointer to modifiable CAN frame */ @@ -472,6 +502,11 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } + if (gwj->deleted_frames) { + if (nla_put_u32(skb, CGW_DELETED, gwj->deleted_frames) < 0) + goto cancel; + } + /* check non default settings of attributes */ if (gwj->mod.modtype.and) { @@ -771,6 +806,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, gwj->handled_frames = 0; gwj->dropped_frames = 0; + gwj->deleted_frames = 0; gwj->flags = r->flags; gwj->gwtype = r->gwtype; @@ -895,7 +931,11 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) static __init int cgw_module_init(void) { - printk(banner); + /* sanitize given module parameter */ + max_hops = clamp_t(unsigned int, max_hops, CGW_MIN_HOPS, CGW_MAX_HOPS); + + pr_info("can: netlink gateway (rev " CAN_GW_VERSION ") max_hops=%d\n", + max_hops); cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job), 0, 0, NULL); diff --git a/net/can/proc.c b/net/can/proc.c index ae566902d2b..49733589214 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -531,5 +531,5 @@ void can_remove_proc(void) can_remove_proc_readentry(CAN_PROC_RCVLIST_SFF); if (can_dir) - proc_net_remove(&init_net, "can"); + remove_proc_entry("can", init_net.proc_net); } diff --git a/net/can/raw.c b/net/can/raw.c index 5b0e3e330d9..c1764e41dda 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -50,6 +50,7 @@ #include <linux/skbuff.h> #include <linux/can.h> #include <linux/can/core.h> +#include <linux/can/skb.h> #include <linux/can/raw.h> #include <net/sock.h> #include <net/net_namespace.h> @@ -699,11 +700,14 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, if (!dev) return -ENXIO; - skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT, - &err); + skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv), + msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) goto put_dev; + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = dev->ifindex; + err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err < 0) goto free_skb; diff --git a/net/core/Makefile b/net/core/Makefile index 674641b13ae..b33b996f5dd 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -9,10 +9,11 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ - sock_diag.o + sock_diag.o dev_ioctl.o obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o +obj-$(CONFIG_PROC_FS) += net-procfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_NET_DMA) += user_dma.o diff --git a/net/core/dev.c b/net/core/dev.c index f64e439b4a0..17bc535115d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -97,8 +97,6 @@ #include <net/net_namespace.h> #include <net/sock.h> #include <linux/rtnetlink.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> #include <linux/stat.h> #include <net/dst.h> #include <net/pkt_sched.h> @@ -106,12 +104,10 @@ #include <net/xfrm.h> #include <linux/highmem.h> #include <linux/init.h> -#include <linux/kmod.h> #include <linux/module.h> #include <linux/netpoll.h> #include <linux/rcupdate.h> #include <linux/delay.h> -#include <net/wext.h> #include <net/iw_handler.h> #include <asm/current.h> #include <linux/audit.h> @@ -132,9 +128,7 @@ #include <linux/pci.h> #include <linux/inetdevice.h> #include <linux/cpu_rmap.h> -#include <linux/net_tstamp.h> #include <linux/static_key.h> -#include <net/flow_keys.h> #include "net-sysfs.h" @@ -144,41 +138,10 @@ /* This should be increased if a protocol with a bigger head is added. */ #define GRO_MAX_HEAD (MAX_HEADER + 128) -/* - * The list of packet types we will receive (as opposed to discard) - * and the routines to invoke. - * - * Why 16. Because with 16 the only overlap we get on a hash of the - * low nibble of the protocol value is RARP/SNAP/X.25. - * - * NOTE: That is no longer true with the addition of VLAN tags. Not - * sure which should go first, but I bet it won't make much - * difference if we are running VLANs. The good news is that - * this protocol won't be in the list unless compiled in, so - * the average user (w/out VLANs) will not be adversely affected. - * --BLG - * - * 0800 IP - * 8100 802.1Q VLAN - * 0001 802.3 - * 0002 AX.25 - * 0004 802.2 - * 8035 RARP - * 0005 SNAP - * 0805 X.25 - * 0806 ARP - * 8137 IPX - * 0009 Localtalk - * 86DD IPv6 - */ - -#define PTYPE_HASH_SIZE (16) -#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) - static DEFINE_SPINLOCK(ptype_lock); static DEFINE_SPINLOCK(offload_lock); -static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; -static struct list_head ptype_all __read_mostly; /* Taps */ +struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; +struct list_head ptype_all __read_mostly; /* Taps */ static struct list_head offload_base __read_mostly; /* @@ -1227,36 +1190,6 @@ void netdev_notify_peers(struct net_device *dev) } EXPORT_SYMBOL(netdev_notify_peers); -/** - * dev_load - load a network module - * @net: the applicable net namespace - * @name: name of interface - * - * If a network interface is not present and the process has suitable - * privileges this function loads the module. If module loading is not - * available in this kernel then it becomes a nop. - */ - -void dev_load(struct net *net, const char *name) -{ - struct net_device *dev; - int no_module; - - rcu_read_lock(); - dev = dev_get_by_name_rcu(net, name); - rcu_read_unlock(); - - no_module = !dev; - if (no_module && capable(CAP_NET_ADMIN)) - no_module = request_module("netdev-%s", name); - if (no_module && capable(CAP_SYS_MODULE)) { - if (!request_module("%s", name)) - pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n", - name); - } -} -EXPORT_SYMBOL(dev_load); - static int __dev_open(struct net_device *dev) { const struct net_device_ops *ops = dev->netdev_ops; @@ -1267,6 +1200,14 @@ static int __dev_open(struct net_device *dev) if (!netif_device_present(dev)) return -ENODEV; + /* Block netpoll from trying to do any rx path servicing. + * If we don't do this there is a chance ndo_poll_controller + * or ndo_poll may be running while we open the device + */ + ret = netpoll_rx_disable(dev); + if (ret) + return ret; + ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); ret = notifier_to_errno(ret); if (ret) @@ -1280,6 +1221,8 @@ static int __dev_open(struct net_device *dev) if (!ret && ops->ndo_open) ret = ops->ndo_open(dev); + netpoll_rx_enable(dev); + if (ret) clear_bit(__LINK_STATE_START, &dev->state); else { @@ -1371,9 +1314,16 @@ static int __dev_close(struct net_device *dev) int retval; LIST_HEAD(single); + /* Temporarily disable netpoll until the interface is down */ + retval = netpoll_rx_disable(dev); + if (retval) + return retval; + list_add(&dev->unreg_list, &single); retval = __dev_close_many(&single); list_del(&single); + + netpoll_rx_enable(dev); return retval; } @@ -1409,14 +1359,22 @@ static int dev_close_many(struct list_head *head) */ int dev_close(struct net_device *dev) { + int ret = 0; if (dev->flags & IFF_UP) { LIST_HEAD(single); + /* Block netpoll rx while the interface is going down */ + ret = netpoll_rx_disable(dev); + if (ret) + return ret; + list_add(&dev->unreg_list, &single); dev_close_many(&single); list_del(&single); + + netpoll_rx_enable(dev); } - return 0; + return ret; } EXPORT_SYMBOL(dev_close); @@ -1621,57 +1579,6 @@ static inline void net_timestamp_set(struct sk_buff *skb) __net_timestamp(SKB); \ } \ -static int net_hwtstamp_validate(struct ifreq *ifr) -{ - struct hwtstamp_config cfg; - enum hwtstamp_tx_types tx_type; - enum hwtstamp_rx_filters rx_filter; - int tx_type_valid = 0; - int rx_filter_valid = 0; - - if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) - return -EFAULT; - - if (cfg.flags) /* reserved for future extensions */ - return -EINVAL; - - tx_type = cfg.tx_type; - rx_filter = cfg.rx_filter; - - switch (tx_type) { - case HWTSTAMP_TX_OFF: - case HWTSTAMP_TX_ON: - case HWTSTAMP_TX_ONESTEP_SYNC: - tx_type_valid = 1; - break; - } - - switch (rx_filter) { - case HWTSTAMP_FILTER_NONE: - case HWTSTAMP_FILTER_ALL: - case HWTSTAMP_FILTER_SOME: - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - rx_filter_valid = 1; - break; - } - - if (!tx_type_valid || !rx_filter_valid) - return -ERANGE; - - return 0; -} - static inline bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb) { @@ -1857,6 +1764,228 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) } } +#ifdef CONFIG_XPS +static DEFINE_MUTEX(xps_map_mutex); +#define xmap_dereference(P) \ + rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) + +static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps, + int cpu, u16 index) +{ + struct xps_map *map = NULL; + int pos; + + if (dev_maps) + map = xmap_dereference(dev_maps->cpu_map[cpu]); + + for (pos = 0; map && pos < map->len; pos++) { + if (map->queues[pos] == index) { + if (map->len > 1) { + map->queues[pos] = map->queues[--map->len]; + } else { + RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL); + kfree_rcu(map, rcu); + map = NULL; + } + break; + } + } + + return map; +} + +static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) +{ + struct xps_dev_maps *dev_maps; + int cpu, i; + bool active = false; + + mutex_lock(&xps_map_mutex); + dev_maps = xmap_dereference(dev->xps_maps); + + if (!dev_maps) + goto out_no_maps; + + for_each_possible_cpu(cpu) { + for (i = index; i < dev->num_tx_queues; i++) { + if (!remove_xps_queue(dev_maps, cpu, i)) + break; + } + if (i == dev->num_tx_queues) + active = true; + } + + if (!active) { + RCU_INIT_POINTER(dev->xps_maps, NULL); + kfree_rcu(dev_maps, rcu); + } + + for (i = index; i < dev->num_tx_queues; i++) + netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), + NUMA_NO_NODE); + +out_no_maps: + mutex_unlock(&xps_map_mutex); +} + +static struct xps_map *expand_xps_map(struct xps_map *map, + int cpu, u16 index) +{ + struct xps_map *new_map; + int alloc_len = XPS_MIN_MAP_ALLOC; + int i, pos; + + for (pos = 0; map && pos < map->len; pos++) { + if (map->queues[pos] != index) + continue; + return map; + } + + /* Need to add queue to this CPU's existing map */ + if (map) { + if (pos < map->alloc_len) + return map; + + alloc_len = map->alloc_len * 2; + } + + /* Need to allocate new map to store queue on this CPU's map */ + new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL, + cpu_to_node(cpu)); + if (!new_map) + return NULL; + + for (i = 0; i < pos; i++) + new_map->queues[i] = map->queues[i]; + new_map->alloc_len = alloc_len; + new_map->len = pos; + + return new_map; +} + +int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index) +{ + struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; + struct xps_map *map, *new_map; + int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES); + int cpu, numa_node_id = -2; + bool active = false; + + mutex_lock(&xps_map_mutex); + + dev_maps = xmap_dereference(dev->xps_maps); + + /* allocate memory for queue storage */ + for_each_online_cpu(cpu) { + if (!cpumask_test_cpu(cpu, mask)) + continue; + + if (!new_dev_maps) + new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); + if (!new_dev_maps) + return -ENOMEM; + + map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : + NULL; + + map = expand_xps_map(map, cpu, index); + if (!map) + goto error; + + RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); + } + + if (!new_dev_maps) + goto out_no_new_maps; + + for_each_possible_cpu(cpu) { + if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) { + /* add queue to CPU maps */ + int pos = 0; + + map = xmap_dereference(new_dev_maps->cpu_map[cpu]); + while ((pos < map->len) && (map->queues[pos] != index)) + pos++; + + if (pos == map->len) + map->queues[map->len++] = index; +#ifdef CONFIG_NUMA + if (numa_node_id == -2) + numa_node_id = cpu_to_node(cpu); + else if (numa_node_id != cpu_to_node(cpu)) + numa_node_id = -1; +#endif + } else if (dev_maps) { + /* fill in the new device map from the old device map */ + map = xmap_dereference(dev_maps->cpu_map[cpu]); + RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); + } + + } + + rcu_assign_pointer(dev->xps_maps, new_dev_maps); + + /* Cleanup old maps */ + if (dev_maps) { + for_each_possible_cpu(cpu) { + new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); + map = xmap_dereference(dev_maps->cpu_map[cpu]); + if (map && map != new_map) + kfree_rcu(map, rcu); + } + + kfree_rcu(dev_maps, rcu); + } + + dev_maps = new_dev_maps; + active = true; + +out_no_new_maps: + /* update Tx queue numa node */ + netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index), + (numa_node_id >= 0) ? numa_node_id : + NUMA_NO_NODE); + + if (!dev_maps) + goto out_no_maps; + + /* removes queue from unused CPUs */ + for_each_possible_cpu(cpu) { + if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) + continue; + + if (remove_xps_queue(dev_maps, cpu, index)) + active = true; + } + + /* free map if not active */ + if (!active) { + RCU_INIT_POINTER(dev->xps_maps, NULL); + kfree_rcu(dev_maps, rcu); + } + +out_no_maps: + mutex_unlock(&xps_map_mutex); + + return 0; +error: + /* remove any maps that we added */ + for_each_possible_cpu(cpu) { + new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); + map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : + NULL; + if (new_map && new_map != map) + kfree(new_map); + } + + mutex_unlock(&xps_map_mutex); + + kfree(new_dev_maps); + return -ENOMEM; +} +EXPORT_SYMBOL(netif_set_xps_queue); + +#endif /* * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. @@ -1880,8 +2009,12 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->num_tc) netif_setup_tc(dev, txq); - if (txq < dev->real_num_tx_queues) + if (txq < dev->real_num_tx_queues) { qdisc_reset_all_tx_gt(dev, txq); +#ifdef CONFIG_XPS + netif_reset_xps_queues_gt(dev, txq); +#endif + } } dev->real_num_tx_queues = txq; @@ -2046,6 +2179,15 @@ int skb_checksum_help(struct sk_buff *skb) return -EINVAL; } + /* Before computing a checksum, we should make sure no frag could + * be modified by an external entity : checksum could be wrong. + */ + if (skb_has_shared_frag(skb)) { + ret = __skb_linearize(skb); + if (ret) + goto out; + } + offset = skb_checksum_start_offset(skb); BUG_ON(offset >= skb_headlen(skb)); csum = skb_checksum(skb, offset, skb->len - offset, 0); @@ -2069,25 +2211,19 @@ out: EXPORT_SYMBOL(skb_checksum_help); /** - * skb_gso_segment - Perform segmentation on skb. + * skb_mac_gso_segment - mac layer segmentation handler. * @skb: buffer to segment * @features: features for the output path (see dev->features) - * - * This function segments the given skb and returns a list of segments. - * - * It may return NULL if the skb requires no segmentation. This is - * only possible when GSO is used for verifying header integrity. */ -struct sk_buff *skb_gso_segment(struct sk_buff *skb, - netdev_features_t features) +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_offload *ptype; __be16 type = skb->protocol; - int vlan_depth = ETH_HLEN; - int err; while (type == htons(ETH_P_8021Q)) { + int vlan_depth = ETH_HLEN; struct vlan_hdr *vh; if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) @@ -2098,22 +2234,14 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, vlan_depth += VLAN_HLEN; } - skb_reset_mac_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; __skb_pull(skb, skb->mac_len); - if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { - skb_warn_bad_offload(skb); - - if (skb_header_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) - return ERR_PTR(err); - } - rcu_read_lock(); list_for_each_entry_rcu(ptype, &offload_base, list) { if (ptype->type == type && ptype->callbacks.gso_segment) { if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + int err; + err = ptype->callbacks.gso_send_check(skb); segs = ERR_PTR(err); if (err || skb_gso_ok(skb, features)) @@ -2131,7 +2259,50 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, return segs; } -EXPORT_SYMBOL(skb_gso_segment); +EXPORT_SYMBOL(skb_mac_gso_segment); + + +/* openvswitch calls this on rx path, so we need a different check. + */ +static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) +{ + if (tx_path) + return skb->ip_summed != CHECKSUM_PARTIAL; + else + return skb->ip_summed == CHECKSUM_NONE; +} + +/** + * __skb_gso_segment - Perform segmentation on skb. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + * @tx_path: whether it is called in TX path + * + * This function segments the given skb and returns a list of segments. + * + * It may return NULL if the skb requires no segmentation. This is + * only possible when GSO is used for verifying header integrity. + */ +struct sk_buff *__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, bool tx_path) +{ + if (unlikely(skb_needs_check(skb, tx_path))) { + int err; + + skb_warn_bad_offload(skb); + + if (skb_header_cloned(skb) && + (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + return ERR_PTR(err); + } + + SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + + return skb_mac_gso_segment(skb, features); +} +EXPORT_SYMBOL(__skb_gso_segment); /* Take action when hardware reception checksum errors are detected. */ #ifdef CONFIG_BUG @@ -2410,126 +2581,28 @@ out: return rc; } -static u32 hashrnd __read_mostly; - -/* - * Returns a Tx hash based on the given packet descriptor a Tx queues' number - * to be used as a distribution range. - */ -u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, - unsigned int num_tx_queues) +static void qdisc_pkt_len_init(struct sk_buff *skb) { - u32 hash; - u16 qoffset = 0; - u16 qcount = num_tx_queues; + const struct skb_shared_info *shinfo = skb_shinfo(skb); - if (skb_rx_queue_recorded(skb)) { - hash = skb_get_rx_queue(skb); - while (unlikely(hash >= num_tx_queues)) - hash -= num_tx_queues; - return hash; - } - - if (dev->num_tc) { - u8 tc = netdev_get_prio_tc_map(dev, skb->priority); - qoffset = dev->tc_to_txq[tc].offset; - qcount = dev->tc_to_txq[tc].count; - } - - if (skb->sk && skb->sk->sk_hash) - hash = skb->sk->sk_hash; - else - hash = (__force u16) skb->protocol; - hash = jhash_1word(hash, hashrnd); - - return (u16) (((u64) hash * qcount) >> 32) + qoffset; -} -EXPORT_SYMBOL(__skb_tx_hash); - -static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) -{ - if (unlikely(queue_index >= dev->real_num_tx_queues)) { - net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n", - dev->name, queue_index, - dev->real_num_tx_queues); - return 0; - } - return queue_index; -} - -static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) -{ -#ifdef CONFIG_XPS - struct xps_dev_maps *dev_maps; - struct xps_map *map; - int queue_index = -1; - - rcu_read_lock(); - dev_maps = rcu_dereference(dev->xps_maps); - if (dev_maps) { - map = rcu_dereference( - dev_maps->cpu_map[raw_smp_processor_id()]); - if (map) { - if (map->len == 1) - queue_index = map->queues[0]; - else { - u32 hash; - if (skb->sk && skb->sk->sk_hash) - hash = skb->sk->sk_hash; - else - hash = (__force u16) skb->protocol ^ - skb->rxhash; - hash = jhash_1word(hash, hashrnd); - queue_index = map->queues[ - ((u64)hash * map->len) >> 32]; - } - if (unlikely(queue_index >= dev->real_num_tx_queues)) - queue_index = -1; - } - } - rcu_read_unlock(); - - return queue_index; -#else - return -1; -#endif -} - -struct netdev_queue *netdev_pick_tx(struct net_device *dev, - struct sk_buff *skb) -{ - int queue_index; - const struct net_device_ops *ops = dev->netdev_ops; - - if (dev->real_num_tx_queues == 1) - queue_index = 0; - else if (ops->ndo_select_queue) { - queue_index = ops->ndo_select_queue(dev, skb); - queue_index = dev_cap_txqueue(dev, queue_index); - } else { - struct sock *sk = skb->sk; - queue_index = sk_tx_queue_get(sk); - - if (queue_index < 0 || skb->ooo_okay || - queue_index >= dev->real_num_tx_queues) { - int old_index = queue_index; + qdisc_skb_cb(skb)->pkt_len = skb->len; - queue_index = get_xps_queue(dev, skb); - if (queue_index < 0) - queue_index = skb_tx_hash(dev, skb); + /* To get more precise estimation of bytes sent on wire, + * we add to pkt_len the headers size of all segments + */ + if (shinfo->gso_size) { + unsigned int hdr_len; - if (queue_index != old_index && sk) { - struct dst_entry *dst = - rcu_dereference_check(sk->sk_dst_cache, 1); + /* mac layer + network layer */ + hdr_len = skb_transport_header(skb) - skb_mac_header(skb); - if (dst && skb_dst(skb) == dst) - sk_tx_queue_set(sk, queue_index); - } - } + /* + transport layer */ + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + hdr_len += tcp_hdrlen(skb); + else + hdr_len += sizeof(struct udphdr); + qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len; } - - skb_set_queue_mapping(skb, queue_index); - return netdev_get_tx_queue(dev, queue_index); } static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, @@ -2540,7 +2613,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, bool contended; int rc; - qdisc_skb_cb(skb)->pkt_len = skb->len; + qdisc_pkt_len_init(skb); qdisc_calculate_pkt_len(skb, q); /* * Heuristic to force contended enqueues to serialize on a @@ -2663,6 +2736,8 @@ int dev_queue_xmit(struct sk_buff *skb) struct Qdisc *q; int rc = -ENOMEM; + skb_reset_mac_header(skb); + /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ @@ -2757,41 +2832,6 @@ static inline void ____napi_schedule(struct softnet_data *sd, __raise_softirq_irqoff(NET_RX_SOFTIRQ); } -/* - * __skb_get_rxhash: calculate a flow hash based on src/dst addresses - * and src/dst port numbers. Sets rxhash in skb to non-zero hash value - * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb - * if hash is a canonical 4-tuple hash over transport ports. - */ -void __skb_get_rxhash(struct sk_buff *skb) -{ - struct flow_keys keys; - u32 hash; - - if (!skb_flow_dissect(skb, &keys)) - return; - - if (keys.ports) - skb->l4_rxhash = 1; - - /* get a consistent hash (same value on both flow directions) */ - if (((__force u32)keys.dst < (__force u32)keys.src) || - (((__force u32)keys.dst == (__force u32)keys.src) && - ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { - swap(keys.dst, keys.src); - swap(keys.port16[0], keys.port16[1]); - } - - hash = jhash_3words((__force u32)keys.dst, - (__force u32)keys.src, - (__force u32)keys.ports, hashrnd); - if (!hash) - hash = 1; - - skb->rxhash = hash; -} -EXPORT_SYMBOL(__skb_get_rxhash); - #ifdef CONFIG_RPS /* One global table that all flow-based protocols share. */ @@ -3318,7 +3358,7 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb) } } -static int __netif_receive_skb(struct sk_buff *skb) +static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) { struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; @@ -3327,24 +3367,11 @@ static int __netif_receive_skb(struct sk_buff *skb) bool deliver_exact = false; int ret = NET_RX_DROP; __be16 type; - unsigned long pflags = current->flags; net_timestamp_check(!netdev_tstamp_prequeue, skb); trace_netif_receive_skb(skb); - /* - * PFMEMALLOC skbs are special, they should - * - be delivered to SOCK_MEMALLOC sockets only - * - stay away from userspace - * - have bounded memory usage - * - * Use PF_MEMALLOC as this saves us from propagating the allocation - * context down to all allocation sites. - */ - if (sk_memalloc_socks() && skb_pfmemalloc(skb)) - current->flags |= PF_MEMALLOC; - /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) goto out; @@ -3352,7 +3379,8 @@ static int __netif_receive_skb(struct sk_buff *skb) orig_dev = skb->dev; skb_reset_network_header(skb); - skb_reset_transport_header(skb); + if (!skb_transport_header_was_set(skb)) + skb_reset_transport_header(skb); skb_reset_mac_len(skb); pt_prev = NULL; @@ -3377,7 +3405,7 @@ another_round: } #endif - if (sk_memalloc_socks() && skb_pfmemalloc(skb)) + if (pfmemalloc) goto skip_taps; list_for_each_entry_rcu(ptype, &ptype_all, list) { @@ -3396,8 +3424,7 @@ skip_taps: ncls: #endif - if (sk_memalloc_socks() && skb_pfmemalloc(skb) - && !skb_pfmemalloc_protocol(skb)) + if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) goto drop; if (vlan_tx_tag_present(skb)) { @@ -3467,7 +3494,31 @@ drop: unlock: rcu_read_unlock(); out: - tsk_restore_flags(current, pflags, PF_MEMALLOC); + return ret; +} + +static int __netif_receive_skb(struct sk_buff *skb) +{ + int ret; + + if (sk_memalloc_socks() && skb_pfmemalloc(skb)) { + unsigned long pflags = current->flags; + + /* + * PFMEMALLOC skbs are special, they should + * - be delivered to SOCK_MEMALLOC sockets only + * - stay away from userspace + * - have bounded memory usage + * + * Use PF_MEMALLOC as this saves us from propagating the allocation + * context down to all allocation sites. + */ + current->flags |= PF_MEMALLOC; + ret = __netif_receive_skb_core(skb, true); + tsk_restore_flags(current, pflags, PF_MEMALLOC); + } else + ret = __netif_receive_skb_core(skb, false); + return ret; } @@ -3634,7 +3685,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff __be16 type = skb->protocol; struct list_head *head = &offload_base; int same_flow; - int mac_len; enum gro_result ret; if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) @@ -3651,8 +3701,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff continue; skb_set_network_header(skb, skb_gro_offset(skb)); - mac_len = skb->network_header - skb->mac_header; - skb->mac_len = mac_len; + skb_reset_mac_len(skb); NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; @@ -4134,530 +4183,231 @@ softnet_break: goto out; } -static gifconf_func_t *gifconf_list[NPROTO]; - -/** - * register_gifconf - register a SIOCGIF handler - * @family: Address family - * @gifconf: Function handler - * - * Register protocol dependent address dumping routines. The handler - * that is passed must not be freed or reused until it has been replaced - * by another handler. - */ -int register_gifconf(unsigned int family, gifconf_func_t *gifconf) -{ - if (family >= NPROTO) - return -EINVAL; - gifconf_list[family] = gifconf; - return 0; -} -EXPORT_SYMBOL(register_gifconf); - - -/* - * Map an interface index to its name (SIOCGIFNAME) - */ - -/* - * We need this ioctl for efficient implementation of the - * if_indextoname() function required by the IPv6 API. Without - * it, we would have to search all the interfaces to find a - * match. --pb - */ - -static int dev_ifname(struct net *net, struct ifreq __user *arg) -{ +struct netdev_upper { struct net_device *dev; - struct ifreq ifr; - unsigned seq; - - /* - * Fetch the caller's info block. - */ + bool master; + struct list_head list; + struct rcu_head rcu; + struct list_head search_list; +}; - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; +static void __append_search_uppers(struct list_head *search_list, + struct net_device *dev) +{ + struct netdev_upper *upper; -retry: - seq = read_seqcount_begin(&devnet_rename_seq); - rcu_read_lock(); - dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); - if (!dev) { - rcu_read_unlock(); - return -ENODEV; + list_for_each_entry(upper, &dev->upper_dev_list, list) { + /* check if this upper is not already in search list */ + if (list_empty(&upper->search_list)) + list_add_tail(&upper->search_list, search_list); } - - strcpy(ifr.ifr_name, dev->name); - rcu_read_unlock(); - if (read_seqcount_retry(&devnet_rename_seq, seq)) - goto retry; - - if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) - return -EFAULT; - return 0; } -/* - * Perform a SIOCGIFCONF call. This structure will change - * size eventually, and there is nothing I can do about it. - * Thus we will need a 'compatibility mode'. - */ - -static int dev_ifconf(struct net *net, char __user *arg) +static bool __netdev_search_upper_dev(struct net_device *dev, + struct net_device *upper_dev) { - struct ifconf ifc; - struct net_device *dev; - char __user *pos; - int len; - int total; - int i; - - /* - * Fetch the caller's info block. - */ - - if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) - return -EFAULT; - - pos = ifc.ifc_buf; - len = ifc.ifc_len; + LIST_HEAD(search_list); + struct netdev_upper *upper; + struct netdev_upper *tmp; + bool ret = false; - /* - * Loop over the interfaces, and write an info block for each. - */ - - total = 0; - for_each_netdev(net, dev) { - for (i = 0; i < NPROTO; i++) { - if (gifconf_list[i]) { - int done; - if (!pos) - done = gifconf_list[i](dev, NULL, 0); - else - done = gifconf_list[i](dev, pos + total, - len - total); - if (done < 0) - return -EFAULT; - total += done; - } + __append_search_uppers(&search_list, dev); + list_for_each_entry(upper, &search_list, search_list) { + if (upper->dev == upper_dev) { + ret = true; + break; } + __append_search_uppers(&search_list, upper->dev); } - - /* - * All done. Write the updated control block back to the caller. - */ - ifc.ifc_len = total; - - /* - * Both BSD and Solaris return 0 here, so we do too. - */ - return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; + list_for_each_entry_safe(upper, tmp, &search_list, search_list) + INIT_LIST_HEAD(&upper->search_list); + return ret; } -#ifdef CONFIG_PROC_FS - -#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1) - -#define get_bucket(x) ((x) >> BUCKET_SPACE) -#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) -#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) - -static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos) +static struct netdev_upper *__netdev_find_upper(struct net_device *dev, + struct net_device *upper_dev) { - struct net *net = seq_file_net(seq); - struct net_device *dev; - struct hlist_node *p; - struct hlist_head *h; - unsigned int count = 0, offset = get_offset(*pos); + struct netdev_upper *upper; - h = &net->dev_name_head[get_bucket(*pos)]; - hlist_for_each_entry_rcu(dev, p, h, name_hlist) { - if (++count == offset) - return dev; + list_for_each_entry(upper, &dev->upper_dev_list, list) { + if (upper->dev == upper_dev) + return upper; } - return NULL; } -static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos) -{ - struct net_device *dev; - unsigned int bucket; - - do { - dev = dev_from_same_bucket(seq, pos); - if (dev) - return dev; - - bucket = get_bucket(*pos) + 1; - *pos = set_bucket_offset(bucket, 1); - } while (bucket < NETDEV_HASHENTRIES); - - return NULL; -} - -/* - * This is invoked by the /proc filesystem handler to display a device - * in detail. +/** + * netdev_has_upper_dev - Check if device is linked to an upper device + * @dev: device + * @upper_dev: upper device to check + * + * Find out if a device is linked to specified upper device and return true + * in case it is. Note that this checks only immediate upper device, + * not through a complete stack of devices. The caller must hold the RTNL lock. */ -void *dev_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) +bool netdev_has_upper_dev(struct net_device *dev, + struct net_device *upper_dev) { - rcu_read_lock(); - if (!*pos) - return SEQ_START_TOKEN; - - if (get_bucket(*pos) >= NETDEV_HASHENTRIES) - return NULL; - - return dev_from_bucket(seq, pos); -} - -void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return dev_from_bucket(seq, pos); -} - -void dev_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) -{ - struct rtnl_link_stats64 temp; - const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); + ASSERT_RTNL(); - seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " - "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", - dev->name, stats->rx_bytes, stats->rx_packets, - stats->rx_errors, - stats->rx_dropped + stats->rx_missed_errors, - stats->rx_fifo_errors, - stats->rx_length_errors + stats->rx_over_errors + - stats->rx_crc_errors + stats->rx_frame_errors, - stats->rx_compressed, stats->multicast, - stats->tx_bytes, stats->tx_packets, - stats->tx_errors, stats->tx_dropped, - stats->tx_fifo_errors, stats->collisions, - stats->tx_carrier_errors + - stats->tx_aborted_errors + - stats->tx_window_errors + - stats->tx_heartbeat_errors, - stats->tx_compressed); + return __netdev_find_upper(dev, upper_dev); } +EXPORT_SYMBOL(netdev_has_upper_dev); -/* - * Called from the PROCfs module. This now uses the new arbitrary sized - * /proc/net interface to create /proc/net/dev +/** + * netdev_has_any_upper_dev - Check if device is linked to some device + * @dev: device + * + * Find out if a device is linked to an upper device and return true in case + * it is. The caller must hold the RTNL lock. */ -static int dev_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_puts(seq, "Inter-| Receive " - " | Transmit\n" - " face |bytes packets errs drop fifo frame " - "compressed multicast|bytes packets errs " - "drop fifo colls carrier compressed\n"); - else - dev_seq_printf_stats(seq, v); - return 0; -} - -static struct softnet_data *softnet_get_online(loff_t *pos) -{ - struct softnet_data *sd = NULL; - - while (*pos < nr_cpu_ids) - if (cpu_online(*pos)) { - sd = &per_cpu(softnet_data, *pos); - break; - } else - ++*pos; - return sd; -} - -static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) -{ - return softnet_get_online(pos); -} - -static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) +bool netdev_has_any_upper_dev(struct net_device *dev) { - ++*pos; - return softnet_get_online(pos); -} - -static void softnet_seq_stop(struct seq_file *seq, void *v) -{ -} - -static int softnet_seq_show(struct seq_file *seq, void *v) -{ - struct softnet_data *sd = v; + ASSERT_RTNL(); - seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", - sd->processed, sd->dropped, sd->time_squeeze, 0, - 0, 0, 0, 0, /* was fastroute */ - sd->cpu_collision, sd->received_rps); - return 0; + return !list_empty(&dev->upper_dev_list); } +EXPORT_SYMBOL(netdev_has_any_upper_dev); -static const struct seq_operations dev_seq_ops = { - .start = dev_seq_start, - .next = dev_seq_next, - .stop = dev_seq_stop, - .show = dev_seq_show, -}; - -static int dev_seq_open(struct inode *inode, struct file *file) +/** + * netdev_master_upper_dev_get - Get master upper device + * @dev: device + * + * Find a master upper device and return pointer to it or NULL in case + * it's not there. The caller must hold the RTNL lock. + */ +struct net_device *netdev_master_upper_dev_get(struct net_device *dev) { - return seq_open_net(inode, file, &dev_seq_ops, - sizeof(struct seq_net_private)); -} + struct netdev_upper *upper; -static const struct file_operations dev_seq_fops = { - .owner = THIS_MODULE, - .open = dev_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; + ASSERT_RTNL(); -static const struct seq_operations softnet_seq_ops = { - .start = softnet_seq_start, - .next = softnet_seq_next, - .stop = softnet_seq_stop, - .show = softnet_seq_show, -}; + if (list_empty(&dev->upper_dev_list)) + return NULL; -static int softnet_seq_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &softnet_seq_ops); + upper = list_first_entry(&dev->upper_dev_list, + struct netdev_upper, list); + if (likely(upper->master)) + return upper->dev; + return NULL; } +EXPORT_SYMBOL(netdev_master_upper_dev_get); -static const struct file_operations softnet_seq_fops = { - .owner = THIS_MODULE, - .open = softnet_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static void *ptype_get_idx(loff_t pos) +/** + * netdev_master_upper_dev_get_rcu - Get master upper device + * @dev: device + * + * Find a master upper device and return pointer to it or NULL in case + * it's not there. The caller must hold the RCU read lock. + */ +struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) { - struct packet_type *pt = NULL; - loff_t i = 0; - int t; + struct netdev_upper *upper; - list_for_each_entry_rcu(pt, &ptype_all, list) { - if (i == pos) - return pt; - ++i; - } - - for (t = 0; t < PTYPE_HASH_SIZE; t++) { - list_for_each_entry_rcu(pt, &ptype_base[t], list) { - if (i == pos) - return pt; - ++i; - } - } + upper = list_first_or_null_rcu(&dev->upper_dev_list, + struct netdev_upper, list); + if (upper && likely(upper->master)) + return upper->dev; return NULL; } +EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); -static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) +static int __netdev_upper_dev_link(struct net_device *dev, + struct net_device *upper_dev, bool master) { - rcu_read_lock(); - return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; -} + struct netdev_upper *upper; -static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct packet_type *pt; - struct list_head *nxt; - int hash; + ASSERT_RTNL(); - ++*pos; - if (v == SEQ_START_TOKEN) - return ptype_get_idx(0); + if (dev == upper_dev) + return -EBUSY; - pt = v; - nxt = pt->list.next; - if (pt->type == htons(ETH_P_ALL)) { - if (nxt != &ptype_all) - goto found; - hash = 0; - nxt = ptype_base[0].next; - } else - hash = ntohs(pt->type) & PTYPE_HASH_MASK; + /* To prevent loops, check if dev is not upper device to upper_dev. */ + if (__netdev_search_upper_dev(upper_dev, dev)) + return -EBUSY; - while (nxt == &ptype_base[hash]) { - if (++hash >= PTYPE_HASH_SIZE) - return NULL; - nxt = ptype_base[hash].next; - } -found: - return list_entry(nxt, struct packet_type, list); -} + if (__netdev_find_upper(dev, upper_dev)) + return -EEXIST; -static void ptype_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} + if (master && netdev_master_upper_dev_get(dev)) + return -EBUSY; -static int ptype_seq_show(struct seq_file *seq, void *v) -{ - struct packet_type *pt = v; + upper = kmalloc(sizeof(*upper), GFP_KERNEL); + if (!upper) + return -ENOMEM; - if (v == SEQ_START_TOKEN) - seq_puts(seq, "Type Device Function\n"); - else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { - if (pt->type == htons(ETH_P_ALL)) - seq_puts(seq, "ALL "); - else - seq_printf(seq, "%04x", ntohs(pt->type)); + upper->dev = upper_dev; + upper->master = master; + INIT_LIST_HEAD(&upper->search_list); - seq_printf(seq, " %-8s %pF\n", - pt->dev ? pt->dev->name : "", pt->func); - } + /* Ensure that master upper link is always the first item in list. */ + if (master) + list_add_rcu(&upper->list, &dev->upper_dev_list); + else + list_add_tail_rcu(&upper->list, &dev->upper_dev_list); + dev_hold(upper_dev); return 0; } -static const struct seq_operations ptype_seq_ops = { - .start = ptype_seq_start, - .next = ptype_seq_next, - .stop = ptype_seq_stop, - .show = ptype_seq_show, -}; - -static int ptype_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ptype_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations ptype_seq_fops = { - .owner = THIS_MODULE, - .open = ptype_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - - -static int __net_init dev_proc_net_init(struct net *net) -{ - int rc = -ENOMEM; - - if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) - goto out; - if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) - goto out_dev; - if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops)) - goto out_softnet; - - if (wext_proc_init(net)) - goto out_ptype; - rc = 0; -out: - return rc; -out_ptype: - proc_net_remove(net, "ptype"); -out_softnet: - proc_net_remove(net, "softnet_stat"); -out_dev: - proc_net_remove(net, "dev"); - goto out; -} - -static void __net_exit dev_proc_net_exit(struct net *net) -{ - wext_proc_exit(net); - - proc_net_remove(net, "ptype"); - proc_net_remove(net, "softnet_stat"); - proc_net_remove(net, "dev"); -} - -static struct pernet_operations __net_initdata dev_proc_ops = { - .init = dev_proc_net_init, - .exit = dev_proc_net_exit, -}; - -static int __init dev_proc_init(void) +/** + * netdev_upper_dev_link - Add a link to the upper device + * @dev: device + * @upper_dev: new upper device + * + * Adds a link to device which is upper to this one. The caller must hold + * the RTNL lock. On a failure a negative errno code is returned. + * On success the reference counts are adjusted and the function + * returns zero. + */ +int netdev_upper_dev_link(struct net_device *dev, + struct net_device *upper_dev) { - return register_pernet_subsys(&dev_proc_ops); + return __netdev_upper_dev_link(dev, upper_dev, false); } -#else -#define dev_proc_init() 0 -#endif /* CONFIG_PROC_FS */ - +EXPORT_SYMBOL(netdev_upper_dev_link); /** - * netdev_set_master - set up master pointer - * @slave: slave device - * @master: new master device + * netdev_master_upper_dev_link - Add a master link to the upper device + * @dev: device + * @upper_dev: new upper device * - * Changes the master device of the slave. Pass %NULL to break the - * bonding. The caller must hold the RTNL semaphore. On a failure - * a negative errno code is returned. On success the reference counts - * are adjusted and the function returns zero. + * Adds a link to device which is upper to this one. In this case, only + * one master upper device can be linked, although other non-master devices + * might be linked as well. The caller must hold the RTNL lock. + * On a failure a negative errno code is returned. On success the reference + * counts are adjusted and the function returns zero. */ -int netdev_set_master(struct net_device *slave, struct net_device *master) +int netdev_master_upper_dev_link(struct net_device *dev, + struct net_device *upper_dev) { - struct net_device *old = slave->master; - - ASSERT_RTNL(); - - if (master) { - if (old) - return -EBUSY; - dev_hold(master); - } - - slave->master = master; - - if (old) - dev_put(old); - return 0; + return __netdev_upper_dev_link(dev, upper_dev, true); } -EXPORT_SYMBOL(netdev_set_master); +EXPORT_SYMBOL(netdev_master_upper_dev_link); /** - * netdev_set_bond_master - set up bonding master/slave pair - * @slave: slave device - * @master: new master device + * netdev_upper_dev_unlink - Removes a link to upper device + * @dev: device + * @upper_dev: new upper device * - * Changes the master device of the slave. Pass %NULL to break the - * bonding. The caller must hold the RTNL semaphore. On a failure - * a negative errno code is returned. On success %RTM_NEWLINK is sent - * to the routing socket and the function returns zero. + * Removes a link to device which is upper to this one. The caller must hold + * the RTNL lock. */ -int netdev_set_bond_master(struct net_device *slave, struct net_device *master) +void netdev_upper_dev_unlink(struct net_device *dev, + struct net_device *upper_dev) { - int err; + struct netdev_upper *upper; ASSERT_RTNL(); - err = netdev_set_master(slave, master); - if (err) - return err; - if (master) - slave->flags |= IFF_SLAVE; - else - slave->flags &= ~IFF_SLAVE; - - rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); - return 0; + upper = __netdev_find_upper(dev, upper_dev); + if (!upper) + return; + list_del_rcu(&upper->list); + dev_put(upper_dev); + kfree_rcu(upper, rcu); } -EXPORT_SYMBOL(netdev_set_bond_master); +EXPORT_SYMBOL(netdev_upper_dev_unlink); static void dev_change_rx_flags(struct net_device *dev, int flags) { @@ -5020,381 +4770,33 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) if (!netif_device_present(dev)) return -ENODEV; err = ops->ndo_set_mac_address(dev, sa); - if (!err) - call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + if (err) + return err; + dev->addr_assign_type = NET_ADDR_SET; + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); add_device_randomness(dev->dev_addr, dev->addr_len); - return err; + return 0; } EXPORT_SYMBOL(dev_set_mac_address); -/* - * Perform the SIOCxIFxxx calls, inside rcu_read_lock() - */ -static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) -{ - int err; - struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name); - - if (!dev) - return -ENODEV; - - switch (cmd) { - case SIOCGIFFLAGS: /* Get interface flags */ - ifr->ifr_flags = (short) dev_get_flags(dev); - return 0; - - case SIOCGIFMETRIC: /* Get the metric on the interface - (currently unused) */ - ifr->ifr_metric = 0; - return 0; - - case SIOCGIFMTU: /* Get the MTU of a device */ - ifr->ifr_mtu = dev->mtu; - return 0; - - case SIOCGIFHWADDR: - if (!dev->addr_len) - memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); - else - memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); - ifr->ifr_hwaddr.sa_family = dev->type; - return 0; - - case SIOCGIFSLAVE: - err = -EINVAL; - break; - - case SIOCGIFMAP: - ifr->ifr_map.mem_start = dev->mem_start; - ifr->ifr_map.mem_end = dev->mem_end; - ifr->ifr_map.base_addr = dev->base_addr; - ifr->ifr_map.irq = dev->irq; - ifr->ifr_map.dma = dev->dma; - ifr->ifr_map.port = dev->if_port; - return 0; - - case SIOCGIFINDEX: - ifr->ifr_ifindex = dev->ifindex; - return 0; - - case SIOCGIFTXQLEN: - ifr->ifr_qlen = dev->tx_queue_len; - return 0; - - default: - /* dev_ioctl() should ensure this case - * is never reached - */ - WARN_ON(1); - err = -ENOTTY; - break; - - } - return err; -} - -/* - * Perform the SIOCxIFxxx calls, inside rtnl_lock() - */ -static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) -{ - int err; - struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); - const struct net_device_ops *ops; - - if (!dev) - return -ENODEV; - - ops = dev->netdev_ops; - - switch (cmd) { - case SIOCSIFFLAGS: /* Set interface flags */ - return dev_change_flags(dev, ifr->ifr_flags); - - case SIOCSIFMETRIC: /* Set the metric on the interface - (currently unused) */ - return -EOPNOTSUPP; - - case SIOCSIFMTU: /* Set the MTU of a device */ - return dev_set_mtu(dev, ifr->ifr_mtu); - - case SIOCSIFHWADDR: - return dev_set_mac_address(dev, &ifr->ifr_hwaddr); - - case SIOCSIFHWBROADCAST: - if (ifr->ifr_hwaddr.sa_family != dev->type) - return -EINVAL; - memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); - call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); - return 0; - - case SIOCSIFMAP: - if (ops->ndo_set_config) { - if (!netif_device_present(dev)) - return -ENODEV; - return ops->ndo_set_config(dev, &ifr->ifr_map); - } - return -EOPNOTSUPP; - - case SIOCADDMULTI: - if (!ops->ndo_set_rx_mode || - ifr->ifr_hwaddr.sa_family != AF_UNSPEC) - return -EINVAL; - if (!netif_device_present(dev)) - return -ENODEV; - return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); - - case SIOCDELMULTI: - if (!ops->ndo_set_rx_mode || - ifr->ifr_hwaddr.sa_family != AF_UNSPEC) - return -EINVAL; - if (!netif_device_present(dev)) - return -ENODEV; - return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); - - case SIOCSIFTXQLEN: - if (ifr->ifr_qlen < 0) - return -EINVAL; - dev->tx_queue_len = ifr->ifr_qlen; - return 0; - - case SIOCSIFNAME: - ifr->ifr_newname[IFNAMSIZ-1] = '\0'; - return dev_change_name(dev, ifr->ifr_newname); - - case SIOCSHWTSTAMP: - err = net_hwtstamp_validate(ifr); - if (err) - return err; - /* fall through */ - - /* - * Unknown or private ioctl - */ - default: - if ((cmd >= SIOCDEVPRIVATE && - cmd <= SIOCDEVPRIVATE + 15) || - cmd == SIOCBONDENSLAVE || - cmd == SIOCBONDRELEASE || - cmd == SIOCBONDSETHWADDR || - cmd == SIOCBONDSLAVEINFOQUERY || - cmd == SIOCBONDINFOQUERY || - cmd == SIOCBONDCHANGEACTIVE || - cmd == SIOCGMIIPHY || - cmd == SIOCGMIIREG || - cmd == SIOCSMIIREG || - cmd == SIOCBRADDIF || - cmd == SIOCBRDELIF || - cmd == SIOCSHWTSTAMP || - cmd == SIOCWANDEV) { - err = -EOPNOTSUPP; - if (ops->ndo_do_ioctl) { - if (netif_device_present(dev)) - err = ops->ndo_do_ioctl(dev, ifr, cmd); - else - err = -ENODEV; - } - } else - err = -EINVAL; - - } - return err; -} - -/* - * This function handles all "interface"-type I/O control requests. The actual - * 'doing' part of this is dev_ifsioc above. - */ - /** - * dev_ioctl - network device ioctl - * @net: the applicable net namespace - * @cmd: command to issue - * @arg: pointer to a struct ifreq in user space + * dev_change_carrier - Change device carrier + * @dev: device + * @new_carries: new value * - * Issue ioctl functions to devices. This is normally called by the - * user space syscall interfaces but can sometimes be useful for - * other purposes. The return value is the return from the syscall if - * positive or a negative errno code on error. + * Change device carrier */ - -int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) +int dev_change_carrier(struct net_device *dev, bool new_carrier) { - struct ifreq ifr; - int ret; - char *colon; - - /* One special case: SIOCGIFCONF takes ifconf argument - and requires shared lock, because it sleeps writing - to user space. - */ - - if (cmd == SIOCGIFCONF) { - rtnl_lock(); - ret = dev_ifconf(net, (char __user *) arg); - rtnl_unlock(); - return ret; - } - if (cmd == SIOCGIFNAME) - return dev_ifname(net, (struct ifreq __user *)arg); - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; - - ifr.ifr_name[IFNAMSIZ-1] = 0; - - colon = strchr(ifr.ifr_name, ':'); - if (colon) - *colon = 0; - - /* - * See which interface the caller is talking about. - */ - - switch (cmd) { - /* - * These ioctl calls: - * - can be done by all. - * - atomic and do not require locking. - * - return a value - */ - case SIOCGIFFLAGS: - case SIOCGIFMETRIC: - case SIOCGIFMTU: - case SIOCGIFHWADDR: - case SIOCGIFSLAVE: - case SIOCGIFMAP: - case SIOCGIFINDEX: - case SIOCGIFTXQLEN: - dev_load(net, ifr.ifr_name); - rcu_read_lock(); - ret = dev_ifsioc_locked(net, &ifr, cmd); - rcu_read_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } - return ret; - - case SIOCETHTOOL: - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ethtool(net, &ifr); - rtnl_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } - return ret; - - /* - * These ioctl calls: - * - require superuser power. - * - require strict serialization. - * - return a value - */ - case SIOCGMIIPHY: - case SIOCGMIIREG: - case SIOCSIFNAME: - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); - rtnl_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } - return ret; - - /* - * These ioctl calls: - * - require superuser power. - * - require strict serialization. - * - do not return a value - */ - case SIOCSIFMAP: - case SIOCSIFTXQLEN: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - /* fall through */ - /* - * These ioctl calls: - * - require local superuser power. - * - require strict serialization. - * - do not return a value - */ - case SIOCSIFFLAGS: - case SIOCSIFMETRIC: - case SIOCSIFMTU: - case SIOCSIFHWADDR: - case SIOCSIFSLAVE: - case SIOCADDMULTI: - case SIOCDELMULTI: - case SIOCSIFHWBROADCAST: - case SIOCSMIIREG: - case SIOCBONDENSLAVE: - case SIOCBONDRELEASE: - case SIOCBONDSETHWADDR: - case SIOCBONDCHANGEACTIVE: - case SIOCBRADDIF: - case SIOCBRDELIF: - case SIOCSHWTSTAMP: - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - /* fall through */ - case SIOCBONDSLAVEINFOQUERY: - case SIOCBONDINFOQUERY: - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); - rtnl_unlock(); - return ret; - - case SIOCGIFMEM: - /* Get the per device memory space. We can add this but - * currently do not support it */ - case SIOCSIFMEM: - /* Set the per device memory buffer space. - * Not applicable in our case */ - case SIOCSIFLINK: - return -ENOTTY; + const struct net_device_ops *ops = dev->netdev_ops; - /* - * Unknown or private ioctl. - */ - default: - if (cmd == SIOCWANDEV || - (cmd >= SIOCDEVPRIVATE && - cmd <= SIOCDEVPRIVATE + 15)) { - dev_load(net, ifr.ifr_name); - rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); - rtnl_unlock(); - if (!ret && copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - return ret; - } - /* Take care of Wireless Extensions */ - if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) - return wext_handle_ioctl(net, &ifr, cmd, arg); - return -ENOTTY; - } + if (!ops->ndo_change_carrier) + return -EOPNOTSUPP; + if (!netif_device_present(dev)) + return -ENODEV; + return ops->ndo_change_carrier(dev, new_carrier); } - +EXPORT_SYMBOL(dev_change_carrier); /** * dev_new_index - allocate an ifindex @@ -5482,11 +4884,15 @@ static void rollback_registered_many(struct list_head *head) if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); - /* Notifier chain MUST detach us from master device. */ - WARN_ON(dev->master); + /* Notifier chain MUST detach us all upper devices. */ + WARN_ON(netdev_has_any_upper_dev(dev)); /* Remove entries from kobject tree */ netdev_unregister_kobject(dev); +#ifdef CONFIG_XPS + /* Remove XPS queueing entries */ + netif_reset_xps_queues_gt(dev, 0); +#endif } synchronize_net(); @@ -5664,10 +5070,9 @@ static int netif_alloc_rx_queues(struct net_device *dev) BUG_ON(count < 1); rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); - if (!rx) { - pr_err("netdev: Unable to allocate %u rx queues\n", count); + if (!rx) return -ENOMEM; - } + dev->_rx = rx; for (i = 0; i < count; i++) @@ -5698,10 +5103,9 @@ static int netif_alloc_netdev_queues(struct net_device *dev) BUG_ON(count < 1); tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); - if (!tx) { - pr_err("netdev: Unable to allocate %u tx queues\n", count); + if (!tx) return -ENOMEM; - } + dev->_tx = tx; netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); @@ -5760,6 +5164,14 @@ int register_netdevice(struct net_device *dev) } } + if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) && + (!dev->netdev_ops->ndo_vlan_rx_add_vid || + !dev->netdev_ops->ndo_vlan_rx_kill_vid)) { + netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n"); + ret = -EINVAL; + goto err_uninit; + } + ret = -EBUSY; if (!dev->ifindex) dev->ifindex = dev_new_index(net); @@ -5815,6 +5227,13 @@ int register_netdevice(struct net_device *dev) list_netdevice(dev); add_device_randomness(dev->dev_addr, dev->addr_len); + /* If the device has permanent device address, driver should + * set dev_addr and also addr_assign_type should be set to + * NET_ADDR_PERM (default value). + */ + if (dev->addr_assign_type == NET_ADDR_PERM) + memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); + /* Notify protocols, that a new device appeared. */ ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); ret = notifier_to_errno(ret); @@ -6173,10 +5592,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, alloc_size += NETDEV_ALIGN - 1; p = kzalloc(alloc_size, GFP_KERNEL); - if (!p) { - pr_err("alloc_netdev: Unable to allocate device\n"); + if (!p) return NULL; - } dev = PTR_ALIGN(p, NETDEV_ALIGN); dev->padded = (char *)dev - (char *)p; @@ -6199,6 +5616,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); INIT_LIST_HEAD(&dev->link_watch_list); + INIT_LIST_HEAD(&dev->upper_dev_list); dev->priv_flags = IFF_XMIT_DST_RELEASE; setup(dev); @@ -6842,19 +6260,9 @@ static int __init net_dev_init(void) hotcpu_notifier(dev_cpu_callback, 0); dst_init(); - dev_mcast_init(); rc = 0; out: return rc; } subsys_initcall(net_dev_init); - -static int __init initialize_hashrnd(void) -{ - get_random_bytes(&hashrnd, sizeof(hashrnd)); - return 0; -} - -late_initcall_sync(initialize_hashrnd); - diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index b079c7bbc15..bd2eb9d3e36 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -15,7 +15,6 @@ #include <linux/rtnetlink.h> #include <linux/export.h> #include <linux/list.h> -#include <linux/proc_fs.h> /* * General list handling functions @@ -727,76 +726,3 @@ void dev_mc_init(struct net_device *dev) __hw_addr_init(&dev->mc); } EXPORT_SYMBOL(dev_mc_init); - -#ifdef CONFIG_PROC_FS -#include <linux/seq_file.h> - -static int dev_mc_seq_show(struct seq_file *seq, void *v) -{ - struct netdev_hw_addr *ha; - struct net_device *dev = v; - - if (v == SEQ_START_TOKEN) - return 0; - - netif_addr_lock_bh(dev); - netdev_for_each_mc_addr(ha, dev) { - int i; - - seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex, - dev->name, ha->refcount, ha->global_use); - - for (i = 0; i < dev->addr_len; i++) - seq_printf(seq, "%02x", ha->addr[i]); - - seq_putc(seq, '\n'); - } - netif_addr_unlock_bh(dev); - return 0; -} - -static const struct seq_operations dev_mc_seq_ops = { - .start = dev_seq_start, - .next = dev_seq_next, - .stop = dev_seq_stop, - .show = dev_mc_seq_show, -}; - -static int dev_mc_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &dev_mc_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations dev_mc_seq_fops = { - .owner = THIS_MODULE, - .open = dev_mc_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -#endif - -static int __net_init dev_mc_net_init(struct net *net) -{ - if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops)) - return -ENOMEM; - return 0; -} - -static void __net_exit dev_mc_net_exit(struct net *net) -{ - proc_net_remove(net, "dev_mcast"); -} - -static struct pernet_operations __net_initdata dev_mc_net_ops = { - .init = dev_mc_net_init, - .exit = dev_mc_net_exit, -}; - -void __init dev_mcast_init(void) -{ - register_pernet_subsys(&dev_mc_net_ops); -} - diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c new file mode 100644 index 00000000000..6cc0481faad --- /dev/null +++ b/net/core/dev_ioctl.c @@ -0,0 +1,576 @@ +#include <linux/kmod.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/rtnetlink.h> +#include <linux/net_tstamp.h> +#include <linux/wireless.h> +#include <net/wext.h> + +/* + * Map an interface index to its name (SIOCGIFNAME) + */ + +/* + * We need this ioctl for efficient implementation of the + * if_indextoname() function required by the IPv6 API. Without + * it, we would have to search all the interfaces to find a + * match. --pb + */ + +static int dev_ifname(struct net *net, struct ifreq __user *arg) +{ + struct net_device *dev; + struct ifreq ifr; + unsigned seq; + + /* + * Fetch the caller's info block. + */ + + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; + +retry: + seq = read_seqcount_begin(&devnet_rename_seq); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } + + strcpy(ifr.ifr_name, dev->name); + rcu_read_unlock(); + if (read_seqcount_retry(&devnet_rename_seq, seq)) + goto retry; + + if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) + return -EFAULT; + return 0; +} + +static gifconf_func_t *gifconf_list[NPROTO]; + +/** + * register_gifconf - register a SIOCGIF handler + * @family: Address family + * @gifconf: Function handler + * + * Register protocol dependent address dumping routines. The handler + * that is passed must not be freed or reused until it has been replaced + * by another handler. + */ +int register_gifconf(unsigned int family, gifconf_func_t *gifconf) +{ + if (family >= NPROTO) + return -EINVAL; + gifconf_list[family] = gifconf; + return 0; +} +EXPORT_SYMBOL(register_gifconf); + +/* + * Perform a SIOCGIFCONF call. This structure will change + * size eventually, and there is nothing I can do about it. + * Thus we will need a 'compatibility mode'. + */ + +static int dev_ifconf(struct net *net, char __user *arg) +{ + struct ifconf ifc; + struct net_device *dev; + char __user *pos; + int len; + int total; + int i; + + /* + * Fetch the caller's info block. + */ + + if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) + return -EFAULT; + + pos = ifc.ifc_buf; + len = ifc.ifc_len; + + /* + * Loop over the interfaces, and write an info block for each. + */ + + total = 0; + for_each_netdev(net, dev) { + for (i = 0; i < NPROTO; i++) { + if (gifconf_list[i]) { + int done; + if (!pos) + done = gifconf_list[i](dev, NULL, 0); + else + done = gifconf_list[i](dev, pos + total, + len - total); + if (done < 0) + return -EFAULT; + total += done; + } + } + } + + /* + * All done. Write the updated control block back to the caller. + */ + ifc.ifc_len = total; + + /* + * Both BSD and Solaris return 0 here, so we do too. + */ + return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; +} + +/* + * Perform the SIOCxIFxxx calls, inside rcu_read_lock() + */ +static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) +{ + int err; + struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name); + + if (!dev) + return -ENODEV; + + switch (cmd) { + case SIOCGIFFLAGS: /* Get interface flags */ + ifr->ifr_flags = (short) dev_get_flags(dev); + return 0; + + case SIOCGIFMETRIC: /* Get the metric on the interface + (currently unused) */ + ifr->ifr_metric = 0; + return 0; + + case SIOCGIFMTU: /* Get the MTU of a device */ + ifr->ifr_mtu = dev->mtu; + return 0; + + case SIOCGIFHWADDR: + if (!dev->addr_len) + memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); + else + memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, + min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); + ifr->ifr_hwaddr.sa_family = dev->type; + return 0; + + case SIOCGIFSLAVE: + err = -EINVAL; + break; + + case SIOCGIFMAP: + ifr->ifr_map.mem_start = dev->mem_start; + ifr->ifr_map.mem_end = dev->mem_end; + ifr->ifr_map.base_addr = dev->base_addr; + ifr->ifr_map.irq = dev->irq; + ifr->ifr_map.dma = dev->dma; + ifr->ifr_map.port = dev->if_port; + return 0; + + case SIOCGIFINDEX: + ifr->ifr_ifindex = dev->ifindex; + return 0; + + case SIOCGIFTXQLEN: + ifr->ifr_qlen = dev->tx_queue_len; + return 0; + + default: + /* dev_ioctl() should ensure this case + * is never reached + */ + WARN_ON(1); + err = -ENOTTY; + break; + + } + return err; +} + +static int net_hwtstamp_validate(struct ifreq *ifr) +{ + struct hwtstamp_config cfg; + enum hwtstamp_tx_types tx_type; + enum hwtstamp_rx_filters rx_filter; + int tx_type_valid = 0; + int rx_filter_valid = 0; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + if (cfg.flags) /* reserved for future extensions */ + return -EINVAL; + + tx_type = cfg.tx_type; + rx_filter = cfg.rx_filter; + + switch (tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + case HWTSTAMP_TX_ONESTEP_SYNC: + tx_type_valid = 1; + break; + } + + switch (rx_filter) { + case HWTSTAMP_FILTER_NONE: + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + rx_filter_valid = 1; + break; + } + + if (!tx_type_valid || !rx_filter_valid) + return -ERANGE; + + return 0; +} + +/* + * Perform the SIOCxIFxxx calls, inside rtnl_lock() + */ +static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) +{ + int err; + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); + const struct net_device_ops *ops; + + if (!dev) + return -ENODEV; + + ops = dev->netdev_ops; + + switch (cmd) { + case SIOCSIFFLAGS: /* Set interface flags */ + return dev_change_flags(dev, ifr->ifr_flags); + + case SIOCSIFMETRIC: /* Set the metric on the interface + (currently unused) */ + return -EOPNOTSUPP; + + case SIOCSIFMTU: /* Set the MTU of a device */ + return dev_set_mtu(dev, ifr->ifr_mtu); + + case SIOCSIFHWADDR: + return dev_set_mac_address(dev, &ifr->ifr_hwaddr); + + case SIOCSIFHWBROADCAST: + if (ifr->ifr_hwaddr.sa_family != dev->type) + return -EINVAL; + memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, + min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + return 0; + + case SIOCSIFMAP: + if (ops->ndo_set_config) { + if (!netif_device_present(dev)) + return -ENODEV; + return ops->ndo_set_config(dev, &ifr->ifr_map); + } + return -EOPNOTSUPP; + + case SIOCADDMULTI: + if (!ops->ndo_set_rx_mode || + ifr->ifr_hwaddr.sa_family != AF_UNSPEC) + return -EINVAL; + if (!netif_device_present(dev)) + return -ENODEV; + return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); + + case SIOCDELMULTI: + if (!ops->ndo_set_rx_mode || + ifr->ifr_hwaddr.sa_family != AF_UNSPEC) + return -EINVAL; + if (!netif_device_present(dev)) + return -ENODEV; + return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); + + case SIOCSIFTXQLEN: + if (ifr->ifr_qlen < 0) + return -EINVAL; + dev->tx_queue_len = ifr->ifr_qlen; + return 0; + + case SIOCSIFNAME: + ifr->ifr_newname[IFNAMSIZ-1] = '\0'; + return dev_change_name(dev, ifr->ifr_newname); + + case SIOCSHWTSTAMP: + err = net_hwtstamp_validate(ifr); + if (err) + return err; + /* fall through */ + + /* + * Unknown or private ioctl + */ + default: + if ((cmd >= SIOCDEVPRIVATE && + cmd <= SIOCDEVPRIVATE + 15) || + cmd == SIOCBONDENSLAVE || + cmd == SIOCBONDRELEASE || + cmd == SIOCBONDSETHWADDR || + cmd == SIOCBONDSLAVEINFOQUERY || + cmd == SIOCBONDINFOQUERY || + cmd == SIOCBONDCHANGEACTIVE || + cmd == SIOCGMIIPHY || + cmd == SIOCGMIIREG || + cmd == SIOCSMIIREG || + cmd == SIOCBRADDIF || + cmd == SIOCBRDELIF || + cmd == SIOCSHWTSTAMP || + cmd == SIOCWANDEV) { + err = -EOPNOTSUPP; + if (ops->ndo_do_ioctl) { + if (netif_device_present(dev)) + err = ops->ndo_do_ioctl(dev, ifr, cmd); + else + err = -ENODEV; + } + } else + err = -EINVAL; + + } + return err; +} + +/** + * dev_load - load a network module + * @net: the applicable net namespace + * @name: name of interface + * + * If a network interface is not present and the process has suitable + * privileges this function loads the module. If module loading is not + * available in this kernel then it becomes a nop. + */ + +void dev_load(struct net *net, const char *name) +{ + struct net_device *dev; + int no_module; + + rcu_read_lock(); + dev = dev_get_by_name_rcu(net, name); + rcu_read_unlock(); + + no_module = !dev; + if (no_module && capable(CAP_NET_ADMIN)) + no_module = request_module("netdev-%s", name); + if (no_module && capable(CAP_SYS_MODULE)) { + if (!request_module("%s", name)) + pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n", + name); + } +} +EXPORT_SYMBOL(dev_load); + +/* + * This function handles all "interface"-type I/O control requests. The actual + * 'doing' part of this is dev_ifsioc above. + */ + +/** + * dev_ioctl - network device ioctl + * @net: the applicable net namespace + * @cmd: command to issue + * @arg: pointer to a struct ifreq in user space + * + * Issue ioctl functions to devices. This is normally called by the + * user space syscall interfaces but can sometimes be useful for + * other purposes. The return value is the return from the syscall if + * positive or a negative errno code on error. + */ + +int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) +{ + struct ifreq ifr; + int ret; + char *colon; + + /* One special case: SIOCGIFCONF takes ifconf argument + and requires shared lock, because it sleeps writing + to user space. + */ + + if (cmd == SIOCGIFCONF) { + rtnl_lock(); + ret = dev_ifconf(net, (char __user *) arg); + rtnl_unlock(); + return ret; + } + if (cmd == SIOCGIFNAME) + return dev_ifname(net, (struct ifreq __user *)arg); + + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; + + ifr.ifr_name[IFNAMSIZ-1] = 0; + + colon = strchr(ifr.ifr_name, ':'); + if (colon) + *colon = 0; + + /* + * See which interface the caller is talking about. + */ + + switch (cmd) { + /* + * These ioctl calls: + * - can be done by all. + * - atomic and do not require locking. + * - return a value + */ + case SIOCGIFFLAGS: + case SIOCGIFMETRIC: + case SIOCGIFMTU: + case SIOCGIFHWADDR: + case SIOCGIFSLAVE: + case SIOCGIFMAP: + case SIOCGIFINDEX: + case SIOCGIFTXQLEN: + dev_load(net, ifr.ifr_name); + rcu_read_lock(); + ret = dev_ifsioc_locked(net, &ifr, cmd); + rcu_read_unlock(); + if (!ret) { + if (colon) + *colon = ':'; + if (copy_to_user(arg, &ifr, + sizeof(struct ifreq))) + ret = -EFAULT; + } + return ret; + + case SIOCETHTOOL: + dev_load(net, ifr.ifr_name); + rtnl_lock(); + ret = dev_ethtool(net, &ifr); + rtnl_unlock(); + if (!ret) { + if (colon) + *colon = ':'; + if (copy_to_user(arg, &ifr, + sizeof(struct ifreq))) + ret = -EFAULT; + } + return ret; + + /* + * These ioctl calls: + * - require superuser power. + * - require strict serialization. + * - return a value + */ + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCSIFNAME: + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + dev_load(net, ifr.ifr_name); + rtnl_lock(); + ret = dev_ifsioc(net, &ifr, cmd); + rtnl_unlock(); + if (!ret) { + if (colon) + *colon = ':'; + if (copy_to_user(arg, &ifr, + sizeof(struct ifreq))) + ret = -EFAULT; + } + return ret; + + /* + * These ioctl calls: + * - require superuser power. + * - require strict serialization. + * - do not return a value + */ + case SIOCSIFMAP: + case SIOCSIFTXQLEN: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + /* fall through */ + /* + * These ioctl calls: + * - require local superuser power. + * - require strict serialization. + * - do not return a value + */ + case SIOCSIFFLAGS: + case SIOCSIFMETRIC: + case SIOCSIFMTU: + case SIOCSIFHWADDR: + case SIOCSIFSLAVE: + case SIOCADDMULTI: + case SIOCDELMULTI: + case SIOCSIFHWBROADCAST: + case SIOCSMIIREG: + case SIOCBONDENSLAVE: + case SIOCBONDRELEASE: + case SIOCBONDSETHWADDR: + case SIOCBONDCHANGEACTIVE: + case SIOCBRADDIF: + case SIOCBRDELIF: + case SIOCSHWTSTAMP: + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + /* fall through */ + case SIOCBONDSLAVEINFOQUERY: + case SIOCBONDINFOQUERY: + dev_load(net, ifr.ifr_name); + rtnl_lock(); + ret = dev_ifsioc(net, &ifr, cmd); + rtnl_unlock(); + return ret; + + case SIOCGIFMEM: + /* Get the per device memory space. We can add this but + * currently do not support it */ + case SIOCSIFMEM: + /* Set the per device memory buffer space. + * Not applicable in our case */ + case SIOCSIFLINK: + return -ENOTTY; + + /* + * Unknown or private ioctl. + */ + default: + if (cmd == SIOCWANDEV || + (cmd >= SIOCDEVPRIVATE && + cmd <= SIOCDEVPRIVATE + 15)) { + dev_load(net, ifr.ifr_name); + rtnl_lock(); + ret = dev_ifsioc(net, &ifr, cmd); + rtnl_unlock(); + if (!ret && copy_to_user(arg, &ifr, + sizeof(struct ifreq))) + ret = -EFAULT; + return ret; + } + /* Take care of Wireless Extensions */ + if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) + return wext_handle_ioctl(net, &ifr, cmd, arg); + return -ENOTTY; + } +} diff --git a/net/core/dst.c b/net/core/dst.c index ee6153e2cf4..35fd12f1a69 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -179,6 +179,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst_init_metrics(dst, dst_default_metrics, true); dst->expires = 0UL; dst->path = dst; + dst->from = NULL; #ifdef CONFIG_XFRM dst->xfrm = NULL; #endif diff --git a/net/core/ethtool.c b/net/core/ethtool.c index a8705432e4b..3e9b2c3e30f 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -77,6 +77,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", + [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", @@ -175,7 +176,7 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset) if (sset == ETH_SS_FEATURES) return ARRAY_SIZE(netdev_features_strings); - if (ops && ops->get_sset_count && ops->get_strings) + if (ops->get_sset_count && ops->get_strings) return ops->get_sset_count(dev, sset); else return -EOPNOTSUPP; @@ -311,7 +312,7 @@ int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { ASSERT_RTNL(); - if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) + if (!dev->ethtool_ops->get_settings) return -EOPNOTSUPP; memset(cmd, 0, sizeof(struct ethtool_cmd)); @@ -355,7 +356,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, memset(&info, 0, sizeof(info)); info.cmd = ETHTOOL_GDRVINFO; - if (ops && ops->get_drvinfo) { + if (ops->get_drvinfo) { ops->get_drvinfo(dev, &info); } else if (dev->dev.parent && dev->dev.parent->driver) { strlcpy(info.bus_info, dev_name(dev->dev.parent), @@ -370,7 +371,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, * this method of obtaining string set info is deprecated; * Use ETHTOOL_GSSET_INFO instead. */ - if (ops && ops->get_sset_count) { + if (ops->get_sset_count) { int rc; rc = ops->get_sset_count(dev, ETH_SS_TEST); @@ -383,9 +384,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, if (rc >= 0) info.n_priv_flags = rc; } - if (ops && ops->get_regs_len) + if (ops->get_regs_len) info.regdump_len = ops->get_regs_len(dev); - if (ops && ops->get_eeprom_len) + if (ops->get_eeprom_len) info.eedump_len = ops->get_eeprom_len(dev); if (copy_to_user(useraddr, &info, sizeof(info))) @@ -590,13 +591,14 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, struct ethtool_rxnfc rx_rings; u32 user_size, dev_size, i; u32 *indir; + const struct ethtool_ops *ops = dev->ethtool_ops; int ret; - if (!dev->ethtool_ops->get_rxfh_indir_size || - !dev->ethtool_ops->set_rxfh_indir || - !dev->ethtool_ops->get_rxnfc) + if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir || + !ops->get_rxnfc) return -EOPNOTSUPP; - dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); + + dev_size = ops->get_rxfh_indir_size(dev); if (dev_size == 0) return -EOPNOTSUPP; @@ -613,7 +615,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, return -ENOMEM; rx_rings.cmd = ETHTOOL_GRXRINGS; - ret = dev->ethtool_ops->get_rxnfc(dev, &rx_rings, NULL); + ret = ops->get_rxnfc(dev, &rx_rings, NULL); if (ret) goto out; @@ -639,7 +641,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, } } - ret = dev->ethtool_ops->set_rxfh_indir(dev, indir); + ret = ops->set_rxfh_indir(dev, indir); out: kfree(indir); @@ -1082,9 +1084,10 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) { struct ethtool_value id; static bool busy; + const struct ethtool_ops *ops = dev->ethtool_ops; int rc; - if (!dev->ethtool_ops->set_phys_id) + if (!ops->set_phys_id) return -EOPNOTSUPP; if (busy) @@ -1093,7 +1096,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) if (copy_from_user(&id, useraddr, sizeof(id))) return -EFAULT; - rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE); + rc = ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE); if (rc < 0) return rc; @@ -1118,7 +1121,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) i = n; do { rtnl_lock(); - rc = dev->ethtool_ops->set_phys_id(dev, + rc = ops->set_phys_id(dev, (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON); rtnl_unlock(); if (rc) @@ -1133,7 +1136,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) dev_put(dev); busy = false; - (void)dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE); + (void) ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE); return rc; } @@ -1275,7 +1278,7 @@ static int ethtool_get_dump_flag(struct net_device *dev, struct ethtool_dump dump; const struct ethtool_ops *ops = dev->ethtool_ops; - if (!dev->ethtool_ops->get_dump_flag) + if (!ops->get_dump_flag) return -EOPNOTSUPP; if (copy_from_user(&dump, useraddr, sizeof(dump))) @@ -1299,8 +1302,7 @@ static int ethtool_get_dump_data(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; void *data = NULL; - if (!dev->ethtool_ops->get_dump_data || - !dev->ethtool_ops->get_dump_flag) + if (!ops->get_dump_data || !ops->get_dump_flag) return -EOPNOTSUPP; if (copy_from_user(&dump, useraddr, sizeof(dump))) @@ -1346,13 +1348,9 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr) info.cmd = ETHTOOL_GET_TS_INFO; if (phydev && phydev->drv && phydev->drv->ts_info) { - err = phydev->drv->ts_info(phydev, &info); - - } else if (dev->ethtool_ops && dev->ethtool_ops->get_ts_info) { - + } else if (ops->get_ts_info) { err = ops->get_ts_info(dev, &info); - } else { info.so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | diff --git a/net/core/filter.c b/net/core/filter.c index c23543cba13..2e20b55a783 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -532,6 +532,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X, }; int pc; + bool anc_found; if (flen == 0 || flen > BPF_MAXINSNS) return -EINVAL; @@ -592,8 +593,10 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) case BPF_S_LD_W_ABS: case BPF_S_LD_H_ABS: case BPF_S_LD_B_ABS: + anc_found = false; #define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ code = BPF_S_ANC_##CODE; \ + anc_found = true; \ break switch (ftest->k) { ANCILLARY(PROTOCOL); @@ -610,6 +613,10 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) ANCILLARY(VLAN_TAG); ANCILLARY(VLAN_TAG_PRESENT); } + + /* ancillary operation unknown or unsupported */ + if (anc_found == false && ftest->k >= SKF_AD_OFF) + return -EINVAL; } ftest->code = code; } @@ -714,6 +721,9 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) unsigned int fsize = sizeof(struct sock_filter) * fprog->len; int err; + if (sock_flag(sk, SOCK_FILTER_LOCKED)) + return -EPERM; + /* Make sure new filter is there and in the right amounts. */ if (fprog->filter == NULL) return -EINVAL; @@ -750,6 +760,9 @@ int sk_detach_filter(struct sock *sk) int ret = -ENOENT; struct sk_filter *filter; + if (sock_flag(sk, SOCK_FILTER_LOCKED)) + return -EPERM; + filter = rcu_dereference_protected(sk->sk_filter, sock_owned_by_user(sk)); if (filter) { diff --git a/net/core/flow.c b/net/core/flow.c index b0901ee5a00..43f7495df27 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -286,7 +286,7 @@ nocache: else fle->genid--; } else { - if (flo && !IS_ERR(flo)) + if (!IS_ERR_OR_NULL(flo)) flo->ops->delete(flo); } ret_object: diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 466820b6e34..9d4c7201400 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -143,3 +143,176 @@ ipv6: return true; } EXPORT_SYMBOL(skb_flow_dissect); + +static u32 hashrnd __read_mostly; + +/* + * __skb_get_rxhash: calculate a flow hash based on src/dst addresses + * and src/dst port numbers. Sets rxhash in skb to non-zero hash value + * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb + * if hash is a canonical 4-tuple hash over transport ports. + */ +void __skb_get_rxhash(struct sk_buff *skb) +{ + struct flow_keys keys; + u32 hash; + + if (!skb_flow_dissect(skb, &keys)) + return; + + if (keys.ports) + skb->l4_rxhash = 1; + + /* get a consistent hash (same value on both flow directions) */ + if (((__force u32)keys.dst < (__force u32)keys.src) || + (((__force u32)keys.dst == (__force u32)keys.src) && + ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { + swap(keys.dst, keys.src); + swap(keys.port16[0], keys.port16[1]); + } + + hash = jhash_3words((__force u32)keys.dst, + (__force u32)keys.src, + (__force u32)keys.ports, hashrnd); + if (!hash) + hash = 1; + + skb->rxhash = hash; +} +EXPORT_SYMBOL(__skb_get_rxhash); + +/* + * Returns a Tx hash based on the given packet descriptor a Tx queues' number + * to be used as a distribution range. + */ +u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, + unsigned int num_tx_queues) +{ + u32 hash; + u16 qoffset = 0; + u16 qcount = num_tx_queues; + + if (skb_rx_queue_recorded(skb)) { + hash = skb_get_rx_queue(skb); + while (unlikely(hash >= num_tx_queues)) + hash -= num_tx_queues; + return hash; + } + + if (dev->num_tc) { + u8 tc = netdev_get_prio_tc_map(dev, skb->priority); + qoffset = dev->tc_to_txq[tc].offset; + qcount = dev->tc_to_txq[tc].count; + } + + if (skb->sk && skb->sk->sk_hash) + hash = skb->sk->sk_hash; + else + hash = (__force u16) skb->protocol; + hash = jhash_1word(hash, hashrnd); + + return (u16) (((u64) hash * qcount) >> 32) + qoffset; +} +EXPORT_SYMBOL(__skb_tx_hash); + +static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) +{ + if (unlikely(queue_index >= dev->real_num_tx_queues)) { + net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n", + dev->name, queue_index, + dev->real_num_tx_queues); + return 0; + } + return queue_index; +} + +static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) +{ +#ifdef CONFIG_XPS + struct xps_dev_maps *dev_maps; + struct xps_map *map; + int queue_index = -1; + + rcu_read_lock(); + dev_maps = rcu_dereference(dev->xps_maps); + if (dev_maps) { + map = rcu_dereference( + dev_maps->cpu_map[raw_smp_processor_id()]); + if (map) { + if (map->len == 1) + queue_index = map->queues[0]; + else { + u32 hash; + if (skb->sk && skb->sk->sk_hash) + hash = skb->sk->sk_hash; + else + hash = (__force u16) skb->protocol ^ + skb->rxhash; + hash = jhash_1word(hash, hashrnd); + queue_index = map->queues[ + ((u64)hash * map->len) >> 32]; + } + if (unlikely(queue_index >= dev->real_num_tx_queues)) + queue_index = -1; + } + } + rcu_read_unlock(); + + return queue_index; +#else + return -1; +#endif +} + +u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + int queue_index = sk_tx_queue_get(sk); + + if (queue_index < 0 || skb->ooo_okay || + queue_index >= dev->real_num_tx_queues) { + int new_index = get_xps_queue(dev, skb); + if (new_index < 0) + new_index = skb_tx_hash(dev, skb); + + if (queue_index != new_index && sk) { + struct dst_entry *dst = + rcu_dereference_check(sk->sk_dst_cache, 1); + + if (dst && skb_dst(skb) == dst) + sk_tx_queue_set(sk, queue_index); + + } + + queue_index = new_index; + } + + return queue_index; +} +EXPORT_SYMBOL(__netdev_pick_tx); + +struct netdev_queue *netdev_pick_tx(struct net_device *dev, + struct sk_buff *skb) +{ + int queue_index = 0; + + if (dev->real_num_tx_queues != 1) { + const struct net_device_ops *ops = dev->netdev_ops; + if (ops->ndo_select_queue) + queue_index = ops->ndo_select_queue(dev, skb); + else + queue_index = __netdev_pick_tx(dev, skb); + queue_index = dev_cap_txqueue(dev, queue_index); + } + + skb_set_queue_mapping(skb, queue_index); + return netdev_get_tx_queue(dev, queue_index); +} + +static int __init initialize_hashrnd(void) +{ + get_random_bytes(&hashrnd, sizeof(hashrnd)); + return 0; +} + +late_initcall_sync(initialize_hashrnd); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index c815f285e5a..3863b8f639c 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -290,15 +290,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device goto out_entries; } - if (tbl->entry_size) - n = kzalloc(tbl->entry_size, GFP_ATOMIC); - else { - int sz = sizeof(*n) + tbl->key_len; - - sz = ALIGN(sz, NEIGH_PRIV_ALIGN); - sz += dev->neigh_priv_len; - n = kzalloc(sz, GFP_ATOMIC); - } + n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC); if (!n) goto out_entries; @@ -778,6 +770,9 @@ static void neigh_periodic_work(struct work_struct *work) nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); + if (atomic_read(&tbl->entries) < tbl->gc_thresh1) + goto out; + /* * periodically recompute ReachableTime from random function */ @@ -832,6 +827,7 @@ next_elt: nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); } +out: /* Cycle through all hash buckets every base_reachable_time/2 ticks. * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 * base_reachable_time. @@ -1542,6 +1538,12 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl) if (!tbl->nht || !tbl->phash_buckets) panic("cannot allocate neighbour cache hashes"); + if (!tbl->entry_size) + tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) + + tbl->key_len, NEIGH_PRIV_ALIGN); + else + WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); + rwlock_init(&tbl->lock); INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c new file mode 100644 index 00000000000..0f6bb6f8d39 --- /dev/null +++ b/net/core/net-procfs.c @@ -0,0 +1,412 @@ +#include <linux/netdevice.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <net/wext.h> + +#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1) + +#define get_bucket(x) ((x) >> BUCKET_SPACE) +#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) +#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) + +extern struct list_head ptype_all __read_mostly; +extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; + +static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos) +{ + struct net *net = seq_file_net(seq); + struct net_device *dev; + struct hlist_node *p; + struct hlist_head *h; + unsigned int count = 0, offset = get_offset(*pos); + + h = &net->dev_name_head[get_bucket(*pos)]; + hlist_for_each_entry_rcu(dev, p, h, name_hlist) { + if (++count == offset) + return dev; + } + + return NULL; +} + +static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos) +{ + struct net_device *dev; + unsigned int bucket; + + do { + dev = dev_from_same_bucket(seq, pos); + if (dev) + return dev; + + bucket = get_bucket(*pos) + 1; + *pos = set_bucket_offset(bucket, 1); + } while (bucket < NETDEV_HASHENTRIES); + + return NULL; +} + +/* + * This is invoked by the /proc filesystem handler to display a device + * in detail. + */ +static void *dev_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) +{ + rcu_read_lock(); + if (!*pos) + return SEQ_START_TOKEN; + + if (get_bucket(*pos) >= NETDEV_HASHENTRIES) + return NULL; + + return dev_from_bucket(seq, pos); +} + +static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return dev_from_bucket(seq, pos); +} + +static void dev_seq_stop(struct seq_file *seq, void *v) + __releases(RCU) +{ + rcu_read_unlock(); +} + +static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) +{ + struct rtnl_link_stats64 temp; + const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); + + seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " + "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", + dev->name, stats->rx_bytes, stats->rx_packets, + stats->rx_errors, + stats->rx_dropped + stats->rx_missed_errors, + stats->rx_fifo_errors, + stats->rx_length_errors + stats->rx_over_errors + + stats->rx_crc_errors + stats->rx_frame_errors, + stats->rx_compressed, stats->multicast, + stats->tx_bytes, stats->tx_packets, + stats->tx_errors, stats->tx_dropped, + stats->tx_fifo_errors, stats->collisions, + stats->tx_carrier_errors + + stats->tx_aborted_errors + + stats->tx_window_errors + + stats->tx_heartbeat_errors, + stats->tx_compressed); +} + +/* + * Called from the PROCfs module. This now uses the new arbitrary sized + * /proc/net interface to create /proc/net/dev + */ +static int dev_seq_show(struct seq_file *seq, void *v) +{ + if (v == SEQ_START_TOKEN) + seq_puts(seq, "Inter-| Receive " + " | Transmit\n" + " face |bytes packets errs drop fifo frame " + "compressed multicast|bytes packets errs " + "drop fifo colls carrier compressed\n"); + else + dev_seq_printf_stats(seq, v); + return 0; +} + +static struct softnet_data *softnet_get_online(loff_t *pos) +{ + struct softnet_data *sd = NULL; + + while (*pos < nr_cpu_ids) + if (cpu_online(*pos)) { + sd = &per_cpu(softnet_data, *pos); + break; + } else + ++*pos; + return sd; +} + +static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) +{ + return softnet_get_online(pos); +} + +static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return softnet_get_online(pos); +} + +static void softnet_seq_stop(struct seq_file *seq, void *v) +{ +} + +static int softnet_seq_show(struct seq_file *seq, void *v) +{ + struct softnet_data *sd = v; + + seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", + sd->processed, sd->dropped, sd->time_squeeze, 0, + 0, 0, 0, 0, /* was fastroute */ + sd->cpu_collision, sd->received_rps); + return 0; +} + +static const struct seq_operations dev_seq_ops = { + .start = dev_seq_start, + .next = dev_seq_next, + .stop = dev_seq_stop, + .show = dev_seq_show, +}; + +static int dev_seq_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &dev_seq_ops, + sizeof(struct seq_net_private)); +} + +static const struct file_operations dev_seq_fops = { + .owner = THIS_MODULE, + .open = dev_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +static const struct seq_operations softnet_seq_ops = { + .start = softnet_seq_start, + .next = softnet_seq_next, + .stop = softnet_seq_stop, + .show = softnet_seq_show, +}; + +static int softnet_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &softnet_seq_ops); +} + +static const struct file_operations softnet_seq_fops = { + .owner = THIS_MODULE, + .open = softnet_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static void *ptype_get_idx(loff_t pos) +{ + struct packet_type *pt = NULL; + loff_t i = 0; + int t; + + list_for_each_entry_rcu(pt, &ptype_all, list) { + if (i == pos) + return pt; + ++i; + } + + for (t = 0; t < PTYPE_HASH_SIZE; t++) { + list_for_each_entry_rcu(pt, &ptype_base[t], list) { + if (i == pos) + return pt; + ++i; + } + } + return NULL; +} + +static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) +{ + rcu_read_lock(); + return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; +} + +static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct packet_type *pt; + struct list_head *nxt; + int hash; + + ++*pos; + if (v == SEQ_START_TOKEN) + return ptype_get_idx(0); + + pt = v; + nxt = pt->list.next; + if (pt->type == htons(ETH_P_ALL)) { + if (nxt != &ptype_all) + goto found; + hash = 0; + nxt = ptype_base[0].next; + } else + hash = ntohs(pt->type) & PTYPE_HASH_MASK; + + while (nxt == &ptype_base[hash]) { + if (++hash >= PTYPE_HASH_SIZE) + return NULL; + nxt = ptype_base[hash].next; + } +found: + return list_entry(nxt, struct packet_type, list); +} + +static void ptype_seq_stop(struct seq_file *seq, void *v) + __releases(RCU) +{ + rcu_read_unlock(); +} + +static int ptype_seq_show(struct seq_file *seq, void *v) +{ + struct packet_type *pt = v; + + if (v == SEQ_START_TOKEN) + seq_puts(seq, "Type Device Function\n"); + else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { + if (pt->type == htons(ETH_P_ALL)) + seq_puts(seq, "ALL "); + else + seq_printf(seq, "%04x", ntohs(pt->type)); + + seq_printf(seq, " %-8s %pF\n", + pt->dev ? pt->dev->name : "", pt->func); + } + + return 0; +} + +static const struct seq_operations ptype_seq_ops = { + .start = ptype_seq_start, + .next = ptype_seq_next, + .stop = ptype_seq_stop, + .show = ptype_seq_show, +}; + +static int ptype_seq_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &ptype_seq_ops, + sizeof(struct seq_net_private)); +} + +static const struct file_operations ptype_seq_fops = { + .owner = THIS_MODULE, + .open = ptype_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + + +static int __net_init dev_proc_net_init(struct net *net) +{ + int rc = -ENOMEM; + + if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops)) + goto out; + if (!proc_create("softnet_stat", S_IRUGO, net->proc_net, + &softnet_seq_fops)) + goto out_dev; + if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops)) + goto out_softnet; + + if (wext_proc_init(net)) + goto out_ptype; + rc = 0; +out: + return rc; +out_ptype: + remove_proc_entry("ptype", net->proc_net); +out_softnet: + remove_proc_entry("softnet_stat", net->proc_net); +out_dev: + remove_proc_entry("dev", net->proc_net); + goto out; +} + +static void __net_exit dev_proc_net_exit(struct net *net) +{ + wext_proc_exit(net); + + remove_proc_entry("ptype", net->proc_net); + remove_proc_entry("softnet_stat", net->proc_net); + remove_proc_entry("dev", net->proc_net); +} + +static struct pernet_operations __net_initdata dev_proc_ops = { + .init = dev_proc_net_init, + .exit = dev_proc_net_exit, +}; + +static int dev_mc_seq_show(struct seq_file *seq, void *v) +{ + struct netdev_hw_addr *ha; + struct net_device *dev = v; + + if (v == SEQ_START_TOKEN) + return 0; + + netif_addr_lock_bh(dev); + netdev_for_each_mc_addr(ha, dev) { + int i; + + seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex, + dev->name, ha->refcount, ha->global_use); + + for (i = 0; i < dev->addr_len; i++) + seq_printf(seq, "%02x", ha->addr[i]); + + seq_putc(seq, '\n'); + } + netif_addr_unlock_bh(dev); + return 0; +} + +static const struct seq_operations dev_mc_seq_ops = { + .start = dev_seq_start, + .next = dev_seq_next, + .stop = dev_seq_stop, + .show = dev_mc_seq_show, +}; + +static int dev_mc_seq_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &dev_mc_seq_ops, + sizeof(struct seq_net_private)); +} + +static const struct file_operations dev_mc_seq_fops = { + .owner = THIS_MODULE, + .open = dev_mc_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +static int __net_init dev_mc_net_init(struct net *net) +{ + if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops)) + return -ENOMEM; + return 0; +} + +static void __net_exit dev_mc_net_exit(struct net *net) +{ + remove_proc_entry("dev_mcast", net->proc_net); +} + +static struct pernet_operations __net_initdata dev_mc_net_ops = { + .init = dev_mc_net_init, + .exit = dev_mc_net_exit, +}; + +int __init dev_proc_init(void) +{ + int ret = register_pernet_subsys(&dev_proc_ops); + if (!ret) + return register_pernet_subsys(&dev_mc_net_ops); + return ret; +} diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 28c5f5aa7ca..a5b89a6fec6 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -126,6 +126,19 @@ static ssize_t show_broadcast(struct device *dev, return -EINVAL; } +static int change_carrier(struct net_device *net, unsigned long new_carrier) +{ + if (!netif_running(net)) + return -EINVAL; + return dev_change_carrier(net, (bool) new_carrier); +} + +static ssize_t store_carrier(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + return netdev_store(dev, attr, buf, len, change_carrier); +} + static ssize_t show_carrier(struct device *dev, struct device_attribute *attr, char *buf) { @@ -331,7 +344,7 @@ static struct device_attribute net_class_attributes[] = { __ATTR(link_mode, S_IRUGO, show_link_mode, NULL), __ATTR(address, S_IRUGO, show_address, NULL), __ATTR(broadcast, S_IRUGO, show_broadcast, NULL), - __ATTR(carrier, S_IRUGO, show_carrier, NULL), + __ATTR(carrier, S_IRUGO | S_IWUSR, show_carrier, store_carrier), __ATTR(speed, S_IRUGO, show_speed, NULL), __ATTR(duplex, S_IRUGO, show_duplex, NULL), __ATTR(dormant, S_IRUGO, show_dormant, NULL), @@ -989,68 +1002,14 @@ static ssize_t show_xps_map(struct netdev_queue *queue, return len; } -static DEFINE_MUTEX(xps_map_mutex); -#define xmap_dereference(P) \ - rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) - -static void xps_queue_release(struct netdev_queue *queue) -{ - struct net_device *dev = queue->dev; - struct xps_dev_maps *dev_maps; - struct xps_map *map; - unsigned long index; - int i, pos, nonempty = 0; - - index = get_netdev_queue_index(queue); - - mutex_lock(&xps_map_mutex); - dev_maps = xmap_dereference(dev->xps_maps); - - if (dev_maps) { - for_each_possible_cpu(i) { - map = xmap_dereference(dev_maps->cpu_map[i]); - if (!map) - continue; - - for (pos = 0; pos < map->len; pos++) - if (map->queues[pos] == index) - break; - - if (pos < map->len) { - if (map->len > 1) - map->queues[pos] = - map->queues[--map->len]; - else { - RCU_INIT_POINTER(dev_maps->cpu_map[i], - NULL); - kfree_rcu(map, rcu); - map = NULL; - } - } - if (map) - nonempty = 1; - } - - if (!nonempty) { - RCU_INIT_POINTER(dev->xps_maps, NULL); - kfree_rcu(dev_maps, rcu); - } - } - mutex_unlock(&xps_map_mutex); -} - static ssize_t store_xps_map(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, const char *buf, size_t len) { struct net_device *dev = queue->dev; - cpumask_var_t mask; - int err, i, cpu, pos, map_len, alloc_len, need_set; unsigned long index; - struct xps_map *map, *new_map; - struct xps_dev_maps *dev_maps, *new_dev_maps; - int nonempty = 0; - int numa_node_id = -2; + cpumask_var_t mask; + int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -1066,105 +1025,11 @@ static ssize_t store_xps_map(struct netdev_queue *queue, return err; } - new_dev_maps = kzalloc(max_t(unsigned int, - XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL); - if (!new_dev_maps) { - free_cpumask_var(mask); - return -ENOMEM; - } - - mutex_lock(&xps_map_mutex); - - dev_maps = xmap_dereference(dev->xps_maps); - - for_each_possible_cpu(cpu) { - map = dev_maps ? - xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; - new_map = map; - if (map) { - for (pos = 0; pos < map->len; pos++) - if (map->queues[pos] == index) - break; - map_len = map->len; - alloc_len = map->alloc_len; - } else - pos = map_len = alloc_len = 0; - - need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu); -#ifdef CONFIG_NUMA - if (need_set) { - if (numa_node_id == -2) - numa_node_id = cpu_to_node(cpu); - else if (numa_node_id != cpu_to_node(cpu)) - numa_node_id = -1; - } -#endif - if (need_set && pos >= map_len) { - /* Need to add queue to this CPU's map */ - if (map_len >= alloc_len) { - alloc_len = alloc_len ? - 2 * alloc_len : XPS_MIN_MAP_ALLOC; - new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), - GFP_KERNEL, - cpu_to_node(cpu)); - if (!new_map) - goto error; - new_map->alloc_len = alloc_len; - for (i = 0; i < map_len; i++) - new_map->queues[i] = map->queues[i]; - new_map->len = map_len; - } - new_map->queues[new_map->len++] = index; - } else if (!need_set && pos < map_len) { - /* Need to remove queue from this CPU's map */ - if (map_len > 1) - new_map->queues[pos] = - new_map->queues[--new_map->len]; - else - new_map = NULL; - } - RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map); - } - - /* Cleanup old maps */ - for_each_possible_cpu(cpu) { - map = dev_maps ? - xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; - if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map) - kfree_rcu(map, rcu); - if (new_dev_maps->cpu_map[cpu]) - nonempty = 1; - } - - if (nonempty) { - rcu_assign_pointer(dev->xps_maps, new_dev_maps); - } else { - kfree(new_dev_maps); - RCU_INIT_POINTER(dev->xps_maps, NULL); - } - - if (dev_maps) - kfree_rcu(dev_maps, rcu); - - netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id : - NUMA_NO_NODE); - - mutex_unlock(&xps_map_mutex); + err = netif_set_xps_queue(dev, mask, index); free_cpumask_var(mask); - return len; -error: - mutex_unlock(&xps_map_mutex); - - if (new_dev_maps) - for_each_possible_cpu(i) - kfree(rcu_dereference_protected( - new_dev_maps->cpu_map[i], - 1)); - kfree(new_dev_maps); - free_cpumask_var(mask); - return -ENOMEM; + return err ? : len; } static struct netdev_queue_attribute xps_cpus_attribute = @@ -1183,10 +1048,6 @@ static void netdev_queue_release(struct kobject *kobj) { struct netdev_queue *queue = to_netdev_queue(kobj); -#ifdef CONFIG_XPS - xps_queue_release(queue); -#endif - memset(kobj, 0, sizeof(*kobj)); dev_put(queue->dev); } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 3151acf5ec1..fa32899006a 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -29,6 +29,9 @@ #include <linux/if_vlan.h> #include <net/tcp.h> #include <net/udp.h> +#include <net/addrconf.h> +#include <net/ndisc.h> +#include <net/ip6_checksum.h> #include <asm/unaligned.h> #include <trace/events/napi.h> @@ -44,6 +47,8 @@ static struct sk_buff_head skb_pool; static atomic_t trapped; +static struct srcu_struct netpoll_srcu; + #define USEC_PER_POLL 50 #define NETPOLL_RX_ENABLED 1 #define NETPOLL_RX_DROP 2 @@ -55,7 +60,8 @@ static atomic_t trapped; MAX_UDP_CHUNK) static void zap_completion_queue(void); -static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo); +static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo); +static void netpoll_async_cleanup(struct work_struct *work); static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644); @@ -181,13 +187,13 @@ static void poll_napi(struct net_device *dev) } } -static void service_arp_queue(struct netpoll_info *npi) +static void service_neigh_queue(struct netpoll_info *npi) { if (npi) { struct sk_buff *skb; - while ((skb = skb_dequeue(&npi->arp_tx))) - netpoll_arp_reply(skb, npi); + while ((skb = skb_dequeue(&npi->neigh_tx))) + netpoll_neigh_reply(skb, npi); } } @@ -196,35 +202,76 @@ static void netpoll_poll_dev(struct net_device *dev) const struct net_device_ops *ops; struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); - if (!dev || !netif_running(dev)) + /* Don't do any rx activity if the dev_lock mutex is held + * the dev_open/close paths use this to block netpoll activity + * while changing device state + */ + if (!mutex_trylock(&ni->dev_lock)) return; + if (!netif_running(dev)) { + mutex_unlock(&ni->dev_lock); + return; + } + ops = dev->netdev_ops; - if (!ops->ndo_poll_controller) + if (!ops->ndo_poll_controller) { + mutex_unlock(&ni->dev_lock); return; + } /* Process pending work on NIC */ ops->ndo_poll_controller(dev); poll_napi(dev); + mutex_unlock(&ni->dev_lock); + if (dev->flags & IFF_SLAVE) { if (ni) { - struct net_device *bond_dev = dev->master; + struct net_device *bond_dev; struct sk_buff *skb; - struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo); - while ((skb = skb_dequeue(&ni->arp_tx))) { + struct netpoll_info *bond_ni; + + bond_dev = netdev_master_upper_dev_get_rcu(dev); + bond_ni = rcu_dereference_bh(bond_dev->npinfo); + while ((skb = skb_dequeue(&ni->neigh_tx))) { skb->dev = bond_dev; - skb_queue_tail(&bond_ni->arp_tx, skb); + skb_queue_tail(&bond_ni->neigh_tx, skb); } } } - service_arp_queue(ni); + service_neigh_queue(ni); zap_completion_queue(); } +int netpoll_rx_disable(struct net_device *dev) +{ + struct netpoll_info *ni; + int idx; + might_sleep(); + idx = srcu_read_lock(&netpoll_srcu); + ni = srcu_dereference(dev->npinfo, &netpoll_srcu); + if (ni) + mutex_lock(&ni->dev_lock); + srcu_read_unlock(&netpoll_srcu, idx); + return 0; +} +EXPORT_SYMBOL(netpoll_rx_disable); + +void netpoll_rx_enable(struct net_device *dev) +{ + struct netpoll_info *ni; + rcu_read_lock(); + ni = rcu_dereference(dev->npinfo); + if (ni) + mutex_unlock(&ni->dev_lock); + rcu_read_unlock(); +} +EXPORT_SYMBOL(netpoll_rx_enable); + static void refill_skbs(void) { struct sk_buff *skb; @@ -381,9 +428,14 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) struct iphdr *iph; struct ethhdr *eth; static atomic_t ip_ident; + struct ipv6hdr *ip6h; udp_len = len + sizeof(*udph); - ip_len = udp_len + sizeof(*iph); + if (np->ipv6) + ip_len = udp_len + sizeof(*ip6h); + else + ip_len = udp_len + sizeof(*iph); + total_len = ip_len + LL_RESERVED_SPACE(np->dev); skb = find_skb(np, total_len + np->dev->needed_tailroom, @@ -400,34 +452,66 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) udph->source = htons(np->local_port); udph->dest = htons(np->remote_port); udph->len = htons(udp_len); - udph->check = 0; - udph->check = csum_tcpudp_magic(np->local_ip, - np->remote_ip, - udp_len, IPPROTO_UDP, - csum_partial(udph, udp_len, 0)); - if (udph->check == 0) - udph->check = CSUM_MANGLED_0; - - skb_push(skb, sizeof(*iph)); - skb_reset_network_header(skb); - iph = ip_hdr(skb); - - /* iph->version = 4; iph->ihl = 5; */ - put_unaligned(0x45, (unsigned char *)iph); - iph->tos = 0; - put_unaligned(htons(ip_len), &(iph->tot_len)); - iph->id = htons(atomic_inc_return(&ip_ident)); - iph->frag_off = 0; - iph->ttl = 64; - iph->protocol = IPPROTO_UDP; - iph->check = 0; - put_unaligned(np->local_ip, &(iph->saddr)); - put_unaligned(np->remote_ip, &(iph->daddr)); - iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); - - eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); - skb_reset_mac_header(skb); - skb->protocol = eth->h_proto = htons(ETH_P_IP); + + if (np->ipv6) { + udph->check = 0; + udph->check = csum_ipv6_magic(&np->local_ip.in6, + &np->remote_ip.in6, + udp_len, IPPROTO_UDP, + csum_partial(udph, udp_len, 0)); + if (udph->check == 0) + udph->check = CSUM_MANGLED_0; + + skb_push(skb, sizeof(*ip6h)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + + /* ip6h->version = 6; ip6h->priority = 0; */ + put_unaligned(0x60, (unsigned char *)ip6h); + ip6h->flow_lbl[0] = 0; + ip6h->flow_lbl[1] = 0; + ip6h->flow_lbl[2] = 0; + + ip6h->payload_len = htons(sizeof(struct udphdr) + len); + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = 32; + ip6h->saddr = np->local_ip.in6; + ip6h->daddr = np->remote_ip.in6; + + eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + skb->protocol = eth->h_proto = htons(ETH_P_IPV6); + } else { + udph->check = 0; + udph->check = csum_tcpudp_magic(np->local_ip.ip, + np->remote_ip.ip, + udp_len, IPPROTO_UDP, + csum_partial(udph, udp_len, 0)); + if (udph->check == 0) + udph->check = CSUM_MANGLED_0; + + skb_push(skb, sizeof(*iph)); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + + /* iph->version = 4; iph->ihl = 5; */ + put_unaligned(0x45, (unsigned char *)iph); + iph->tos = 0; + put_unaligned(htons(ip_len), &(iph->tot_len)); + iph->id = htons(atomic_inc_return(&ip_ident)); + iph->frag_off = 0; + iph->ttl = 64; + iph->protocol = IPPROTO_UDP; + iph->check = 0; + put_unaligned(np->local_ip.ip, &(iph->saddr)); + put_unaligned(np->remote_ip.ip, &(iph->daddr)); + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + + eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + skb->protocol = eth->h_proto = htons(ETH_P_IP); + } + memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN); memcpy(eth->h_dest, np->remote_mac, ETH_ALEN); @@ -437,18 +521,16 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) } EXPORT_SYMBOL(netpoll_send_udp); -static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo) +static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo) { - struct arphdr *arp; - unsigned char *arp_ptr; - int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; + int size, type = ARPOP_REPLY; __be32 sip, tip; unsigned char *sha; struct sk_buff *send_skb; struct netpoll *np, *tmp; unsigned long flags; int hlen, tlen; - int hits = 0; + int hits = 0, proto; if (list_empty(&npinfo->rx_np)) return; @@ -466,94 +548,214 @@ static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo) if (!hits) return; - /* No arp on this interface */ - if (skb->dev->flags & IFF_NOARP) - return; - - if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) - return; + proto = ntohs(eth_hdr(skb)->h_proto); + if (proto == ETH_P_IP) { + struct arphdr *arp; + unsigned char *arp_ptr; + /* No arp on this interface */ + if (skb->dev->flags & IFF_NOARP) + return; - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - arp = arp_hdr(skb); + if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) + return; - if ((arp->ar_hrd != htons(ARPHRD_ETHER) && - arp->ar_hrd != htons(ARPHRD_IEEE802)) || - arp->ar_pro != htons(ETH_P_IP) || - arp->ar_op != htons(ARPOP_REQUEST)) - return; + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + arp = arp_hdr(skb); - arp_ptr = (unsigned char *)(arp+1); - /* save the location of the src hw addr */ - sha = arp_ptr; - arp_ptr += skb->dev->addr_len; - memcpy(&sip, arp_ptr, 4); - arp_ptr += 4; - /* If we actually cared about dst hw addr, - it would get copied here */ - arp_ptr += skb->dev->addr_len; - memcpy(&tip, arp_ptr, 4); - - /* Should we ignore arp? */ - if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) - return; + if ((arp->ar_hrd != htons(ARPHRD_ETHER) && + arp->ar_hrd != htons(ARPHRD_IEEE802)) || + arp->ar_pro != htons(ETH_P_IP) || + arp->ar_op != htons(ARPOP_REQUEST)) + return; - size = arp_hdr_len(skb->dev); + arp_ptr = (unsigned char *)(arp+1); + /* save the location of the src hw addr */ + sha = arp_ptr; + arp_ptr += skb->dev->addr_len; + memcpy(&sip, arp_ptr, 4); + arp_ptr += 4; + /* If we actually cared about dst hw addr, + it would get copied here */ + arp_ptr += skb->dev->addr_len; + memcpy(&tip, arp_ptr, 4); - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (tip != np->local_ip) - continue; + /* Should we ignore arp? */ + if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) + return; - hlen = LL_RESERVED_SPACE(np->dev); - tlen = np->dev->needed_tailroom; - send_skb = find_skb(np, size + hlen + tlen, hlen); - if (!send_skb) - continue; + size = arp_hdr_len(skb->dev); - skb_reset_network_header(send_skb); - arp = (struct arphdr *) skb_put(send_skb, size); - send_skb->dev = skb->dev; - send_skb->protocol = htons(ETH_P_ARP); + spin_lock_irqsave(&npinfo->rx_lock, flags); + list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + if (tip != np->local_ip.ip) + continue; + + hlen = LL_RESERVED_SPACE(np->dev); + tlen = np->dev->needed_tailroom; + send_skb = find_skb(np, size + hlen + tlen, hlen); + if (!send_skb) + continue; + + skb_reset_network_header(send_skb); + arp = (struct arphdr *) skb_put(send_skb, size); + send_skb->dev = skb->dev; + send_skb->protocol = htons(ETH_P_ARP); + + /* Fill the device header for the ARP frame */ + if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP, + sha, np->dev->dev_addr, + send_skb->len) < 0) { + kfree_skb(send_skb); + continue; + } - /* Fill the device header for the ARP frame */ - if (dev_hard_header(send_skb, skb->dev, ptype, - sha, np->dev->dev_addr, - send_skb->len) < 0) { - kfree_skb(send_skb); - continue; + /* + * Fill out the arp protocol part. + * + * we only support ethernet device type, + * which (according to RFC 1390) should + * always equal 1 (Ethernet). + */ + + arp->ar_hrd = htons(np->dev->type); + arp->ar_pro = htons(ETH_P_IP); + arp->ar_hln = np->dev->addr_len; + arp->ar_pln = 4; + arp->ar_op = htons(type); + + arp_ptr = (unsigned char *)(arp + 1); + memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); + arp_ptr += np->dev->addr_len; + memcpy(arp_ptr, &tip, 4); + arp_ptr += 4; + memcpy(arp_ptr, sha, np->dev->addr_len); + arp_ptr += np->dev->addr_len; + memcpy(arp_ptr, &sip, 4); + + netpoll_send_skb(np, send_skb); + + /* If there are several rx_hooks for the same address, + we're fine by sending a single reply */ + break; } + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } else if( proto == ETH_P_IPV6) { +#if IS_ENABLED(CONFIG_IPV6) + struct nd_msg *msg; + u8 *lladdr = NULL; + struct ipv6hdr *hdr; + struct icmp6hdr *icmp6h; + const struct in6_addr *saddr; + const struct in6_addr *daddr; + struct inet6_dev *in6_dev = NULL; + struct in6_addr *target; + + in6_dev = in6_dev_get(skb->dev); + if (!in6_dev || !in6_dev->cnf.accept_ra) + return; - /* - * Fill out the arp protocol part. - * - * we only support ethernet device type, - * which (according to RFC 1390) should - * always equal 1 (Ethernet). - */ + if (!pskb_may_pull(skb, skb->len)) + return; - arp->ar_hrd = htons(np->dev->type); - arp->ar_pro = htons(ETH_P_IP); - arp->ar_hln = np->dev->addr_len; - arp->ar_pln = 4; - arp->ar_op = htons(type); + msg = (struct nd_msg *)skb_transport_header(skb); - arp_ptr = (unsigned char *)(arp + 1); - memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &tip, 4); - arp_ptr += 4; - memcpy(arp_ptr, sha, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &sip, 4); + __skb_push(skb, skb->data - skb_transport_header(skb)); - netpoll_send_skb(np, send_skb); + if (ipv6_hdr(skb)->hop_limit != 255) + return; + if (msg->icmph.icmp6_code != 0) + return; + if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) + return; + + saddr = &ipv6_hdr(skb)->saddr; + daddr = &ipv6_hdr(skb)->daddr; - /* If there are several rx_hooks for the same address, - we're fine by sending a single reply */ - break; + size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr); + + spin_lock_irqsave(&npinfo->rx_lock, flags); + list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + if (!ipv6_addr_equal(daddr, &np->local_ip.in6)) + continue; + + hlen = LL_RESERVED_SPACE(np->dev); + tlen = np->dev->needed_tailroom; + send_skb = find_skb(np, size + hlen + tlen, hlen); + if (!send_skb) + continue; + + send_skb->protocol = htons(ETH_P_IPV6); + send_skb->dev = skb->dev; + + skb_reset_network_header(send_skb); + skb_put(send_skb, sizeof(struct ipv6hdr)); + hdr = ipv6_hdr(send_skb); + + *(__be32*)hdr = htonl(0x60000000); + + hdr->payload_len = htons(size); + hdr->nexthdr = IPPROTO_ICMPV6; + hdr->hop_limit = 255; + hdr->saddr = *saddr; + hdr->daddr = *daddr; + + send_skb->transport_header = send_skb->tail; + skb_put(send_skb, size); + + icmp6h = (struct icmp6hdr *)skb_transport_header(skb); + icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; + icmp6h->icmp6_router = 0; + icmp6h->icmp6_solicited = 1; + target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr)); + *target = msg->target; + icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, + IPPROTO_ICMPV6, + csum_partial(icmp6h, + size, 0)); + + if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6, + lladdr, np->dev->dev_addr, + send_skb->len) < 0) { + kfree_skb(send_skb); + continue; + } + + netpoll_send_skb(np, send_skb); + + /* If there are several rx_hooks for the same address, + we're fine by sending a single reply */ + break; + } + spin_unlock_irqrestore(&npinfo->rx_lock, flags); +#endif } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); +} + +static bool pkt_is_ns(struct sk_buff *skb) +{ + struct nd_msg *msg; + struct ipv6hdr *hdr; + + if (skb->protocol != htons(ETH_P_ARP)) + return false; + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg))) + return false; + + msg = (struct nd_msg *)skb_transport_header(skb); + __skb_push(skb, skb->data - skb_transport_header(skb)); + hdr = ipv6_hdr(skb); + + if (hdr->nexthdr != IPPROTO_ICMPV6) + return false; + if (hdr->hop_limit != 255) + return false; + if (msg->icmph.icmp6_code != 0) + return false; + if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) + return false; + + return true; } int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) @@ -571,9 +773,11 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) goto out; /* check if netpoll clients need ARP */ - if (skb->protocol == htons(ETH_P_ARP) && - atomic_read(&trapped)) { - skb_queue_tail(&npinfo->arp_tx, skb); + if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) { + skb_queue_tail(&npinfo->neigh_tx, skb); + return 1; + } else if (pkt_is_ns(skb) && atomic_read(&trapped)) { + skb_queue_tail(&npinfo->neigh_tx, skb); return 1; } @@ -584,60 +788,100 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) } proto = ntohs(eth_hdr(skb)->h_proto); - if (proto != ETH_P_IP) + if (proto != ETH_P_IP && proto != ETH_P_IPV6) goto out; if (skb->pkt_type == PACKET_OTHERHOST) goto out; if (skb_shared(skb)) goto out; - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto out; - iph = (struct iphdr *)skb->data; - if (iph->ihl < 5 || iph->version != 4) - goto out; - if (!pskb_may_pull(skb, iph->ihl*4)) - goto out; - iph = (struct iphdr *)skb->data; - if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) - goto out; - - len = ntohs(iph->tot_len); - if (skb->len < len || len < iph->ihl*4) - goto out; + if (proto == ETH_P_IP) { + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto out; + iph = (struct iphdr *)skb->data; + if (iph->ihl < 5 || iph->version != 4) + goto out; + if (!pskb_may_pull(skb, iph->ihl*4)) + goto out; + iph = (struct iphdr *)skb->data; + if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) + goto out; - /* - * Our transport medium may have padded the buffer out. - * Now We trim to the true length of the frame. - */ - if (pskb_trim_rcsum(skb, len)) - goto out; + len = ntohs(iph->tot_len); + if (skb->len < len || len < iph->ihl*4) + goto out; - iph = (struct iphdr *)skb->data; - if (iph->protocol != IPPROTO_UDP) - goto out; + /* + * Our transport medium may have padded the buffer out. + * Now We trim to the true length of the frame. + */ + if (pskb_trim_rcsum(skb, len)) + goto out; - len -= iph->ihl*4; - uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); - ulen = ntohs(uh->len); + iph = (struct iphdr *)skb->data; + if (iph->protocol != IPPROTO_UDP) + goto out; - if (ulen != len) - goto out; - if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) - goto out; + len -= iph->ihl*4; + uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); + ulen = ntohs(uh->len); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (np->local_ip && np->local_ip != iph->daddr) - continue; - if (np->remote_ip && np->remote_ip != iph->saddr) - continue; - if (np->local_port && np->local_port != ntohs(uh->dest)) - continue; + if (ulen != len) + goto out; + if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) + goto out; + list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + if (np->local_ip.ip && np->local_ip.ip != iph->daddr) + continue; + if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr) + continue; + if (np->local_port && np->local_port != ntohs(uh->dest)) + continue; + + np->rx_hook(np, ntohs(uh->source), + (char *)(uh+1), + ulen - sizeof(struct udphdr)); + hits++; + } + } else { +#if IS_ENABLED(CONFIG_IPV6) + const struct ipv6hdr *ip6h; - np->rx_hook(np, ntohs(uh->source), - (char *)(uh+1), - ulen - sizeof(struct udphdr)); - hits++; + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto out; + ip6h = (struct ipv6hdr *)skb->data; + if (ip6h->version != 6) + goto out; + len = ntohs(ip6h->payload_len); + if (!len) + goto out; + if (len + sizeof(struct ipv6hdr) > skb->len) + goto out; + if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr))) + goto out; + ip6h = ipv6_hdr(skb); + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto out; + uh = udp_hdr(skb); + ulen = ntohs(uh->len); + if (ulen != skb->len) + goto out; + if (udp6_csum_init(skb, uh, IPPROTO_UDP)) + goto out; + list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr)) + continue; + if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr)) + continue; + if (np->local_port && np->local_port != ntohs(uh->dest)) + continue; + + np->rx_hook(np, ntohs(uh->source), + (char *)(uh+1), + ulen - sizeof(struct udphdr)); + hits++; + } +#endif } if (!hits) @@ -658,17 +902,44 @@ out: void netpoll_print_options(struct netpoll *np) { np_info(np, "local port %d\n", np->local_port); - np_info(np, "local IP %pI4\n", &np->local_ip); + if (np->ipv6) + np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6); + else + np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip); np_info(np, "interface '%s'\n", np->dev_name); np_info(np, "remote port %d\n", np->remote_port); - np_info(np, "remote IP %pI4\n", &np->remote_ip); + if (np->ipv6) + np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6); + else + np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip); np_info(np, "remote ethernet address %pM\n", np->remote_mac); } EXPORT_SYMBOL(netpoll_print_options); +static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr) +{ + const char *end; + + if (!strchr(str, ':') && + in4_pton(str, -1, (void *)addr, -1, &end) > 0) { + if (!*end) + return 0; + } + if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) { +#if IS_ENABLED(CONFIG_IPV6) + if (!*end) + return 1; +#else + return -1; +#endif + } + return -1; +} + int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; + int ipv6; if (*cur != '@') { if ((delim = strchr(cur, '@')) == NULL) @@ -684,7 +955,11 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; *delim = 0; - np->local_ip = in_aton(cur); + ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip); + if (ipv6 < 0) + goto parse_failed; + else + np->ipv6 = (bool)ipv6; cur = delim; } cur++; @@ -716,7 +991,13 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; *delim = 0; - np->remote_ip = in_aton(cur); + ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip); + if (ipv6 < 0) + goto parse_failed; + else if (np->ipv6 != (bool)ipv6) + goto parse_failed; + else + np->ipv6 = (bool)ipv6; cur = delim + 1; if (*cur != 0) { @@ -744,6 +1025,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) np->dev = ndev; strlcpy(np->dev_name, ndev->name, IFNAMSIZ); + INIT_WORK(&np->cleanup_work, netpoll_async_cleanup); if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || !ndev->netdev_ops->ndo_poll_controller) { @@ -764,7 +1046,8 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) INIT_LIST_HEAD(&npinfo->rx_np); spin_lock_init(&npinfo->rx_lock); - skb_queue_head_init(&npinfo->arp_tx); + mutex_init(&npinfo->dev_lock); + skb_queue_head_init(&npinfo->neigh_tx); skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); @@ -777,7 +1060,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) goto free_npinfo; } } else { - npinfo = ndev->npinfo; + npinfo = rtnl_dereference(ndev->npinfo); atomic_inc(&npinfo->refcnt); } @@ -808,14 +1091,19 @@ int netpoll_setup(struct netpoll *np) struct in_device *in_dev; int err; - if (np->dev_name) - ndev = dev_get_by_name(&init_net, np->dev_name); + rtnl_lock(); + if (np->dev_name) { + struct net *net = current->nsproxy->net_ns; + ndev = __dev_get_by_name(net, np->dev_name); + } if (!ndev) { np_err(np, "%s doesn't exist, aborting\n", np->dev_name); - return -ENODEV; + err = -ENODEV; + goto unlock; } + dev_hold(ndev); - if (ndev->master) { + if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", np->dev_name); err = -EBUSY; goto put; @@ -826,15 +1114,14 @@ int netpoll_setup(struct netpoll *np) np_info(np, "device %s not up yet, forcing it\n", np->dev_name); - rtnl_lock(); err = dev_open(ndev); - rtnl_unlock(); if (err) { np_err(np, "failed to open %s\n", ndev->name); goto put; } + rtnl_unlock(); atleast = jiffies + HZ/10; atmost = jiffies + carrier_timeout * HZ; while (!netif_carrier_ok(ndev)) { @@ -854,39 +1141,70 @@ int netpoll_setup(struct netpoll *np) np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n"); msleep(4000); } + rtnl_lock(); } - if (!np->local_ip) { - rcu_read_lock(); - in_dev = __in_dev_get_rcu(ndev); + if (!np->local_ip.ip) { + if (!np->ipv6) { + in_dev = __in_dev_get_rtnl(ndev); + + if (!in_dev || !in_dev->ifa_list) { + np_err(np, "no IP address for %s, aborting\n", + np->dev_name); + err = -EDESTADDRREQ; + goto put; + } + + np->local_ip.ip = in_dev->ifa_list->ifa_local; + np_info(np, "local IP %pI4\n", &np->local_ip.ip); + } else { +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_dev *idev; - if (!in_dev || !in_dev->ifa_list) { - rcu_read_unlock(); - np_err(np, "no IP address for %s, aborting\n", - np->dev_name); err = -EDESTADDRREQ; + idev = __in6_dev_get(ndev); + if (idev) { + struct inet6_ifaddr *ifp; + + read_lock_bh(&idev->lock); + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) + continue; + np->local_ip.in6 = ifp->addr; + err = 0; + break; + } + read_unlock_bh(&idev->lock); + } + if (err) { + np_err(np, "no IPv6 address for %s, aborting\n", + np->dev_name); + goto put; + } else + np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6); +#else + np_err(np, "IPv6 is not supported %s, aborting\n", + np->dev_name); + err = -EINVAL; goto put; +#endif } - - np->local_ip = in_dev->ifa_list->ifa_local; - rcu_read_unlock(); - np_info(np, "local IP %pI4\n", &np->local_ip); } /* fill up the skb queue */ refill_skbs(); - rtnl_lock(); err = __netpoll_setup(np, ndev, GFP_KERNEL); - rtnl_unlock(); - if (err) goto put; + rtnl_unlock(); return 0; put: dev_put(ndev); +unlock: + rtnl_unlock(); return err; } EXPORT_SYMBOL(netpoll_setup); @@ -894,6 +1212,7 @@ EXPORT_SYMBOL(netpoll_setup); static int __init netpoll_init(void) { skb_queue_head_init(&skb_pool); + init_srcu_struct(&netpoll_srcu); return 0; } core_initcall(netpoll_init); @@ -903,7 +1222,7 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) struct netpoll_info *npinfo = container_of(rcu_head, struct netpoll_info, rcu); - skb_queue_purge(&npinfo->arp_tx); + skb_queue_purge(&npinfo->neigh_tx); skb_queue_purge(&npinfo->txq); /* we can't call cancel_delayed_work_sync here, as we are in softirq */ @@ -921,7 +1240,11 @@ void __netpoll_cleanup(struct netpoll *np) struct netpoll_info *npinfo; unsigned long flags; - npinfo = np->dev->npinfo; + /* rtnl_dereference would be preferable here but + * rcu_cleanup_netpoll path can put us in here safely without + * holding the rtnl, so plain rcu_dereference it is + */ + npinfo = rtnl_dereference(np->dev->npinfo); if (!npinfo) return; @@ -933,6 +1256,8 @@ void __netpoll_cleanup(struct netpoll *np) spin_unlock_irqrestore(&npinfo->rx_lock, flags); } + synchronize_srcu(&netpoll_srcu); + if (atomic_dec_and_test(&npinfo->refcnt)) { const struct net_device_ops *ops; @@ -940,25 +1265,27 @@ void __netpoll_cleanup(struct netpoll *np) if (ops->ndo_netpoll_cleanup) ops->ndo_netpoll_cleanup(np->dev); - RCU_INIT_POINTER(np->dev->npinfo, NULL); + rcu_assign_pointer(np->dev->npinfo, NULL); call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); } } EXPORT_SYMBOL_GPL(__netpoll_cleanup); -static void rcu_cleanup_netpoll(struct rcu_head *rcu_head) +static void netpoll_async_cleanup(struct work_struct *work) { - struct netpoll *np = container_of(rcu_head, struct netpoll, rcu); + struct netpoll *np = container_of(work, struct netpoll, cleanup_work); + rtnl_lock(); __netpoll_cleanup(np); + rtnl_unlock(); kfree(np); } -void __netpoll_free_rcu(struct netpoll *np) +void __netpoll_free_async(struct netpoll *np) { - call_rcu_bh(&np->rcu, rcu_cleanup_netpoll); + schedule_work(&np->cleanup_work); } -EXPORT_SYMBOL_GPL(__netpoll_free_rcu); +EXPORT_SYMBOL_GPL(__netpoll_free_async); void netpoll_cleanup(struct netpoll *np) { diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 5e67defe2cb..0777d0aa18c 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -69,10 +69,8 @@ static int extend_netdev_table(struct net_device *dev, u32 target_idx) /* allocate & copy */ new = kzalloc(new_sz, GFP_KERNEL); - if (!new) { - pr_warn("Unable to alloc new priomap!\n"); + if (!new) return -ENOMEM; - } if (old) memcpy(new->priomap, old->priomap, diff --git a/net/core/pktgen.c b/net/core/pktgen.c index e6e1cbe863f..6048fc1da1c 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -164,6 +164,7 @@ #ifdef CONFIG_XFRM #include <net/xfrm.h> #endif +#include <net/netns/generic.h> #include <asm/byteorder.h> #include <linux/rcupdate.h> #include <linux/bitops.h> @@ -212,7 +213,6 @@ #define PKTGEN_MAGIC 0xbe9be955 #define PG_PROC_DIR "pktgen" #define PGCTRL "pgctrl" -static struct proc_dir_entry *pg_proc_dir; #define MAX_CFLOWS 65536 @@ -397,7 +397,15 @@ struct pktgen_hdr { __be32 tv_usec; }; -static bool pktgen_exiting __read_mostly; + +static int pg_net_id __read_mostly; + +struct pktgen_net { + struct net *net; + struct proc_dir_entry *proc_dir; + struct list_head pktgen_threads; + bool pktgen_exiting; +}; struct pktgen_thread { spinlock_t if_lock; /* for list of devices */ @@ -414,6 +422,7 @@ struct pktgen_thread { wait_queue_head_t queue; struct completion start_done; + struct pktgen_net *net; }; #define REMOVE 1 @@ -428,9 +437,9 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char *ifname, bool exact); static int pktgen_device_event(struct notifier_block *, unsigned long, void *); -static void pktgen_run_all_threads(void); -static void pktgen_reset_all_threads(void); -static void pktgen_stop_all_threads_ifs(void); +static void pktgen_run_all_threads(struct pktgen_net *pn); +static void pktgen_reset_all_threads(struct pktgen_net *pn); +static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn); static void pktgen_stop(struct pktgen_thread *t); static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); @@ -442,7 +451,6 @@ static int pg_clone_skb_d __read_mostly; static int debug __read_mostly; static DEFINE_MUTEX(pktgen_thread_lock); -static LIST_HEAD(pktgen_threads); static struct notifier_block pktgen_notifier_block = { .notifier_call = pktgen_device_event, @@ -464,6 +472,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf, { int err = 0; char data[128]; + struct pktgen_net *pn = net_generic(current->nsproxy->net_ns, pg_net_id); if (!capable(CAP_NET_ADMIN)) { err = -EPERM; @@ -480,13 +489,13 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf, data[count - 1] = 0; /* Make string */ if (!strcmp(data, "stop")) - pktgen_stop_all_threads_ifs(); + pktgen_stop_all_threads_ifs(pn); else if (!strcmp(data, "start")) - pktgen_run_all_threads(); + pktgen_run_all_threads(pn); else if (!strcmp(data, "reset")) - pktgen_reset_all_threads(); + pktgen_reset_all_threads(pn); else pr_warning("Unknown command: %s\n", data); @@ -1827,13 +1836,14 @@ static const struct file_operations pktgen_thread_fops = { }; /* Think find or remove for NN */ -static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove) +static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn, + const char *ifname, int remove) { struct pktgen_thread *t; struct pktgen_dev *pkt_dev = NULL; bool exact = (remove == FIND); - list_for_each_entry(t, &pktgen_threads, th_list) { + list_for_each_entry(t, &pn->pktgen_threads, th_list) { pkt_dev = pktgen_find_dev(t, ifname, exact); if (pkt_dev) { if (remove) { @@ -1851,7 +1861,7 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove) /* * mark a device for removal */ -static void pktgen_mark_device(const char *ifname) +static void pktgen_mark_device(const struct pktgen_net *pn, const char *ifname) { struct pktgen_dev *pkt_dev = NULL; const int max_tries = 10, msec_per_try = 125; @@ -1862,7 +1872,7 @@ static void pktgen_mark_device(const char *ifname) while (1) { - pkt_dev = __pktgen_NN_threads(ifname, REMOVE); + pkt_dev = __pktgen_NN_threads(pn, ifname, REMOVE); if (pkt_dev == NULL) break; /* success */ @@ -1883,21 +1893,21 @@ static void pktgen_mark_device(const char *ifname) mutex_unlock(&pktgen_thread_lock); } -static void pktgen_change_name(struct net_device *dev) +static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *dev) { struct pktgen_thread *t; - list_for_each_entry(t, &pktgen_threads, th_list) { + list_for_each_entry(t, &pn->pktgen_threads, th_list) { struct pktgen_dev *pkt_dev; list_for_each_entry(pkt_dev, &t->if_list, list) { if (pkt_dev->odev != dev) continue; - remove_proc_entry(pkt_dev->entry->name, pg_proc_dir); + remove_proc_entry(pkt_dev->entry->name, pn->proc_dir); pkt_dev->entry = proc_create_data(dev->name, 0600, - pg_proc_dir, + pn->proc_dir, &pktgen_if_fops, pkt_dev); if (!pkt_dev->entry) @@ -1912,8 +1922,9 @@ static int pktgen_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = ptr; + struct pktgen_net *pn = net_generic(dev_net(dev), pg_net_id); - if (!net_eq(dev_net(dev), &init_net) || pktgen_exiting) + if (pn->pktgen_exiting) return NOTIFY_DONE; /* It is OK that we do not hold the group lock right now, @@ -1922,18 +1933,19 @@ static int pktgen_device_event(struct notifier_block *unused, switch (event) { case NETDEV_CHANGENAME: - pktgen_change_name(dev); + pktgen_change_name(pn, dev); break; case NETDEV_UNREGISTER: - pktgen_mark_device(dev->name); + pktgen_mark_device(pn, dev->name); break; } return NOTIFY_DONE; } -static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev, +static struct net_device *pktgen_dev_get_by_name(const struct pktgen_net *pn, + struct pktgen_dev *pkt_dev, const char *ifname) { char b[IFNAMSIZ+5]; @@ -1947,13 +1959,14 @@ static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev, } b[i] = 0; - return dev_get_by_name(&init_net, b); + return dev_get_by_name(pn->net, b); } /* Associate pktgen_dev with a device. */ -static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) +static int pktgen_setup_dev(const struct pktgen_net *pn, + struct pktgen_dev *pkt_dev, const char *ifname) { struct net_device *odev; int err; @@ -1964,7 +1977,7 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) pkt_dev->odev = NULL; } - odev = pktgen_dev_get_by_name(pkt_dev, ifname); + odev = pktgen_dev_get_by_name(pn, pkt_dev, ifname); if (!odev) { pr_err("no such netdevice: \"%s\"\n", ifname); return -ENODEV; @@ -2206,9 +2219,10 @@ static inline int f_pick(struct pktgen_dev *pkt_dev) static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) { struct xfrm_state *x = pkt_dev->flows[flow].x; + struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id); if (!x) { /*slow path: we dont already have xfrm_state*/ - x = xfrm_stateonly_find(&init_net, DUMMY_MARK, + x = xfrm_stateonly_find(pn->net, DUMMY_MARK, (xfrm_address_t *)&pkt_dev->cur_daddr, (xfrm_address_t *)&pkt_dev->cur_saddr, AF_INET, @@ -2915,7 +2929,7 @@ static void pktgen_run(struct pktgen_thread *t) t->control &= ~(T_STOP); } -static void pktgen_stop_all_threads_ifs(void) +static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn) { struct pktgen_thread *t; @@ -2923,7 +2937,7 @@ static void pktgen_stop_all_threads_ifs(void) mutex_lock(&pktgen_thread_lock); - list_for_each_entry(t, &pktgen_threads, th_list) + list_for_each_entry(t, &pn->pktgen_threads, th_list) t->control |= T_STOP; mutex_unlock(&pktgen_thread_lock); @@ -2959,28 +2973,28 @@ signal: return 0; } -static int pktgen_wait_all_threads_run(void) +static int pktgen_wait_all_threads_run(struct pktgen_net *pn) { struct pktgen_thread *t; int sig = 1; mutex_lock(&pktgen_thread_lock); - list_for_each_entry(t, &pktgen_threads, th_list) { + list_for_each_entry(t, &pn->pktgen_threads, th_list) { sig = pktgen_wait_thread_run(t); if (sig == 0) break; } if (sig == 0) - list_for_each_entry(t, &pktgen_threads, th_list) + list_for_each_entry(t, &pn->pktgen_threads, th_list) t->control |= (T_STOP); mutex_unlock(&pktgen_thread_lock); return sig; } -static void pktgen_run_all_threads(void) +static void pktgen_run_all_threads(struct pktgen_net *pn) { struct pktgen_thread *t; @@ -2988,7 +3002,7 @@ static void pktgen_run_all_threads(void) mutex_lock(&pktgen_thread_lock); - list_for_each_entry(t, &pktgen_threads, th_list) + list_for_each_entry(t, &pn->pktgen_threads, th_list) t->control |= (T_RUN); mutex_unlock(&pktgen_thread_lock); @@ -2996,10 +3010,10 @@ static void pktgen_run_all_threads(void) /* Propagate thread->control */ schedule_timeout_interruptible(msecs_to_jiffies(125)); - pktgen_wait_all_threads_run(); + pktgen_wait_all_threads_run(pn); } -static void pktgen_reset_all_threads(void) +static void pktgen_reset_all_threads(struct pktgen_net *pn) { struct pktgen_thread *t; @@ -3007,7 +3021,7 @@ static void pktgen_reset_all_threads(void) mutex_lock(&pktgen_thread_lock); - list_for_each_entry(t, &pktgen_threads, th_list) + list_for_each_entry(t, &pn->pktgen_threads, th_list) t->control |= (T_REMDEVALL); mutex_unlock(&pktgen_thread_lock); @@ -3015,7 +3029,7 @@ static void pktgen_reset_all_threads(void) /* Propagate thread->control */ schedule_timeout_interruptible(msecs_to_jiffies(125)); - pktgen_wait_all_threads_run(); + pktgen_wait_all_threads_run(pn); } static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) @@ -3157,9 +3171,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) static void pktgen_rem_thread(struct pktgen_thread *t) { /* Remove from the thread list */ - - remove_proc_entry(t->tsk->comm, pg_proc_dir); - + remove_proc_entry(t->tsk->comm, t->net->proc_dir); } static void pktgen_resched(struct pktgen_dev *pkt_dev) @@ -3305,7 +3317,7 @@ static int pktgen_thread_worker(void *arg) pkt_dev = next_to_run(t); if (unlikely(!pkt_dev && t->control == 0)) { - if (pktgen_exiting) + if (t->net->pktgen_exiting) break; wait_event_interruptible_timeout(t->queue, t->control != 0, @@ -3427,7 +3439,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) /* We don't allow a device to be on several threads */ - pkt_dev = __pktgen_NN_threads(ifname, FIND); + pkt_dev = __pktgen_NN_threads(t->net, ifname, FIND); if (pkt_dev) { pr_err("ERROR: interface already used\n"); return -EBUSY; @@ -3462,13 +3474,13 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->svlan_id = 0xffff; pkt_dev->node = -1; - err = pktgen_setup_dev(pkt_dev, ifname); + err = pktgen_setup_dev(t->net, pkt_dev, ifname); if (err) goto out1; if (pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING) pkt_dev->clone_skb = pg_clone_skb_d; - pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir, + pkt_dev->entry = proc_create_data(ifname, 0600, t->net->proc_dir, &pktgen_if_fops, pkt_dev); if (!pkt_dev->entry) { pr_err("cannot create %s/%s procfs entry\n", @@ -3493,7 +3505,7 @@ out1: return err; } -static int __init pktgen_create_thread(int cpu) +static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) { struct pktgen_thread *t; struct proc_dir_entry *pe; @@ -3511,7 +3523,7 @@ static int __init pktgen_create_thread(int cpu) INIT_LIST_HEAD(&t->if_list); - list_add_tail(&t->th_list, &pktgen_threads); + list_add_tail(&t->th_list, &pn->pktgen_threads); init_completion(&t->start_done); p = kthread_create_on_node(pktgen_thread_worker, @@ -3527,7 +3539,7 @@ static int __init pktgen_create_thread(int cpu) kthread_bind(p, cpu); t->tsk = p; - pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir, + pe = proc_create_data(t->tsk->comm, 0600, pn->proc_dir, &pktgen_thread_fops, t); if (!pe) { pr_err("cannot create %s/%s procfs entry\n", @@ -3538,6 +3550,7 @@ static int __init pktgen_create_thread(int cpu) return -EINVAL; } + t->net = pn; wake_up_process(p); wait_for_completion(&t->start_done); @@ -3563,6 +3576,7 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t, static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_dev) { + struct pktgen_net *pn = t->net; pr_debug("remove_device pkt_dev=%p\n", pkt_dev); @@ -3583,7 +3597,7 @@ static int pktgen_remove_device(struct pktgen_thread *t, _rem_dev_from_if_list(t, pkt_dev); if (pkt_dev->entry) - remove_proc_entry(pkt_dev->entry->name, pg_proc_dir); + remove_proc_entry(pkt_dev->entry->name, pn->proc_dir); #ifdef CONFIG_XFRM free_SAs(pkt_dev); @@ -3595,63 +3609,63 @@ static int pktgen_remove_device(struct pktgen_thread *t, return 0; } -static int __init pg_init(void) +static int __net_init pg_net_init(struct net *net) { - int cpu; + struct pktgen_net *pn = net_generic(net, pg_net_id); struct proc_dir_entry *pe; - int ret = 0; - - pr_info("%s", version); - - pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net); - if (!pg_proc_dir) + int cpu, ret = 0; + + pn->net = net; + INIT_LIST_HEAD(&pn->pktgen_threads); + pn->pktgen_exiting = false; + pn->proc_dir = proc_mkdir(PG_PROC_DIR, pn->net->proc_net); + if (!pn->proc_dir) { + pr_warn("cannot create /proc/net/%s\n", PG_PROC_DIR); return -ENODEV; - - pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops); + } + pe = proc_create(PGCTRL, 0600, pn->proc_dir, &pktgen_fops); if (pe == NULL) { - pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL); + pr_err("cannot create %s procfs entry\n", PGCTRL); ret = -EINVAL; - goto remove_dir; + goto remove; } - register_netdevice_notifier(&pktgen_notifier_block); - for_each_online_cpu(cpu) { int err; - err = pktgen_create_thread(cpu); + err = pktgen_create_thread(cpu, pn); if (err) - pr_warning("WARNING: Cannot create thread for cpu %d (%d)\n", + pr_warn("Cannot create thread for cpu %d (%d)\n", cpu, err); } - if (list_empty(&pktgen_threads)) { - pr_err("ERROR: Initialization failed for all threads\n"); + if (list_empty(&pn->pktgen_threads)) { + pr_err("Initialization failed for all threads\n"); ret = -ENODEV; - goto unregister; + goto remove_entry; } return 0; - unregister: - unregister_netdevice_notifier(&pktgen_notifier_block); - remove_proc_entry(PGCTRL, pg_proc_dir); - remove_dir: - proc_net_remove(&init_net, PG_PROC_DIR); +remove_entry: + remove_proc_entry(PGCTRL, pn->proc_dir); +remove: + remove_proc_entry(PG_PROC_DIR, pn->net->proc_net); return ret; } -static void __exit pg_cleanup(void) +static void __net_exit pg_net_exit(struct net *net) { + struct pktgen_net *pn = net_generic(net, pg_net_id); struct pktgen_thread *t; struct list_head *q, *n; LIST_HEAD(list); /* Stop all interfaces & threads */ - pktgen_exiting = true; + pn->pktgen_exiting = true; mutex_lock(&pktgen_thread_lock); - list_splice_init(&pktgen_threads, &list); + list_splice_init(&pn->pktgen_threads, &list); mutex_unlock(&pktgen_thread_lock); list_for_each_safe(q, n, &list) { @@ -3661,12 +3675,36 @@ static void __exit pg_cleanup(void) kfree(t); } - /* Un-register us from receiving netdevice events */ - unregister_netdevice_notifier(&pktgen_notifier_block); + remove_proc_entry(PGCTRL, pn->proc_dir); + remove_proc_entry(PG_PROC_DIR, pn->net->proc_net); +} + +static struct pernet_operations pg_net_ops = { + .init = pg_net_init, + .exit = pg_net_exit, + .id = &pg_net_id, + .size = sizeof(struct pktgen_net), +}; + +static int __init pg_init(void) +{ + int ret = 0; - /* Clean up proc file system */ - remove_proc_entry(PGCTRL, pg_proc_dir); - proc_net_remove(&init_net, PG_PROC_DIR); + pr_info("%s", version); + ret = register_pernet_subsys(&pg_net_ops); + if (ret) + return ret; + ret = register_netdevice_notifier(&pktgen_notifier_block); + if (ret) + unregister_pernet_subsys(&pg_net_ops); + + return ret; +} + +static void __exit pg_cleanup(void) +{ + unregister_netdevice_notifier(&pktgen_notifier_block); + unregister_pernet_subsys(&pg_net_ops); } module_init(pg_init); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1868625af25..d8aa20f6a46 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -780,6 +780,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_MTU */ + nla_total_size(4) /* IFLA_LINK */ + nla_total_size(4) /* IFLA_MASTER */ + + nla_total_size(1) /* IFLA_CARRIER */ + nla_total_size(4) /* IFLA_PROMISCUITY */ + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ @@ -879,6 +880,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, const struct rtnl_link_stats64 *stats; struct nlattr *attr, *af_spec; struct rtnl_af_ops *af_ops; + struct net_device *upper_dev = netdev_master_upper_dev_get(dev); ASSERT_RTNL(); nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); @@ -907,8 +909,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, #endif (dev->ifindex != dev->iflink && nla_put_u32(skb, IFLA_LINK, dev->iflink)) || - (dev->master && - nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) || + (upper_dev && + nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) || + nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || (dev->qdisc && nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || (dev->ifalias && @@ -1108,6 +1111,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_MTU] = { .type = NLA_U32 }, [IFLA_LINK] = { .type = NLA_U32 }, [IFLA_MASTER] = { .type = NLA_U32 }, + [IFLA_CARRIER] = { .type = NLA_U8 }, [IFLA_TXQLEN] = { .type = NLA_U32 }, [IFLA_WEIGHT] = { .type = NLA_U32 }, [IFLA_OPERSTATE] = { .type = NLA_U8 }, @@ -1270,16 +1274,16 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) static int do_set_master(struct net_device *dev, int ifindex) { - struct net_device *master_dev; + struct net_device *upper_dev = netdev_master_upper_dev_get(dev); const struct net_device_ops *ops; int err; - if (dev->master) { - if (dev->master->ifindex == ifindex) + if (upper_dev) { + if (upper_dev->ifindex == ifindex) return 0; - ops = dev->master->netdev_ops; + ops = upper_dev->netdev_ops; if (ops->ndo_del_slave) { - err = ops->ndo_del_slave(dev->master, dev); + err = ops->ndo_del_slave(upper_dev, dev); if (err) return err; } else { @@ -1288,12 +1292,12 @@ static int do_set_master(struct net_device *dev, int ifindex) } if (ifindex) { - master_dev = __dev_get_by_index(dev_net(dev), ifindex); - if (!master_dev) + upper_dev = __dev_get_by_index(dev_net(dev), ifindex); + if (!upper_dev) return -EINVAL; - ops = master_dev->netdev_ops; + ops = upper_dev->netdev_ops; if (ops->ndo_add_slave) { - err = ops->ndo_add_slave(master_dev, dev); + err = ops->ndo_add_slave(upper_dev, dev); if (err) return err; } else { @@ -1307,7 +1311,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { const struct net_device_ops *ops = dev->netdev_ops; - int send_addr_notify = 0; int err; if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) { @@ -1360,16 +1363,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct sockaddr *sa; int len; - if (!ops->ndo_set_mac_address) { - err = -EOPNOTSUPP; - goto errout; - } - - if (!netif_device_present(dev)) { - err = -ENODEV; - goto errout; - } - len = sizeof(sa_family_t) + dev->addr_len; sa = kmalloc(len, GFP_KERNEL); if (!sa) { @@ -1379,13 +1372,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), dev->addr_len); - err = ops->ndo_set_mac_address(dev, sa); + err = dev_set_mac_address(dev, sa); kfree(sa); if (err) goto errout; - send_addr_notify = 1; modified = 1; - add_device_randomness(dev->dev_addr, dev->addr_len); } if (tb[IFLA_MTU]) { @@ -1422,7 +1413,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, if (tb[IFLA_BROADCAST]) { nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len); - send_addr_notify = 1; + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); } if (ifm->ifi_flags || ifm->ifi_change) { @@ -1438,6 +1429,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, modified = 1; } + if (tb[IFLA_CARRIER]) { + err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER])); + if (err) + goto errout; + modified = 1; + } + if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); @@ -1536,9 +1534,6 @@ errout: net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n", dev->name); - if (send_addr_notify) - call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); - return err; } @@ -1672,9 +1667,11 @@ struct net_device *rtnl_create_link(struct net *net, if (tb[IFLA_MTU]) dev->mtu = nla_get_u32(tb[IFLA_MTU]); - if (tb[IFLA_ADDRESS]) + if (tb[IFLA_ADDRESS]) { memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), nla_len(tb[IFLA_ADDRESS])); + dev->addr_assign_type = NET_ADDR_SET; + } if (tb[IFLA_BROADCAST]) memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), nla_len(tb[IFLA_BROADCAST])); @@ -1992,6 +1989,7 @@ errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_LINK, err); } +EXPORT_SYMBOL(rtmsg_ifinfo); static int nlmsg_populate_fdb_fill(struct sk_buff *skb, struct net_device *dev, @@ -2054,16 +2052,12 @@ errout: static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); - struct net_device *master = NULL; struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct net_device *dev; u8 *addr; int err; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); if (err < 0) return err; @@ -2096,10 +2090,10 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) /* Support fdb on master device the net/bridge default case */ if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && (dev->priv_flags & IFF_BRIDGE_PORT)) { - master = dev->master; - err = master->netdev_ops->ndo_fdb_add(ndm, tb, - dev, addr, - nlh->nlmsg_flags); + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + const struct net_device_ops *ops = br_dev->netdev_ops; + + err = ops->ndo_fdb_add(ndm, tb, dev, addr, nlh->nlmsg_flags); if (err) goto out; else @@ -2125,7 +2119,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; - struct nlattr *llattr; + struct nlattr *tb[NDA_MAX+1]; struct net_device *dev; int err = -EINVAL; __u8 *addr; @@ -2133,8 +2127,9 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (nlmsg_len(nlh) < sizeof(*ndm)) - return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + if (err < 0) + return err; ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex == 0) { @@ -2148,22 +2143,27 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -ENODEV; } - llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR); - if (llattr == NULL || nla_len(llattr) != ETH_ALEN) { - pr_info("PF_BRIGDE: RTM_DELNEIGH with invalid address\n"); + if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n"); + return -EINVAL; + } + + addr = nla_data(tb[NDA_LLADDR]); + if (!is_valid_ether_addr(addr)) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n"); return -EINVAL; } - addr = nla_data(llattr); err = -EOPNOTSUPP; /* Support fdb on master device the net/bridge default case */ if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && (dev->priv_flags & IFF_BRIDGE_PORT)) { - struct net_device *master = dev->master; + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + const struct net_device_ops *ops = br_dev->netdev_ops; - if (master->netdev_ops->ndo_fdb_del) - err = master->netdev_ops->ndo_fdb_del(ndm, dev, addr); + if (ops->ndo_fdb_del) + err = ops->ndo_fdb_del(ndm, tb, dev, addr); if (err) goto out; @@ -2173,7 +2173,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) /* Embedded bridge, macvlan, and any other device support */ if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { - err = dev->netdev_ops->ndo_fdb_del(ndm, dev, addr); + err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); if (!err) { rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); @@ -2247,9 +2247,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); for_each_netdev_rcu(net, dev) { if (dev->priv_flags & IFF_BRIDGE_PORT) { - struct net_device *master = dev->master; - const struct net_device_ops *ops = master->netdev_ops; + struct net_device *br_dev; + const struct net_device_ops *ops; + br_dev = netdev_master_upper_dev_get(dev); + ops = br_dev->netdev_ops; if (ops->ndo_fdb_dump) idx = ops->ndo_fdb_dump(skb, cb, dev, idx); } @@ -2270,6 +2272,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct ifinfomsg *ifm; struct nlattr *br_afspec; u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + struct net_device *br_dev = netdev_master_upper_dev_get(dev); nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI); if (nlh == NULL) @@ -2287,8 +2290,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, if (nla_put_string(skb, IFLA_IFNAME, dev->name) || nla_put_u32(skb, IFLA_MTU, dev->mtu) || nla_put_u8(skb, IFLA_OPERSTATE, operstate) || - (dev->master && - nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) || + (br_dev && + nla_put_u32(skb, IFLA_MASTER, br_dev->ifindex)) || (dev->addr_len && nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || (dev->ifindex != dev->iflink && @@ -2320,23 +2323,31 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) int idx = 0; u32 portid = NETLINK_CB(cb->skb).portid; u32 seq = cb->nlh->nlmsg_seq; + struct nlattr *extfilt; + u32 filter_mask = 0; + + extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg), + IFLA_EXT_MASK); + if (extfilt) + filter_mask = nla_get_u32(extfilt); rcu_read_lock(); for_each_netdev_rcu(net, dev) { const struct net_device_ops *ops = dev->netdev_ops; - struct net_device *master = dev->master; + struct net_device *br_dev = netdev_master_upper_dev_get(dev); - if (master && master->netdev_ops->ndo_bridge_getlink) { + if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && - master->netdev_ops->ndo_bridge_getlink( - skb, portid, seq, dev) < 0) + br_dev->netdev_ops->ndo_bridge_getlink( + skb, portid, seq, dev, filter_mask) < 0) break; idx++; } if (ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && - ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0) + ops->ndo_bridge_getlink(skb, portid, seq, dev, + filter_mask) < 0) break; idx++; } @@ -2365,7 +2376,7 @@ static inline size_t bridge_nlmsg_size(void) static int rtnl_bridge_notify(struct net_device *dev, u16 flags) { struct net *net = dev_net(dev); - struct net_device *master = dev->master; + struct net_device *br_dev = netdev_master_upper_dev_get(dev); struct sk_buff *skb; int err = -EOPNOTSUPP; @@ -2376,15 +2387,15 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags) } if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && - master && master->netdev_ops->ndo_bridge_getlink) { - err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { + err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); if (err < 0) goto errout; } if ((flags & BRIDGE_FLAGS_SELF) && dev->netdev_ops->ndo_bridge_getlink) { - err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); if (err < 0) goto errout; } @@ -2436,13 +2447,14 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, oflags = flags; if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { - if (!dev->master || - !dev->master->netdev_ops->ndo_bridge_setlink) { + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + + if (!br_dev || !br_dev->netdev_ops->ndo_bridge_setlink) { err = -EOPNOTSUPP; goto out; } - err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh); + err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh); if (err) goto out; @@ -2468,6 +2480,77 @@ out: return err; } +static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(skb->sk); + struct ifinfomsg *ifm; + struct net_device *dev; + struct nlattr *br_spec, *attr = NULL; + int rem, err = -EOPNOTSUPP; + u16 oflags, flags = 0; + bool have_flags = false; + + if (nlmsg_len(nlh) < sizeof(*ifm)) + return -EINVAL; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_family != AF_BRIDGE) + return -EPFNOSUPPORT; + + dev = __dev_get_by_index(net, ifm->ifi_index); + if (!dev) { + pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + return -ENODEV; + } + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + have_flags = true; + flags = nla_get_u16(attr); + break; + } + } + } + + oflags = flags; + + if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + + if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) { + err = -EOPNOTSUPP; + goto out; + } + + err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + if (err) + goto out; + + flags &= ~BRIDGE_FLAGS_MASTER; + } + + if ((flags & BRIDGE_FLAGS_SELF)) { + if (!dev->netdev_ops->ndo_bridge_dellink) + err = -EOPNOTSUPP; + else + err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + + if (!err) + flags &= ~BRIDGE_FLAGS_SELF; + } + + if (have_flags) + memcpy(nla_data(attr), &flags, sizeof(flags)); + /* Generate event to notify upper layer of bridge change */ + if (!err) + err = rtnl_bridge_notify(dev, oflags); +out: + return err; +} + /* Protected by RTNL sempahore. */ static struct rtattr **rta_buf; static int rtattr_max; @@ -2651,6 +2734,7 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); + rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 32443ebc3e8..33245ef54c3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -104,47 +104,37 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = { .get = sock_pipe_buf_get, }; -/* - * Keep out-of-line to prevent kernel bloat. - * __builtin_return_address is not used because it is not always - * reliable. - */ - /** - * skb_over_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_put(). Not user callable. + * skb_panic - private function for out-of-line support + * @skb: buffer + * @sz: size + * @addr: address + * @msg: skb_over_panic or skb_under_panic + * + * Out-of-line support for skb_put() and skb_push(). + * Called via the wrapper skb_over_panic() or skb_under_panic(). + * Keep out of line to prevent kernel bloat. + * __builtin_return_address is not used because it is not always reliable. */ -static void skb_over_panic(struct sk_buff *skb, int sz, void *here) +static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr, + const char msg[]) { pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", - __func__, here, skb->len, sz, skb->head, skb->data, + msg, addr, skb->len, sz, skb->head, skb->data, (unsigned long)skb->tail, (unsigned long)skb->end, skb->dev ? skb->dev->name : "<NULL>"); BUG(); } -/** - * skb_under_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_push(). Not user callable. - */ - -static void skb_under_panic(struct sk_buff *skb, int sz, void *here) +static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr) { - pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", - __func__, here, skb->len, sz, skb->head, skb->data, - (unsigned long)skb->tail, (unsigned long)skb->end, - skb->dev ? skb->dev->name : "<NULL>"); - BUG(); + skb_panic(skb, sz, addr, __func__); } +static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr) +{ + skb_panic(skb, sz, addr, __func__); +} /* * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells @@ -155,8 +145,9 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) */ #define kmalloc_reserve(size, gfp, node, pfmemalloc) \ __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc) -void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip, - bool *pfmemalloc) + +static void *__kmalloc_reserve(size_t size, gfp_t flags, int node, + unsigned long ip, bool *pfmemalloc) { void *obj; bool ret_pfmemalloc = false; @@ -259,6 +250,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->end = skb->tail + size; #ifdef NET_SKBUFF_DATA_USES_OFFSET skb->mac_header = ~0U; + skb->transport_header = ~0U; #endif /* make sure we initialize shinfo sequentially */ @@ -327,6 +319,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) skb->end = skb->tail + size; #ifdef NET_SKBUFF_DATA_USES_OFFSET skb->mac_header = ~0U; + skb->transport_header = ~0U; #endif /* make sure we initialize shinfo sequentially */ @@ -348,10 +341,6 @@ struct netdev_alloc_cache { }; static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); -#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) -#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) -#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE - static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { struct netdev_alloc_cache *nc; @@ -2337,6 +2326,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); + skb_shinfo(skb1)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ @@ -2668,48 +2658,37 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, int len, int odd, struct sk_buff *skb), void *from, int length) { - int frg_cnt = 0; - skb_frag_t *frag = NULL; - struct page *page = NULL; - int copy, left; + int frg_cnt = skb_shinfo(skb)->nr_frags; + int copy; int offset = 0; int ret; + struct page_frag *pfrag = ¤t->task_frag; do { /* Return error if we don't have space for new frag */ - frg_cnt = skb_shinfo(skb)->nr_frags; if (frg_cnt >= MAX_SKB_FRAGS) - return -EFAULT; + return -EMSGSIZE; - /* allocate a new page for next frag */ - page = alloc_pages(sk->sk_allocation, 0); - - /* If alloc_page fails just return failure and caller will - * free previous allocated pages by doing kfree_skb() - */ - if (page == NULL) + if (!sk_page_frag_refill(sk, pfrag)) return -ENOMEM; - /* initialize the next frag */ - skb_fill_page_desc(skb, frg_cnt, page, 0, 0); - skb->truesize += PAGE_SIZE; - atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); - - /* get the new initialized frag */ - frg_cnt = skb_shinfo(skb)->nr_frags; - frag = &skb_shinfo(skb)->frags[frg_cnt - 1]; - /* copy the user data to page */ - left = PAGE_SIZE - frag->page_offset; - copy = (length > left)? left : length; + copy = min_t(int, length, pfrag->size - pfrag->offset); - ret = getfrag(from, skb_frag_address(frag) + skb_frag_size(frag), - offset, copy, 0, skb); + ret = getfrag(from, page_address(pfrag->page) + pfrag->offset, + offset, copy, 0, skb); if (ret < 0) return -EFAULT; /* copy was successful so update the size parameters */ - skb_frag_size_add(frag, copy); + skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset, + copy); + frg_cnt++; + pfrag->offset += copy; + get_page(pfrag->page); + + skb->truesize += copy; + atomic_add(copy, &sk->sk_wmem_alloc); skb->len += copy; skb->data_len += copy; offset += copy; @@ -2759,6 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int doffset = skb->data - skb_mac_header(skb); unsigned int offset = doffset; + unsigned int tnl_hlen = skb_tnl_header_len(skb); unsigned int headroom; unsigned int len; int sg = !!(features & NETIF_F_SG); @@ -2835,7 +2815,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_set_network_header(nskb, skb->mac_len); nskb->transport_header = (nskb->network_header + skb_network_header_len(skb)); - skb_copy_from_linear_data(skb, nskb->data, doffset); + + skb_copy_from_linear_data_offset(skb, -tnl_hlen, + nskb->data - tnl_hlen, + doffset + tnl_hlen); if (fskb != skb_shinfo(skb)->frag_list) continue; @@ -2853,6 +2836,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_copy_from_linear_data_offset(skb, offset, skb_put(nskb, hsize), hsize); + skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; + while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; __skb_frag_ref(frag); diff --git a/net/core/sock.c b/net/core/sock.c index bc131d41968..fe96c5d3429 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -665,6 +665,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname, case SO_REUSEADDR: sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE); break; + case SO_REUSEPORT: + sk->sk_reuseport = valbool; + break; case SO_TYPE: case SO_PROTOCOL: case SO_DOMAIN: @@ -861,6 +864,13 @@ set_rcvbuf: ret = sk_detach_filter(sk); break; + case SO_LOCK_FILTER: + if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool) + ret = -EPERM; + else + sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool); + break; + case SO_PASSSEC: if (valbool) set_bit(SOCK_PASSSEC, &sock->flags); @@ -965,6 +975,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_reuse; break; + case SO_REUSEPORT: + v.val = sk->sk_reuseport; + break; + case SO_KEEPALIVE: v.val = sock_flag(sk, SOCK_KEEPOPEN); break; @@ -1140,6 +1154,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, goto lenout; + case SO_LOCK_FILTER: + v.val = sock_flag(sk, SOCK_FILTER_LOCKED); + break; + default: return -ENOPROTOOPT; } @@ -2212,7 +2230,7 @@ EXPORT_SYMBOL(sk_reset_timer); void sk_stop_timer(struct sock *sk, struct timer_list* timer) { - if (timer_pending(timer) && del_timer(timer)) + if (del_timer(timer)) __sock_put(sk); } EXPORT_SYMBOL(sk_stop_timer); @@ -2818,7 +2836,7 @@ static const struct file_operations proto_seq_fops = { static __net_init int proto_init_net(struct net *net) { - if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops)) + if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops)) return -ENOMEM; return 0; @@ -2826,7 +2844,7 @@ static __net_init int proto_init_net(struct net *net) static __net_exit void proto_exit_net(struct net *net) { - proc_net_remove(net, "protocols"); + remove_proc_entry("protocols", net->proc_net); } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index d1b08045a9d..cfdb46ab3a7 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -20,6 +20,8 @@ #include <net/sock.h> #include <net/net_ratelimit.h> +static int one = 1; + #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -92,28 +94,32 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_wmem_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "rmem_max", .data = &sysctl_rmem_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "wmem_default", .data = &sysctl_wmem_default, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "rmem_default", .data = &sysctl_rmem_default, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "dev_weight", diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 0a8d6ebd9b4..4c6bdf97a65 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -171,7 +171,7 @@ static __init int dccpprobe_init(void) spin_lock_init(&dccpw.lock); if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL)) return ret; - if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) + if (!proc_create(procname, S_IRUSR, init_net.proc_net, &dccpprobe_fops)) goto err0; ret = setup_jprobe(); @@ -181,7 +181,7 @@ static __init int dccpprobe_init(void) pr_info("DCCP watch registered (port=%d)\n", port); return 0; err1: - proc_net_remove(&init_net, procname); + remove_proc_entry(procname, init_net.proc_net); err0: kfifo_free(&dccpw.fifo); return ret; @@ -191,7 +191,7 @@ module_init(dccpprobe_init); static __exit void dccpprobe_exit(void) { kfifo_free(&dccpw.fifo); - proc_net_remove(&init_net, procname); + remove_proc_entry(procname, init_net.proc_net); unregister_jprobe(&dccp_send_probe); } diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 307c322d53b..c4a2def5b7b 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -909,6 +909,7 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, struct dn_scp *scp = DN_SK(sk); int err = -EISCONN; struct flowidn fld; + struct dst_entry *dst; if (sock->state == SS_CONNECTED) goto out; @@ -955,10 +956,11 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, fld.flowidn_proto = DNPROTO_NSP; if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, flags) < 0) goto out; - sk->sk_route_caps = sk->sk_dst_cache->dev->features; + dst = __sk_dst_get(sk); + sk->sk_route_caps = dst->dev->features; sock->state = SS_CONNECTING; scp->state = DN_CI; - scp->segsize_loc = dst_metric_advmss(sk->sk_dst_cache); + scp->segsize_loc = dst_metric_advmss(dst); dn_nsp_send_conninit(sk, NSP_CI); err = -EINPROGRESS; @@ -2382,7 +2384,7 @@ static int __init decnet_init(void) dev_add_pack(&dn_dix_packet_type); register_netdevice_notifier(&dn_dev_notifier); - proc_net_fops_create(&init_net, "decnet", S_IRUGO, &dn_socket_seq_fops); + proc_create("decnet", S_IRUGO, init_net.proc_net, &dn_socket_seq_fops); dn_register_sysctl(); out: return rc; @@ -2411,7 +2413,7 @@ static void __exit decnet_exit(void) dn_neigh_cleanup(); dn_fib_cleanup(); - proc_net_remove(&init_net, "decnet"); + remove_proc_entry("decnet", init_net.proc_net); proto_unregister(&dn_proto); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index e47ba9fc4a0..c8da116d84a 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1412,7 +1412,7 @@ void __init dn_dev_init(void) rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, NULL); rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, NULL); - proc_net_fops_create(&init_net, "decnet_dev", S_IRUGO, &dn_dev_seq_fops); + proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops); #ifdef CONFIG_SYSCTL { @@ -1433,7 +1433,7 @@ void __exit dn_dev_cleanup(void) } #endif /* CONFIG_SYSCTL */ - proc_net_remove(&init_net, "decnet_dev"); + remove_proc_entry("decnet_dev", init_net.proc_net); dn_dev_devices_off(); } diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 3aede1b459f..f8637f93d31 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -95,7 +95,7 @@ static u32 dn_neigh_hash(const void *pkey, struct neigh_table dn_neigh_table = { .family = PF_DECnet, - .entry_size = sizeof(struct dn_neigh), + .entry_size = NEIGH_ENTRY_SIZE(sizeof(struct dn_neigh)), .key_len = sizeof(__le16), .hash = dn_neigh_hash, .constructor = dn_neigh_construct, @@ -590,11 +590,12 @@ static const struct file_operations dn_neigh_seq_fops = { void __init dn_neigh_init(void) { neigh_table_init(&dn_neigh_table); - proc_net_fops_create(&init_net, "decnet_neigh", S_IRUGO, &dn_neigh_seq_fops); + proc_create("decnet_neigh", S_IRUGO, init_net.proc_net, + &dn_neigh_seq_fops); } void __exit dn_neigh_cleanup(void) { - proc_net_remove(&init_net, "decnet_neigh"); + remove_proc_entry("decnet_neigh", init_net.proc_net); neigh_table_clear(&dn_neigh_table); } diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 8a96047c7c9..1aaa51ebbda 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -598,7 +598,7 @@ void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg, if (reason == 0) reason = le16_to_cpu(scp->discdata_out.opt_status); - dn_nsp_do_disc(sk, msgflg, reason, gfp, sk->sk_dst_cache, ddl, + dn_nsp_do_disc(sk, msgflg, reason, gfp, __sk_dst_get(sk), ddl, scp->discdata_out.opt_data, scp->addrrem, scp->addrloc); } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index b57419cc41a..5ac0e153ef8 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1282,7 +1282,7 @@ static int dn_route_output_key(struct dst_entry **pprt, struct flowidn *flp, int return err; } -int dn_route_output_sock(struct dst_entry **pprt, struct flowidn *fl, struct sock *sk, int flags) +int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *fl, struct sock *sk, int flags) { int err; @@ -1901,7 +1901,8 @@ void __init dn_route_init(void) dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1); - proc_net_fops_create(&init_net, "decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); + proc_create("decnet_cache", S_IRUGO, init_net.proc_net, + &dn_rt_cache_seq_fops); #ifdef CONFIG_DECNET_ROUTER rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, @@ -1917,7 +1918,7 @@ void __exit dn_route_cleanup(void) del_timer(&dn_route_timer); dn_run_flush(0); - proc_net_remove(&init_net, "decnet_cache"); + remove_proc_entry("decnet_cache", init_net.proc_net); dst_entries_destroy(&dn_dst_ops); } diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 45295ca0957..2bc62ea857c 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -80,6 +80,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, int ret; char *name; int i; + bool valid_name_found = false; /* * Probe for switch model. @@ -131,8 +132,13 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, } else { ds->phys_port_mask |= 1 << i; } + valid_name_found = true; } + if (!valid_name_found && i == DSA_MAX_PORTS) { + ret = -EINVAL; + goto out; + } /* * If the CPU connects to this switch, set the switch tree diff --git a/net/dsa/slave.c b/net/dsa/slave.c index e32083d5d8f..6ebd8fbd928 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -41,8 +41,8 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) ds->slave_mii_bus->name = "dsa slave smi"; ds->slave_mii_bus->read = dsa_slave_phy_read; ds->slave_mii_bus->write = dsa_slave_phy_write; - snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "%s:%.2x", - ds->master_mii_bus->id, ds->pd->sw_addr); + snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d:%.2x", + ds->index, ds->pd->sw_addr); ds->slave_mii_bus->parent = &ds->master_mii_bus->dev; } @@ -203,10 +203,10 @@ dsa_slave_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) static void dsa_slave_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { - strncpy(drvinfo->driver, "dsa", 32); - strncpy(drvinfo->version, dsa_driver_version, 32); - strncpy(drvinfo->fw_version, "N/A", 32); - strncpy(drvinfo->bus_info, "platform", 32); + strlcpy(drvinfo->driver, "dsa", sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, dsa_driver_version, sizeof(drvinfo->version)); + strlcpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version)); + strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info)); } static int dsa_slave_nway_reset(struct net_device *dev) @@ -391,7 +391,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, if (p->phy != NULL) { phy_attach(slave_dev, dev_name(&p->phy->dev), - 0, PHY_INTERFACE_MODE_GMII); + PHY_INTERFACE_MODE_GMII); p->phy->autoneg = AUTONEG_ENABLE; p->phy->speed = 0; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 4efad533e5f..a36c85eab5b 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -272,6 +272,36 @@ void eth_header_cache_update(struct hh_cache *hh, EXPORT_SYMBOL(eth_header_cache_update); /** + * eth_prepare_mac_addr_change - prepare for mac change + * @dev: network device + * @p: socket address + */ +int eth_prepare_mac_addr_change(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + + if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev)) + return -EBUSY; + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + return 0; +} +EXPORT_SYMBOL(eth_prepare_mac_addr_change); + +/** + * eth_commit_mac_addr_change - commit mac change + * @dev: network device + * @p: socket address + */ +void eth_commit_mac_addr_change(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); +} +EXPORT_SYMBOL(eth_commit_mac_addr_change); + +/** * eth_mac_addr - set new Ethernet hardware address * @dev: network device * @p: socket address @@ -283,15 +313,12 @@ EXPORT_SYMBOL(eth_header_cache_update); */ int eth_mac_addr(struct net_device *dev, void *p) { - struct sockaddr *addr = p; + int ret; - if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev)) - return -EBUSY; - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; - memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); - /* if device marked as NET_ADDR_RANDOM, reset it */ - dev->addr_assign_type &= ~NET_ADDR_RANDOM; + ret = eth_prepare_mac_addr_change(dev, p); + if (ret < 0) + return ret; + eth_commit_mac_addr_change(dev, p); return 0; } EXPORT_SYMBOL(eth_mac_addr); diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index f651da60f16..43b95ca6111 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -377,17 +377,14 @@ static int lowpan_header_create(struct sk_buff *skb, struct ipv6hdr *hdr; const u8 *saddr = _saddr; const u8 *daddr = _daddr; - u8 *head; + u8 head[100]; struct ieee802154_addr sa, da; + /* TODO: + * if this package isn't ipv6 one, where should it be routed? + */ if (type != ETH_P_IPV6) return 0; - /* TODO: - * if this package isn't ipv6 one, where should it be routed? - */ - head = kzalloc(100, GFP_KERNEL); - if (head == NULL) - return -ENOMEM; hdr = ipv6_hdr(skb); hc06_ptr = head + 2; @@ -561,8 +558,6 @@ static int lowpan_header_create(struct sk_buff *skb, skb_pull(skb, sizeof(struct ipv6hdr)); memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head); - kfree(head); - lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); @@ -594,10 +589,32 @@ static int lowpan_header_create(struct sk_buff *skb, } } +static int lowpan_give_skb_to_devices(struct sk_buff *skb) +{ + struct lowpan_dev_record *entry; + struct sk_buff *skb_cp; + int stat = NET_RX_SUCCESS; + + rcu_read_lock(); + list_for_each_entry_rcu(entry, &lowpan_devices, list) + if (lowpan_dev_info(entry->ldev)->real_dev == skb->dev) { + skb_cp = skb_copy(skb, GFP_ATOMIC); + if (!skb_cp) { + stat = -ENOMEM; + break; + } + + skb_cp->dev = entry->ldev; + stat = netif_rx(skb_cp); + } + rcu_read_unlock(); + + return stat; +} + static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr) { struct sk_buff *new; - struct lowpan_dev_record *entry; int stat = NET_RX_SUCCESS; new = skb_copy_expand(skb, sizeof(struct ipv6hdr), skb_tailroom(skb), @@ -614,19 +631,7 @@ static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr) new->protocol = htons(ETH_P_IPV6); new->pkt_type = PACKET_HOST; - rcu_read_lock(); - list_for_each_entry_rcu(entry, &lowpan_devices, list) - if (lowpan_dev_info(entry->ldev)->real_dev == new->dev) { - skb = skb_copy(new, GFP_ATOMIC); - if (!skb) { - stat = -ENOMEM; - break; - } - - skb->dev = entry->ldev; - stat = netif_rx(skb); - } - rcu_read_unlock(); + stat = lowpan_give_skb_to_devices(new); kfree_skb(new); @@ -1137,19 +1142,42 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, goto drop; /* check that it's our buffer */ - switch (skb->data[0] & 0xe0) { - case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ - case LOWPAN_DISPATCH_FRAG1: /* first fragment header */ - case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */ - local_skb = skb_clone(skb, GFP_ATOMIC); + if (skb->data[0] == LOWPAN_DISPATCH_IPV6) { + /* Copy the packet so that the IPv6 header is + * properly aligned. + */ + local_skb = skb_copy_expand(skb, NET_SKB_PAD - 1, + skb_tailroom(skb), GFP_ATOMIC); if (!local_skb) goto drop; - lowpan_process_data(local_skb); + local_skb->protocol = htons(ETH_P_IPV6); + local_skb->pkt_type = PACKET_HOST; + + /* Pull off the 1-byte of 6lowpan header. */ + skb_pull(local_skb, 1); + skb_reset_network_header(local_skb); + skb_set_transport_header(local_skb, sizeof(struct ipv6hdr)); + + lowpan_give_skb_to_devices(local_skb); + + kfree_skb(local_skb); kfree_skb(skb); - break; - default: - break; + } else { + switch (skb->data[0] & 0xe0) { + case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ + case LOWPAN_DISPATCH_FRAG1: /* first fragment header */ + case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */ + local_skb = skb_clone(skb, GFP_ATOMIC); + if (!local_skb) + goto drop; + lowpan_process_data(local_skb); + + kfree_skb(skb); + break; + default: + break; + } } return NET_RX_SUCCESS; @@ -1234,7 +1262,7 @@ static inline int __init lowpan_netlink_init(void) return rtnl_link_register(&lowpan_link_ops); } -static inline void __init lowpan_netlink_fini(void) +static inline void lowpan_netlink_fini(void) { rtnl_link_unregister(&lowpan_link_ops); } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 24b384b7903..e225a4e5b57 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -263,21 +263,6 @@ void build_ehash_secret(void) } EXPORT_SYMBOL(build_ehash_secret); -static inline int inet_netns_ok(struct net *net, __u8 protocol) -{ - const struct net_protocol *ipprot; - - if (net_eq(net, &init_net)) - return 1; - - ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot == NULL) { - /* raw IP is OK */ - return 1; - } - return ipprot->netns_ok; -} - /* * Create an inet socket. */ @@ -350,10 +335,6 @@ lookup_protocol: !ns_capable(net->user_ns, CAP_NET_RAW)) goto out_rcu_unlock; - err = -EAFNOSUPPORT; - if (!inet_netns_ok(net, protocol)) - goto out_rcu_unlock; - sock->ops = answer->ops; answer_prot = answer->prot; answer_no_check = answer->no_check; @@ -1306,6 +1287,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_GRE | 0))) goto out; @@ -1333,7 +1315,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ops->callbacks.gso_segment(skb, features); rcu_read_unlock(); - if (!segs || IS_ERR(segs)) + if (IS_ERR_OR_NULL(segs)) goto out; skb = segs; @@ -1705,12 +1687,11 @@ static struct packet_type ip_packet_type __read_mostly = { static int __init inet_init(void) { - struct sk_buff *dummy_skb; struct inet_protosw *q; struct list_head *r; int rc = -EINVAL; - BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); + BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb)); sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); if (!sysctl_local_reserved_ports) diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index a69b4e4a02b..2e7f1948216 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -321,8 +321,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) /* We are going to _remove_ AH header to keep sockets happy, * so... Later this can change. */ - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto out; skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index ded146b217f..fea4929f620 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1405,14 +1405,14 @@ static const struct file_operations arp_seq_fops = { static int __net_init arp_net_init(struct net *net) { - if (!proc_net_fops_create(net, "arp", S_IRUGO, &arp_seq_fops)) + if (!proc_create("arp", S_IRUGO, net->proc_net, &arp_seq_fops)) return -ENOMEM; return 0; } static void __net_exit arp_net_exit(struct net *net) { - proc_net_remove(net, "arp"); + remove_proc_entry("arp", net->proc_net); } static struct pernet_operations arp_net_ops = { diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a8e4f2665d5..5281314886c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -63,6 +63,7 @@ #include <net/ip_fib.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> +#include <net/addrconf.h> #include "fib_lookup.h" @@ -93,6 +94,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_ADDRESS] = { .type = NLA_U32 }, [IFA_BROADCAST] = { .type = NLA_U32 }, [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, + [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, }; #define IN4_ADDR_HSIZE_SHIFT 8 @@ -417,6 +419,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); } +static void check_lifetime(struct work_struct *work); + +static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime); + static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, u32 portid) { @@ -462,6 +468,9 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, inet_hash_insert(dev_net(in_dev->dev), ifa); + cancel_delayed_work(&check_lifetime_work); + schedule_delayed_work(&check_lifetime_work, 0); + /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ @@ -573,7 +582,107 @@ errout: return err; } -static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) +#define INFINITY_LIFE_TIME 0xFFFFFFFF + +static void check_lifetime(struct work_struct *work) +{ + unsigned long now, next, next_sec, next_sched; + struct in_ifaddr *ifa; + struct hlist_node *node; + int i; + + now = jiffies; + next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); + + rcu_read_lock(); + for (i = 0; i < IN4_ADDR_HSIZE; i++) { + hlist_for_each_entry_rcu(ifa, node, + &inet_addr_lst[i], hash) { + unsigned long age; + + if (ifa->ifa_flags & IFA_F_PERMANENT) + continue; + + /* We try to batch several events at once. */ + age = (now - ifa->ifa_tstamp + + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; + + if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && + age >= ifa->ifa_valid_lft) { + struct in_ifaddr **ifap ; + + rtnl_lock(); + for (ifap = &ifa->ifa_dev->ifa_list; + *ifap != NULL; ifap = &ifa->ifa_next) { + if (*ifap == ifa) + inet_del_ifa(ifa->ifa_dev, + ifap, 1); + } + rtnl_unlock(); + } else if (ifa->ifa_preferred_lft == + INFINITY_LIFE_TIME) { + continue; + } else if (age >= ifa->ifa_preferred_lft) { + if (time_before(ifa->ifa_tstamp + + ifa->ifa_valid_lft * HZ, next)) + next = ifa->ifa_tstamp + + ifa->ifa_valid_lft * HZ; + + if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) { + ifa->ifa_flags |= IFA_F_DEPRECATED; + rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); + } + } else if (time_before(ifa->ifa_tstamp + + ifa->ifa_preferred_lft * HZ, + next)) { + next = ifa->ifa_tstamp + + ifa->ifa_preferred_lft * HZ; + } + } + } + rcu_read_unlock(); + + next_sec = round_jiffies_up(next); + next_sched = next; + + /* If rounded timeout is accurate enough, accept it. */ + if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ)) + next_sched = next_sec; + + now = jiffies; + /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */ + if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX)) + next_sched = now + ADDRCONF_TIMER_FUZZ_MAX; + + schedule_delayed_work(&check_lifetime_work, next_sched - now); +} + +static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, + __u32 prefered_lft) +{ + unsigned long timeout; + + ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED); + + timeout = addrconf_timeout_fixup(valid_lft, HZ); + if (addrconf_finite_timeout(timeout)) + ifa->ifa_valid_lft = timeout; + else + ifa->ifa_flags |= IFA_F_PERMANENT; + + timeout = addrconf_timeout_fixup(prefered_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + if (timeout == 0) + ifa->ifa_flags |= IFA_F_DEPRECATED; + ifa->ifa_preferred_lft = timeout; + } + ifa->ifa_tstamp = jiffies; + if (!ifa->ifa_cstamp) + ifa->ifa_cstamp = ifa->ifa_tstamp; +} + +static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, + __u32 *pvalid_lft, __u32 *pprefered_lft) { struct nlattr *tb[IFA_MAX+1]; struct in_ifaddr *ifa; @@ -633,24 +742,73 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); + if (tb[IFA_CACHEINFO]) { + struct ifa_cacheinfo *ci; + + ci = nla_data(tb[IFA_CACHEINFO]); + if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { + err = -EINVAL; + goto errout; + } + *pvalid_lft = ci->ifa_valid; + *pprefered_lft = ci->ifa_prefered; + } + return ifa; errout: return ERR_PTR(err); } +static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) +{ + struct in_device *in_dev = ifa->ifa_dev; + struct in_ifaddr *ifa1, **ifap; + + if (!ifa->ifa_local) + return NULL; + + for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; + ifap = &ifa1->ifa_next) { + if (ifa1->ifa_mask == ifa->ifa_mask && + inet_ifa_match(ifa1->ifa_address, ifa) && + ifa1->ifa_local == ifa->ifa_local) + return ifa1; + } + return NULL; +} + static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct in_ifaddr *ifa; + struct in_ifaddr *ifa_existing; + __u32 valid_lft = INFINITY_LIFE_TIME; + __u32 prefered_lft = INFINITY_LIFE_TIME; ASSERT_RTNL(); - ifa = rtm_to_ifaddr(net, nlh); + ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft); if (IS_ERR(ifa)) return PTR_ERR(ifa); - return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); + ifa_existing = find_matching_ifa(ifa); + if (!ifa_existing) { + /* It would be best to check for !NLM_F_CREATE here but + * userspace alreay relies on not having to provide this. + */ + set_ifa_lifetime(ifa, valid_lft, prefered_lft); + return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); + } else { + inet_free_ifa(ifa); + + if (nlh->nlmsg_flags & NLM_F_EXCL || + !(nlh->nlmsg_flags & NLM_F_REPLACE)) + return -EEXIST; + + set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft); + } + return 0; } /* @@ -852,6 +1010,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) ifa->ifa_prefixlen = 32; ifa->ifa_mask = inet_make_mask(32); } + set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); ret = inet_set_ifa(dev, ifa); break; @@ -1190,6 +1349,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, ifa->ifa_dev = in_dev; ifa->ifa_scope = RT_SCOPE_HOST; memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); + set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, + INFINITY_LIFE_TIME); inet_insert_ifa(ifa); } } @@ -1246,11 +1407,30 @@ static size_t inet_nlmsg_size(void) + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ } +static inline u32 cstamp_delta(unsigned long cstamp) +{ + return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; +} + +static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, + unsigned long tstamp, u32 preferred, u32 valid) +{ + struct ifa_cacheinfo ci; + + ci.cstamp = cstamp_delta(cstamp); + ci.tstamp = cstamp_delta(tstamp); + ci.ifa_prefered = preferred; + ci.ifa_valid = valid; + + return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); +} + static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, u32 portid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; + u32 preferred, valid; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) @@ -1259,10 +1439,31 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, ifm = nlmsg_data(nlh); ifm->ifa_family = AF_INET; ifm->ifa_prefixlen = ifa->ifa_prefixlen; - ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; + ifm->ifa_flags = ifa->ifa_flags; ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; + if (!(ifm->ifa_flags & IFA_F_PERMANENT)) { + preferred = ifa->ifa_preferred_lft; + valid = ifa->ifa_valid_lft; + if (preferred != INFINITY_LIFE_TIME) { + long tval = (jiffies - ifa->ifa_tstamp) / HZ; + + if (preferred > tval) + preferred -= tval; + else + preferred = 0; + if (valid != INFINITY_LIFE_TIME) { + if (valid > tval) + valid -= tval; + else + valid = 0; + } + } + } else { + preferred = INFINITY_LIFE_TIME; + valid = INFINITY_LIFE_TIME; + } if ((ifa->ifa_address && nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) || (ifa->ifa_local && @@ -1270,7 +1471,9 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, (ifa->ifa_broadcast && nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || (ifa->ifa_label[0] && - nla_put_string(skb, IFA_LABEL, ifa->ifa_label))) + nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || + put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, + preferred, valid)) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -1988,6 +2191,8 @@ void __init devinet_init(void) register_gifconf(PF_INET, inet_gifconf); register_netdevice_notifier(&ip_netdev_notifier); + schedule_delayed_work(&check_lifetime_work, 0); + rtnl_af_register(&inet_af_ops); rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 5cd75e2dab2..99f00d39d10 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -974,7 +974,7 @@ static void nl_fib_input(struct sk_buff *skb) nl_fib_lookup(frn, tb); - portid = NETLINK_CB(skb).portid; /* pid of sending process */ + portid = NETLINK_CB(skb).portid; /* netlink portid */ NETLINK_CB(skb).portid = 0; /* from kernel */ NETLINK_CB(skb).dst_group = 0; /* unicast */ netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 31d771ca9a7..61e03da3e1f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2607,31 +2607,31 @@ static const struct file_operations fib_route_fops = { int __net_init fib_proc_init(struct net *net) { - if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops)) + if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops)) goto out1; - if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO, - &fib_triestat_fops)) + if (!proc_create("fib_triestat", S_IRUGO, net->proc_net, + &fib_triestat_fops)) goto out2; - if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops)) + if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops)) goto out3; return 0; out3: - proc_net_remove(net, "fib_triestat"); + remove_proc_entry("fib_triestat", net->proc_net); out2: - proc_net_remove(net, "fib_trie"); + remove_proc_entry("fib_trie", net->proc_net); out1: return -ENOMEM; } void __net_exit fib_proc_exit(struct net *net) { - proc_net_remove(net, "fib_trie"); - proc_net_remove(net, "fib_triestat"); - proc_net_remove(net, "route"); + remove_proc_entry("fib_trie", net->proc_net); + remove_proc_entry("fib_triestat", net->proc_net); + remove_proc_entry("route", net->proc_net); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 42a491055c7..7a4c710c4cd 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -19,6 +19,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/netdevice.h> +#include <linux/if_tunnel.h> #include <linux/spinlock.h> #include <net/protocol.h> #include <net/gre.h> @@ -26,6 +27,11 @@ static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; static DEFINE_SPINLOCK(gre_proto_lock); +struct gre_base_hdr { + __be16 flags; + __be16 protocol; +}; +#define GRE_HEADER_SECTION 4 int gre_add_protocol(const struct gre_protocol *proto, u8 version) { @@ -112,12 +118,117 @@ static void gre_err(struct sk_buff *skb, u32 info) rcu_read_unlock(); } +static struct sk_buff *gre_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t enc_features; + int ghl = GRE_HEADER_SECTION; + struct gre_base_hdr *greh; + int mac_len = skb->mac_len; + int tnl_hlen; + bool csum; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_TCPV6 | + SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_GRE))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) + goto out; + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + + if (greh->flags & GRE_KEY) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_SEQ) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_CSUM) { + ghl += GRE_HEADER_SECTION; + csum = true; + } else + csum = false; + + /* setup inner skb. */ + if (greh->protocol == htons(ETH_P_TEB)) { + struct ethhdr *eth = eth_hdr(skb); + skb->protocol = eth->h_proto; + } else { + skb->protocol = greh->protocol; + } + + skb->encapsulation = 0; + + if (unlikely(!pskb_may_pull(skb, ghl))) + goto out; + __skb_pull(skb, ghl); + skb_reset_mac_header(skb); + skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb->mac_len = skb_inner_network_offset(skb); + + /* segment inner packet. */ + enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, enc_features); + if (!segs || IS_ERR(segs)) + goto out; + + skb = segs; + tnl_hlen = skb_tnl_header_len(skb); + do { + __skb_push(skb, ghl); + if (csum) { + __be32 *pcsum; + + if (skb_has_shared_frag(skb)) { + int err; + + err = __skb_linearize(skb); + if (err) { + kfree_skb(segs); + segs = ERR_PTR(err); + goto out; + } + } + + greh = (struct gre_base_hdr *)(skb->data); + pcsum = (__be32 *)(greh + 1); + *pcsum = 0; + *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); + } + __skb_push(skb, tnl_hlen - ghl); + + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb->mac_len = mac_len; + } while ((skb = skb->next)); +out: + return segs; +} + +static int gre_gso_send_check(struct sk_buff *skb) +{ + if (!skb->encapsulation) + return -EINVAL; + return 0; +} + static const struct net_protocol net_gre_protocol = { .handler = gre_rcv, .err_handler = gre_err, .netns_ok = 1, }; +static const struct net_offload gre_offload = { + .callbacks = { + .gso_send_check = gre_gso_send_check, + .gso_segment = gre_gso_segment, + }, +}; + static int __init gre_init(void) { pr_info("GRE over IPv4 demultiplexor driver\n"); @@ -127,11 +238,18 @@ static int __init gre_init(void) return -EAGAIN; } + if (inet_add_offload(&gre_offload, IPPROTO_GRE)) { + pr_err("can't add protocol offload\n"); + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); + return -EAGAIN; + } + return 0; } static void __exit gre_exit(void) { + inet_del_offload(&gre_offload, IPPROTO_GRE); inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 736ab70fd17..d8c232794bc 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2646,24 +2646,25 @@ static int __net_init igmp_net_init(struct net *net) { struct proc_dir_entry *pde; - pde = proc_net_fops_create(net, "igmp", S_IRUGO, &igmp_mc_seq_fops); + pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops); if (!pde) goto out_igmp; - pde = proc_net_fops_create(net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops); + pde = proc_create("mcfilter", S_IRUGO, net->proc_net, + &igmp_mcf_seq_fops); if (!pde) goto out_mcfilter; return 0; out_mcfilter: - proc_net_remove(net, "igmp"); + remove_proc_entry("igmp", net->proc_net); out_igmp: return -ENOMEM; } static void __net_exit igmp_net_exit(struct net *net) { - proc_net_remove(net, "mcfilter"); - proc_net_remove(net, "igmp"); + remove_proc_entry("mcfilter", net->proc_net); + remove_proc_entry("igmp", net->proc_net); } static struct pernet_operations igmp_net_ops = { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d0670f00d52..11cb4979a46 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -59,6 +59,8 @@ int inet_csk_bind_conflict(const struct sock *sk, struct sock *sk2; struct hlist_node *node; int reuse = sk->sk_reuse; + int reuseport = sk->sk_reuseport; + kuid_t uid = sock_i_uid((struct sock *)sk); /* * Unlike other sk lookup places we do not check @@ -73,8 +75,11 @@ int inet_csk_bind_conflict(const struct sock *sk, (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - if (!reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) { + if ((!reuse || !sk2->sk_reuse || + sk2->sk_state == TCP_LISTEN) && + (!reuseport || !sk2->sk_reuseport || + (sk2->sk_state != TCP_TIME_WAIT && + !uid_eq(uid, sock_i_uid(sk2))))) { const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || sk2_rcv_saddr == sk_rcv_saddr(sk)) @@ -106,6 +111,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) int ret, attempts = 5; struct net *net = sock_net(sk); int smallest_size = -1, smallest_rover; + kuid_t uid = sock_i_uid(sk); local_bh_disable(); if (!snum) { @@ -125,9 +131,12 @@ again: spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (net_eq(ib_net(tb), net) && tb->port == rover) { - if (tb->fastreuse > 0 && - sk->sk_reuse && - sk->sk_state != TCP_LISTEN && + if (((tb->fastreuse > 0 && + sk->sk_reuse && + sk->sk_state != TCP_LISTEN) || + (tb->fastreuseport > 0 && + sk->sk_reuseport && + uid_eq(tb->fastuid, uid))) && (tb->num_owners < smallest_size || smallest_size == -1)) { smallest_size = tb->num_owners; smallest_rover = rover; @@ -185,14 +194,18 @@ tb_found: if (sk->sk_reuse == SK_FORCE_REUSE) goto success; - if (tb->fastreuse > 0 && - sk->sk_reuse && sk->sk_state != TCP_LISTEN && + if (((tb->fastreuse > 0 && + sk->sk_reuse && sk->sk_state != TCP_LISTEN) || + (tb->fastreuseport > 0 && + sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && smallest_size == -1) { goto success; } else { ret = 1; if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { - if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && + if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) || + (tb->fastreuseport > 0 && + sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && smallest_size != -1 && --attempts >= 0) { spin_unlock(&head->lock); goto again; @@ -212,9 +225,19 @@ tb_not_found: tb->fastreuse = 1; else tb->fastreuse = 0; - } else if (tb->fastreuse && - (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) - tb->fastreuse = 0; + if (sk->sk_reuseport) { + tb->fastreuseport = 1; + tb->fastuid = uid; + } else + tb->fastreuseport = 0; + } else { + if (tb->fastreuse && + (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) + tb->fastreuse = 0; + if (tb->fastreuseport && + (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))) + tb->fastreuseport = 0; + } success: if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, snum); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 4750d2b74d7..2e453bde699 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -73,8 +73,9 @@ EXPORT_SYMBOL(inet_frags_init); void inet_frags_init_net(struct netns_frags *nf) { nf->nqueues = 0; - atomic_set(&nf->mem, 0); + init_frag_mem_limit(nf); INIT_LIST_HEAD(&nf->lru_list); + spin_lock_init(&nf->lru_lock); } EXPORT_SYMBOL(inet_frags_init_net); @@ -91,6 +92,8 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) local_bh_disable(); inet_frag_evictor(nf, f, true); local_bh_enable(); + + percpu_counter_destroy(&nf->mem); } EXPORT_SYMBOL(inet_frags_exit_net); @@ -98,9 +101,9 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) { write_lock(&f->lock); hlist_del(&fq->list); - list_del(&fq->lru_list); fq->net->nqueues--; write_unlock(&f->lock); + inet_frag_lru_del(fq); } void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) @@ -117,12 +120,8 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) EXPORT_SYMBOL(inet_frag_kill); static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, - struct sk_buff *skb, int *work) + struct sk_buff *skb) { - if (work) - *work -= skb->truesize; - - atomic_sub(skb->truesize, &nf->mem); if (f->skb_free) f->skb_free(skb); kfree_skb(skb); @@ -133,6 +132,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, { struct sk_buff *fp; struct netns_frags *nf; + unsigned int sum, sum_truesize = 0; WARN_ON(!(q->last_in & INET_FRAG_COMPLETE)); WARN_ON(del_timer(&q->timer) != 0); @@ -143,13 +143,14 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, while (fp) { struct sk_buff *xp = fp->next; - frag_kfree_skb(nf, f, fp, work); + sum_truesize += fp->truesize; + frag_kfree_skb(nf, f, fp); fp = xp; } - + sum = sum_truesize + f->qsize; if (work) - *work -= f->qsize; - atomic_sub(f->qsize, &nf->mem); + *work -= sum; + sub_frag_mem_limit(q, sum); if (f->destructor) f->destructor(q); @@ -164,22 +165,23 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) int work, evicted = 0; if (!force) { - if (atomic_read(&nf->mem) <= nf->high_thresh) + if (frag_mem_limit(nf) <= nf->high_thresh) return 0; } - work = atomic_read(&nf->mem) - nf->low_thresh; + work = frag_mem_limit(nf) - nf->low_thresh; while (work > 0) { - read_lock(&f->lock); + spin_lock(&nf->lru_lock); + if (list_empty(&nf->lru_list)) { - read_unlock(&f->lock); + spin_unlock(&nf->lru_lock); break; } q = list_first_entry(&nf->lru_list, struct inet_frag_queue, lru_list); atomic_inc(&q->refcnt); - read_unlock(&f->lock); + spin_unlock(&nf->lru_lock); spin_lock(&q->lock); if (!(q->last_in & INET_FRAG_COMPLETE)) @@ -233,9 +235,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, atomic_inc(&qp->refcnt); hlist_add_head(&qp->list, &f->hash[hash]); - list_add_tail(&qp->lru_list, &nf->lru_list); nf->nqueues++; write_unlock(&f->lock); + inet_frag_lru_add(nf, qp); return qp; } @@ -250,7 +252,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, q->net = nf; f->constructor(q, arg); - atomic_add(f->qsize, &nf->mem); + add_frag_mem_limit(q, f->qsize); + setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); atomic_set(&q->refcnt, 1); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fa3ae814871..0ce0595d986 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -39,6 +39,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, write_pnet(&tb->ib_net, hold_net(net)); tb->port = snum; tb->fastreuse = 0; + tb->fastreuseport = 0; tb->num_owners = 0; INIT_HLIST_HEAD(&tb->owners); hlist_add_head(&tb->node, &head->chain); @@ -151,16 +152,16 @@ static inline int compute_score(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && inet->inet_num == hnum && !ipv6_only_sock(sk)) { __be32 rcv_saddr = inet->inet_rcv_saddr; - score = sk->sk_family == PF_INET ? 1 : 0; + score = sk->sk_family == PF_INET ? 2 : 1; if (rcv_saddr) { if (rcv_saddr != daddr) return -1; - score += 2; + score += 4; } if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if != dif) return -1; - score += 2; + score += 4; } } return score; @@ -176,6 +177,7 @@ static inline int compute_score(struct sock *sk, struct net *net, struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, + const __be32 saddr, __be16 sport, const __be32 daddr, const unsigned short hnum, const int dif) { @@ -183,17 +185,29 @@ struct sock *__inet_lookup_listener(struct net *net, struct hlist_nulls_node *node; unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; - int score, hiscore; + int score, hiscore, matches = 0, reuseport = 0; + u32 phash = 0; rcu_read_lock(); begin: result = NULL; - hiscore = -1; + hiscore = 0; sk_nulls_for_each_rcu(sk, node, &ilb->head) { score = compute_score(sk, net, hnum, daddr, dif); if (score > hiscore) { result = sk; hiscore = score; + reuseport = sk->sk_reuseport; + if (reuseport) { + phash = inet_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } + } else if (score == hiscore && reuseport) { + matches++; + if (((u64)phash * matches) >> 32 == 0) + result = sk; + phash = next_pseudo_random32(phash); } } /* @@ -501,7 +515,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, inet_bind_bucket_for_each(tb, node, &head->chain) { if (net_eq(ib_net(tb), net) && tb->port == port) { - if (tb->fastreuse >= 0) + if (tb->fastreuse >= 0 || + tb->fastreuseport >= 0) goto next_port; WARN_ON(hlist_empty(&tb->owners)); if (!check_established(death_row, sk, @@ -518,6 +533,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, break; } tb->fastreuse = -1; + tb->fastreuseport = -1; goto ok; next_port: diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index eb9d63a570c..b6d30acb600 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -122,7 +122,7 @@ int ip_frag_nqueues(struct net *net) int ip_frag_mem(struct net *net) { - return atomic_read(&net->ipv4.frags.mem); + return sum_frag_mem_limit(&net->ipv4.frags); } static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, @@ -161,13 +161,6 @@ static bool ip4_frag_match(struct inet_frag_queue *q, void *a) qp->user == arg->user; } -/* Memory Tracking Functions. */ -static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) -{ - atomic_sub(skb->truesize, &nf->mem); - kfree_skb(skb); -} - static void ip4_frag_init(struct inet_frag_queue *q, void *a) { struct ipq *qp = container_of(q, struct ipq, q); @@ -340,6 +333,7 @@ static inline int ip_frag_too_far(struct ipq *qp) static int ip_frag_reinit(struct ipq *qp) { struct sk_buff *fp; + unsigned int sum_truesize = 0; if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) { atomic_inc(&qp->q.refcnt); @@ -349,9 +343,12 @@ static int ip_frag_reinit(struct ipq *qp) fp = qp->q.fragments; do { struct sk_buff *xp = fp->next; - frag_kfree_skb(qp->q.net, fp); + + sum_truesize += fp->truesize; + kfree_skb(fp); fp = xp; } while (fp); + sub_frag_mem_limit(&qp->q, sum_truesize); qp->q.last_in = 0; qp->q.len = 0; @@ -496,7 +493,8 @@ found: qp->q.fragments = next; qp->q.meat -= free_it->len; - frag_kfree_skb(qp->q.net, free_it); + sub_frag_mem_limit(&qp->q, free_it->truesize); + kfree_skb(free_it); } } @@ -519,7 +517,7 @@ found: qp->q.stamp = skb->tstamp; qp->q.meat += skb->len; qp->ecn |= ecn; - atomic_add(skb->truesize, &qp->q.net->mem); + add_frag_mem_limit(&qp->q, skb->truesize); if (offset == 0) qp->q.last_in |= INET_FRAG_FIRST_IN; @@ -531,9 +529,7 @@ found: qp->q.meat == qp->q.len) return ip_frag_reasm(qp, prev, dev); - write_lock(&ip4_frags.lock); - list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list); - write_unlock(&ip4_frags.lock); + inet_frag_lru_move(&qp->q); return -EINPROGRESS; err: @@ -594,7 +590,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, goto out_oversize; /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) + if (skb_unclone(head, GFP_ATOMIC)) goto out_nomem; /* If the first fragment is fragmented itself, we split @@ -617,7 +613,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &qp->q.net->mem); + add_frag_mem_limit(&qp->q, clone->truesize); } skb_push(head, head->data - skb_network_header(head)); @@ -645,7 +641,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, } fp = next; } - atomic_sub(sum_truesize, &qp->q.net->mem); + sub_frag_mem_limit(&qp->q, sum_truesize); head->next = NULL; head->dev = dev; @@ -851,14 +847,22 @@ static inline void ip4_frags_ctl_register(void) static int __net_init ipv4_frags_init_net(struct net *net) { - /* - * Fragment cache limits. We will commit 256K at one time. Should we - * cross that limit we will prune down to 192K. This should cope with - * even the most extreme cases without allowing an attacker to - * measurably harm machine performance. + /* Fragment cache limits. + * + * The fragment memory accounting code, (tries to) account for + * the real memory usage, by measuring both the size of frag + * queue struct (inet_frag_queue (ipv4:ipq/ipv6:frag_queue)) + * and the SKB's truesize. + * + * A 64K fragment consumes 129736 bytes (44*2944)+200 + * (1500 truesize == 2944, sizeof(struct ipq) == 200) + * + * We will commit 4MB at one time. Should we cross that limit + * we will prune down to 3MB, making room for approx 8 big 64K + * fragments 8x128k. */ - net->ipv4.frags.high_thresh = 256 * 1024; - net->ipv4.frags.low_thresh = 192 * 1024; + net->ipv4.frags.high_thresh = 4 * 1024 * 1024; + net->ipv4.frags.low_thresh = 3 * 1024 * 1024; /* * Important NOTE! Fragment queue must be destroyed before MSL expires. * RFC791 is wrong proposing to prolongate timer each fragment arrival diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index e81b1caf2ea..5ef4da780ac 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -735,10 +735,32 @@ drop: return 0; } +static struct sk_buff *handle_offloads(struct sk_buff *skb) +{ + int err; + + if (skb_is_gso(skb)) { + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + goto error; + skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; + return skb; + } + if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; + + return skb; + +error: + kfree_skb(skb); + return ERR_PTR(err); +} + static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { + struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); struct ip_tunnel *tunnel = netdev_priv(dev); - const struct iphdr *old_iph = ip_hdr(skb); + const struct iphdr *old_iph; const struct iphdr *tiph; struct flowi4 fl4; u8 tos; @@ -751,10 +773,21 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev __be32 dst; int mtu; u8 ttl; + int err; + int pkt_len; - if (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb)) - goto tx_error; + skb = handle_offloads(skb); + if (IS_ERR(skb)) { + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + + if (!skb->encapsulation) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + old_iph = ip_hdr(skb); if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -818,8 +851,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev ttl = tiph->ttl; tos = tiph->tos; - if (tos == 1) { - tos = 0; + if (tos & 0x1) { + tos &= ~0x1; if (skb->protocol == htons(ETH_P_IP)) tos = old_iph->tos; else if (skb->protocol == htons(ETH_P_IPV6)) @@ -853,7 +886,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (skb->protocol == htons(ETH_P_IP)) { df |= (old_iph->frag_off&htons(IP_DF)); - if ((old_iph->frag_off&htons(IP_DF)) && + if (!skb_is_gso(skb) && + (old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); @@ -873,7 +907,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } } - if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { + if (!skb_is_gso(skb) && + mtu >= IPV6_MIN_MTU && + mtu < skb->len - tunnel->hlen + gre_hlen) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; @@ -934,6 +970,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev iph->daddr = fl4.daddr; iph->saddr = fl4.saddr; iph->ttl = ttl; + iph->id = 0; if (ttl == 0) { if (skb->protocol == htons(ETH_P_IP)) @@ -962,9 +999,17 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev *ptr = tunnel->parms.o_key; ptr--; } - if (tunnel->parms.o_flags&GRE_CSUM) { + /* Skip GRE checksum if skb is getting offloaded. */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) && + (tunnel->parms.o_flags&GRE_CSUM)) { int offset = skb_transport_offset(skb); + if (skb_has_shared_frag(skb)) { + err = __skb_linearize(skb); + if (err) + goto tx_error; + } + *ptr = 0; *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset, skb->len - offset, @@ -972,7 +1017,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } } - iptunnel_xmit(skb, dev); + nf_reset(skb); + + pkt_len = skb->len - skb_transport_offset(skb); + err = ip_local_out(skb); + if (likely(net_xmit_eval(err) == 0)) { + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else { + dev->stats.tx_errors++; + dev->stats.tx_aborted_errors++; + } return NETDEV_TX_OK; #if IS_ENABLED(CONFIG_IPV6) @@ -1042,6 +1099,17 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) mtu = 68; tunnel->hlen = addend; + /* TCP offload with GRE SEQ is not supported. */ + if (!(tunnel->parms.o_flags & GRE_SEQ)) { + /* device supports enc gso offload*/ + if (tdev->hw_enc_features & NETIF_F_GRE_GSO) { + dev->features |= NETIF_F_TSO; + dev->hw_features |= NETIF_F_TSO; + } else { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } + } return mtu; } @@ -1591,6 +1659,9 @@ static void ipgre_tap_setup(struct net_device *dev) dev->iflink = 0; dev->features |= NETIF_F_NETNS_LOCAL; + + dev->features |= GRE_FEATURES; + dev->hw_features |= GRE_FEATURES; } static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index f1395a6fb35..87abd3e2bd3 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -208,13 +208,6 @@ static int ip_local_deliver_finish(struct sk_buff *skb) if (ipprot != NULL) { int ret; - if (!net_eq(net, &init_net) && !ipprot->netns_ok) { - net_info_ratelimited("%s: proto %d isn't netns-ready\n", - __func__, protocol); - kfree_skb(skb); - goto out; - } - if (!ipprot->no_policy) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { kfree_skb(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3e98ed2bff5..5e12dca7b3d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -598,6 +598,7 @@ slow_path: /* for offloaded checksums cleanup checksum before fragmentation */ if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb)) goto fail; + iph = ip_hdr(skb); left = skb->len - hlen; /* Space per frame */ ptr = hlen; /* Where to start from */ diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 9a46daed2f3..f01d1b1aff7 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -163,6 +163,7 @@ static const struct net_protocol ipcomp4_protocol = { .handler = xfrm4_rcv, .err_handler = ipcomp4_err, .no_policy = 1, + .netns_ok = 1, }; static int __init ipcomp4_init(void) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index a2e50ae80b5..98cbc687701 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1394,7 +1394,7 @@ static int __init ip_auto_config(void) unsigned int i; #ifdef CONFIG_PROC_FS - proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); + proc_create("pnp", S_IRUGO, init_net.proc_net, &pnp_seq_fops); #endif /* CONFIG_PROC_FS */ if (!ic_enable) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 191fc24a745..8f024d41eef 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -472,7 +472,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) __be16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ - const struct iphdr *old_iph = ip_hdr(skb); + const struct iphdr *old_iph; struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; @@ -486,6 +486,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb_checksum_help(skb)) goto tx_error; + old_iph = ip_hdr(skb); + if (tos & 1) tos = old_iph->tos; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a9454cbd953..5f95b3aa579 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -828,6 +828,49 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, return NULL; } +/* Look for a (*,*,oif) entry */ +static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt, + int vifi) +{ + int line = MFC_HASH(htonl(INADDR_ANY), htonl(INADDR_ANY)); + struct mfc_cache *c; + + list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) + if (c->mfc_origin == htonl(INADDR_ANY) && + c->mfc_mcastgrp == htonl(INADDR_ANY) && + c->mfc_un.res.ttls[vifi] < 255) + return c; + + return NULL; +} + +/* Look for a (*,G) entry */ +static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, + __be32 mcastgrp, int vifi) +{ + int line = MFC_HASH(mcastgrp, htonl(INADDR_ANY)); + struct mfc_cache *c, *proxy; + + if (mcastgrp == htonl(INADDR_ANY)) + goto skip; + + list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) + if (c->mfc_origin == htonl(INADDR_ANY) && + c->mfc_mcastgrp == mcastgrp) { + if (c->mfc_un.res.ttls[vifi] < 255) + return c; + + /* It's ok if the vifi is part of the static tree */ + proxy = ipmr_cache_find_any_parent(mrt, + c->mfc_parent); + if (proxy && proxy->mfc_un.res.ttls[vifi] < 255) + return c; + } + +skip: + return ipmr_cache_find_any_parent(mrt, vifi); +} + /* * Allocate a multicast cache entry */ @@ -1053,7 +1096,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) * MFC cache manipulation by user space mroute daemon */ -static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) +static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) { int line; struct mfc_cache *c, *next; @@ -1062,7 +1105,8 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && - c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { + c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr && + (parent == -1 || parent == c->mfc_parent)) { list_del_rcu(&c->list); mroute_netlink_event(mrt, c, RTM_DELROUTE); ipmr_cache_free(c); @@ -1073,7 +1117,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) } static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, - struct mfcctl *mfc, int mrtsock) + struct mfcctl *mfc, int mrtsock, int parent) { bool found = false; int line; @@ -1086,7 +1130,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && - c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { + c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr && + (parent == -1 || parent == c->mfc_parent)) { found = true; break; } @@ -1103,7 +1148,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, return 0; } - if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) + if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) && + !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) return -EINVAL; c = ipmr_cache_alloc(); @@ -1218,7 +1264,7 @@ static void mrtsock_destruct(struct sock *sk) int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) { - int ret; + int ret, parent = 0; struct vifctl vif; struct mfcctl mfc; struct net *net = sock_net(sk); @@ -1287,16 +1333,22 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi */ case MRT_ADD_MFC: case MRT_DEL_MFC: + parent = -1; + case MRT_ADD_MFC_PROXY: + case MRT_DEL_MFC_PROXY: if (optlen != sizeof(mfc)) return -EINVAL; if (copy_from_user(&mfc, optval, sizeof(mfc))) return -EFAULT; + if (parent == 0) + parent = mfc.mfcc_parent; rtnl_lock(); - if (optname == MRT_DEL_MFC) - ret = ipmr_mfc_delete(mrt, &mfc); + if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) + ret = ipmr_mfc_delete(mrt, &mfc, parent); else ret = ipmr_mfc_add(net, mrt, &mfc, - sk == rtnl_dereference(mrt->mroute_sk)); + sk == rtnl_dereference(mrt->mroute_sk), + parent); rtnl_unlock(); return ret; /* @@ -1749,17 +1801,28 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, { int psend = -1; int vif, ct; + int true_vifi = ipmr_find_vif(mrt, skb->dev); vif = cache->mfc_parent; cache->mfc_un.res.pkt++; cache->mfc_un.res.bytes += skb->len; + if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { + struct mfc_cache *cache_proxy; + + /* For an (*,G) entry, we only check that the incomming + * interface is part of the static tree. + */ + cache_proxy = ipmr_cache_find_any_parent(mrt, vif); + if (cache_proxy && + cache_proxy->mfc_un.res.ttls[true_vifi] < 255) + goto forward; + } + /* * Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif_table[vif].dev != skb->dev) { - int true_vifi; - if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. * Very complicated situation... @@ -1776,7 +1839,6 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, } cache->mfc_un.res.wrong_if++; - true_vifi = ipmr_find_vif(mrt, skb->dev); if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, @@ -1794,15 +1856,34 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, goto dont_forward; } +forward: mrt->vif_table[vif].pkt_in++; mrt->vif_table[vif].bytes_in += skb->len; /* * Forward the frame */ + if (cache->mfc_origin == htonl(INADDR_ANY) && + cache->mfc_mcastgrp == htonl(INADDR_ANY)) { + if (true_vifi >= 0 && + true_vifi != cache->mfc_parent && + ip_hdr(skb)->ttl > + cache->mfc_un.res.ttls[cache->mfc_parent]) { + /* It's an (*,*) entry and the packet is not coming from + * the upstream: forward the packet to the upstream + * only. + */ + psend = cache->mfc_parent; + goto last_forward; + } + goto dont_forward; + } for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { - if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { + /* For (*,G) entry, don't forward to the incoming interface */ + if ((cache->mfc_origin != htonl(INADDR_ANY) || + ct != true_vifi) && + ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); @@ -1813,6 +1894,7 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, psend = ct; } } +last_forward: if (psend != -1) { if (local) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); @@ -1902,6 +1984,13 @@ int ip_mr_input(struct sk_buff *skb) /* already under rcu_read_lock() */ cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); + if (cache == NULL) { + int vif = ipmr_find_vif(mrt, skb->dev); + + if (vif >= 0) + cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, + vif); + } /* * No usable cache entry @@ -2107,7 +2196,12 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, rcu_read_lock(); cache = ipmr_cache_find(mrt, saddr, daddr); + if (cache == NULL && skb->dev) { + int vif = ipmr_find_vif(mrt, skb->dev); + if (vif >= 0) + cache = ipmr_cache_find_any(mrt, daddr, vif); + } if (cache == NULL) { struct sk_buff *skb2; struct iphdr *iph; @@ -2609,16 +2703,16 @@ static int __net_init ipmr_net_init(struct net *net) #ifdef CONFIG_PROC_FS err = -ENOMEM; - if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops)) + if (!proc_create("ip_mr_vif", 0, net->proc_net, &ipmr_vif_fops)) goto proc_vif_fail; - if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops)) + if (!proc_create("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_fops)) goto proc_cache_fail; #endif return 0; #ifdef CONFIG_PROC_FS proc_cache_fail: - proc_net_remove(net, "ip_mr_vif"); + remove_proc_entry("ip_mr_vif", net->proc_net); proc_vif_fail: ipmr_rules_exit(net); #endif @@ -2629,8 +2723,8 @@ fail: static void __net_exit ipmr_net_exit(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_remove(net, "ip_mr_cache"); - proc_net_remove(net, "ip_mr_vif"); + remove_proc_entry("ip_mr_cache", net->proc_net); + remove_proc_entry("ip_mr_vif", net->proc_net); #endif ipmr_rules_exit(net); } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 3ea4127404d..7dc6a974359 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -901,7 +901,7 @@ static int get_info(struct net *net, void __user *user, #endif t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), "arptable_%s", name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { struct arpt_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT @@ -958,7 +958,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, } t = xt_find_table_lock(net, NFPROTO_ARP, get.name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", @@ -1001,7 +1001,7 @@ static int __do_replace(struct net *net, const char *name, t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), "arptable_%s", name); - if (!t || IS_ERR(t)) { + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free_newinfo_counters_untrans; } @@ -1158,7 +1158,7 @@ static int do_add_counters(struct net *net, const void __user *user, } t = xt_find_table_lock(net, NFPROTO_ARP, name); - if (!t || IS_ERR(t)) { + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; } @@ -1646,7 +1646,7 @@ static int compat_get_entries(struct net *net, xt_compat_lock(NFPROTO_ARP); t = xt_find_table_lock(net, NFPROTO_ARP, get.name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 17c5e06da66..3efcf87400c 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1090,7 +1090,7 @@ static int get_info(struct net *net, void __user *user, #endif t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), "iptable_%s", name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { struct ipt_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT @@ -1149,7 +1149,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, } t = xt_find_table_lock(net, AF_INET, get.name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", private->number); if (get.size == private->size) @@ -1189,7 +1189,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), "iptable_%s", name); - if (!t || IS_ERR(t)) { + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free_newinfo_counters_untrans; } @@ -1347,7 +1347,7 @@ do_add_counters(struct net *net, const void __user *user, } t = xt_find_table_lock(net, AF_INET, name); - if (!t || IS_ERR(t)) { + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; } @@ -1931,7 +1931,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; duprintf("t->private->number = %u\n", private->number); diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index b5ef3cba225..7d168dcbd13 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -88,10 +88,8 @@ static void ulog_send(unsigned int nlgroupnum) { ulog_buff_t *ub = &ulog_buffers[nlgroupnum]; - if (timer_pending(&ub->timer)) { - pr_debug("ulog_send: timer was pending, deleting\n"); - del_timer(&ub->timer); - } + pr_debug("ulog_send: timer is deleting\n"); + del_timer(&ub->timer); if (!ub->skb) { pr_debug("ulog_send: nothing to send\n"); @@ -426,10 +424,8 @@ static void __exit ulog_tg_exit(void) /* remove pending timers and free allocated skb's */ for (i = 0; i < ULOG_MAXNLGROUPS; i++) { ub = &ulog_buffers[i]; - if (timer_pending(&ub->timer)) { - pr_debug("timer was pending, deleting\n"); - del_timer(&ub->timer); - } + pr_debug("timer is deleting\n"); + del_timer(&ub->timer); if (ub->skb) { kfree_skb(ub->skb); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index fcdd0c2406e..2820aa18b54 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -100,7 +100,6 @@ static unsigned int ipv4_helper(unsigned int hooknum, enum ip_conntrack_info ctinfo; const struct nf_conn_help *help; const struct nf_conntrack_helper *helper; - unsigned int ret; /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); @@ -116,13 +115,8 @@ static unsigned int ipv4_helper(unsigned int hooknum, if (!helper) return NF_ACCEPT; - ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), - ct, ctinfo); - if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) { - nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL, - "nf_ct_%s: dropping packet", helper->name); - } - return ret; + return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), + ct, ctinfo); } static unsigned int ipv4_confirm(unsigned int hooknum, @@ -420,54 +414,43 @@ static int ipv4_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_tcp4); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4); if (ret < 0) { - pr_err("nf_conntrack_l4proto_tcp4 :protocol register failed\n"); + pr_err("nf_conntrack_tcp4: pernet registration failed\n"); goto out_tcp; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_udp4); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4); if (ret < 0) { - pr_err("nf_conntrack_l4proto_udp4 :protocol register failed\n"); + pr_err("nf_conntrack_udp4: pernet registration failed\n"); goto out_udp; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_icmp); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp); if (ret < 0) { - pr_err("nf_conntrack_l4proto_icmp4 :protocol register failed\n"); + pr_err("nf_conntrack_icmp4: pernet registration failed\n"); goto out_icmp; } - ret = nf_conntrack_l3proto_register(net, - &nf_conntrack_l3proto_ipv4); + ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4); if (ret < 0) { - pr_err("nf_conntrack_l3proto_ipv4 :protocol register failed\n"); + pr_err("nf_conntrack_ipv4: pernet registration failed\n"); goto out_ipv4; } return 0; out_ipv4: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_icmp); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp); out_icmp: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_udp4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4); out_udp: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_tcp4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4); out_tcp: return ret; } static void ipv4_net_exit(struct net *net) { - nf_conntrack_l3proto_unregister(net, - &nf_conntrack_l3proto_ipv4); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_icmp); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_udp4); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_tcp4); + nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4); } static struct pernet_operations ipv4_net_ops = { @@ -500,16 +483,49 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) pr_err("nf_conntrack_ipv4: can't register hooks.\n"); goto cleanup_pernet; } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n"); + goto cleanup_hooks; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n"); + goto cleanup_tcp4; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n"); + goto cleanup_udp4; + } + + ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4); + if (ret < 0) { + pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n"); + goto cleanup_icmpv4; + } + #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) ret = nf_conntrack_ipv4_compat_init(); if (ret < 0) - goto cleanup_hooks; + goto cleanup_proto; #endif return ret; #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + cleanup_proto: + nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); +#endif + cleanup_icmpv4: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); + cleanup_udp4: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); + cleanup_tcp4: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4); cleanup_hooks: nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); -#endif cleanup_pernet: unregister_pernet_subsys(&ipv4_net_ops); cleanup_sockopt: @@ -523,6 +539,10 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) nf_conntrack_ipv4_compat_fini(); #endif + nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4); nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); unregister_pernet_subsys(&ipv4_net_ops); nf_unregister_sockopt(&so_getorigdst); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 9682b36df38..f2ca1279408 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -417,12 +417,12 @@ static int __net_init ip_conntrack_net_init(struct net *net) { struct proc_dir_entry *proc, *proc_exp, *proc_stat; - proc = proc_net_fops_create(net, "ip_conntrack", 0440, &ct_file_ops); + proc = proc_create("ip_conntrack", 0440, net->proc_net, &ct_file_ops); if (!proc) goto err1; - proc_exp = proc_net_fops_create(net, "ip_conntrack_expect", 0440, - &ip_exp_file_ops); + proc_exp = proc_create("ip_conntrack_expect", 0440, net->proc_net, + &ip_exp_file_ops); if (!proc_exp) goto err2; @@ -433,9 +433,9 @@ static int __net_init ip_conntrack_net_init(struct net *net) return 0; err3: - proc_net_remove(net, "ip_conntrack_expect"); + remove_proc_entry("ip_conntrack_expect", net->proc_net); err2: - proc_net_remove(net, "ip_conntrack"); + remove_proc_entry("ip_conntrack", net->proc_net); err1: return -ENOMEM; } @@ -443,8 +443,8 @@ err1: static void __net_exit ip_conntrack_net_exit(struct net *net) { remove_proc_entry("ip_conntrack", net->proc_net_stat); - proc_net_remove(net, "ip_conntrack_expect"); - proc_net_remove(net, "ip_conntrack"); + remove_proc_entry("ip_conntrack_expect", net->proc_net); + remove_proc_entry("ip_conntrack", net->proc_net); } static struct pernet_operations ip_conntrack_net_ops = { diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 6f9c07268cf..55c4ee1bba0 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -889,7 +889,7 @@ static int ping_proc_register(struct net *net) struct proc_dir_entry *p; int rc = 0; - p = proc_net_fops_create(net, "icmp", S_IRUGO, &ping_seq_fops); + p = proc_create("icmp", S_IRUGO, net->proc_net, &ping_seq_fops); if (!p) rc = -ENOMEM; return rc; @@ -897,7 +897,7 @@ static int ping_proc_register(struct net *net) static void ping_proc_unregister(struct net *net) { - proc_net_remove(net, "icmp"); + remove_proc_entry("icmp", net->proc_net); } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 8de53e1ddd5..32030a24e77 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -471,28 +471,29 @@ static const struct file_operations netstat_seq_fops = { static __net_init int ip_proc_init_net(struct net *net) { - if (!proc_net_fops_create(net, "sockstat", S_IRUGO, &sockstat_seq_fops)) + if (!proc_create("sockstat", S_IRUGO, net->proc_net, + &sockstat_seq_fops)) goto out_sockstat; - if (!proc_net_fops_create(net, "netstat", S_IRUGO, &netstat_seq_fops)) + if (!proc_create("netstat", S_IRUGO, net->proc_net, &netstat_seq_fops)) goto out_netstat; - if (!proc_net_fops_create(net, "snmp", S_IRUGO, &snmp_seq_fops)) + if (!proc_create("snmp", S_IRUGO, net->proc_net, &snmp_seq_fops)) goto out_snmp; return 0; out_snmp: - proc_net_remove(net, "netstat"); + remove_proc_entry("netstat", net->proc_net); out_netstat: - proc_net_remove(net, "sockstat"); + remove_proc_entry("sockstat", net->proc_net); out_sockstat: return -ENOMEM; } static __net_exit void ip_proc_exit_net(struct net *net) { - proc_net_remove(net, "snmp"); - proc_net_remove(net, "netstat"); - proc_net_remove(net, "sockstat"); + remove_proc_entry("snmp", net->proc_net); + remove_proc_entry("netstat", net->proc_net); + remove_proc_entry("sockstat", net->proc_net); } static __net_initdata struct pernet_operations ip_proc_ops = { diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 0f9d09f54bd..ce848461acb 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -37,6 +37,12 @@ const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly; int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) { + if (!prot->netns_ok) { + pr_err("Protocol %u is not namespace aware, cannot register.\n", + protocol); + return -EINVAL; + } + return !cmpxchg((const struct net_protocol **)&inet_protos[protocol], NULL, prot) ? 0 : -1; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 6f08991409c..53ddebc292b 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -1050,7 +1050,7 @@ static const struct file_operations raw_seq_fops = { static __net_init int raw_init_net(struct net *net) { - if (!proc_net_fops_create(net, "raw", S_IRUGO, &raw_seq_fops)) + if (!proc_create("raw", S_IRUGO, net->proc_net, &raw_seq_fops)) return -ENOMEM; return 0; @@ -1058,7 +1058,7 @@ static __net_init int raw_init_net(struct net *net) static __net_exit void raw_exit_net(struct net *net) { - proc_net_remove(net, "raw"); + remove_proc_entry("raw", net->proc_net); } static __net_initdata struct pernet_operations raw_net_ops = { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a0fcc47fee7..6e2851464f8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -117,15 +117,11 @@ #define RT_GC_TIMEOUT (300*HZ) static int ip_rt_max_size; -static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; -static int ip_rt_gc_interval __read_mostly = 60 * HZ; -static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_redirect_number __read_mostly = 9; static int ip_rt_redirect_load __read_mostly = HZ / 50; static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1)); static int ip_rt_error_cost __read_mostly = HZ; static int ip_rt_error_burst __read_mostly = 5 * HZ; -static int ip_rt_gc_elasticity __read_mostly = 8; static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; @@ -384,8 +380,8 @@ static int __net_init ip_rt_do_proc_init(struct net *net) { struct proc_dir_entry *pde; - pde = proc_net_fops_create(net, "rt_cache", S_IRUGO, - &rt_cache_seq_fops); + pde = proc_create("rt_cache", S_IRUGO, net->proc_net, + &rt_cache_seq_fops); if (!pde) goto err1; @@ -2423,6 +2419,11 @@ void ip_rt_multicast_event(struct in_device *in_dev) } #ifdef CONFIG_SYSCTL +static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; +static int ip_rt_gc_interval __read_mostly = 60 * HZ; +static int ip_rt_gc_min_interval __read_mostly = HZ / 2; +static int ip_rt_gc_elasticity __read_mostly = 8; + static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index b236ef04914..ef54377fb11 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -232,7 +232,8 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, * * return false if we decode an option that should not be. */ -bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok) +bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, + struct net *net, bool *ecn_ok) { /* echoed timestamp, lowest bits contain options */ u32 options = tcp_opt->rcv_tsecr & TSMASK; @@ -247,7 +248,7 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok) tcp_opt->sack_ok = (options & (1 << 4)) ? TCP_SACK_SEEN : 0; *ecn_ok = (options >> 5) & 1; - if (*ecn_ok && !sysctl_tcp_ecn) + if (*ecn_ok && !net->ipv4.sysctl_tcp_ecn) return false; if (tcp_opt->sack_ok && !sysctl_tcp_sack) @@ -295,7 +296,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); - if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) + if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) goto out; ret = NULL; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d84400b6504..960fd29d9b8 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -27,6 +27,7 @@ #include <net/tcp_memcontrol.h> static int zero; +static int one = 1; static int two = 2; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; @@ -232,8 +233,8 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, return 0; } -int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer, - size_t *lenp, loff_t *ppos) +static int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer, + size_t *lenp, loff_t *ppos) { ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; struct tcp_fastopen_context *ctxt; @@ -538,13 +539,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "tcp_ecn", - .data = &sysctl_tcp_ecn, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_dsack", .data = &sysctl_tcp_dsack, .maxlen = sizeof(int), @@ -556,14 +550,16 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_wmem, .maxlen = sizeof(sysctl_tcp_wmem), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "tcp_rmem", .data = &sysctl_tcp_rmem, .maxlen = sizeof(sysctl_tcp_rmem), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "tcp_app_win", @@ -637,13 +633,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_tcp_congestion_control, }, { - .procname = "tcp_abc", - .data = &sysctl_tcp_abc, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { .procname = "tcp_mtu_probing", .data = &sysctl_tcp_mtu_probing, .maxlen = sizeof(int), @@ -786,7 +775,7 @@ static struct ctl_table ipv4_table[] = { .maxlen = sizeof(sysctl_udp_rmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero + .extra1 = &one }, { .procname = "udp_wmem_min", @@ -794,7 +783,7 @@ static struct ctl_table ipv4_table[] = { .maxlen = sizeof(sysctl_udp_wmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero + .extra1 = &one }, { } }; @@ -850,6 +839,13 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = ipv4_ping_group_range, }, { + .procname = "tcp_ecn", + .data = &init_net.ipv4.sysctl_tcp_ecn, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { .procname = "tcp_mem", .maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem), .mode = 0644, @@ -882,6 +878,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) &net->ipv4.sysctl_icmp_ratemask; table[6].data = &net->ipv4.sysctl_ping_group_range; + table[7].data = + &net->ipv4.sysctl_tcp_ecn; /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2aa69c8ae60..7a5ba48c2cc 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -400,6 +400,8 @@ void tcp_init_sock(struct sock *sk) tcp_enable_early_retrans(tp); icsk->icsk_ca_ops = &tcp_init_congestion_ops; + tp->tsoffset = 0; + sk->sk_state = TCP_CLOSE; sk->sk_write_space = sk_stream_write_space; @@ -895,6 +897,7 @@ new_segment: get_page(page); skb_fill_page_desc(skb, i, page, offset, copy); } + skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; skb->len += copy; skb->data_len += copy; @@ -2287,7 +2290,6 @@ int tcp_disconnect(struct sock *sk, int flags) tp->packets_out = 0; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_cnt = 0; - tp->bytes_acked = 0; tp->window_clamp = 0; tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); @@ -2711,6 +2713,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, else err = -EINVAL; break; + case TCP_TIMESTAMP: + if (!tp->repair) + err = -EPERM; + else + tp->tsoffset = val - tcp_time_stamp; + break; default: err = -ENOPROTOOPT; break; @@ -2959,6 +2967,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_USER_TIMEOUT: val = jiffies_to_msecs(icsk->icsk_user_timeout); break; + case TCP_TIMESTAMP: + val = tcp_time_stamp + tp->tsoffset; + break; default: return -ENOPROTOOPT; } @@ -3032,6 +3043,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | + SKB_GSO_GRE | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; @@ -3243,7 +3255,7 @@ __tcp_alloc_md5sig_pool(struct sock *sk) struct crypto_hash *hash; hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); - if (!hash || IS_ERR(hash)) + if (IS_ERR_OR_NULL(hash)) goto out_free; per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index cdf2e707bb1..019c2389a34 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -317,28 +317,11 @@ void tcp_slow_start(struct tcp_sock *tp) snd_cwnd = 1U; } - /* RFC3465: ABC Slow start - * Increase only after a full MSS of bytes is acked - * - * TCP sender SHOULD increase cwnd by the number of - * previously unacknowledged bytes ACKed by each incoming - * acknowledgment, provided the increase is not more than L - */ - if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache) - return; - if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh) cnt = sysctl_tcp_max_ssthresh >> 1; /* limited slow start */ else cnt = snd_cwnd; /* exponential increase */ - /* RFC3465: ABC - * We MAY increase by 2 if discovered delayed ack - */ - if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) - cnt <<= 1; - tp->bytes_acked = 0; - tp->snd_cwnd_cnt += cnt; while (tp->snd_cwnd_cnt >= snd_cwnd) { tp->snd_cwnd_cnt -= snd_cwnd; @@ -378,20 +361,9 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) /* In "safe" area, increase. */ if (tp->snd_cwnd <= tp->snd_ssthresh) tcp_slow_start(tp); - /* In dangerous area, increase slowly. */ - else if (sysctl_tcp_abc) { - /* RFC3465: Appropriate Byte Count - * increase once for each full cwnd acked - */ - if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) { - tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache; - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - } - } else { + else tcp_cong_avoid_ai(tp, tp->snd_cwnd); - } } EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ad70a962c20..a759e19496d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -81,8 +81,6 @@ int sysctl_tcp_sack __read_mostly = 1; int sysctl_tcp_fack __read_mostly = 1; int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; EXPORT_SYMBOL(sysctl_tcp_reordering); -int sysctl_tcp_ecn __read_mostly = 2; -EXPORT_SYMBOL(sysctl_tcp_ecn); int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; @@ -100,7 +98,6 @@ int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; -int sysctl_tcp_abc __read_mostly; int sysctl_tcp_early_retrans __read_mostly = 2; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ @@ -2009,7 +2006,6 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; tp->frto_counter = 0; - tp->bytes_acked = 0; tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); @@ -2058,7 +2054,6 @@ void tcp_enter_loss(struct sock *sk, int how) tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; - tp->bytes_acked = 0; tcp_clear_retrans_partial(tp); if (tcp_is_reno(tp)) @@ -2686,7 +2681,6 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) struct tcp_sock *tp = tcp_sk(sk); tp->high_seq = tp->snd_nxt; - tp->bytes_acked = 0; tp->snd_cwnd_cnt = 0; tp->prior_cwnd = tp->snd_cwnd; tp->prr_delivered = 0; @@ -2737,7 +2731,6 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) struct tcp_sock *tp = tcp_sk(sk); tp->prior_ssthresh = 0; - tp->bytes_acked = 0; if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { tp->undo_marker = 0; tcp_init_cwnd_reduction(sk, set_ssthresh); @@ -3419,7 +3412,6 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp) { tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); tp->snd_cwnd_cnt = 0; - tp->bytes_acked = 0; TCP_ECN_queue_cwr(tp); tcp_moderate_cwnd(tp); } @@ -3615,15 +3607,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, prior_snd_una)) flag |= FLAG_SND_UNA_ADVANCED; - if (sysctl_tcp_abc) { - if (icsk->icsk_ca_state < TCP_CA_CWR) - tp->bytes_acked += ack - prior_snd_una; - else if (icsk->icsk_ca_state == TCP_CA_Loss) - /* we assume just one segment left network */ - tp->bytes_acked += min(ack - prior_snd_una, - tp->mss_cache); - } - prior_fackets = tp->fackets_out; prior_in_flight = tcp_packets_in_flight(tp); @@ -3877,7 +3860,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr ++ptr; tp->rx_opt.rcv_tsval = ntohl(*ptr); ++ptr; - tp->rx_opt.rcv_tsecr = ntohl(*ptr); + tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset; return true; } return false; @@ -3901,7 +3884,11 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, if (tcp_parse_aligned_timestamp(tp, th)) return true; } + tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); + if (tp->rx_opt.saw_tstamp) + tp->rx_opt.rcv_tsecr -= tp->tsoffset; + return true; } @@ -5682,6 +5669,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, int saved_clamp = tp->rx_opt.mss_clamp; tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc); + if (tp->rx_opt.saw_tstamp) + tp->rx_opt.rcv_tsecr -= tp->tsoffset; if (th->ack) { /* rfc793: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index eadb693eef5..145d3bf8df8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -657,7 +657,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) * no RST generated if md5 hash doesn't match. */ sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev), - &tcp_hashinfo, ip_hdr(skb)->daddr, + &tcp_hashinfo, ip_hdr(skb)->saddr, + th->source, ip_hdr(skb)->daddr, ntohs(th->source), inet_iif(skb)); /* don't send rst if it can't find key */ if (!sk1) @@ -725,7 +726,7 @@ release_sk1: */ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, - u32 win, u32 ts, int oif, + u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int reply_flags, u8 tos) { @@ -746,12 +747,12 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, arg.iov[0].iov_base = (unsigned char *)&rep; arg.iov[0].iov_len = sizeof(rep.th); - if (ts) { + if (tsecr) { rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - rep.opt[1] = htonl(tcp_time_stamp); - rep.opt[2] = htonl(ts); + rep.opt[1] = htonl(tsval); + rep.opt[2] = htonl(tsecr); arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; } @@ -766,7 +767,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, #ifdef CONFIG_TCP_MD5SIG if (key) { - int offset = (ts) ? 3 : 0; + int offset = (tsecr) ? 3 : 0; rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -801,6 +802,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, + tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), @@ -820,6 +822,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, tcp_rsk(req)->rcv_nxt, req->rcv_wnd, + tcp_time_stamp, req->ts_recent, 0, tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, @@ -1570,7 +1573,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; if (!want_cookie || tmp_opt.tstamp_ok) - TCP_ECN_create_request(req, skb); + TCP_ECN_create_request(req, skb, sock_net(sk)); if (want_cookie) { isn = cookie_v4_init_sequence(sk, skb, &req->mss); @@ -2077,6 +2080,7 @@ do_time_wait: case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, + iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb)); if (sk2) { @@ -2612,7 +2616,7 @@ EXPORT_SYMBOL(tcp_proc_register); void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) { - proc_net_remove(net, afinfo->name); + remove_proc_entry(afinfo->name, net->proc_net); } EXPORT_SYMBOL(tcp_proc_unregister); @@ -2891,6 +2895,7 @@ EXPORT_SYMBOL(tcp_prot); static int __net_init tcp_sk_init(struct net *net) { + net->ipv4.sysctl_tcp_ecn = 2; return 0; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f35f2dfb640..b83a49cc381 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -102,6 +102,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); if (tmp_opt.saw_tstamp) { + tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; tmp_opt.ts_recent = tcptw->tw_ts_recent; tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; paws_reject = tcp_paws_reject(&tmp_opt, th->rst); @@ -288,6 +289,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcptw->tw_rcv_wnd = tcp_receive_window(tp); tcptw->tw_ts_recent = tp->rx_opt.ts_recent; tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; + tcptw->tw_ts_offset = tp->tsoffset; #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { @@ -446,7 +448,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, */ newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; - newtp->bytes_acked = 0; newtp->frto_counter = 0; newtp->frto_highmark = 0; @@ -500,6 +501,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.ts_recent_stamp = 0; newtp->tcp_header_len = sizeof(struct tcphdr); } + newtp->tsoffset = 0; #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ if (newtp->af_specific->md5_lookup(sk, newsk)) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5d451593ef1..fd0cea114b5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -314,7 +314,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); tp->ecn_flags = 0; - if (sysctl_tcp_ecn == 1) { + if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1) { TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; tp->ecn_flags = TCP_ECN_OK; } @@ -622,7 +622,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { opts->options |= OPTION_TS; - opts->tsval = TCP_SKB_CB(skb)->when; + opts->tsval = TCP_SKB_CB(skb)->when + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } @@ -806,7 +806,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb if (likely(tp->rx_opt.tstamp_ok)) { opts->options |= OPTION_TS; - opts->tsval = tcb ? tcb->when : 0; + opts->tsval = tcb ? tcb->when + tp->tsoffset : 0; opts->tsecr = tp->rx_opt.ts_recent; size += TCPOLEN_TSTAMP_ALIGNED; } @@ -1331,7 +1331,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) /* Remove acked data from a packet in the transmit queue. */ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) { - if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; __pskb_trim_head(skb, len); diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 4526fe68e60..d4943f67aff 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -234,7 +234,7 @@ static __init int tcpprobe_init(void) if (!tcp_probe.log) goto err0; - if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &tcpprobe_fops)) + if (!proc_create(procname, S_IRUSR, init_net.proc_net, &tcpprobe_fops)) goto err0; ret = register_jprobe(&tcp_jprobe); @@ -244,7 +244,7 @@ static __init int tcpprobe_init(void) pr_info("probe registered (port=%d) bufsize=%u\n", port, bufsize); return 0; err1: - proc_net_remove(&init_net, procname); + remove_proc_entry(procname, init_net.proc_net); err0: kfree(tcp_probe.log); return ret; @@ -253,7 +253,7 @@ module_init(tcpprobe_init); static __exit void tcpprobe_exit(void) { - proc_net_remove(&init_net, procname); + remove_proc_entry(procname, init_net.proc_net); unregister_jprobe(&tcp_jprobe); kfree(tcp_probe.log); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1f4d405eafb..265c42cf963 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -139,6 +139,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, { struct sock *sk2; struct hlist_nulls_node *node; + kuid_t uid = sock_i_uid(sk); sk_nulls_for_each(sk2, node, &hslot->head) if (net_eq(sock_net(sk2), net) && @@ -147,6 +148,8 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (!sk2->sk_reuseport || !sk->sk_reuseport || + !uid_eq(uid, sock_i_uid(sk2))) && (*saddr_comp)(sk, sk2)) { if (bitmap) __set_bit(udp_sk(sk2)->udp_port_hash >> log, @@ -169,6 +172,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, { struct sock *sk2; struct hlist_nulls_node *node; + kuid_t uid = sock_i_uid(sk); int res = 0; spin_lock(&hslot2->lock); @@ -179,6 +183,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (!sk2->sk_reuseport || !sk->sk_reuseport || + !uid_eq(uid, sock_i_uid(sk2))) && (*saddr_comp)(sk, sk2)) { res = 1; break; @@ -337,26 +343,26 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, !ipv6_only_sock(sk)) { struct inet_sock *inet = inet_sk(sk); - score = (sk->sk_family == PF_INET ? 1 : 0); + score = (sk->sk_family == PF_INET ? 2 : 1); if (inet->inet_rcv_saddr) { if (inet->inet_rcv_saddr != daddr) return -1; - score += 2; + score += 4; } if (inet->inet_daddr) { if (inet->inet_daddr != saddr) return -1; - score += 2; + score += 4; } if (inet->inet_dport) { if (inet->inet_dport != sport) return -1; - score += 2; + score += 4; } if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if != dif) return -1; - score += 2; + score += 4; } } return score; @@ -365,7 +371,6 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, /* * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num) */ -#define SCORE2_MAX (1 + 2 + 2 + 2) static inline int compute_score2(struct sock *sk, struct net *net, __be32 saddr, __be16 sport, __be32 daddr, unsigned int hnum, int dif) @@ -380,21 +385,21 @@ static inline int compute_score2(struct sock *sk, struct net *net, if (inet->inet_num != hnum) return -1; - score = (sk->sk_family == PF_INET ? 1 : 0); + score = (sk->sk_family == PF_INET ? 2 : 1); if (inet->inet_daddr) { if (inet->inet_daddr != saddr) return -1; - score += 2; + score += 4; } if (inet->inet_dport) { if (inet->inet_dport != sport) return -1; - score += 2; + score += 4; } if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if != dif) return -1; - score += 2; + score += 4; } } return score; @@ -409,19 +414,29 @@ static struct sock *udp4_lib_lookup2(struct net *net, { struct sock *sk, *result; struct hlist_nulls_node *node; - int score, badness; + int score, badness, matches = 0, reuseport = 0; + u32 hash = 0; begin: result = NULL; - badness = -1; + badness = 0; udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { score = compute_score2(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { result = sk; badness = score; - if (score == SCORE2_MAX) - goto exact_match; + reuseport = sk->sk_reuseport; + if (reuseport) { + hash = inet_ehashfn(net, daddr, hnum, + saddr, htons(sport)); + matches = 1; + } + } else if (score == badness && reuseport) { + matches++; + if (((u64)hash * matches) >> 32 == 0) + result = sk; + hash = next_pseudo_random32(hash); } } /* @@ -431,9 +446,7 @@ begin: */ if (get_nulls_value(node) != slot2) goto begin; - if (result) { -exact_match: if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, @@ -457,7 +470,8 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; - int score, badness; + int score, badness, matches = 0, reuseport = 0; + u32 hash = 0; rcu_read_lock(); if (hslot->count > 10) { @@ -486,13 +500,24 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, } begin: result = NULL; - badness = -1; + badness = 0; sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; + reuseport = sk->sk_reuseport; + if (reuseport) { + hash = inet_ehashfn(net, daddr, hnum, + saddr, htons(sport)); + matches = 1; + } + } else if (score == badness && reuseport) { + matches++; + if (((u64)hash * matches) >> 32 == 0) + result = sk; + hash = next_pseudo_random32(hash); } } /* @@ -971,7 +996,7 @@ back_from_confirm: sizeof(struct udphdr), &ipc, &rt, msg->msg_flags); err = PTR_ERR(skb); - if (skb && !IS_ERR(skb)) + if (!IS_ERR_OR_NULL(skb)) err = udp_send_skb(skb, fl4); goto out; } @@ -2097,7 +2122,7 @@ EXPORT_SYMBOL(udp_proc_register); void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) { - proc_net_remove(net, afinfo->name); + remove_proc_entry(afinfo->name, net->proc_net); } EXPORT_SYMBOL(udp_proc_unregister); @@ -2280,7 +2305,8 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 06814b6216d..1f12c8b4586 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -132,7 +132,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) * header and optional ESP marker bytes) and then modify the * protocol to ESP, and then call into the transform receiver. */ - if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto drop; /* Now we can update and verify the packet length... */ diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index ddee0a099a2..fe5189e2e11 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -142,8 +142,8 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) for_each_input_rcu(rcv_notify_handlers, handler) handler->handler(skb); - if (skb_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + err = skb_unclone(skb, GFP_ATOMIC); + if (err) goto out; if (x->props.flags & XFRM_STATE_DECAP_DSCP) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 3be0ac2c192..9a459be24af 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -262,21 +262,56 @@ static struct ctl_table xfrm4_policy_table[] = { { } }; -static struct ctl_table_header *sysctl_hdr; -#endif - -static void __init xfrm4_policy_init(void) +static int __net_init xfrm4_net_init(struct net *net) { - xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); + struct ctl_table *table; + struct ctl_table_header *hdr; + + table = xfrm4_policy_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(table, sizeof(xfrm4_policy_table), GFP_KERNEL); + if (!table) + goto err_alloc; + + table[0].data = &net->xfrm.xfrm4_dst_ops.gc_thresh; + } + + hdr = register_net_sysctl(net, "net/ipv4", table); + if (!hdr) + goto err_reg; + + net->ipv4.xfrm4_hdr = hdr; + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; } -static void __exit xfrm4_policy_fini(void) +static void __net_exit xfrm4_net_exit(struct net *net) { -#ifdef CONFIG_SYSCTL - if (sysctl_hdr) - unregister_net_sysctl_table(sysctl_hdr); + struct ctl_table *table; + + if (net->ipv4.xfrm4_hdr == NULL) + return; + + table = net->ipv4.xfrm4_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ipv4.xfrm4_hdr); + if (!net_eq(net, &init_net)) + kfree(table); +} + +static struct pernet_operations __net_initdata xfrm4_net_ops = { + .init = xfrm4_net_init, + .exit = xfrm4_net_exit, +}; #endif - xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); + +static void __init xfrm4_policy_init(void) +{ + xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); } void __init xfrm4_init(void) @@ -286,8 +321,7 @@ void __init xfrm4_init(void) xfrm4_state_init(); xfrm4_policy_init(); #ifdef CONFIG_SYSCTL - sysctl_hdr = register_net_sysctl(&init_net, "net/ipv4", - xfrm4_policy_table); + register_pernet_subsys(&xfrm4_net_ops); #endif } diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 4ea244891b5..309af19a0a0 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -40,7 +40,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o -obj-y += addrconf_core.o exthdrs_core.o +obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1b5d8cb9b12..4dc0d44a5d3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -110,10 +110,6 @@ static inline u32 cstamp_delta(unsigned long cstamp) return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; } -#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1) -#define ADDRCONF_TIMER_FUZZ (HZ / 4) -#define ADDRCONF_TIMER_FUZZ_MAX (HZ) - #ifdef CONFIG_SYSCTL static void addrconf_sysctl_register(struct inet6_dev *idev); static void addrconf_sysctl_unregister(struct inet6_dev *idev); @@ -248,6 +244,9 @@ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; /* Check if a valid qdisc is available */ static inline bool addrconf_qdisc_ok(const struct net_device *dev) @@ -432,6 +431,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) /* protected by rtnl_lock */ rcu_assign_pointer(dev->ip6_ptr, ndev); + /* Join interface-local all-node multicast group */ + ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes); + /* Join all-node multicast group */ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); @@ -615,10 +617,15 @@ static void dev_forward_change(struct inet6_dev *idev) if (idev->cnf.forwarding) dev_disable_lro(dev); if (dev->flags & IFF_MULTICAST) { - if (idev->cnf.forwarding) + if (idev->cnf.forwarding) { ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); - else + ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allrouters); + ipv6_dev_mc_inc(dev, &in6addr_sitelocal_allrouters); + } else { ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters); + ipv6_dev_mc_dec(dev, &in6addr_interfacelocal_allrouters); + ipv6_dev_mc_dec(dev, &in6addr_sitelocal_allrouters); + } } list_for_each_entry(ifa, &idev->addr_list, if_list) { @@ -1051,7 +1058,7 @@ retry: ipv6_add_addr(idev, &addr, tmp_plen, ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, addr_flags) : NULL; - if (!ift || IS_ERR(ift)) { + if (IS_ERR_OR_NULL(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); pr_info("%s: retry temporary address regeneration\n", __func__); @@ -2080,7 +2087,7 @@ ok: addr_type&IPV6_ADDR_SCOPE_MASK, addr_flags); - if (!ifp || IS_ERR(ifp)) { + if (IS_ERR_OR_NULL(ifp)) { in6_dev_put(in6_dev); return; } @@ -3318,14 +3325,14 @@ static const struct file_operations if6_fops = { static int __net_init if6_proc_net_init(struct net *net) { - if (!proc_net_fops_create(net, "if_inet6", S_IRUGO, &if6_fops)) + if (!proc_create("if_inet6", S_IRUGO, net->proc_net, &if6_fops)) return -ENOMEM; return 0; } static void __net_exit if6_proc_net_exit(struct net *net) { - proc_net_remove(net, "if_inet6"); + remove_proc_entry("if_inet6", net->proc_net); } static struct pernet_operations if6_proc_net_ops = { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b043c60429b..6b793bfc0e1 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -811,11 +811,10 @@ static struct pernet_operations inet6_net_ops = { static int __init inet6_init(void) { - struct sk_buff *dummy_skb; struct list_head *r; int err = 0; - BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)); + BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb)); /* Register the socket-side information for inet6_create. */ for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 384233188ac..bb02e176cb7 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -521,8 +521,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) /* We are going to _remove_ AH header to keep sockets happy, * so... Later this can change. */ - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto out; skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 757a810d8f1..5a80f15a9de 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -47,7 +47,7 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); /* Big ac list lock for all the sockets */ -static DEFINE_RWLOCK(ipv6_sk_ac_lock); +static DEFINE_SPINLOCK(ipv6_sk_ac_lock); /* @@ -128,10 +128,10 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) err = ipv6_dev_ac_inc(dev, addr); if (!err) { - write_lock_bh(&ipv6_sk_ac_lock); + spin_lock_bh(&ipv6_sk_ac_lock); pac->acl_next = np->ipv6_ac_list; np->ipv6_ac_list = pac; - write_unlock_bh(&ipv6_sk_ac_lock); + spin_unlock_bh(&ipv6_sk_ac_lock); pac = NULL; } @@ -152,7 +152,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) struct ipv6_ac_socklist *pac, *prev_pac; struct net *net = sock_net(sk); - write_lock_bh(&ipv6_sk_ac_lock); + spin_lock_bh(&ipv6_sk_ac_lock); prev_pac = NULL; for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { if ((ifindex == 0 || pac->acl_ifindex == ifindex) && @@ -161,7 +161,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) prev_pac = pac; } if (!pac) { - write_unlock_bh(&ipv6_sk_ac_lock); + spin_unlock_bh(&ipv6_sk_ac_lock); return -ENOENT; } if (prev_pac) @@ -169,7 +169,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) else np->ipv6_ac_list = pac->acl_next; - write_unlock_bh(&ipv6_sk_ac_lock); + spin_unlock_bh(&ipv6_sk_ac_lock); rcu_read_lock(); dev = dev_get_by_index_rcu(net, pac->acl_ifindex); @@ -192,10 +192,10 @@ void ipv6_sock_ac_close(struct sock *sk) if (!np->ipv6_ac_list) return; - write_lock_bh(&ipv6_sk_ac_lock); + spin_lock_bh(&ipv6_sk_ac_lock); pac = np->ipv6_ac_list; np->ipv6_ac_list = NULL; - write_unlock_bh(&ipv6_sk_ac_lock); + spin_unlock_bh(&ipv6_sk_ac_lock); prev_index = 0; rcu_read_lock(); @@ -509,7 +509,7 @@ static const struct file_operations ac6_seq_fops = { int __net_init ac6_proc_init(struct net *net) { - if (!proc_net_fops_create(net, "anycast6", S_IRUGO, &ac6_seq_fops)) + if (!proc_create("anycast6", S_IRUGO, net->proc_net, &ac6_seq_fops)) return -ENOMEM; return 0; @@ -517,7 +517,7 @@ int __net_init ac6_proc_init(struct net *net) void ac6_proc_exit(struct net *net) { - proc_net_remove(net, "anycast6"); + remove_proc_entry("anycast6", net->proc_net); } #endif diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 7a778b9a7b8..f5a54782a34 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -30,6 +30,7 @@ #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/tcp_states.h> +#include <net/dsfield.h> #include <linux/errqueue.h> #include <asm/uaccess.h> @@ -356,12 +357,11 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) sin->sin6_port = serr->port; sin->sin6_scope_id = 0; if (skb->protocol == htons(ETH_P_IPV6)) { - sin->sin6_addr = - *(struct in6_addr *)(nh + serr->addr_offset); + const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset), + struct ipv6hdr, daddr); + sin->sin6_addr = ip6h->daddr; if (np->sndflow) - sin->sin6_flowinfo = - (*(__be32 *)(nh + serr->addr_offset - 24) & - IPV6_FLOWINFO_MASK); + sin->sin6_flowinfo = ip6_flowinfo(ip6h); if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin->sin6_scope_id = IP6CB(skb)->iif; } else { @@ -489,13 +489,14 @@ int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, } if (np->rxopt.bits.rxtclass) { - int tclass = ipv6_tclass(ipv6_hdr(skb)); + int tclass = ipv6_get_dsfield(ipv6_hdr(skb)); put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); } - if (np->rxopt.bits.rxflow && (*(__be32 *)nh & IPV6_FLOWINFO_MASK)) { - __be32 flowinfo = *(__be32 *)nh & IPV6_FLOWINFO_MASK; - put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); + if (np->rxopt.bits.rxflow) { + __be32 flowinfo = ip6_flowinfo((struct ipv6hdr *)nh); + if (flowinfo) + put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); } /* HbH is allowed only once */ diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 473f628f9f2..07a7d65a7cb 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -553,7 +553,8 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) const unsigned char *nh = skb_network_header(skb); if (nh[optoff + 1] == 2) { - IP6CB(skb)->ra = optoff; + IP6CB(skb)->flags |= IP6SKB_ROUTERALERT; + memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra)); return true; } LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 30647857a37..b386a2ce4c6 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -32,6 +32,9 @@ int inet6_csk_bind_conflict(const struct sock *sk, { const struct sock *sk2; const struct hlist_node *node; + int reuse = sk->sk_reuse; + int reuseport = sk->sk_reuseport; + kuid_t uid = sock_i_uid((struct sock *)sk); /* We must walk the whole port owner list in this case. -DaveM */ /* @@ -42,11 +45,17 @@ int inet6_csk_bind_conflict(const struct sock *sk, if (sk != sk2 && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && - (!sk->sk_reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - ipv6_rcv_saddr_equal(sk, sk2)) - break; + sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { + if ((!reuse || !sk2->sk_reuse || + sk2->sk_state == TCP_LISTEN) && + (!reuseport || !sk2->sk_reuseport || + (sk2->sk_state != TCP_TIME_WAIT && + !uid_eq(uid, + sock_i_uid((struct sock *)sk2))))) { + if (ipv6_rcv_saddr_equal(sk, sk2)) + break; + } + } } return node != NULL; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index dea17fd28e5..32b4a1675d8 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -158,25 +158,38 @@ static inline int compute_score(struct sock *sk, struct net *net, } struct sock *inet6_lookup_listener(struct net *net, - struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, + struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, + const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif) { struct sock *sk; const struct hlist_nulls_node *node; struct sock *result; - int score, hiscore; + int score, hiscore, matches = 0, reuseport = 0; + u32 phash = 0; unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; rcu_read_lock(); begin: result = NULL; - hiscore = -1; + hiscore = 0; sk_nulls_for_each(sk, node, &ilb->head) { score = compute_score(sk, net, hnum, daddr, dif); if (score > hiscore) { hiscore = score; result = sk; + reuseport = sk->sk_reuseport; + if (reuseport) { + phash = inet6_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } + } else if (score == hiscore && reuseport) { + matches++; + if (((u64)phash * matches) >> 32 == 0) + result = sk; + phash = next_pseudo_random32(phash); } } /* diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c new file mode 100644 index 00000000000..72d198b8e4d --- /dev/null +++ b/net/ipv6/ip6_checksum.c @@ -0,0 +1,97 @@ +#include <net/ip.h> +#include <net/udp.h> +#include <net/udplite.h> +#include <asm/checksum.h> + +#ifndef _HAVE_ARCH_IPV6_CSUM +__sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, unsigned short proto, + __wsum csum) +{ + + int carry; + __u32 ulen; + __u32 uproto; + __u32 sum = (__force u32)csum; + + sum += (__force u32)saddr->s6_addr32[0]; + carry = (sum < (__force u32)saddr->s6_addr32[0]); + sum += carry; + + sum += (__force u32)saddr->s6_addr32[1]; + carry = (sum < (__force u32)saddr->s6_addr32[1]); + sum += carry; + + sum += (__force u32)saddr->s6_addr32[2]; + carry = (sum < (__force u32)saddr->s6_addr32[2]); + sum += carry; + + sum += (__force u32)saddr->s6_addr32[3]; + carry = (sum < (__force u32)saddr->s6_addr32[3]); + sum += carry; + + sum += (__force u32)daddr->s6_addr32[0]; + carry = (sum < (__force u32)daddr->s6_addr32[0]); + sum += carry; + + sum += (__force u32)daddr->s6_addr32[1]; + carry = (sum < (__force u32)daddr->s6_addr32[1]); + sum += carry; + + sum += (__force u32)daddr->s6_addr32[2]; + carry = (sum < (__force u32)daddr->s6_addr32[2]); + sum += carry; + + sum += (__force u32)daddr->s6_addr32[3]; + carry = (sum < (__force u32)daddr->s6_addr32[3]); + sum += carry; + + ulen = (__force u32)htonl((__u32) len); + sum += ulen; + carry = (sum < ulen); + sum += carry; + + uproto = (__force u32)htonl(proto); + sum += uproto; + carry = (sum < uproto); + sum += carry; + + return csum_fold((__force __wsum)sum); +} +EXPORT_SYMBOL(csum_ipv6_magic); +#endif + +int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) +{ + int err; + + UDP_SKB_CB(skb)->partial_cov = 0; + UDP_SKB_CB(skb)->cscov = skb->len; + + if (proto == IPPROTO_UDPLITE) { + err = udplite_checksum_init(skb, uh); + if (err) + return err; + } + + if (uh->check == 0) { + /* RFC 2460 section 8.1 says that we SHOULD log + this error. Well, it is reasonable. + */ + LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); + return 1; + } + if (skb->ip_summed == CHECKSUM_COMPLETE && + !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, + skb->len, proto, skb->csum)) + skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (!skb_csum_unnecessary(skb)) + skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len, proto, 0)); + + return 0; +} +EXPORT_SYMBOL(udp6_csum_init); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index d6de4b44725..b973ed3d06c 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -51,25 +51,38 @@ #define FL_HASH(l) (ntohl(l)&FL_HASH_MASK) static atomic_t fl_size = ATOMIC_INIT(0); -static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1]; +static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; static void ip6_fl_gc(unsigned long dummy); static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0); /* FL hash table lock: it protects only of GC */ -static DEFINE_RWLOCK(ip6_fl_lock); +static DEFINE_SPINLOCK(ip6_fl_lock); /* Big socket sock */ -static DEFINE_RWLOCK(ip6_sk_fl_lock); +static DEFINE_SPINLOCK(ip6_sk_fl_lock); +#define for_each_fl_rcu(hash, fl) \ + for (fl = rcu_dereference_bh(fl_ht[(hash)]); \ + fl != NULL; \ + fl = rcu_dereference_bh(fl->next)) +#define for_each_fl_continue_rcu(fl) \ + for (fl = rcu_dereference_bh(fl->next); \ + fl != NULL; \ + fl = rcu_dereference_bh(fl->next)) + +#define for_each_sk_fl_rcu(np, sfl) \ + for (sfl = rcu_dereference_bh(np->ipv6_fl_list); \ + sfl != NULL; \ + sfl = rcu_dereference_bh(sfl->next)) static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label) { struct ip6_flowlabel *fl; - for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) { + for_each_fl_rcu(FL_HASH(label), fl) { if (fl->label == label && net_eq(fl->fl_net, net)) return fl; } @@ -80,11 +93,11 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) { struct ip6_flowlabel *fl; - read_lock_bh(&ip6_fl_lock); + rcu_read_lock_bh(); fl = __fl_lookup(net, label); - if (fl) - atomic_inc(&fl->users); - read_unlock_bh(&ip6_fl_lock); + if (fl && !atomic_inc_not_zero(&fl->users)) + fl = NULL; + rcu_read_unlock_bh(); return fl; } @@ -96,13 +109,13 @@ static void fl_free(struct ip6_flowlabel *fl) put_pid(fl->owner.pid); release_net(fl->fl_net); kfree(fl->opt); + kfree_rcu(fl, rcu); } - kfree(fl); } static void fl_release(struct ip6_flowlabel *fl) { - write_lock_bh(&ip6_fl_lock); + spin_lock_bh(&ip6_fl_lock); fl->lastuse = jiffies; if (atomic_dec_and_test(&fl->users)) { @@ -119,7 +132,7 @@ static void fl_release(struct ip6_flowlabel *fl) time_after(ip6_fl_gc_timer.expires, ttd)) mod_timer(&ip6_fl_gc_timer, ttd); } - write_unlock_bh(&ip6_fl_lock); + spin_unlock_bh(&ip6_fl_lock); } static void ip6_fl_gc(unsigned long dummy) @@ -128,12 +141,13 @@ static void ip6_fl_gc(unsigned long dummy) unsigned long now = jiffies; unsigned long sched = 0; - write_lock(&ip6_fl_lock); + spin_lock(&ip6_fl_lock); for (i=0; i<=FL_HASH_MASK; i++) { struct ip6_flowlabel *fl, **flp; flp = &fl_ht[i]; - while ((fl=*flp) != NULL) { + while ((fl = rcu_dereference_protected(*flp, + lockdep_is_held(&ip6_fl_lock))) != NULL) { if (atomic_read(&fl->users) == 0) { unsigned long ttd = fl->lastuse + fl->linger; if (time_after(ttd, fl->expires)) @@ -156,18 +170,19 @@ static void ip6_fl_gc(unsigned long dummy) if (sched) { mod_timer(&ip6_fl_gc_timer, sched); } - write_unlock(&ip6_fl_lock); + spin_unlock(&ip6_fl_lock); } static void __net_exit ip6_fl_purge(struct net *net) { int i; - write_lock(&ip6_fl_lock); + spin_lock(&ip6_fl_lock); for (i = 0; i <= FL_HASH_MASK; i++) { struct ip6_flowlabel *fl, **flp; flp = &fl_ht[i]; - while ((fl = *flp) != NULL) { + while ((fl = rcu_dereference_protected(*flp, + lockdep_is_held(&ip6_fl_lock))) != NULL) { if (net_eq(fl->fl_net, net) && atomic_read(&fl->users) == 0) { *flp = fl->next; @@ -178,7 +193,7 @@ static void __net_exit ip6_fl_purge(struct net *net) flp = &fl->next; } } - write_unlock(&ip6_fl_lock); + spin_unlock(&ip6_fl_lock); } static struct ip6_flowlabel *fl_intern(struct net *net, @@ -188,7 +203,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, fl->label = label & IPV6_FLOWLABEL_MASK; - write_lock_bh(&ip6_fl_lock); + spin_lock_bh(&ip6_fl_lock); if (label == 0) { for (;;) { fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK; @@ -210,16 +225,16 @@ static struct ip6_flowlabel *fl_intern(struct net *net, lfl = __fl_lookup(net, fl->label); if (lfl != NULL) { atomic_inc(&lfl->users); - write_unlock_bh(&ip6_fl_lock); + spin_unlock_bh(&ip6_fl_lock); return lfl; } } fl->lastuse = jiffies; fl->next = fl_ht[FL_HASH(fl->label)]; - fl_ht[FL_HASH(fl->label)] = fl; + rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl); atomic_inc(&fl_size); - write_unlock_bh(&ip6_fl_lock); + spin_unlock_bh(&ip6_fl_lock); return NULL; } @@ -234,17 +249,17 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label) label &= IPV6_FLOWLABEL_MASK; - read_lock_bh(&ip6_sk_fl_lock); - for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) { + rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) { struct ip6_flowlabel *fl = sfl->fl; if (fl->label == label) { fl->lastuse = jiffies; atomic_inc(&fl->users); - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); return fl; } } - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); return NULL; } @@ -255,11 +270,21 @@ void fl6_free_socklist(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_fl_socklist *sfl; - while ((sfl = np->ipv6_fl_list) != NULL) { + if (!rcu_access_pointer(np->ipv6_fl_list)) + return; + + spin_lock_bh(&ip6_sk_fl_lock); + while ((sfl = rcu_dereference_protected(np->ipv6_fl_list, + lockdep_is_held(&ip6_sk_fl_lock))) != NULL) { np->ipv6_fl_list = sfl->next; + spin_unlock_bh(&ip6_sk_fl_lock); + fl_release(sfl->fl); - kfree(sfl); + kfree_rcu(sfl, rcu); + + spin_lock_bh(&ip6_sk_fl_lock); } + spin_unlock_bh(&ip6_sk_fl_lock); } /* Service routines */ @@ -424,7 +449,7 @@ static int mem_check(struct sock *sk) if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) return 0; - for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) + for_each_sk_fl_rcu(np, sfl) count++; if (room <= 0 || @@ -467,11 +492,11 @@ static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2) static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, struct ip6_flowlabel *fl) { - write_lock_bh(&ip6_sk_fl_lock); + spin_lock_bh(&ip6_sk_fl_lock); sfl->fl = fl; sfl->next = np->ipv6_fl_list; - np->ipv6_fl_list = sfl; - write_unlock_bh(&ip6_sk_fl_lock); + rcu_assign_pointer(np->ipv6_fl_list, sfl); + spin_unlock_bh(&ip6_sk_fl_lock); } int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) @@ -493,31 +518,33 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) switch (freq.flr_action) { case IPV6_FL_A_PUT: - write_lock_bh(&ip6_sk_fl_lock); - for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) { + spin_lock_bh(&ip6_sk_fl_lock); + for (sflp = &np->ipv6_fl_list; + (sfl = rcu_dereference(*sflp))!=NULL; + sflp = &sfl->next) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) np->flow_label &= ~IPV6_FLOWLABEL_MASK; - *sflp = sfl->next; - write_unlock_bh(&ip6_sk_fl_lock); + *sflp = rcu_dereference(sfl->next); + spin_unlock_bh(&ip6_sk_fl_lock); fl_release(sfl->fl); - kfree(sfl); + kfree_rcu(sfl, rcu); return 0; } } - write_unlock_bh(&ip6_sk_fl_lock); + spin_unlock_bh(&ip6_sk_fl_lock); return -ESRCH; case IPV6_FL_A_RENEW: - read_lock_bh(&ip6_sk_fl_lock); - for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { + rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == freq.flr_label) { err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires); - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); return err; } } - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); if (freq.flr_share == IPV6_FL_S_NONE && ns_capable(net->user_ns, CAP_NET_ADMIN)) { @@ -541,11 +568,11 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (freq.flr_label) { err = -EEXIST; - read_lock_bh(&ip6_sk_fl_lock); - for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { + rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_flags&IPV6_FL_F_EXCL) { - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); goto done; } fl1 = sfl->fl; @@ -553,7 +580,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) break; } } - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); if (fl1 == NULL) fl1 = fl_lookup(net, freq.flr_label); @@ -641,13 +668,13 @@ static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq) struct net *net = seq_file_net(seq); for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) { - fl = fl_ht[state->bucket]; - - while (fl && !net_eq(fl->fl_net, net)) - fl = fl->next; - if (fl) - break; + for_each_fl_rcu(state->bucket, fl) { + if (net_eq(fl->fl_net, net)) + goto out; + } } + fl = NULL; +out: return fl; } @@ -656,18 +683,22 @@ static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flo struct ip6fl_iter_state *state = ip6fl_seq_private(seq); struct net *net = seq_file_net(seq); - fl = fl->next; + for_each_fl_continue_rcu(fl) { + if (net_eq(fl->fl_net, net)) + goto out; + } + try_again: - while (fl && !net_eq(fl->fl_net, net)) - fl = fl->next; - - while (!fl) { - if (++state->bucket <= FL_HASH_MASK) { - fl = fl_ht[state->bucket]; - goto try_again; - } else - break; + if (++state->bucket <= FL_HASH_MASK) { + for_each_fl_rcu(state->bucket, fl) { + if (net_eq(fl->fl_net, net)) + goto out; + } + goto try_again; } + fl = NULL; + +out: return fl; } @@ -681,9 +712,9 @@ static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos) } static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(ip6_fl_lock) + __acquires(RCU) { - read_lock_bh(&ip6_fl_lock); + rcu_read_lock_bh(); return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -700,9 +731,9 @@ static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void ip6fl_seq_stop(struct seq_file *seq, void *v) - __releases(ip6_fl_lock) + __releases(RCU) { - read_unlock_bh(&ip6_fl_lock); + rcu_read_unlock_bh(); } static int ip6fl_seq_show(struct seq_file *seq, void *v) @@ -775,15 +806,15 @@ static const struct file_operations ip6fl_seq_fops = { static int __net_init ip6_flowlabel_proc_init(struct net *net) { - if (!proc_net_fops_create(net, "ip6_flowlabel", - S_IRUGO, &ip6fl_seq_fops)) + if (!proc_create("ip6_flowlabel", S_IRUGO, net->proc_net, + &ip6fl_seq_fops)) return -ENOMEM; return 0; } static void __net_exit ip6_flowlabel_proc_fini(struct net *net) { - proc_net_remove(net, "ip6_flowlabel"); + remove_proc_entry("ip6_flowlabel", net->proc_net); } #else static inline int ip6_flowlabel_proc_init(struct net *net) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 131dd097736..e4efffe2522 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -772,9 +772,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, * Push down and install the IP header. */ ipv6h = ipv6_hdr(skb); - *(__be32 *)ipv6h = fl6->flowlabel | htonl(0x60000000); - dsfield = INET_ECN_encapsulate(0, dsfield); - ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); + ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); ipv6h->hop_limit = tunnel->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; @@ -1240,7 +1238,7 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); __be16 *p = (__be16 *)(ipv6h+1); - *(__be32 *)ipv6h = t->fl.u.ip6.flowlabel | htonl(0x60000000); + ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = NEXTHDR_GRE; ipv6h->saddr = t->parms.laddr; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index a52d864d562..5b10414e619 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -118,6 +118,15 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt ipv6_addr_loopback(&hdr->daddr)) goto err; + /* RFC4291 2.7 + * Nodes must not originate a packet to a multicast address whose scope + * field contains the reserved value 0; if such a packet is received, it + * must be silently dropped. + */ + if (ipv6_addr_is_multicast(&hdr->daddr) && + IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 0) + goto err; + /* * RFC4291 2.7 * Multicast addresses must not be used as source addresses in IPv6 @@ -212,7 +221,7 @@ resubmit: if (ipv6_addr_is_multicast(&hdr->daddr) && !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, &hdr->saddr) && - !ipv6_is_mld(skb, nexthdr)) + !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) goto discard; } if (!(ipprot->flags & INET6_PROTO_NOPOLICY) && @@ -280,10 +289,8 @@ int ip6_mc_input(struct sk_buff *skb) struct inet6_skb_parm *opt = IP6CB(skb); /* Check for MLD */ - if (unlikely(opt->ra)) { + if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { /* Check if this is a mld message */ - u8 *ptr = skb_network_header(skb) + opt->ra; - struct icmp6hdr *icmp6; u8 nexthdr = hdr->nexthdr; __be16 frag_off; int offset; @@ -291,7 +298,7 @@ int ip6_mc_input(struct sk_buff *skb) /* Check if the value of Router Alert * is for MLD (0x0000). */ - if ((ptr[2] | ptr[3]) == 0) { + if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) { deliver = false; if (!ipv6_ext_hdr(nexthdr)) { @@ -303,24 +310,10 @@ int ip6_mc_input(struct sk_buff *skb) if (offset < 0) goto out; - if (nexthdr != IPPROTO_ICMPV6) + if (!ipv6_is_mld(skb, nexthdr, offset)) goto out; - if (!pskb_may_pull(skb, (skb_network_header(skb) + - offset + 1 - skb->data))) - goto out; - - icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); - - switch (icmp6->icmp6_type) { - case ICMPV6_MGM_QUERY: - case ICMPV6_MGM_REPORT: - case ICMPV6_MGM_REDUCTION: - case ICMPV6_MLD2_REPORT: - deliver = true; - break; - } - goto out; + deliver = true; } /* unknown RA - process it normally */ } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index f26f0da7f09..8234c1dcdf7 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -99,6 +99,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_GRE | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0c7c03d50dc..155eccfa776 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -56,8 +56,6 @@ #include <net/checksum.h> #include <linux/mroute6.h> -int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); - int __ip6_local_out(struct sk_buff *skb) { int len; @@ -88,7 +86,8 @@ static int ip6_finish_output2(struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; struct neighbour *neigh; - struct rt6_info *rt; + struct in6_addr *nexthop; + int ret; skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -121,12 +120,26 @@ static int ip6_finish_output2(struct sk_buff *skb) IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, skb->len); + + if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= + IPV6_ADDR_SCOPE_NODELOCAL && + !(dev->flags & IFF_LOOPBACK)) { + kfree_skb(skb); + return 0; + } } - rt = (struct rt6_info *) dst; - neigh = rt->n; - if (neigh) - return dst_neigh_output(dst, neigh, skb); + rcu_read_lock_bh(); + nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); + neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); + if (unlikely(!neigh)) + neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); + if (!IS_ERR(neigh)) { + ret = dst_neigh_output(dst, neigh, skb); + rcu_read_unlock_bh(); + return ret; + } + rcu_read_unlock_bh(); IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); @@ -216,7 +229,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); - *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel; + ip6_flow_hdr(hdr, tclass, fl6->flowlabel); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -236,9 +249,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, dst->dev, dst_output); } - net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n"); skb->dev = dst->dev; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + ipv6_local_error(sk, EMSGSIZE, fl6, mtu); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; @@ -246,39 +258,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, EXPORT_SYMBOL(ip6_xmit); -/* - * To avoid extra problems ND packets are send through this - * routine. It's code duplication but I really want to avoid - * extra checks since ipv6_build_header is used by TCP (which - * is for us performance critical) - */ - -int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, - const struct in6_addr *saddr, const struct in6_addr *daddr, - int proto, int len) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6hdr *hdr; - - skb->protocol = htons(ETH_P_IPV6); - skb->dev = dev; - - skb_reset_network_header(skb); - skb_put(skb, sizeof(struct ipv6hdr)); - hdr = ipv6_hdr(skb); - - *(__be32*)hdr = htonl(0x60000000); - - hdr->payload_len = htons(len); - hdr->nexthdr = proto; - hdr->hop_limit = np->hop_limit; - - hdr->saddr = *saddr; - hdr->daddr = *daddr; - - return 0; -} - static int ip6_call_ra_chain(struct sk_buff *skb, int sel) { struct ip6_ra_chain *ra; @@ -913,8 +892,12 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry of the nexthop router */ rt = (struct rt6_info *) *dst; - n = rt->n; - if (n && !(n->nud_state & NUD_VALID)) { + rcu_read_lock_bh(); + n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); + err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; + rcu_read_unlock_bh(); + + if (err) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; int redirect; @@ -1548,9 +1531,7 @@ int ip6_push_pending_frames(struct sock *sk) skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - *(__be32*)hdr = fl6->flowlabel | - htonl(0x60000000 | ((int)np->cork.tclass << 20)); - + ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel); hdr->hop_limit = np->cork.hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a14f28b280f..fff83cbc197 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1030,9 +1030,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); - *(__be32*)ipv6h = fl6->flowlabel | htonl(0x60000000); - dsfield = INET_ECN_encapsulate(0, dsfield); - ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); + ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8fd154e5f07..96bfb4e4b82 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1017,6 +1017,50 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt, return NULL; } +/* Look for a (*,*,oif) entry */ +static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt, + mifi_t mifi) +{ + int line = MFC6_HASH(&in6addr_any, &in6addr_any); + struct mfc6_cache *c; + + list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) + if (ipv6_addr_any(&c->mf6c_origin) && + ipv6_addr_any(&c->mf6c_mcastgrp) && + (c->mfc_un.res.ttls[mifi] < 255)) + return c; + + return NULL; +} + +/* Look for a (*,G) entry */ +static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt, + struct in6_addr *mcastgrp, + mifi_t mifi) +{ + int line = MFC6_HASH(mcastgrp, &in6addr_any); + struct mfc6_cache *c, *proxy; + + if (ipv6_addr_any(mcastgrp)) + goto skip; + + list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) + if (ipv6_addr_any(&c->mf6c_origin) && + ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) { + if (c->mfc_un.res.ttls[mifi] < 255) + return c; + + /* It's ok if the mifi is part of the static tree */ + proxy = ip6mr_cache_find_any_parent(mrt, + c->mf6c_parent); + if (proxy && proxy->mfc_un.res.ttls[mifi] < 255) + return c; + } + +skip: + return ip6mr_cache_find_any_parent(mrt, mifi); +} + /* * Allocate a multicast cache entry */ @@ -1247,7 +1291,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) * MFC6 cache manipulation by user space */ -static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc) +static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc, + int parent) { int line; struct mfc6_cache *c, *next; @@ -1256,7 +1301,9 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc) list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && - ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { + ipv6_addr_equal(&c->mf6c_mcastgrp, + &mfc->mf6cc_mcastgrp.sin6_addr) && + (parent == -1 || parent == c->mf6c_parent)) { write_lock_bh(&mrt_lock); list_del(&c->list); write_unlock_bh(&mrt_lock); @@ -1312,9 +1359,9 @@ static int __net_init ip6mr_net_init(struct net *net) #ifdef CONFIG_PROC_FS err = -ENOMEM; - if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops)) + if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops)) goto proc_vif_fail; - if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops)) + if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops)) goto proc_cache_fail; #endif @@ -1322,7 +1369,7 @@ static int __net_init ip6mr_net_init(struct net *net) #ifdef CONFIG_PROC_FS proc_cache_fail: - proc_net_remove(net, "ip6_mr_vif"); + remove_proc_entry("ip6_mr_vif", net->proc_net); proc_vif_fail: ip6mr_rules_exit(net); #endif @@ -1333,8 +1380,8 @@ fail: static void __net_exit ip6mr_net_exit(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_remove(net, "ip6_mr_cache"); - proc_net_remove(net, "ip6_mr_vif"); + remove_proc_entry("ip6_mr_cache", net->proc_net); + remove_proc_entry("ip6_mr_vif", net->proc_net); #endif ip6mr_rules_exit(net); } @@ -1391,7 +1438,7 @@ void ip6_mr_cleanup(void) } static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, - struct mf6cctl *mfc, int mrtsock) + struct mf6cctl *mfc, int mrtsock, int parent) { bool found = false; int line; @@ -1413,7 +1460,9 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && - ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { + ipv6_addr_equal(&c->mf6c_mcastgrp, + &mfc->mf6cc_mcastgrp.sin6_addr) && + (parent == -1 || parent == mfc->mf6cc_parent)) { found = true; break; } @@ -1430,7 +1479,8 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, return 0; } - if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) + if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && + !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) return -EINVAL; c = ip6mr_cache_alloc(); @@ -1596,7 +1646,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) { - int ret; + int ret, parent = 0; struct mif6ctl vif; struct mf6cctl mfc; mifi_t mifi; @@ -1653,15 +1703,21 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns */ case MRT6_ADD_MFC: case MRT6_DEL_MFC: + parent = -1; + case MRT6_ADD_MFC_PROXY: + case MRT6_DEL_MFC_PROXY: if (optlen < sizeof(mfc)) return -EINVAL; if (copy_from_user(&mfc, optval, sizeof(mfc))) return -EFAULT; + if (parent == 0) + parent = mfc.mf6cc_parent; rtnl_lock(); - if (optname == MRT6_DEL_MFC) - ret = ip6mr_mfc_delete(mrt, &mfc); + if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) + ret = ip6mr_mfc_delete(mrt, &mfc, parent); else - ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk); + ret = ip6mr_mfc_add(net, mrt, &mfc, + sk == mrt->mroute6_sk, parent); rtnl_unlock(); return ret; @@ -2018,19 +2074,29 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, { int psend = -1; int vif, ct; + int true_vifi = ip6mr_find_vif(mrt, skb->dev); vif = cache->mf6c_parent; cache->mfc_un.res.pkt++; cache->mfc_un.res.bytes += skb->len; + if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) { + struct mfc6_cache *cache_proxy; + + /* For an (*,G) entry, we only check that the incomming + * interface is part of the static tree. + */ + cache_proxy = ip6mr_cache_find_any_parent(mrt, vif); + if (cache_proxy && + cache_proxy->mfc_un.res.ttls[true_vifi] < 255) + goto forward; + } + /* * Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif6_table[vif].dev != skb->dev) { - int true_vifi; - cache->mfc_un.res.wrong_if++; - true_vifi = ip6mr_find_vif(mrt, skb->dev); if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, @@ -2048,14 +2114,32 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, goto dont_forward; } +forward: mrt->vif6_table[vif].pkt_in++; mrt->vif6_table[vif].bytes_in += skb->len; /* * Forward the frame */ + if (ipv6_addr_any(&cache->mf6c_origin) && + ipv6_addr_any(&cache->mf6c_mcastgrp)) { + if (true_vifi >= 0 && + true_vifi != cache->mf6c_parent && + ipv6_hdr(skb)->hop_limit > + cache->mfc_un.res.ttls[cache->mf6c_parent]) { + /* It's an (*,*) entry and the packet is not coming from + * the upstream: forward the packet to the upstream + * only. + */ + psend = cache->mf6c_parent; + goto last_forward; + } + goto dont_forward; + } for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { - if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { + /* For (*,G) entry, don't forward to the incoming interface */ + if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) && + ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) @@ -2064,6 +2148,7 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, psend = ct; } } +last_forward: if (psend != -1) { ip6mr_forward2(net, mrt, skb, cache, psend); return 0; @@ -2099,6 +2184,14 @@ int ip6_mr_input(struct sk_buff *skb) read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); + if (cache == NULL) { + int vif = ip6mr_find_vif(mrt, skb->dev); + + if (vif >= 0) + cache = ip6mr_cache_find_any(mrt, + &ipv6_hdr(skb)->daddr, + vif); + } /* * No usable cache entry @@ -2186,6 +2279,13 @@ int ip6mr_get_route(struct net *net, read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); + if (!cache && skb->dev) { + int vif = ip6mr_find_vif(mrt, skb->dev); + + if (vif >= 0) + cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, + vif); + } if (!cache) { struct sk_buff *skb2; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 28dfa5f3801..bfa6cc36ef2 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -376,8 +376,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, goto done; /* err = -EADDRNOTAVAIL */ rv = !0; for (i=0; i<psl->sl_count; i++) { - rv = memcmp(&psl->sl_addr[i], source, - sizeof(struct in6_addr)); + rv = !ipv6_addr_equal(&psl->sl_addr[i], source); if (rv == 0) break; } @@ -427,12 +426,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk, } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ for (i=0; i<psl->sl_count; i++) { - rv = memcmp(&psl->sl_addr[i], source, sizeof(struct in6_addr)); - if (rv == 0) - break; + rv = !ipv6_addr_equal(&psl->sl_addr[i], source); + if (rv == 0) /* There is an error in the address. */ + goto done; } - if (rv == 0) /* address already there is an error */ - goto done; for (j=psl->sl_count-1; j>=i; j--) psl->sl_addr[j+1] = psl->sl_addr[j]; psl->sl_addr[i] = *source; @@ -664,6 +661,10 @@ static void igmp6_group_added(struct ifmcaddr6 *mc) struct net_device *dev = mc->idev->dev; char buf[MAX_ADDR_LEN]; + if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < + IPV6_ADDR_SCOPE_LINKLOCAL) + return; + spin_lock_bh(&mc->mca_lock); if (!(mc->mca_flags&MAF_LOADED)) { mc->mca_flags |= MAF_LOADED; @@ -690,6 +691,10 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) struct net_device *dev = mc->idev->dev; char buf[MAX_ADDR_LEN]; + if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < + IPV6_ADDR_SCOPE_LINKLOCAL) + return; + spin_lock_bh(&mc->mca_lock); if (mc->mca_flags&MAF_LOADED) { mc->mca_flags &= ~MAF_LOADED; @@ -935,33 +940,6 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr) } /* - * identify MLD packets for MLD filter exceptions - */ -bool ipv6_is_mld(struct sk_buff *skb, int nexthdr) -{ - struct icmp6hdr *pic; - - if (nexthdr != IPPROTO_ICMPV6) - return false; - - if (!pskb_may_pull(skb, sizeof(struct icmp6hdr))) - return false; - - pic = icmp6_hdr(skb); - - switch (pic->icmp6_type) { - case ICMPV6_MGM_QUERY: - case ICMPV6_MGM_REPORT: - case ICMPV6_MGM_REDUCTION: - case ICMPV6_MLD2_REPORT: - return true; - default: - break; - } - return false; -} - -/* * check if the interface/address pair is valid */ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, @@ -1340,6 +1318,31 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted) return scount; } +static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb, + struct net_device *dev, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + int proto, int len) +{ + struct ipv6hdr *hdr; + + skb->protocol = htons(ETH_P_IPV6); + skb->dev = dev; + + skb_reset_network_header(skb); + skb_put(skb, sizeof(struct ipv6hdr)); + hdr = ipv6_hdr(skb); + + ip6_flow_hdr(hdr, 0, 0); + + hdr->payload_len = htons(len); + hdr->nexthdr = proto; + hdr->hop_limit = inet6_sk(sk)->hop_limit; + + hdr->saddr = *saddr; + hdr->daddr = *daddr; +} + static struct sk_buff *mld_newpack(struct net_device *dev, int size) { struct net *net = dev_net(dev); @@ -1375,7 +1378,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) } else saddr = &addr_buf; - ip6_nd_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0); + ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0); memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra)); @@ -1418,7 +1421,7 @@ static void mld_sendpack(struct sk_buff *skb) icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - dst = icmp6_dst_alloc(skb->dev, NULL, &fl6); + dst = icmp6_dst_alloc(skb->dev, &fl6); err = 0; if (IS_ERR(dst)) { @@ -1767,7 +1770,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) } else saddr = &addr_buf; - ip6_nd_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len); + ip6_mc_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len); memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra)); @@ -1786,7 +1789,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) icmpv6_flow_init(sk, &fl6, type, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - dst = icmp6_dst_alloc(skb->dev, NULL, &fl6); + dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto err_out; @@ -2596,10 +2599,10 @@ static int __net_init igmp6_proc_init(struct net *net) int err; err = -ENOMEM; - if (!proc_net_fops_create(net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops)) + if (!proc_create("igmp6", S_IRUGO, net->proc_net, &igmp6_mc_seq_fops)) goto out; - if (!proc_net_fops_create(net, "mcfilter6", S_IRUGO, - &igmp6_mcf_seq_fops)) + if (!proc_create("mcfilter6", S_IRUGO, net->proc_net, + &igmp6_mcf_seq_fops)) goto out_proc_net_igmp6; err = 0; @@ -2607,14 +2610,14 @@ out: return err; out_proc_net_igmp6: - proc_net_remove(net, "igmp6"); + remove_proc_entry("igmp6", net->proc_net); goto out; } static void __net_exit igmp6_proc_exit(struct net *net) { - proc_net_remove(net, "mcfilter6"); - proc_net_remove(net, "igmp6"); + remove_proc_entry("mcfilter6", net->proc_net); + remove_proc_entry("igmp6", net->proc_net); } #else static inline int igmp6_proc_init(struct net *net) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 6574175795d..76ef4353d51 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -143,16 +143,12 @@ struct neigh_table nd_tbl = { .gc_thresh3 = 1024, }; -static inline int ndisc_opt_addr_space(struct net_device *dev) +static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data) { - return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type)); -} - -static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len, - unsigned short addr_type) -{ - int pad = ndisc_addr_option_pad(addr_type); - int space = NDISC_OPT_SPACE(data_len + pad); + int pad = ndisc_addr_option_pad(skb->dev->type); + int data_len = skb->dev->addr_len; + int space = ndisc_opt_addr_space(skb->dev); + u8 *opt = skb_put(skb, space); opt[0] = type; opt[1] = space>>3; @@ -166,7 +162,6 @@ static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len, opt += data_len; if ((space -= data_len) > 0) memset(opt, 0, space); - return opt + space; } static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, @@ -370,91 +365,88 @@ static void pndisc_destructor(struct pneigh_entry *n) ipv6_dev_mc_dec(dev, &maddr); } -static struct sk_buff *ndisc_build_skb(struct net_device *dev, - const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct icmp6hdr *icmp6h, - const struct in6_addr *target, - int llinfo) +static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, + int len) { - struct net *net = dev_net(dev); - struct sock *sk = net->ipv6.ndisc_sk; - struct sk_buff *skb; - struct icmp6hdr *hdr; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - int len; + struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; + struct sk_buff *skb; int err; - u8 *opt; - - if (!dev->addr_len) - llinfo = 0; - - len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0); - if (llinfo) - len += ndisc_opt_addr_space(dev); skb = sock_alloc_send_skb(sk, - (MAX_HEADER + sizeof(struct ipv6hdr) + - len + hlen + tlen), + hlen + sizeof(struct ipv6hdr) + len + tlen, 1, &err); if (!skb) { - ND_PRINTK(0, err, "ND: %s failed to allocate an skb, err=%d\n", + ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n", __func__, err); return NULL; } - skb_reserve(skb, hlen); - ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len); + skb->protocol = htons(ETH_P_IPV6); + skb->dev = dev; - skb->transport_header = skb->tail; - skb_put(skb, len); + skb_reserve(skb, hlen + sizeof(struct ipv6hdr)); + skb_reset_transport_header(skb); - hdr = (struct icmp6hdr *)skb_transport_header(skb); - memcpy(hdr, icmp6h, sizeof(*hdr)); + return skb; +} - opt = skb_transport_header(skb) + sizeof(struct icmp6hdr); - if (target) { - *(struct in6_addr *)opt = *target; - opt += sizeof(*target); - } +static void ip6_nd_hdr(struct sk_buff *skb, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + int hop_limit, int len) +{ + struct ipv6hdr *hdr; - if (llinfo) - ndisc_fill_addr_option(opt, llinfo, dev->dev_addr, - dev->addr_len, dev->type); + skb_push(skb, sizeof(*hdr)); + skb_reset_network_header(skb); + hdr = ipv6_hdr(skb); - hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len, - IPPROTO_ICMPV6, - csum_partial(hdr, - len, 0)); + ip6_flow_hdr(hdr, 0, 0); - return skb; + hdr->payload_len = htons(len); + hdr->nexthdr = IPPROTO_ICMPV6; + hdr->hop_limit = hop_limit; + + hdr->saddr = *saddr; + hdr->daddr = *daddr; } -static void ndisc_send_skb(struct sk_buff *skb, struct net_device *dev, - struct neighbour *neigh, +static void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct icmp6hdr *icmp6h) + const struct in6_addr *saddr) { - struct flowi6 fl6; - struct dst_entry *dst; - struct net *net = dev_net(dev); + struct dst_entry *dst = skb_dst(skb); + struct net *net = dev_net(skb->dev); struct sock *sk = net->ipv6.ndisc_sk; struct inet6_dev *idev; int err; + struct icmp6hdr *icmp6h = icmp6_hdr(skb); u8 type; type = icmp6h->icmp6_type; - icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex); - dst = icmp6_dst_alloc(dev, neigh, &fl6); - if (IS_ERR(dst)) { - kfree_skb(skb); - return; + if (!dst) { + struct sock *sk = net->ipv6.ndisc_sk; + struct flowi6 fl6; + + icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex); + dst = icmp6_dst_alloc(skb->dev, &fl6); + if (IS_ERR(dst)) { + kfree_skb(skb); + return; + } + + skb_dst_set(skb, dst); } - skb_dst_set(skb, dst); + icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len, + IPPROTO_ICMPV6, + csum_partial(icmp6h, + skb->len, 0)); + + ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len); rcu_read_lock(); idev = __in6_dev_get(dst->dev); @@ -470,36 +462,17 @@ static void ndisc_send_skb(struct sk_buff *skb, struct net_device *dev, rcu_read_unlock(); } -/* - * Send a Neighbour Discover packet - */ -static void __ndisc_send(struct net_device *dev, - struct neighbour *neigh, - const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct icmp6hdr *icmp6h, const struct in6_addr *target, - int llinfo) -{ - struct sk_buff *skb; - - skb = ndisc_build_skb(dev, daddr, saddr, icmp6h, target, llinfo); - if (!skb) - return; - - ndisc_send_skb(skb, dev, neigh, daddr, saddr, icmp6h); -} - static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, - int router, int solicited, int override, int inc_opt) + bool router, bool solicited, bool override, bool inc_opt) { + struct sk_buff *skb; struct in6_addr tmpaddr; struct inet6_ifaddr *ifp; const struct in6_addr *src_addr; - struct icmp6hdr icmp6h = { - .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT, - }; + struct nd_msg *msg; + int optlen = 0; /* for anycast or proxy, solicited_addr != src_addr */ ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1); @@ -517,13 +490,32 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, src_addr = &tmpaddr; } - icmp6h.icmp6_router = router; - icmp6h.icmp6_solicited = solicited; - icmp6h.icmp6_override = override; + if (!dev->addr_len) + inc_opt = 0; + if (inc_opt) + optlen += ndisc_opt_addr_space(dev); + + skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); + if (!skb) + return; - __ndisc_send(dev, neigh, daddr, src_addr, - &icmp6h, solicited_addr, - inc_opt ? ND_OPT_TARGET_LL_ADDR : 0); + msg = (struct nd_msg *)skb_put(skb, sizeof(*msg)); + *msg = (struct nd_msg) { + .icmph = { + .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT, + .icmp6_router = router, + .icmp6_solicited = solicited, + .icmp6_override = override, + }, + .target = *solicited_addr, + }; + + if (inc_opt) + ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, + dev->dev_addr); + + + ndisc_send_skb(skb, daddr, src_addr); } static void ndisc_send_unsol_na(struct net_device *dev) @@ -551,10 +543,11 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, const struct in6_addr *solicit, const struct in6_addr *daddr, const struct in6_addr *saddr) { + struct sk_buff *skb; struct in6_addr addr_buf; - struct icmp6hdr icmp6h = { - .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION, - }; + int inc_opt = dev->addr_len; + int optlen = 0; + struct nd_msg *msg; if (saddr == NULL) { if (ipv6_get_lladdr(dev, &addr_buf, @@ -563,18 +556,37 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, saddr = &addr_buf; } - __ndisc_send(dev, neigh, daddr, saddr, - &icmp6h, solicit, - !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0); + if (ipv6_addr_any(saddr)) + inc_opt = 0; + if (inc_opt) + optlen += ndisc_opt_addr_space(dev); + + skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); + if (!skb) + return; + + msg = (struct nd_msg *)skb_put(skb, sizeof(*msg)); + *msg = (struct nd_msg) { + .icmph = { + .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION, + }, + .target = *solicit, + }; + + if (inc_opt) + ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, + dev->dev_addr); + + ndisc_send_skb(skb, daddr, saddr); } void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr) { - struct icmp6hdr icmp6h = { - .icmp6_type = NDISC_ROUTER_SOLICITATION, - }; + struct sk_buff *skb; + struct rs_msg *msg; int send_sllao = dev->addr_len; + int optlen = 0; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD /* @@ -598,9 +610,27 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, } } #endif - __ndisc_send(dev, NULL, daddr, saddr, - &icmp6h, NULL, - send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0); + if (!dev->addr_len) + send_sllao = 0; + if (send_sllao) + optlen += ndisc_opt_addr_space(dev); + + skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); + if (!skb) + return; + + msg = (struct rs_msg *)skb_put(skb, sizeof(*msg)); + *msg = (struct rs_msg) { + .icmph = { + .icmp6_type = NDISC_ROUTER_SOLICITATION, + }, + }; + + if (send_sllao) + ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, + dev->dev_addr); + + ndisc_send_skb(skb, daddr, saddr); } @@ -676,6 +706,11 @@ static void ndisc_recv_ns(struct sk_buff *skb) bool inc; int is_router = -1; + if (skb->len < sizeof(struct nd_msg)) { + ND_PRINTK(2, warn, "NS: packet too short\n"); + return; + } + if (ipv6_addr_is_multicast(&msg->target)) { ND_PRINTK(2, warn, "NS: multicast target address\n"); return; @@ -685,11 +720,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) * RFC2461 7.1.1: * DAD has to be destined for solicited node multicast address. */ - if (dad && - !(daddr->s6_addr32[0] == htonl(0xff020000) && - daddr->s6_addr32[1] == htonl(0x00000000) && - daddr->s6_addr32[2] == htonl(0x00000001) && - daddr->s6_addr [12] == 0xff )) { + if (dad && !ipv6_addr_is_solict_mult(daddr)) { ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n"); return; } @@ -780,11 +811,11 @@ static void ndisc_recv_ns(struct sk_buff *skb) } if (is_router < 0) - is_router = !!idev->cnf.forwarding; + is_router = idev->cnf.forwarding; if (dad) { ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target, - is_router, 0, (ifp != NULL), 1); + !!is_router, false, (ifp != NULL), true); goto out; } @@ -805,8 +836,8 @@ static void ndisc_recv_ns(struct sk_buff *skb) NEIGH_UPDATE_F_OVERRIDE); if (neigh || !dev->header_ops) { ndisc_send_na(dev, neigh, saddr, &msg->target, - is_router, - 1, (ifp != NULL && inc), inc); + !!is_router, + true, (ifp != NULL && inc), inc); if (neigh) neigh_release(neigh); } @@ -1350,25 +1381,34 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) icmpv6_notify(skb, NDISC_REDIRECT, 0, 0); } +static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb, + struct sk_buff *orig_skb, + int rd_len) +{ + u8 *opt = skb_put(skb, rd_len); + + memset(opt, 0, 8); + *(opt++) = ND_OPT_REDIRECT_HDR; + *(opt++) = (rd_len >> 3); + opt += 6; + + memcpy(opt, ipv6_hdr(orig_skb), rd_len - 8); +} + void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) { struct net_device *dev = skb->dev; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; - int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); + int optlen = 0; struct inet_peer *peer; struct sk_buff *buff; - struct icmp6hdr *icmph; + struct rd_msg *msg; struct in6_addr saddr_buf; - struct in6_addr *addrp; struct rt6_info *rt; struct dst_entry *dst; - struct inet6_dev *idev; struct flowi6 fl6; - u8 *opt; - int hlen, tlen; int rd_len; - int err; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; bool ret; @@ -1424,7 +1464,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) memcpy(ha_buf, neigh->ha, dev->addr_len); read_unlock_bh(&neigh->lock); ha = ha_buf; - len += ndisc_opt_addr_space(dev); + optlen += ndisc_opt_addr_space(dev); } else read_unlock_bh(&neigh->lock); @@ -1432,80 +1472,40 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) } rd_len = min_t(unsigned int, - IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8); + IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(*msg) - optlen, + skb->len + 8); rd_len &= ~0x7; - len += rd_len; - - hlen = LL_RESERVED_SPACE(dev); - tlen = dev->needed_tailroom; - buff = sock_alloc_send_skb(sk, - (MAX_HEADER + sizeof(struct ipv6hdr) + - len + hlen + tlen), - 1, &err); - if (buff == NULL) { - ND_PRINTK(0, err, - "Redirect: %s failed to allocate an skb, err=%d\n", - __func__, err); - goto release; - } - - skb_reserve(buff, hlen); - ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr, - IPPROTO_ICMPV6, len); - - skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data); - skb_put(buff, len); - icmph = icmp6_hdr(buff); - - memset(icmph, 0, sizeof(struct icmp6hdr)); - icmph->icmp6_type = NDISC_REDIRECT; + optlen += rd_len; - /* - * copy target and destination addresses - */ - - addrp = (struct in6_addr *)(icmph + 1); - *addrp = *target; - addrp++; - *addrp = ipv6_hdr(skb)->daddr; + buff = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); + if (!buff) + goto release; - opt = (u8*) (addrp + 1); + msg = (struct rd_msg *)skb_put(buff, sizeof(*msg)); + *msg = (struct rd_msg) { + .icmph = { + .icmp6_type = NDISC_REDIRECT, + }, + .target = *target, + .dest = ipv6_hdr(skb)->daddr, + }; /* * include target_address option */ if (ha) - opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha, - dev->addr_len, dev->type); + ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha); /* * build redirect option and copy skb over to the new packet. */ - memset(opt, 0, 8); - *(opt++) = ND_OPT_REDIRECT_HDR; - *(opt++) = (rd_len >> 3); - opt += 6; - - memcpy(opt, ipv6_hdr(skb), rd_len - 8); - - icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr, - len, IPPROTO_ICMPV6, - csum_partial(icmph, len, 0)); + if (rd_len) + ndisc_fill_redirect_hdr_option(buff, skb, rd_len); skb_dst_set(buff, dst); - rcu_read_lock(); - idev = __in6_dev_get(dst->dev); - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev, - dst_output); - if (!err) { - ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT); - ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); - } - - rcu_read_unlock(); + ndisc_send_skb(buff, &ipv6_hdr(skb)->saddr, &saddr_buf); return; release: @@ -1522,7 +1522,7 @@ int ndisc_rcv(struct sk_buff *skb) { struct nd_msg *msg; - if (!pskb_may_pull(skb, skb->len)) + if (skb_linearize(skb)) return 0; msg = (struct nd_msg *)skb_transport_header(skb); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 125a90d6a79..341b54ade72 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1098,7 +1098,7 @@ static int get_info(struct net *net, void __user *user, #endif t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), "ip6table_%s", name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { struct ip6t_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT @@ -1157,7 +1157,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, } t = xt_find_table_lock(net, AF_INET6, get.name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", private->number); if (get.size == private->size) @@ -1197,7 +1197,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), "ip6table_%s", name); - if (!t || IS_ERR(t)) { + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free_newinfo_counters_untrans; } @@ -1355,7 +1355,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, } t = xt_find_table_lock(net, AF_INET6, name); - if (!t || IS_ERR(t)) { + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; } @@ -1939,7 +1939,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); - if (t && !IS_ERR(t)) { + if (!IS_ERR_OR_NULL(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; duprintf("t->private->number = %u\n", private->number); diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 029623dbd41..ed3b427b284 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -126,7 +126,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) skb_put(nskb, sizeof(struct ipv6hdr)); skb_reset_network_header(nskb); ip6h = ipv6_hdr(nskb); - *(__be32 *)ip6h = htonl(0x60000000 | (tclass << 20)); + ip6_flow_hdr(ip6h, tclass, 0); ip6h->hop_limit = ip6_dst_hoplimit(dst); ip6h->nexthdr = IPPROTO_TCP; ip6h->saddr = oip6h->daddr; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 7431121b87d..6134a1ebfb1 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/slab.h> +#include <net/ipv6.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); @@ -60,8 +61,8 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) dev_net(out)->ipv6.ip6table_mangle); if (ret != NF_DROP && ret != NF_STOLEN && - (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || - memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || + (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || + !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) || skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 137e245860a..2b6c226f519 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -104,7 +104,6 @@ static unsigned int ipv6_helper(unsigned int hooknum, const struct nf_conn_help *help; const struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; - unsigned int ret; __be16 frag_off; int protoff; u8 nexthdr; @@ -130,12 +129,7 @@ static unsigned int ipv6_helper(unsigned int hooknum, return NF_ACCEPT; } - ret = helper->help(skb, protoff, ct, ctinfo); - if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) { - nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL, - "nf_ct_%s: dropping packet", helper->name); - } - return ret; + return helper->help(skb, protoff, ct, ctinfo); } static unsigned int ipv6_confirm(unsigned int hooknum, @@ -421,54 +415,43 @@ static int ipv6_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_tcp6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp6); if (ret < 0) { - printk(KERN_ERR "nf_conntrack_l4proto_tcp6: protocol register failed\n"); + pr_err("nf_conntrack_tcp6: pernet registration failed\n"); goto out; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_udp6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp6); if (ret < 0) { - printk(KERN_ERR "nf_conntrack_l4proto_udp6: protocol register failed\n"); + pr_err("nf_conntrack_udp6: pernet registration failed\n"); goto cleanup_tcp6; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_icmpv6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmpv6); if (ret < 0) { - printk(KERN_ERR "nf_conntrack_l4proto_icmp6: protocol register failed\n"); + pr_err("nf_conntrack_icmp6: pernet registration failed\n"); goto cleanup_udp6; } - ret = nf_conntrack_l3proto_register(net, - &nf_conntrack_l3proto_ipv6); + ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6); if (ret < 0) { - printk(KERN_ERR "nf_conntrack_l3proto_ipv6: protocol register failed\n"); + pr_err("nf_conntrack_ipv6: pernet registration failed.\n"); goto cleanup_icmpv6; } return 0; cleanup_icmpv6: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_icmpv6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6); cleanup_udp6: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_udp6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6); cleanup_tcp6: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_tcp6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6); out: return ret; } static void ipv6_net_exit(struct net *net) { - nf_conntrack_l3proto_unregister(net, - &nf_conntrack_l3proto_ipv6); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_icmpv6); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_udp6); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_tcp6); + nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6); } static struct pernet_operations ipv6_net_ops = { @@ -491,19 +474,52 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) ret = register_pernet_subsys(&ipv6_net_ops); if (ret < 0) - goto cleanup_pernet; + goto cleanup_sockopt; + ret = nf_register_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); if (ret < 0) { pr_err("nf_conntrack_ipv6: can't register pre-routing defrag " "hook.\n"); - goto cleanup_ipv6; + goto cleanup_pernet; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp6); + if (ret < 0) { + pr_err("nf_conntrack_ipv6: can't register tcp6 proto.\n"); + goto cleanup_hooks; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp6); + if (ret < 0) { + pr_err("nf_conntrack_ipv6: can't register udp6 proto.\n"); + goto cleanup_tcp6; + } + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmpv6); + if (ret < 0) { + pr_err("nf_conntrack_ipv6: can't register icmpv6 proto.\n"); + goto cleanup_udp6; + } + + ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6); + if (ret < 0) { + pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n"); + goto cleanup_icmpv6; } return ret; - cleanup_ipv6: - unregister_pernet_subsys(&ipv6_net_ops); + cleanup_icmpv6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); + cleanup_udp6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6); + cleanup_tcp6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6); + cleanup_hooks: + nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); cleanup_pernet: + unregister_pernet_subsys(&ipv6_net_ops); + cleanup_sockopt: nf_unregister_sockopt(&so_getorigdst6); return ret; } @@ -511,6 +527,10 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) static void __exit nf_conntrack_l3proto_ipv6_fini(void) { synchronize_net(); + nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); unregister_pernet_subsys(&ipv6_net_ops); nf_unregister_sockopt(&so_getorigdst6); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 3dacecc9906..54087e96d7b 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -97,9 +97,9 @@ static int nf_ct_frag6_sysctl_register(struct net *net) if (table == NULL) goto err_alloc; - table[0].data = &net->ipv6.frags.high_thresh; - table[1].data = &net->ipv6.frags.low_thresh; - table[2].data = &net->ipv6.frags.timeout; + table[0].data = &net->nf_frag.frags.timeout; + table[1].data = &net->nf_frag.frags.low_thresh; + table[2].data = &net->nf_frag.frags.high_thresh; } hdr = register_net_sysctl(net, "net/netfilter", table); @@ -319,7 +319,7 @@ found: fq->q.meat += skb->len; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; - atomic_add(skb->truesize, &fq->q.net->mem); + add_frag_mem_limit(&fq->q, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -328,9 +328,8 @@ found: fq->nhoffset = nhoff; fq->q.last_in |= INET_FRAG_FIRST_IN; } - write_lock(&nf_frags.lock); - list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); - write_unlock(&nf_frags.lock); + + inet_frag_lru_move(&fq->q); return 0; discard_fq: @@ -369,7 +368,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) } /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) { + if (skb_unclone(head, GFP_ATOMIC)) { pr_debug("skb is cloned but can't expand head"); goto out_oom; } @@ -398,7 +397,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - atomic_add(clone->truesize, &fq->q.net->mem); + add_frag_mem_limit(&fq->q, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -422,7 +421,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - atomic_sub(head->truesize, &fq->q.net->mem); + sub_frag_mem_limit(&fq->q, head->truesize); head->local_df = 1; head->next = NULL; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 745a3204295..bbbe53a99b5 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -295,11 +295,11 @@ int snmp6_unregister_dev(struct inet6_dev *idev) static int __net_init ipv6_proc_init_net(struct net *net) { - if (!proc_net_fops_create(net, "sockstat6", S_IRUGO, - &sockstat6_seq_fops)) + if (!proc_create("sockstat6", S_IRUGO, net->proc_net, + &sockstat6_seq_fops)) return -ENOMEM; - if (!proc_net_fops_create(net, "snmp6", S_IRUGO, &snmp6_seq_fops)) + if (!proc_create("snmp6", S_IRUGO, net->proc_net, &snmp6_seq_fops)) goto proc_snmp6_fail; net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net); @@ -308,17 +308,17 @@ static int __net_init ipv6_proc_init_net(struct net *net) return 0; proc_dev_snmp6_fail: - proc_net_remove(net, "snmp6"); + remove_proc_entry("snmp6", net->proc_net); proc_snmp6_fail: - proc_net_remove(net, "sockstat6"); + remove_proc_entry("sockstat6", net->proc_net); return -ENOMEM; } static void __net_exit ipv6_proc_exit_net(struct net *net) { - proc_net_remove(net, "sockstat6"); - proc_net_remove(net, "dev_snmp6"); - proc_net_remove(net, "snmp6"); + remove_proc_entry("sockstat6", net->proc_net); + remove_proc_entry("dev_snmp6", net->proc_net); + remove_proc_entry("snmp6", net->proc_net); } static struct pernet_operations ipv6_proc_ops = { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 70fa8144999..c65907db8c4 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1292,7 +1292,7 @@ static const struct file_operations raw6_seq_fops = { static int __net_init raw6_init_net(struct net *net) { - if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops)) + if (!proc_create("raw6", S_IRUGO, net->proc_net, &raw6_seq_fops)) return -ENOMEM; return 0; @@ -1300,7 +1300,7 @@ static int __net_init raw6_init_net(struct net *net) static void __net_exit raw6_exit_net(struct net *net) { - proc_net_remove(net, "raw6"); + remove_proc_entry("raw6", net->proc_net); } static struct pernet_operations raw6_net_ops = { diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index e5253ec9e0f..3c6a77290c6 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -79,20 +79,8 @@ unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, { u32 c; - c = jhash_3words((__force u32)saddr->s6_addr32[0], - (__force u32)saddr->s6_addr32[1], - (__force u32)saddr->s6_addr32[2], - rnd); - - c = jhash_3words((__force u32)saddr->s6_addr32[3], - (__force u32)daddr->s6_addr32[0], - (__force u32)daddr->s6_addr32[1], - c); - - c = jhash_3words((__force u32)daddr->s6_addr32[2], - (__force u32)daddr->s6_addr32[3], - (__force u32)id, - c); + c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), + (__force u32)id, rnd); return c & (INETFRAGS_HASHSZ - 1); } @@ -327,7 +315,7 @@ found: } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; - atomic_add(skb->truesize, &fq->q.net->mem); + add_frag_mem_limit(&fq->q, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -341,9 +329,7 @@ found: fq->q.meat == fq->q.len) return ip6_frag_reasm(fq, prev, dev); - write_lock(&ip6_frags.lock); - list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); - write_unlock(&ip6_frags.lock); + inet_frag_lru_move(&fq->q); return -1; discard_fq: @@ -406,7 +392,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, goto out_oversize; /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) + if (skb_unclone(head, GFP_ATOMIC)) goto out_oom; /* If the first fragment is fragmented itself, we split @@ -429,7 +415,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &fq->q.net->mem); + add_frag_mem_limit(&fq->q, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -467,7 +453,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, } fp = next; } - atomic_sub(sum_truesize, &fq->q.net->mem); + sub_frag_mem_limit(&fq->q, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 363d8b7772e..92826656968 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -145,25 +145,12 @@ static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, struct neighbour *n; daddr = choose_neigh_daddr(rt, skb, daddr); - n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr); + n = __ipv6_neigh_lookup(dst->dev, daddr); if (n) return n; return neigh_create(&nd_tbl, daddr, dst->dev); } -static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev) -{ - struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway); - if (!n) { - n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev); - if (IS_ERR(n)) - return PTR_ERR(n); - } - rt->n = n; - - return 0; -} - static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, .protocol = cpu_to_be16(ETH_P_IPV6), @@ -300,9 +287,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; - - if (rt->n) - neigh_release(rt->n); + struct dst_entry *from = dst->from; if (!(rt->dst.flags & DST_HOST)) dst_destroy_metrics_generic(dst); @@ -312,8 +297,8 @@ static void ip6_dst_destroy(struct dst_entry *dst) in6_dev_put(idev); } - if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from) - dst_release(dst->from); + dst->from = NULL; + dst_release(from); if (rt6_has_peer(rt)) { struct inet_peer *peer = rt6_peer_ptr(rt); @@ -354,11 +339,6 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, in6_dev_put(idev); } } - if (rt->n && rt->n->dev == dev) { - rt->n->dev = loopback_dev; - dev_hold(loopback_dev); - dev_put(dev); - } } } @@ -388,15 +368,8 @@ static int rt6_info_hash_nhsfn(unsigned int candidate_count, { unsigned int val = fl6->flowi6_proto; - val ^= (__force u32)fl6->daddr.s6_addr32[0]; - val ^= (__force u32)fl6->daddr.s6_addr32[1]; - val ^= (__force u32)fl6->daddr.s6_addr32[2]; - val ^= (__force u32)fl6->daddr.s6_addr32[3]; - - val ^= (__force u32)fl6->saddr.s6_addr32[0]; - val ^= (__force u32)fl6->saddr.s6_addr32[1]; - val ^= (__force u32)fl6->saddr.s6_addr32[2]; - val ^= (__force u32)fl6->saddr.s6_addr32[3]; + val ^= ipv6_addr_hash(&fl6->daddr); + val ^= ipv6_addr_hash(&fl6->saddr); /* Work only if this not encapsulated */ switch (fl6->flowi6_proto) { @@ -505,24 +478,34 @@ static void rt6_probe(struct rt6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ - neigh = rt ? rt->n : NULL; - if (!neigh || (neigh->nud_state & NUD_VALID)) + if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) return; - read_lock_bh(&neigh->lock); - if (!(neigh->nud_state & NUD_VALID) && + rcu_read_lock_bh(); + neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); + if (neigh) { + write_lock(&neigh->lock); + if (neigh->nud_state & NUD_VALID) + goto out; + } + + if (!neigh || time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { struct in6_addr mcaddr; struct in6_addr *target; - neigh->updated = jiffies; - read_unlock_bh(&neigh->lock); + if (neigh) { + neigh->updated = jiffies; + write_unlock(&neigh->lock); + } - target = (struct in6_addr *)&neigh->primary_key; + target = (struct in6_addr *)&rt->rt6i_gateway; addrconf_addr_solict_mult(target, &mcaddr); ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); } else { - read_unlock_bh(&neigh->lock); +out: + write_unlock(&neigh->lock); } + rcu_read_unlock_bh(); } #else static inline void rt6_probe(struct rt6_info *rt) @@ -549,20 +532,24 @@ static inline bool rt6_check_neigh(struct rt6_info *rt) struct neighbour *neigh; bool ret = false; - neigh = rt->n; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) - ret = true; - else if (neigh) { - read_lock_bh(&neigh->lock); + return true; + + rcu_read_lock_bh(); + neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); + if (neigh) { + read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) ret = true; #ifdef CONFIG_IPV6_ROUTER_PREF else if (!(neigh->nud_state & NUD_FAILED)) ret = true; #endif - read_unlock_bh(&neigh->lock); + read_unlock(&neigh->lock); } + rcu_read_unlock_bh(); + return ret; } @@ -838,8 +825,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, rt = ip6_rt_copy(ort, daddr); if (rt) { - int attempts = !in_softirq(); - if (!(rt->rt6i_flags & RTF_GATEWAY)) { if (ort->rt6i_dst.plen != 128 && ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) @@ -855,32 +840,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, rt->rt6i_src.plen = 128; } #endif - - retry: - if (rt6_bind_neighbour(rt, rt->dst.dev)) { - struct net *net = dev_net(rt->dst.dev); - int saved_rt_min_interval = - net->ipv6.sysctl.ip6_rt_gc_min_interval; - int saved_rt_elasticity = - net->ipv6.sysctl.ip6_rt_gc_elasticity; - - if (attempts-- > 0) { - net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; - net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; - - ip6_dst_gc(&net->ipv6.ip6_dst_ops); - - net->ipv6.sysctl.ip6_rt_gc_elasticity = - saved_rt_elasticity; - net->ipv6.sysctl.ip6_rt_gc_min_interval = - saved_rt_min_interval; - goto retry; - } - - net_warn_ratelimited("Neighbour table overflow\n"); - dst_free(&rt->dst); - return NULL; - } } return rt; @@ -891,10 +850,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, { struct rt6_info *rt = ip6_rt_copy(ort, daddr); - if (rt) { + if (rt) rt->rt6i_flags |= RTF_CACHE; - rt->n = neigh_clone(ort->n); - } return rt; } @@ -928,7 +885,7 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!rt->n && !(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_LOCAL))) + if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY))) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); @@ -994,7 +951,7 @@ void ip6_route_input(struct sk_buff *skb) .flowi6_iif = skb->dev->ifindex, .daddr = iph->daddr, .saddr = iph->saddr, - .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, + .flowlabel = ip6_flowinfo(iph), .flowi6_mark = skb->mark, .flowi6_proto = iph->nexthdr, }; @@ -1054,7 +1011,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt->rt6i_gateway = ort->rt6i_gateway; rt->rt6i_flags = ort->rt6i_flags; - rt6_clean_expires(rt); rt->rt6i_metric = 0; memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); @@ -1159,7 +1115,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; - fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + fl6.flowlabel = ip6_flowinfo(iph); dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1187,7 +1143,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; - fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + fl6.flowlabel = ip6_flowinfo(iph); dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1247,7 +1203,6 @@ static struct dst_entry *icmp6_dst_gc_list; static DEFINE_SPINLOCK(icmp6_dst_lock); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, - struct neighbour *neigh, struct flowi6 *fl6) { struct dst_entry *dst; @@ -1265,20 +1220,8 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, goto out; } - if (neigh) - neigh_hold(neigh); - else { - neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr); - if (IS_ERR(neigh)) { - in6_dev_put(idev); - dst_free(&rt->dst); - return ERR_CAST(neigh); - } - } - rt->dst.flags |= DST_HOST; rt->dst.output = ip6_output; - rt->n = neigh; atomic_set(&rt->dst.__refcnt, 1); rt->rt6i_dst.addr = fl6->daddr; rt->rt6i_dst.plen = 128; @@ -1587,12 +1530,6 @@ int ip6_route_add(struct fib6_config *cfg) } else rt->rt6i_prefsrc.plen = 0; - if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { - err = rt6_bind_neighbour(rt, dev); - if (err) - goto out; - } - rt->rt6i_flags = cfg->fc_flags; install_route: @@ -1705,37 +1642,32 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu struct net *net = dev_net(skb->dev); struct netevent_redirect netevent; struct rt6_info *rt, *nrt = NULL; - const struct in6_addr *target; struct ndisc_options ndopts; - const struct in6_addr *dest; - struct neighbour *old_neigh; struct inet6_dev *in6_dev; struct neighbour *neigh; - struct icmp6hdr *icmph; + struct rd_msg *msg; int optlen, on_link; u8 *lladdr; optlen = skb->tail - skb->transport_header; - optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); + optlen -= sizeof(*msg); if (optlen < 0) { net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); return; } - icmph = icmp6_hdr(skb); - target = (const struct in6_addr *) (icmph + 1); - dest = target + 1; + msg = (struct rd_msg *)icmp6_hdr(skb); - if (ipv6_addr_is_multicast(dest)) { + if (ipv6_addr_is_multicast(&msg->dest)) { net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); return; } on_link = 0; - if (ipv6_addr_equal(dest, target)) { + if (ipv6_addr_equal(&msg->dest, &msg->target)) { on_link = 1; - } else if (ipv6_addr_type(target) != + } else if (ipv6_addr_type(&msg->target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); return; @@ -1752,7 +1684,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu * first-hop router for the specified ICMP Destination Address. */ - if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { + if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) { net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); return; } @@ -1779,15 +1711,10 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu */ dst_confirm(&rt->dst); - neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); + neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); if (!neigh) return; - /* Duplicate redirect: silently ignore. */ - old_neigh = rt->n; - if (neigh == old_neigh) - goto out; - /* * We have finally decided to accept it. */ @@ -1799,7 +1726,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu NEIGH_UPDATE_F_ISROUTER)) ); - nrt = ip6_rt_copy(rt, dest); + nrt = ip6_rt_copy(rt, &msg->dest); if (!nrt) goto out; @@ -1808,16 +1735,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu nrt->rt6i_flags &= ~RTF_GATEWAY; nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; - nrt->n = neigh_clone(neigh); if (ip6_ins_rt(nrt)) goto out; netevent.old = &rt->dst; - netevent.old_neigh = old_neigh; netevent.new = &nrt->dst; - netevent.new_neigh = neigh; - netevent.daddr = dest; + netevent.daddr = &msg->dest; + netevent.neigh = neigh; call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); if (rt->rt6i_flags & RTF_CACHE) { @@ -1859,8 +1784,6 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == (RTF_DEFAULT | RTF_ADDRCONF)) rt6_set_from(rt, ort); - else - rt6_clean_expires(rt); rt->rt6i_metric = 0; #ifdef CONFIG_IPV6_SUBTREES @@ -2123,7 +2046,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); - int err; if (!rt) { net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); @@ -2142,11 +2064,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_flags |= RTF_ANYCAST; else rt->rt6i_flags |= RTF_LOCAL; - err = rt6_bind_neighbour(rt, rt->dst.dev); - if (err) { - dst_free(&rt->dst); - return ERR_PTR(err); - } rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; @@ -2492,7 +2409,6 @@ static int rt6_fill_node(struct net *net, struct nlmsghdr *nlh; long expires; u32 table; - struct neighbour *n; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -2605,9 +2521,8 @@ static int rt6_fill_node(struct net *net, if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - n = rt->n; - if (n) { - if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) + if (rt->rt6i_flags & RTF_GATEWAY) { + if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0) goto nla_put_failure; } @@ -2802,7 +2717,6 @@ struct rt6_proc_arg static int rt6_info_route(struct rt6_info *rt, void *p_arg) { struct seq_file *m = p_arg; - struct neighbour *n; seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); @@ -2811,9 +2725,8 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif - n = rt->n; - if (n) { - seq_printf(m, "%pi6", n->primary_key); + if (rt->rt6i_flags & RTF_GATEWAY) { + seq_printf(m, "%pi6", &rt->rt6i_gateway); } else { seq_puts(m, "00000000000000000000000000000000"); } @@ -3080,8 +2993,8 @@ static void __net_exit ip6_route_net_exit(struct net *net) static int __net_init ip6_route_net_init_late(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); - proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); + proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops); + proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops); #endif return 0; } @@ -3089,8 +3002,8 @@ static int __net_init ip6_route_net_init_late(struct net *net) static void __net_exit ip6_route_net_exit_late(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_remove(net, "ipv6_route"); - proc_net_remove(net, "rt6_stats"); + remove_proc_entry("ipv6_route", net->proc_net); + remove_proc_entry("rt6_stats", net->proc_net); #endif } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index cfba99b2c2a..02f96dcbcf0 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -72,6 +72,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static int ipip6_tunnel_init(struct net_device *dev); static void ipip6_tunnel_setup(struct net_device *dev); static void ipip6_dev_free(struct net_device *dev); +static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, + __be32 *v4dst); static struct rtnl_link_ops sit_link_ops __read_mostly; static int sit_net_id __read_mostly; @@ -590,17 +592,21 @@ out: return err; } +static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr, + const struct in6_addr *v6addr) +{ + __be32 v4embed = 0; + if (check_6rd(tunnel, v6addr, &v4embed) && v4addr != v4embed) + return true; + return false; +} + static int ipip6_rcv(struct sk_buff *skb) { - const struct iphdr *iph; + const struct iphdr *iph = ip_hdr(skb); struct ip_tunnel *tunnel; int err; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - goto out; - - iph = ip_hdr(skb); - tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { @@ -613,10 +619,19 @@ static int ipip6_rcv(struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); skb->pkt_type = PACKET_HOST; - if ((tunnel->dev->priv_flags & IFF_ISATAP) && - !isatap_chksrc(skb, iph, tunnel)) { - tunnel->dev->stats.rx_errors++; - goto out; + if (tunnel->dev->priv_flags & IFF_ISATAP) { + if (!isatap_chksrc(skb, iph, tunnel)) { + tunnel->dev->stats.rx_errors++; + goto out; + } + } else { + if (is_spoofed_6rd(tunnel, iph->saddr, + &ipv6_hdr(skb)->saddr) || + is_spoofed_6rd(tunnel, iph->daddr, + &ipv6_hdr(skb)->daddr)) { + tunnel->dev->stats.rx_errors++; + goto out; + } } __skb_tunnel_rx(skb, tunnel->dev); @@ -650,14 +665,12 @@ out: } /* - * Returns the embedded IPv4 address if the IPv6 address - * comes from 6rd / 6to4 (RFC 3056) addr space. + * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function + * stores the embedded IPv4 address in v4dst and returns true. */ -static inline -__be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel) +static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, + __be32 *v4dst) { - __be32 dst = 0; - #ifdef CONFIG_IPV6_SIT_6RD if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix, tunnel->ip6rd.prefixlen)) { @@ -676,14 +689,24 @@ __be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel) d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >> (32 - pbi1); - dst = tunnel->ip6rd.relay_prefix | htonl(d); + *v4dst = tunnel->ip6rd.relay_prefix | htonl(d); + return true; } #else if (v6dst->s6_addr16[0] == htons(0x2002)) { /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ - memcpy(&dst, &v6dst->s6_addr16[1], 4); + memcpy(v4dst, &v6dst->s6_addr16[1], 4); + return true; } #endif + return false; +} + +static inline __be32 try_6rd(struct ip_tunnel *tunnel, + const struct in6_addr *v6dst) +{ + __be32 dst = 0; + check_6rd(tunnel, v6dst, &dst); return dst; } @@ -744,7 +767,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (!dst) - dst = try_6rd(&iph6->daddr, tunnel); + dst = try_6rd(tunnel, &iph6->daddr); if (!dst) { struct neighbour *neigh = NULL; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 40161977f7c..8a0848b60b3 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -179,7 +179,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); - if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) + if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) goto out; ret = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4f43537197e..9b6460055df 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -713,7 +713,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { #endif static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, - u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass) + u32 tsval, u32 tsecr, + struct tcp_md5sig_key *key, int rst, u8 tclass) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -725,7 +726,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, struct dst_entry *dst; __be32 *topt; - if (ts) + if (tsecr) tot_len += TCPOLEN_TSTAMP_ALIGNED; #ifdef CONFIG_TCP_MD5SIG if (key) @@ -755,11 +756,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, topt = (__be32 *)(t1 + 1); - if (ts) { + if (tsecr) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - *topt++ = htonl(tcp_time_stamp); - *topt++ = htonl(ts); + *topt++ = htonl(tsval); + *topt++ = htonl(tsecr); } #ifdef CONFIG_TCP_MD5SIG @@ -835,7 +836,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) * no RST generated if md5 hash doesn't match. */ sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev), - &tcp_hashinfo, &ipv6h->daddr, + &tcp_hashinfo, &ipv6h->saddr, + th->source, &ipv6h->daddr, ntohs(th->source), inet6_iif(skb)); if (!sk1) return; @@ -859,7 +861,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); - tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0); + tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0); #ifdef CONFIG_TCP_MD5SIG release_sk1: @@ -870,10 +872,11 @@ release_sk1: #endif } -static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, +static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, + u32 win, u32 tsval, u32 tsecr, struct tcp_md5sig_key *key, u8 tclass) { - tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass); + tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -883,6 +886,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, + tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw), tw->tw_tclass); @@ -892,7 +896,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, + tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, + req->rcv_wnd, tcp_time_stamp, req->ts_recent, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0); } @@ -1030,7 +1035,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) treq->rmt_addr = ipv6_hdr(skb)->saddr; treq->loc_addr = ipv6_hdr(skb)->daddr; if (!want_cookie || tmp_opt.tstamp_ok) - TCP_ECN_create_request(req, skb); + TCP_ECN_create_request(req, skb, sock_net(sk)); treq->iif = sk->sk_bound_dev_if; @@ -1167,7 +1172,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); + newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count @@ -1247,7 +1252,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); + newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); /* Clone native IPv6 options from listening socket (if any) @@ -1460,7 +1465,7 @@ ipv6_pktoptions: if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; if (np->rxopt.bits.rxtclass) - np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); + np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); if (ipv6_opt_accepted(sk, opt_skb)) { skb_set_owner_r(opt_skb, sk); opt_skb = xchg(&np->pktoptions, opt_skb); @@ -1602,6 +1607,7 @@ do_time_wait: struct sock *sk2; sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, + &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb)); if (sk2 != NULL) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index fb083295ff0..599e1ba6d1c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -45,6 +45,7 @@ #include <net/tcp_states.h> #include <net/ip6_checksum.h> #include <net/xfrm.h> +#include <net/inet6_hashtables.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -203,7 +204,8 @@ static struct sock *udp6_lib_lookup2(struct net *net, { struct sock *sk, *result; struct hlist_nulls_node *node; - int score, badness; + int score, badness, matches = 0, reuseport = 0; + u32 hash = 0; begin: result = NULL; @@ -214,8 +216,18 @@ begin: if (score > badness) { result = sk; badness = score; - if (score == SCORE2_MAX) + reuseport = sk->sk_reuseport; + if (reuseport) { + hash = inet6_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } else if (score == SCORE2_MAX) goto exact_match; + } else if (score == badness && reuseport) { + matches++; + if (((u64)hash * matches) >> 32 == 0) + result = sk; + hash = next_pseudo_random32(hash); } } /* @@ -249,7 +261,8 @@ struct sock *__udp6_lib_lookup(struct net *net, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; - int score, badness; + int score, badness, matches = 0, reuseport = 0; + u32 hash = 0; rcu_read_lock(); if (hslot->count > 10) { @@ -284,6 +297,17 @@ begin: if (score > badness) { result = sk; badness = score; + reuseport = sk->sk_reuseport; + if (reuseport) { + hash = inet6_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } + } else if (score == badness && reuseport) { + matches++; + if (((u64)hash * matches) >> 32 == 0) + result = sk; + hash = next_pseudo_random32(hash); } } /* @@ -752,40 +776,6 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, return 0; } -static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, - int proto) -{ - int err; - - UDP_SKB_CB(skb)->partial_cov = 0; - UDP_SKB_CB(skb)->cscov = skb->len; - - if (proto == IPPROTO_UDPLITE) { - err = udplite_checksum_init(skb, uh); - if (err) - return err; - } - - if (uh->check == 0) { - /* RFC 2460 section 8.1 says that we SHOULD log - this error. Well, it is reasonable. - */ - LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); - return 1; - } - if (skb->ip_summed == CHECKSUM_COMPLETE && - !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, - skb->len, proto, skb->csum)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - - if (!skb_csum_unnecessary(skb)) - skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - skb->len, proto, 0)); - - return 0; -} - int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 0c8934a317c..cf05cf073c5 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -56,7 +56,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 9f2095b19ad..9bf6a74a71d 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -69,8 +69,8 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; - if (skb_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + err = skb_unclone(skb, GFP_ATOMIC); + if (err) goto out; if (x->props.flags & XFRM_STATE_DECAP_DSCP) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index c9844135c9c..4ef7bdb6544 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -110,7 +110,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ - xdst->u.rt6.n = neigh_clone(rt->n); xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | RTF_LOCAL); xdst->u.rt6.rt6i_metric = rt->rt6i_metric; @@ -321,7 +320,51 @@ static struct ctl_table xfrm6_policy_table[] = { { } }; -static struct ctl_table_header *sysctl_hdr; +static int __net_init xfrm6_net_init(struct net *net) +{ + struct ctl_table *table; + struct ctl_table_header *hdr; + + table = xfrm6_policy_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(table, sizeof(xfrm6_policy_table), GFP_KERNEL); + if (!table) + goto err_alloc; + + table[0].data = &net->xfrm.xfrm6_dst_ops.gc_thresh; + } + + hdr = register_net_sysctl(net, "net/ipv6", table); + if (!hdr) + goto err_reg; + + net->ipv6.sysctl.xfrm6_hdr = hdr; + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void __net_exit xfrm6_net_exit(struct net *net) +{ + struct ctl_table *table; + + if (net->ipv6.sysctl.xfrm6_hdr == NULL) + return; + + table = net->ipv6.sysctl.xfrm6_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ipv6.sysctl.xfrm6_hdr); + if (!net_eq(net, &init_net)) + kfree(table); +} + +static struct pernet_operations xfrm6_net_ops = { + .init = xfrm6_net_init, + .exit = xfrm6_net_exit, +}; #endif int __init xfrm6_init(void) @@ -340,8 +383,7 @@ int __init xfrm6_init(void) goto out_policy; #ifdef CONFIG_SYSCTL - sysctl_hdr = register_net_sysctl(&init_net, "net/ipv6", - xfrm6_policy_table); + register_pernet_subsys(&xfrm6_net_ops); #endif out: return ret; @@ -353,8 +395,7 @@ out_policy: void xfrm6_fini(void) { #ifdef CONFIG_SYSCTL - if (sysctl_hdr) - unregister_net_sysctl_table(sysctl_hdr); + unregister_pernet_subsys(&xfrm6_net_ops); #endif xfrm6_policy_fini(); xfrm6_state_fini(); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index ee5a7065aac..6cc48012b73 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -72,7 +72,7 @@ static inline unsigned int xfrm6_tunnel_spi_hash_byaddr(const xfrm_address_t *ad { unsigned int h; - h = (__force u32)(addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3]); + h = ipv6_addr_hash((const struct in6_addr *)addr); h ^= h >> 16; h ^= h >> 8; h &= XFRM6_TUNNEL_SPI_BYADDR_HSIZE - 1; @@ -94,7 +94,7 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, const hlist_for_each_entry_rcu(x6spi, pos, &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { - if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) + if (xfrm6_addr_equal(&x6spi->addr, saddr)) return x6spi; } @@ -211,7 +211,7 @@ static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr) &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { - if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) { + if (xfrm6_addr_equal(&x6spi->addr, saddr)) { if (atomic_dec_and_test(&x6spi->refcnt)) { hlist_del_rcu(&x6spi->list_byaddr); hlist_del_rcu(&x6spi->list_byspi); diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index b833677d83d..d07e3a62644 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -2567,8 +2567,7 @@ bed: err); /* If watchdog is still activated, kill it! */ - if(timer_pending(&(self->watchdog))) - del_timer(&(self->watchdog)); + del_timer(&(self->watchdog)); IRDA_DEBUG(1, "%s(), ...waking up !\n", __func__); diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 2bb2beb6a37..3c83a1e5ab0 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -214,8 +214,7 @@ irnet_get_discovery_log(irnet_socket * ap) * After reading : discoveries = NULL ; disco_index = Y ; disco_number = -1 */ static inline int -irnet_read_discovery_log(irnet_socket * ap, - char * event) +irnet_read_discovery_log(irnet_socket *ap, char *event, int buf_size) { int done_event = 0; @@ -237,12 +236,13 @@ irnet_read_discovery_log(irnet_socket * ap, if(ap->disco_index < ap->disco_number) { /* Write an event */ - sprintf(event, "Found %08x (%s) behind %08x {hints %02X-%02X}\n", - ap->discoveries[ap->disco_index].daddr, - ap->discoveries[ap->disco_index].info, - ap->discoveries[ap->disco_index].saddr, - ap->discoveries[ap->disco_index].hints[0], - ap->discoveries[ap->disco_index].hints[1]); + snprintf(event, buf_size, + "Found %08x (%s) behind %08x {hints %02X-%02X}\n", + ap->discoveries[ap->disco_index].daddr, + ap->discoveries[ap->disco_index].info, + ap->discoveries[ap->disco_index].saddr, + ap->discoveries[ap->disco_index].hints[0], + ap->discoveries[ap->disco_index].hints[1]); DEBUG(CTRL_INFO, "Writing discovery %d : %s\n", ap->disco_index, ap->discoveries[ap->disco_index].info); @@ -282,27 +282,24 @@ irnet_ctrl_read(irnet_socket * ap, size_t count) { DECLARE_WAITQUEUE(wait, current); - char event[64]; /* Max event is 61 char */ + char event[75]; ssize_t ret = 0; DENTER(CTRL_TRACE, "(ap=0x%p, count=%Zd)\n", ap, count); - /* Check if we can write an event out in one go */ - DABORT(count < sizeof(event), -EOVERFLOW, CTRL_ERROR, "Buffer to small.\n"); - #ifdef INITIAL_DISCOVERY /* Check if we have read the log */ - if(irnet_read_discovery_log(ap, event)) + if (irnet_read_discovery_log(ap, event, sizeof(event))) { - /* We have an event !!! Copy it to the user */ - if(copy_to_user(buf, event, strlen(event))) + count = min(strlen(event), count); + if (copy_to_user(buf, event, count)) { DERROR(CTRL_ERROR, "Invalid user space pointer.\n"); return -EFAULT; } DEXIT(CTRL_TRACE, "\n"); - return strlen(event); + return count; } #endif /* INITIAL_DISCOVERY */ @@ -339,79 +336,81 @@ irnet_ctrl_read(irnet_socket * ap, switch(irnet_events.log[ap->event_index].event) { case IRNET_DISCOVER: - sprintf(event, "Discovered %08x (%s) behind %08x {hints %02X-%02X}\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].saddr, - irnet_events.log[ap->event_index].hints.byte[0], - irnet_events.log[ap->event_index].hints.byte[1]); + snprintf(event, sizeof(event), + "Discovered %08x (%s) behind %08x {hints %02X-%02X}\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].saddr, + irnet_events.log[ap->event_index].hints.byte[0], + irnet_events.log[ap->event_index].hints.byte[1]); break; case IRNET_EXPIRE: - sprintf(event, "Expired %08x (%s) behind %08x {hints %02X-%02X}\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].saddr, - irnet_events.log[ap->event_index].hints.byte[0], - irnet_events.log[ap->event_index].hints.byte[1]); + snprintf(event, sizeof(event), + "Expired %08x (%s) behind %08x {hints %02X-%02X}\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].saddr, + irnet_events.log[ap->event_index].hints.byte[0], + irnet_events.log[ap->event_index].hints.byte[1]); break; case IRNET_CONNECT_TO: - sprintf(event, "Connected to %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); + snprintf(event, sizeof(event), "Connected to %08x (%s) on ppp%d\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].unit); break; case IRNET_CONNECT_FROM: - sprintf(event, "Connection from %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); + snprintf(event, sizeof(event), "Connection from %08x (%s) on ppp%d\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].unit); break; case IRNET_REQUEST_FROM: - sprintf(event, "Request from %08x (%s) behind %08x\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].saddr); + snprintf(event, sizeof(event), "Request from %08x (%s) behind %08x\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].saddr); break; case IRNET_NOANSWER_FROM: - sprintf(event, "No-answer from %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); + snprintf(event, sizeof(event), "No-answer from %08x (%s) on ppp%d\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].unit); break; case IRNET_BLOCKED_LINK: - sprintf(event, "Blocked link with %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); + snprintf(event, sizeof(event), "Blocked link with %08x (%s) on ppp%d\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].unit); break; case IRNET_DISCONNECT_FROM: - sprintf(event, "Disconnection from %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); + snprintf(event, sizeof(event), "Disconnection from %08x (%s) on ppp%d\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name, + irnet_events.log[ap->event_index].unit); break; case IRNET_DISCONNECT_TO: - sprintf(event, "Disconnected to %08x (%s)\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name); + snprintf(event, sizeof(event), "Disconnected to %08x (%s)\n", + irnet_events.log[ap->event_index].daddr, + irnet_events.log[ap->event_index].name); break; default: - sprintf(event, "Bug\n"); + snprintf(event, sizeof(event), "Bug\n"); } /* Increment our event index */ ap->event_index = (ap->event_index + 1) % IRNET_MAX_EVENTS; DEBUG(CTRL_INFO, "Event is :%s", event); - /* Copy it to the user */ - if(copy_to_user(buf, event, strlen(event))) + count = min(strlen(event), count); + if (copy_to_user(buf, event, count)) { DERROR(CTRL_ERROR, "Invalid user space pointer.\n"); return -EFAULT; } DEXIT(CTRL_TRACE, "\n"); - return strlen(event); + return count; } /*------------------------------------------------------------------*/ diff --git a/net/key/af_key.c b/net/key/af_key.c index 5b426a64654..9ef79851f29 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -203,7 +203,6 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, } if (*skb2 != NULL) { if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) { - skb_orphan(*skb2); skb_set_owner_r(*skb2, sk); skb_queue_tail(&sk->sk_receive_queue, *skb2); sk->sk_data_ready(sk, (*skb2)->len); @@ -762,7 +761,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, } /* identity & sensitivity */ - if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr, x->props.family)) + if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, x->props.family)) size += sizeof(struct sadb_address) + sockaddr_size; if (add_keys) { @@ -816,18 +815,21 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, sa->sadb_sa_auth = 0; if (x->aalg) { struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0); - sa->sadb_sa_auth = a ? a->desc.sadb_alg_id : 0; + sa->sadb_sa_auth = (a && a->pfkey_supported) ? + a->desc.sadb_alg_id : 0; } sa->sadb_sa_encrypt = 0; BUG_ON(x->ealg && x->calg); if (x->ealg) { struct xfrm_algo_desc *a = xfrm_ealg_get_byname(x->ealg->alg_name, 0); - sa->sadb_sa_encrypt = a ? a->desc.sadb_alg_id : 0; + sa->sadb_sa_encrypt = (a && a->pfkey_supported) ? + a->desc.sadb_alg_id : 0; } /* KAME compatible: sadb_sa_encrypt is overloaded with calg id */ if (x->calg) { struct xfrm_algo_desc *a = xfrm_calg_get_byname(x->calg->alg_name, 0); - sa->sadb_sa_encrypt = a ? a->desc.sadb_alg_id : 0; + sa->sadb_sa_encrypt = (a && a->pfkey_supported) ? + a->desc.sadb_alg_id : 0; } sa->sadb_sa_flags = 0; @@ -909,8 +911,8 @@ static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, if (!addr->sadb_address_prefixlen) BUG(); - if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr, - x->props.family)) { + if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, + x->props.family)) { addr = (struct sadb_address*) skb_put(skb, sizeof(struct sadb_address)+sockaddr_size); addr->sadb_address_len = @@ -1138,7 +1140,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, if (sa->sadb_sa_auth) { int keysize = 0; struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth); - if (!a) { + if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } @@ -1160,7 +1162,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, if (sa->sadb_sa_encrypt) { if (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP) { struct xfrm_algo_desc *a = xfrm_calg_get_byid(sa->sadb_sa_encrypt); - if (!a) { + if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } @@ -1172,7 +1174,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, } else { int keysize = 0; struct xfrm_algo_desc *a = xfrm_ealg_get_byid(sa->sadb_sa_encrypt); - if (!a) { + if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } @@ -1321,7 +1323,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_ if (hdr->sadb_msg_seq) { x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq); - if (x && xfrm_addr_cmp(&x->id.daddr, xdaddr, family)) { + if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) { xfrm_state_put(x); x = NULL; } @@ -1578,13 +1580,13 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig, struct sadb_msg *hdr; int len, auth_len, enc_len, i; - auth_len = xfrm_count_auth_supported(); + auth_len = xfrm_count_pfkey_auth_supported(); if (auth_len) { auth_len *= sizeof(struct sadb_alg); auth_len += sizeof(struct sadb_supported); } - enc_len = xfrm_count_enc_supported(); + enc_len = xfrm_count_pfkey_enc_supported(); if (enc_len) { enc_len *= sizeof(struct sadb_alg); enc_len += sizeof(struct sadb_supported); @@ -1615,6 +1617,8 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig, struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; + if (!aalg->pfkey_supported) + continue; if (aalg->available) *ap++ = aalg->desc; } @@ -1634,6 +1638,8 @@ static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig, struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; + if (!ealg->pfkey_supported) + continue; if (ealg->available) *ap++ = ealg->desc; } @@ -2825,6 +2831,8 @@ static int count_ah_combs(const struct xfrm_tmpl *t) const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; + if (!aalg->pfkey_supported) + continue; if (aalg_tmpl_set(t, aalg) && aalg->available) sz += sizeof(struct sadb_comb); } @@ -2840,6 +2848,9 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!ealg) break; + if (!ealg->pfkey_supported) + continue; + if (!(ealg_tmpl_set(t, ealg) && ealg->available)) continue; @@ -2848,6 +2859,9 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!aalg) break; + if (!aalg->pfkey_supported) + continue; + if (aalg_tmpl_set(t, aalg) && aalg->available) sz += sizeof(struct sadb_comb); } @@ -2871,6 +2885,9 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) if (!aalg) break; + if (!aalg->pfkey_supported) + continue; + if (aalg_tmpl_set(t, aalg) && aalg->available) { struct sadb_comb *c; c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb)); @@ -2903,6 +2920,9 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) if (!ealg) break; + if (!ealg->pfkey_supported) + continue; + if (!(ealg_tmpl_set(t, ealg) && ealg->available)) continue; @@ -2911,6 +2931,8 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); if (!aalg) break; + if (!aalg->pfkey_supported) + continue; if (!(aalg_tmpl_set(t, aalg) && aalg->available)) continue; c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb)); @@ -3718,7 +3740,7 @@ static int __net_init pfkey_init_proc(struct net *net) { struct proc_dir_entry *e; - e = proc_net_fops_create(net, "pfkey", 0, &pfkey_proc_ops); + e = proc_create("pfkey", 0, net->proc_net, &pfkey_proc_ops); if (e == NULL) return -ENOMEM; @@ -3727,7 +3749,7 @@ static int __net_init pfkey_init_proc(struct net *net) static void __net_exit pfkey_exit_proc(struct net *net) { - proc_net_remove(net, "pfkey"); + remove_proc_entry("pfkey", net->proc_net); } #else static inline int pfkey_init_proc(struct net *net) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 2ac884d0e89..dcfd64e83ab 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -101,6 +101,7 @@ struct l2tp_skb_cb { static atomic_t l2tp_tunnel_count; static atomic_t l2tp_session_count; +static struct workqueue_struct *l2tp_wq; /* per-net private data for this module */ static unsigned int l2tp_net_id; @@ -122,7 +123,6 @@ static inline struct l2tp_net *l2tp_pernet(struct net *net) return net_generic(net, l2tp_net_id); } - /* Tunnel reference counts. Incremented per session that is added to * the tunnel. */ @@ -1271,6 +1271,7 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb); static void l2tp_tunnel_destruct(struct sock *sk) { struct l2tp_tunnel *tunnel; + struct l2tp_net *pn; tunnel = sk->sk_user_data; if (tunnel == NULL) @@ -1278,9 +1279,8 @@ static void l2tp_tunnel_destruct(struct sock *sk) l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing...\n", tunnel->name); - /* Close all sessions */ - l2tp_tunnel_closeall(tunnel); + /* Disable udp encapsulation */ switch (tunnel->encap) { case L2TP_ENCAPTYPE_UDP: /* No longer an encapsulation socket. See net/ipv4/udp.c */ @@ -1292,17 +1292,23 @@ static void l2tp_tunnel_destruct(struct sock *sk) } /* Remove hooks into tunnel socket */ - tunnel->sock = NULL; sk->sk_destruct = tunnel->old_sk_destruct; sk->sk_user_data = NULL; + tunnel->sock = NULL; - /* Call the original destructor */ - if (sk->sk_destruct) - (*sk->sk_destruct)(sk); + /* Remove the tunnel struct from the tunnel list */ + pn = l2tp_pernet(tunnel->l2tp_net); + spin_lock_bh(&pn->l2tp_tunnel_list_lock); + list_del_rcu(&tunnel->list); + spin_unlock_bh(&pn->l2tp_tunnel_list_lock); + atomic_dec(&l2tp_tunnel_count); - /* We're finished with the socket */ + l2tp_tunnel_closeall(tunnel); l2tp_tunnel_dec_refcount(tunnel); + /* Call the original destructor */ + if (sk->sk_destruct) + (*sk->sk_destruct)(sk); end: return; } @@ -1376,48 +1382,77 @@ again: */ static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) { - struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - BUG_ON(atomic_read(&tunnel->ref_count) != 0); BUG_ON(tunnel->sock != NULL); - l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: free...\n", tunnel->name); - - /* Remove from tunnel list */ - spin_lock_bh(&pn->l2tp_tunnel_list_lock); - list_del_rcu(&tunnel->list); kfree_rcu(tunnel, rcu); - spin_unlock_bh(&pn->l2tp_tunnel_list_lock); +} - atomic_dec(&l2tp_tunnel_count); +/* Workqueue tunnel deletion function */ +static void l2tp_tunnel_del_work(struct work_struct *work) +{ + struct l2tp_tunnel *tunnel = NULL; + struct socket *sock = NULL; + struct sock *sk = NULL; + + tunnel = container_of(work, struct l2tp_tunnel, del_work); + sk = l2tp_tunnel_sock_lookup(tunnel); + if (!sk) + return; + + sock = sk->sk_socket; + BUG_ON(!sock); + + /* If the tunnel socket was created directly by the kernel, use the + * sk_* API to release the socket now. Otherwise go through the + * inet_* layer to shut the socket down, and let userspace close it. + * In either case the tunnel resources are freed in the socket + * destructor when the tunnel socket goes away. + */ + if (sock->file == NULL) { + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sk); + } else { + inet_shutdown(sock, 2); + } + + l2tp_tunnel_sock_put(sk); } /* Create a socket for the tunnel, if one isn't set up by * userspace. This is used for static tunnels where there is no * managing L2TP daemon. + * + * Since we don't want these sockets to keep a namespace alive by + * themselves, we drop the socket's namespace refcount after creation. + * These sockets are freed when the namespace exits using the pernet + * exit hook. */ -static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct socket **sockp) +static int l2tp_tunnel_sock_create(struct net *net, + u32 tunnel_id, + u32 peer_tunnel_id, + struct l2tp_tunnel_cfg *cfg, + struct socket **sockp) { int err = -EINVAL; - struct sockaddr_in udp_addr; + struct socket *sock = NULL; + struct sockaddr_in udp_addr = {0}; + struct sockaddr_l2tpip ip_addr = {0}; #if IS_ENABLED(CONFIG_IPV6) - struct sockaddr_in6 udp6_addr; - struct sockaddr_l2tpip6 ip6_addr; + struct sockaddr_in6 udp6_addr = {0}; + struct sockaddr_l2tpip6 ip6_addr = {0}; #endif - struct sockaddr_l2tpip ip_addr; - struct socket *sock = NULL; switch (cfg->encap) { case L2TP_ENCAPTYPE_UDP: #if IS_ENABLED(CONFIG_IPV6) if (cfg->local_ip6 && cfg->peer_ip6) { - err = sock_create(AF_INET6, SOCK_DGRAM, 0, sockp); + err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock); if (err < 0) goto out; - sock = *sockp; + sk_change_net(sock->sk, net); - memset(&udp6_addr, 0, sizeof(udp6_addr)); udp6_addr.sin6_family = AF_INET6; memcpy(&udp6_addr.sin6_addr, cfg->local_ip6, sizeof(udp6_addr.sin6_addr)); @@ -1439,13 +1474,12 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t } else #endif { - err = sock_create(AF_INET, SOCK_DGRAM, 0, sockp); + err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock); if (err < 0) goto out; - sock = *sockp; + sk_change_net(sock->sk, net); - memset(&udp_addr, 0, sizeof(udp_addr)); udp_addr.sin_family = AF_INET; udp_addr.sin_addr = cfg->local_ip; udp_addr.sin_port = htons(cfg->local_udp_port); @@ -1472,14 +1506,13 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t case L2TP_ENCAPTYPE_IP: #if IS_ENABLED(CONFIG_IPV6) if (cfg->local_ip6 && cfg->peer_ip6) { - err = sock_create(AF_INET6, SOCK_DGRAM, IPPROTO_L2TP, - sockp); + err = sock_create_kern(AF_INET6, SOCK_DGRAM, + IPPROTO_L2TP, &sock); if (err < 0) goto out; - sock = *sockp; + sk_change_net(sock->sk, net); - memset(&ip6_addr, 0, sizeof(ip6_addr)); ip6_addr.l2tp_family = AF_INET6; memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6, sizeof(ip6_addr.l2tp_addr)); @@ -1501,14 +1534,13 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t } else #endif { - err = sock_create(AF_INET, SOCK_DGRAM, IPPROTO_L2TP, - sockp); + err = sock_create_kern(AF_INET, SOCK_DGRAM, + IPPROTO_L2TP, &sock); if (err < 0) goto out; - sock = *sockp; + sk_change_net(sock->sk, net); - memset(&ip_addr, 0, sizeof(ip_addr)); ip_addr.l2tp_family = AF_INET; ip_addr.l2tp_addr = cfg->local_ip; ip_addr.l2tp_conn_id = tunnel_id; @@ -1532,8 +1564,10 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t } out: + *sockp = sock; if ((err < 0) && sock) { - sock_release(sock); + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sock->sk); *sockp = NULL; } @@ -1556,15 +1590,23 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 * kernel socket. */ if (fd < 0) { - err = l2tp_tunnel_sock_create(tunnel_id, peer_tunnel_id, cfg, &sock); + err = l2tp_tunnel_sock_create(net, tunnel_id, peer_tunnel_id, + cfg, &sock); if (err < 0) goto err; } else { - err = -EBADF; sock = sockfd_lookup(fd, &err); if (!sock) { - pr_err("tunl %hu: sockfd_lookup(fd=%d) returned %d\n", + pr_err("tunl %u: sockfd_lookup(fd=%d) returned %d\n", tunnel_id, fd, err); + err = -EBADF; + goto err; + } + + /* Reject namespace mismatches */ + if (!net_eq(sock_net(sock->sk), net)) { + pr_err("tunl %u: netns mismatch\n", tunnel_id); + err = -EINVAL; goto err; } } @@ -1651,6 +1693,9 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 sk->sk_allocation = GFP_ATOMIC; + /* Init delete workqueue struct */ + INIT_WORK(&tunnel->del_work, l2tp_tunnel_del_work); + /* Add tunnel to our list */ INIT_LIST_HEAD(&tunnel->list); atomic_inc(&l2tp_tunnel_count); @@ -1682,33 +1727,7 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); */ int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { - int err = -EBADF; - struct socket *sock = NULL; - struct sock *sk = NULL; - - sk = l2tp_tunnel_sock_lookup(tunnel); - if (!sk) - goto out; - - sock = sk->sk_socket; - BUG_ON(!sock); - - /* Force the tunnel socket to close. This will eventually - * cause the tunnel to be deleted via the normal socket close - * mechanisms when userspace closes the tunnel socket. - */ - err = inet_shutdown(sock, 2); - - /* If the tunnel's socket was created by the kernel, - * close the socket here since the socket was not - * created by userspace. - */ - if (sock->file == NULL) - err = inet_release(sock); - - l2tp_tunnel_sock_put(sk); -out: - return err; + return (false == queue_work(l2tp_wq, &tunnel->del_work)); } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); @@ -1892,8 +1911,21 @@ static __net_init int l2tp_init_net(struct net *net) return 0; } +static __net_exit void l2tp_exit_net(struct net *net) +{ + struct l2tp_net *pn = l2tp_pernet(net); + struct l2tp_tunnel *tunnel = NULL; + + rcu_read_lock_bh(); + list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { + (void)l2tp_tunnel_delete(tunnel); + } + rcu_read_unlock_bh(); +} + static struct pernet_operations l2tp_net_ops = { .init = l2tp_init_net, + .exit = l2tp_exit_net, .id = &l2tp_net_id, .size = sizeof(struct l2tp_net), }; @@ -1906,6 +1938,13 @@ static int __init l2tp_init(void) if (rc) goto out; + l2tp_wq = alloc_workqueue("l2tp", WQ_NON_REENTRANT | WQ_UNBOUND, 0); + if (!l2tp_wq) { + pr_err("alloc_workqueue failed\n"); + rc = -ENOMEM; + goto out; + } + pr_info("L2TP core driver, %s\n", L2TP_DRV_VERSION); out: @@ -1915,6 +1954,10 @@ out: static void __exit l2tp_exit(void) { unregister_pernet_device(&l2tp_net_ops); + if (l2tp_wq) { + destroy_workqueue(l2tp_wq); + l2tp_wq = NULL; + } } module_init(l2tp_init); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index e62204cad4f..8eb8f1d47f3 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -191,6 +191,8 @@ struct l2tp_tunnel { int fd; /* Parent fd, if tunnel socket * was created by userspace */ + struct work_struct del_work; + uint8_t priv[0]; /* private data */ }; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 61d8b75d268..f7ac8f42fee 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -115,6 +115,7 @@ static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, in */ static int l2tp_ip_recv(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); struct sock *sk; u32 session_id; u32 tunnel_id; @@ -142,7 +143,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_find(&init_net, NULL, session_id); + session = l2tp_session_find(net, NULL, session_id); if (session == NULL) goto discard; @@ -173,14 +174,14 @@ pass_up: goto discard; tunnel_id = ntohl(*(__be32 *) &skb->data[4]); - tunnel = l2tp_tunnel_find(&init_net, tunnel_id); + tunnel = l2tp_tunnel_find(net, tunnel_id); if (tunnel != NULL) sk = tunnel->sock; else { struct iphdr *iph = (struct iphdr *) skb_network_header(skb); read_lock_bh(&l2tp_ip_lock); - sk = __l2tp_ip_bind_lookup(&init_net, iph->daddr, 0, tunnel_id); + sk = __l2tp_ip_bind_lookup(net, iph->daddr, 0, tunnel_id); read_unlock_bh(&l2tp_ip_lock); } @@ -239,6 +240,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *) uaddr; + struct net *net = sock_net(sk); int ret; int chk_addr_ret; @@ -251,7 +253,8 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) ret = -EADDRINUSE; read_lock_bh(&l2tp_ip_lock); - if (__l2tp_ip_bind_lookup(&init_net, addr->l2tp_addr.s_addr, sk->sk_bound_dev_if, addr->l2tp_conn_id)) + if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, + sk->sk_bound_dev_if, addr->l2tp_conn_id)) goto out_in_use; read_unlock_bh(&l2tp_ip_lock); @@ -260,7 +263,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip)) goto out; - chk_addr_ret = inet_addr_type(&init_net, addr->l2tp_addr.s_addr); + chk_addr_ret = inet_addr_type(net, addr->l2tp_addr.s_addr); ret = -EADDRNOTAVAIL; if (addr->l2tp_addr.s_addr && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) @@ -369,7 +372,7 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb) return 0; drop: - IP_INC_STATS(&init_net, IPSTATS_MIB_INDISCARDS); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS); kfree_skb(skb); return -1; } @@ -605,6 +608,7 @@ static struct inet_protosw l2tp_ip_protosw = { static struct net_protocol l2tp_ip_protocol __read_mostly = { .handler = l2tp_ip_recv, + .netns_ok = 1, }; static int __init l2tp_ip_init(void) diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index bbba3a19e94..c1bab22db85 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -37,6 +37,7 @@ static struct genl_family l2tp_nl_family = { .version = L2TP_GENL_VERSION, .hdrsize = 0, .maxattr = L2TP_ATTR_MAX, + .netnsok = true, }; /* Accessed under genl lock */ diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 716605c241f..3f4e3afc191 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1783,7 +1783,8 @@ static __net_init int pppol2tp_init_net(struct net *net) struct proc_dir_entry *pde; int err = 0; - pde = proc_net_fops_create(net, "pppol2tp", S_IRUGO, &pppol2tp_proc_fops); + pde = proc_create("pppol2tp", S_IRUGO, net->proc_net, + &pppol2tp_proc_fops); if (!pde) { err = -ENOMEM; goto out; @@ -1795,7 +1796,7 @@ out: static __net_exit void pppol2tp_exit_net(struct net *net) { - proc_net_remove(net, "pppol2tp"); + remove_proc_entry("pppol2tp", net->proc_net); } static struct pernet_operations pppol2tp_net_ops = { diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index b4ecf267a34..0ecf947ad37 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -258,6 +258,17 @@ config MAC80211_MESH_SYNC_DEBUG Do not select this option. +config MAC80211_MESH_PS_DEBUG + bool "Verbose mesh powersave debugging" + depends on MAC80211_DEBUG_MENU + depends on MAC80211_MESH + ---help--- + Selecting this option causes mac80211 to print out very verbose mesh + powersave debugging messages (when mac80211 is taking part in a + mesh network). + + Do not select this option. + config MAC80211_TDLS_DEBUG bool "Verbose TDLS debugging" depends on MAC80211_DEBUG_MENU diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 4911202334d..9d7d840aac6 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -39,7 +39,8 @@ mac80211-$(CONFIG_MAC80211_MESH) += \ mesh_pathtbl.o \ mesh_plink.o \ mesh_hwmp.o \ - mesh_sync.o + mesh_sync.o \ + mesh_ps.o mac80211-$(CONFIG_PM) += pm.o diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 808338a1bce..31bf2586fb8 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -83,8 +83,8 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP, &sta->sta, tid, NULL, 0)) sdata_info(sta->sdata, - "HW problem - can not stop rx aggregation for tid %d\n", - tid); + "HW problem - can not stop rx aggregation for %pM tid %d\n", + sta->sta.addr, tid); /* check if this is a self generated aggregation halt */ if (initiator == WLAN_BACK_RECIPIENT && tx) @@ -159,7 +159,8 @@ static void sta_rx_agg_session_timer_expired(unsigned long data) } rcu_read_unlock(); - ht_dbg(sta->sdata, "rx session timer expired on tid %d\n", (u16)*ptid); + ht_dbg(sta->sdata, "RX session timer expired on %pM tid %d\n", + sta->sta.addr, (u16)*ptid); set_bit(*ptid, sta->ampdu_mlme.tid_rx_timer_expired); ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); @@ -247,7 +248,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, status = WLAN_STATUS_REQUEST_DECLINED; if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { - ht_dbg(sta->sdata, "Suspend in progress - Denying ADDBA request\n"); + ht_dbg(sta->sdata, + "Suspend in progress - Denying ADDBA request (%pM tid %d)\n", + sta->sta.addr, tid); goto end_no_lock; } @@ -317,7 +320,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START, &sta->sta, tid, &start_seq_num, 0); - ht_dbg(sta->sdata, "Rx A-MPDU request on tid %d result %d\n", tid, ret); + ht_dbg(sta->sdata, "Rx A-MPDU request on %pM tid %d result %d\n", + sta->sta.addr, tid, ret); if (ret) { kfree(tid_agg_rx->reorder_buf); kfree(tid_agg_rx->reorder_time); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index eb9df22418f..13b7683de5a 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -149,16 +149,133 @@ void ieee80211_assign_tid_tx(struct sta_info *sta, int tid, rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx); } +static inline int ieee80211_ac_from_tid(int tid) +{ + return ieee802_1d_to_ac[tid & 7]; +} + +/* + * When multiple aggregation sessions on multiple stations + * are being created/destroyed simultaneously, we need to + * refcount the global queue stop caused by that in order + * to not get into a situation where one of the aggregation + * setup or teardown re-enables queues before the other is + * ready to handle that. + * + * These two functions take care of this issue by keeping + * a global "agg_queue_stop" refcount. + */ +static void __acquires(agg_queue) +ieee80211_stop_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) +{ + int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; + + if (atomic_inc_return(&sdata->local->agg_queue_stop[queue]) == 1) + ieee80211_stop_queue_by_reason( + &sdata->local->hw, queue, + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + __acquire(agg_queue); +} + +static void __releases(agg_queue) +ieee80211_wake_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) +{ + int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; + + if (atomic_dec_return(&sdata->local->agg_queue_stop[queue]) == 0) + ieee80211_wake_queue_by_reason( + &sdata->local->hw, queue, + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + __release(agg_queue); +} + +/* + * splice packets from the STA's pending to the local pending, + * requires a call to ieee80211_agg_splice_finish later + */ +static void __acquires(agg_queue) +ieee80211_agg_splice_packets(struct ieee80211_sub_if_data *sdata, + struct tid_ampdu_tx *tid_tx, u16 tid) +{ + struct ieee80211_local *local = sdata->local; + int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; + unsigned long flags; + + ieee80211_stop_queue_agg(sdata, tid); + + if (WARN(!tid_tx, + "TID %d gone but expected when splicing aggregates from the pending queue\n", + tid)) + return; + + if (!skb_queue_empty(&tid_tx->pending)) { + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + /* copy over remaining packets */ + skb_queue_splice_tail_init(&tid_tx->pending, + &local->pending[queue]); + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + } +} + +static void __releases(agg_queue) +ieee80211_agg_splice_finish(struct ieee80211_sub_if_data *sdata, u16 tid) +{ + ieee80211_wake_queue_agg(sdata, tid); +} + +static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid) +{ + struct tid_ampdu_tx *tid_tx; + + lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_held(&sta->lock); + + tid_tx = rcu_dereference_protected_tid_tx(sta, tid); + + /* + * When we get here, the TX path will not be lockless any more wrt. + * aggregation, since the OPERATIONAL bit has long been cleared. + * Thus it will block on getting the lock, if it occurs. So if we + * stop the queue now, we will not get any more packets, and any + * that might be being processed will wait for us here, thereby + * guaranteeing that no packets go to the tid_tx pending queue any + * more. + */ + + ieee80211_agg_splice_packets(sta->sdata, tid_tx, tid); + + /* future packets must not find the tid_tx struct any more */ + ieee80211_assign_tid_tx(sta, tid, NULL); + + ieee80211_agg_splice_finish(sta->sdata, tid); + + kfree_rcu(tid_tx, rcu_head); +} + int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_back_parties initiator, - bool tx) + enum ieee80211_agg_stop_reason reason) { struct ieee80211_local *local = sta->local; struct tid_ampdu_tx *tid_tx; + enum ieee80211_ampdu_mlme_action action; int ret; lockdep_assert_held(&sta->ampdu_mlme.mtx); + switch (reason) { + case AGG_STOP_DECLINED: + case AGG_STOP_LOCAL_REQUEST: + case AGG_STOP_PEER_REQUEST: + action = IEEE80211_AMPDU_TX_STOP_CONT; + break; + case AGG_STOP_DESTROY_STA: + action = IEEE80211_AMPDU_TX_STOP_FLUSH; + break; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + spin_lock_bh(&sta->lock); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); @@ -167,10 +284,19 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, return -ENOENT; } - /* if we're already stopping ignore any new requests to stop */ + /* + * if we're already stopping ignore any new requests to stop + * unless we're destroying it in which case notify the driver + */ if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { spin_unlock_bh(&sta->lock); - return -EALREADY; + if (reason != AGG_STOP_DESTROY_STA) + return -EALREADY; + ret = drv_ampdu_action(local, sta->sdata, + IEEE80211_AMPDU_TX_STOP_FLUSH_CONT, + &sta->sta, tid, NULL, 0); + WARN_ON_ONCE(ret); + return 0; } if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) { @@ -212,11 +338,12 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, */ synchronize_net(); - tid_tx->stop_initiator = initiator; - tid_tx->tx_stop = tx; + tid_tx->stop_initiator = reason == AGG_STOP_PEER_REQUEST ? + WLAN_BACK_RECIPIENT : + WLAN_BACK_INITIATOR; + tid_tx->tx_stop = reason == AGG_STOP_LOCAL_REQUEST; - ret = drv_ampdu_action(local, sta->sdata, - IEEE80211_AMPDU_TX_STOP, + ret = drv_ampdu_action(local, sta->sdata, action, &sta->sta, tid, NULL, 0); /* HW shall not deny going back to legacy */ @@ -227,7 +354,17 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, */ } - return ret; + /* + * In the case of AGG_STOP_DESTROY_STA, the driver won't + * necessarily call ieee80211_stop_tx_ba_cb(), so this may + * seem like we can leave the tid_tx data pending forever. + * This is true, in a way, but "forever" is only until the + * station struct is actually destroyed. In the meantime, + * leaving it around ensures that we don't transmit packets + * to the driver on this TID which might confuse it. + */ + + return 0; } /* @@ -253,91 +390,18 @@ static void sta_addba_resp_timer_expired(unsigned long data) test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { rcu_read_unlock(); ht_dbg(sta->sdata, - "timer expired on tid %d but we are not (or no longer) expecting addBA response there\n", - tid); + "timer expired on %pM tid %d but we are not (or no longer) expecting addBA response there\n", + sta->sta.addr, tid); return; } - ht_dbg(sta->sdata, "addBA response timer expired on tid %d\n", tid); + ht_dbg(sta->sdata, "addBA response timer expired on %pM tid %d\n", + sta->sta.addr, tid); ieee80211_stop_tx_ba_session(&sta->sta, tid); rcu_read_unlock(); } -static inline int ieee80211_ac_from_tid(int tid) -{ - return ieee802_1d_to_ac[tid & 7]; -} - -/* - * When multiple aggregation sessions on multiple stations - * are being created/destroyed simultaneously, we need to - * refcount the global queue stop caused by that in order - * to not get into a situation where one of the aggregation - * setup or teardown re-enables queues before the other is - * ready to handle that. - * - * These two functions take care of this issue by keeping - * a global "agg_queue_stop" refcount. - */ -static void __acquires(agg_queue) -ieee80211_stop_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) -{ - int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; - - if (atomic_inc_return(&sdata->local->agg_queue_stop[queue]) == 1) - ieee80211_stop_queue_by_reason( - &sdata->local->hw, queue, - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - __acquire(agg_queue); -} - -static void __releases(agg_queue) -ieee80211_wake_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) -{ - int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; - - if (atomic_dec_return(&sdata->local->agg_queue_stop[queue]) == 0) - ieee80211_wake_queue_by_reason( - &sdata->local->hw, queue, - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - __release(agg_queue); -} - -/* - * splice packets from the STA's pending to the local pending, - * requires a call to ieee80211_agg_splice_finish later - */ -static void __acquires(agg_queue) -ieee80211_agg_splice_packets(struct ieee80211_sub_if_data *sdata, - struct tid_ampdu_tx *tid_tx, u16 tid) -{ - struct ieee80211_local *local = sdata->local; - int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; - unsigned long flags; - - ieee80211_stop_queue_agg(sdata, tid); - - if (WARN(!tid_tx, - "TID %d gone but expected when splicing aggregates from the pending queue\n", - tid)) - return; - - if (!skb_queue_empty(&tid_tx->pending)) { - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - /* copy over remaining packets */ - skb_queue_splice_tail_init(&tid_tx->pending, - &local->pending[queue]); - spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); - } -} - -static void __releases(agg_queue) -ieee80211_agg_splice_finish(struct ieee80211_sub_if_data *sdata, u16 tid) -{ - ieee80211_wake_queue_agg(sdata, tid); -} - void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; @@ -369,7 +433,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) &sta->sta, tid, &start_seq_num, 0); if (ret) { ht_dbg(sdata, - "BA request denied - HW unavailable for tid %d\n", tid); + "BA request denied - HW unavailable for %pM tid %d\n", + sta->sta.addr, tid); spin_lock_bh(&sta->lock); ieee80211_agg_splice_packets(sdata, tid_tx, tid); ieee80211_assign_tid_tx(sta, tid, NULL); @@ -382,7 +447,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) /* activate the timer for the recipient's addBA response */ mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); - ht_dbg(sdata, "activated addBA response timer on tid %d\n", tid); + ht_dbg(sdata, "activated addBA response timer on %pM tid %d\n", + sta->sta.addr, tid); spin_lock_bh(&sta->lock); sta->ampdu_mlme.last_addba_req_time[tid] = jiffies; @@ -429,7 +495,8 @@ static void sta_tx_agg_session_timer_expired(unsigned long data) rcu_read_unlock(); - ht_dbg(sta->sdata, "tx session timer expired on tid %d\n", (u16)*ptid); + ht_dbg(sta->sdata, "tx session timer expired on %pM tid %d\n", + sta->sta.addr, (u16)*ptid); ieee80211_stop_tx_ba_session(&sta->sta, *ptid); } @@ -465,7 +532,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { ht_dbg(sdata, - "BA sessions blocked - Denying BA session request\n"); + "BA sessions blocked - Denying BA session request %pM tid %d\n", + sta->sta.addr, tid); return -EINVAL; } @@ -506,8 +574,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, time_before(jiffies, sta->ampdu_mlme.last_addba_req_time[tid] + HT_AGG_RETRIES_PERIOD)) { ht_dbg(sdata, - "BA request denied - waiting a grace period after %d failed requests on tid %u\n", - sta->ampdu_mlme.addba_req_num[tid], tid); + "BA request denied - waiting a grace period after %d failed requests on %pM tid %u\n", + sta->ampdu_mlme.addba_req_num[tid], sta->sta.addr, tid); ret = -EBUSY; goto err_unlock_sta; } @@ -516,8 +584,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, /* check if the TID is not in aggregation flow already */ if (tid_tx || sta->ampdu_mlme.tid_start_tx[tid]) { ht_dbg(sdata, - "BA request denied - session is not idle on tid %u\n", - tid); + "BA request denied - session is not idle on %pM tid %u\n", + sta->sta.addr, tid); ret = -EAGAIN; goto err_unlock_sta; } @@ -572,7 +640,8 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, tid_tx = rcu_dereference_protected_tid_tx(sta, tid); - ht_dbg(sta->sdata, "Aggregation is on for tid %d\n", tid); + ht_dbg(sta->sdata, "Aggregation is on for %pM tid %d\n", + sta->sta.addr, tid); drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_TX_OPERATIONAL, @@ -660,14 +729,13 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_back_parties initiator, - bool tx) + enum ieee80211_agg_stop_reason reason) { int ret; mutex_lock(&sta->ampdu_mlme.mtx); - ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator, tx); + ret = ___ieee80211_stop_tx_ba_session(sta, tid, reason); mutex_unlock(&sta->ampdu_mlme.mtx); @@ -743,7 +811,9 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { - ht_dbg(sdata, "unexpected callback to A-MPDU stop\n"); + ht_dbg(sdata, + "unexpected callback to A-MPDU stop for %pM tid %d\n", + sta->sta.addr, tid); goto unlock_sta; } @@ -751,24 +821,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) ieee80211_send_delba(sta->sdata, ra, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); - /* - * When we get here, the TX path will not be lockless any more wrt. - * aggregation, since the OPERATIONAL bit has long been cleared. - * Thus it will block on getting the lock, if it occurs. So if we - * stop the queue now, we will not get any more packets, and any - * that might be being processed will wait for us here, thereby - * guaranteeing that no packets go to the tid_tx pending queue any - * more. - */ - - ieee80211_agg_splice_packets(sta->sdata, tid_tx, tid); - - /* future packets must not find the tid_tx struct any more */ - ieee80211_assign_tid_tx(sta, tid, NULL); - - ieee80211_agg_splice_finish(sta->sdata, tid); - - kfree_rcu(tid_tx, rcu_head); + ieee80211_remove_tid_tx(sta, tid); unlock_sta: spin_unlock_bh(&sta->lock); @@ -819,13 +872,15 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, goto out; if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) { - ht_dbg(sta->sdata, "wrong addBA response token, tid %d\n", tid); + ht_dbg(sta->sdata, "wrong addBA response token, %pM tid %d\n", + sta->sta.addr, tid); goto out; } del_timer_sync(&tid_tx->addba_resp_timer); - ht_dbg(sta->sdata, "switched off addBA timer for tid %d\n", tid); + ht_dbg(sta->sdata, "switched off addBA timer for %pM tid %d\n", + sta->sta.addr, tid); /* * addba_resp_timer may have fired before we got here, and @@ -835,8 +890,8 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, if (test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state) || test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { ht_dbg(sta->sdata, - "got addBA resp for tid %d but we already gave up\n", - tid); + "got addBA resp for %pM tid %d but we already gave up\n", + sta->sta.addr, tid); goto out; } @@ -868,8 +923,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, } } else { - ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR, - false); + ___ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_DECLINED); } out: diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0479c64aa83..09d96a8f6c2 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -492,7 +492,10 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) #ifdef CONFIG_MAC80211_MESH sinfo->filled |= STATION_INFO_LLID | STATION_INFO_PLID | - STATION_INFO_PLINK_STATE; + STATION_INFO_PLINK_STATE | + STATION_INFO_LOCAL_PM | + STATION_INFO_PEER_PM | + STATION_INFO_NONPEER_PM; sinfo->llid = le16_to_cpu(sta->llid); sinfo->plid = le16_to_cpu(sta->plid); @@ -501,6 +504,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->filled |= STATION_INFO_T_OFFSET; sinfo->t_offset = sta->t_offset; } + sinfo->local_pm = sta->local_pm; + sinfo->peer_pm = sta->peer_pm; + sinfo->nonpeer_pm = sta->nonpeer_pm; #endif } @@ -520,6 +526,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP) | BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_TDLS_PEER); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHORIZED); @@ -531,6 +538,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_MFP); if (test_sta_flag(sta, WLAN_STA_AUTH)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHENTICATED); + if (test_sta_flag(sta, WLAN_STA_ASSOC)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER); } @@ -919,11 +928,13 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, /* TODO: make hostapd tell us what it wants */ sdata->smps_mode = IEEE80211_SMPS_OFF; sdata->needed_rx_chains = sdata->local->rx_chains; + sdata->radar_required = params->radar_required; err = ieee80211_vif_use_channel(sdata, ¶ms->chandef, IEEE80211_CHANCTX_SHARED); if (err) return err; + ieee80211_vif_copy_chanctx_to_vlans(sdata, false); /* * Apply control port protocol, this allows us to @@ -940,6 +951,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->vif.bss_conf.beacon_int = params->beacon_interval; sdata->vif.bss_conf.dtim_period = params->dtim_period; + sdata->vif.bss_conf.enable_beacon = true; sdata->vif.bss_conf.ssid_len = params->ssid_len; if (params->ssid_len) @@ -1020,8 +1032,15 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) kfree_rcu(old_probe_resp, rcu_head); list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) - sta_info_flush(local, vlan); - sta_info_flush(local, sdata); + sta_info_flush_defer(vlan); + sta_info_flush_defer(sdata); + rcu_barrier(); + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + sta_info_flush_cleanup(vlan); + sta_info_flush_cleanup(sdata); + + sdata->vif.bss_conf.enable_beacon = false; + clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); drv_stop_ap(sdata->local, sdata); @@ -1030,6 +1049,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf); skb_queue_purge(&sdata->u.ap.ps.bc_buf); + ieee80211_vif_copy_chanctx_to_vlans(sdata, true); ieee80211_vif_release_channel(sdata); return 0; @@ -1079,6 +1099,58 @@ static void ieee80211_send_layer2_update(struct sta_info *sta) netif_rx_ni(skb); } +static int sta_apply_auth_flags(struct ieee80211_local *local, + struct sta_info *sta, + u32 mask, u32 set) +{ + int ret; + + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + set & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + !test_sta_flag(sta, WLAN_STA_AUTH)) { + ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && + set & BIT(NL80211_STA_FLAG_ASSOCIATED) && + !test_sta_flag(sta, WLAN_STA_ASSOC)) { + ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { + if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) + ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); + else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); + else + ret = 0; + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && + !(set & BIT(NL80211_STA_FLAG_ASSOCIATED)) && + test_sta_flag(sta, WLAN_STA_ASSOC)) { + ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + !(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) && + test_sta_flag(sta, WLAN_STA_AUTH)) { + ret = sta_info_move_state(sta, IEEE80211_STA_NONE); + if (ret) + return ret; + } + + return 0; +} + static int sta_apply_parameters(struct ieee80211_local *local, struct sta_info *sta, struct station_parameters *params) @@ -1096,52 +1168,20 @@ static int sta_apply_parameters(struct ieee80211_local *local, mask = params->sta_flags_mask; set = params->sta_flags_set; - /* - * In mesh mode, we can clear AUTHENTICATED flag but must - * also make ASSOCIATED follow appropriately for the driver - * API. See also below, after AUTHORIZED changes. - */ - if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) { - /* cfg80211 should not allow this in non-mesh modes */ - if (WARN_ON(!ieee80211_vif_is_mesh(&sdata->vif))) - return -EINVAL; - - if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED) && - !test_sta_flag(sta, WLAN_STA_AUTH)) { - ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); - if (ret) - return ret; - ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); - if (ret) - return ret; - } - } - - if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { - if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) - ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); - else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) - ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); - if (ret) - return ret; - } - - if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) { - /* cfg80211 should not allow this in non-mesh modes */ - if (WARN_ON(!ieee80211_vif_is_mesh(&sdata->vif))) - return -EINVAL; - - if (!(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) && - test_sta_flag(sta, WLAN_STA_AUTH)) { - ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); - if (ret) - return ret; - ret = sta_info_move_state(sta, IEEE80211_STA_NONE); - if (ret) - return ret; - } + if (ieee80211_vif_is_mesh(&sdata->vif)) { + /* + * In mesh mode, ASSOCIATED isn't part of the nl80211 + * API but must follow AUTHENTICATED for driver state. + */ + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) + mask |= BIT(NL80211_STA_FLAG_ASSOCIATED); + if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) + set |= BIT(NL80211_STA_FLAG_ASSOCIATED); } + ret = sta_apply_auth_flags(local, sta, mask, set); + if (ret) + return ret; if (mask & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) { if (set & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) @@ -1187,10 +1227,11 @@ static int sta_apply_parameters(struct ieee80211_local *local, sta->sta.aid = params->aid; /* - * FIXME: updating the following information is racy when this - * function is called from ieee80211_change_station(). - * However, all this information should be static so - * maybe we should just reject attemps to change it. + * Some of the following updates would be racy if called on an + * existing station, via ieee80211_change_station(). However, + * all such changes are rejected by cfg80211 except for updates + * changing the supported rates on an existing but not yet used + * TDLS peer. */ if (params->listen_interval >= 0) @@ -1211,36 +1252,62 @@ static int sta_apply_parameters(struct ieee80211_local *local, if (params->ht_capa) ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, - params->ht_capa, - &sta->sta.ht_cap); + params->ht_capa, sta); if (params->vht_capa) ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, - params->vht_capa, - &sta->sta.vht_cap); + params->vht_capa, sta); if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH - if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) + u32 changed = 0; + if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) { switch (params->plink_state) { - case NL80211_PLINK_LISTEN: case NL80211_PLINK_ESTAB: + if (sta->plink_state != NL80211_PLINK_ESTAB) + changed = mesh_plink_inc_estab_count( + sdata); + sta->plink_state = params->plink_state; + + ieee80211_mps_sta_status_update(sta); + changed |= ieee80211_mps_set_sta_local_pm(sta, + sdata->u.mesh.mshcfg.power_mode); + break; + case NL80211_PLINK_LISTEN: case NL80211_PLINK_BLOCKED: + case NL80211_PLINK_OPN_SNT: + case NL80211_PLINK_OPN_RCVD: + case NL80211_PLINK_CNF_RCVD: + case NL80211_PLINK_HOLDING: + if (sta->plink_state == NL80211_PLINK_ESTAB) + changed = mesh_plink_dec_estab_count( + sdata); sta->plink_state = params->plink_state; + + ieee80211_mps_sta_status_update(sta); + changed |= + ieee80211_mps_local_status_update(sdata); break; default: /* nothing */ break; } - else + } else { switch (params->plink_action) { case PLINK_ACTION_OPEN: - mesh_plink_open(sta); + changed |= mesh_plink_open(sta); break; case PLINK_ACTION_BLOCK: - mesh_plink_block(sta); + changed |= mesh_plink_block(sta); break; } + } + + if (params->local_pm) + changed |= + ieee80211_mps_set_sta_local_pm(sta, + params->local_pm); + ieee80211_bss_info_change_notify(sdata, changed); #endif } @@ -1275,6 +1342,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (!sta) return -ENOMEM; + /* + * defaults -- if userspace wants something else we'll + * change it accordingly in sta_apply_parameters() + */ sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); @@ -1311,7 +1382,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, u8 *mac) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1319,7 +1389,7 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, if (mac) return sta_info_destroy_addr_bss(sdata, mac); - sta_info_flush(local, sdata); + sta_info_flush(sdata); return 0; } @@ -1342,9 +1412,11 @@ static int ieee80211_change_station(struct wiphy *wiphy, return -ENOENT; } - /* in station mode, supported rates are only valid with TDLS */ + /* in station mode, some updates are only valid with TDLS */ if (sdata->vif.type == NL80211_IFTYPE_STATION && - params->supported_rates && + (params->supported_rates || params->ht_capa || params->vht_capa || + params->sta_modify_mask || + (params->sta_flags_mask & BIT(NL80211_STA_FLAG_WME))) && !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { mutex_unlock(&local->sta_mtx); return -EINVAL; @@ -1428,13 +1500,13 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, return -ENOENT; } - err = mesh_path_add(dst, sdata); + err = mesh_path_add(sdata, dst); if (err) { rcu_read_unlock(); return err; } - mpath = mesh_path_lookup(dst, sdata); + mpath = mesh_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENXIO; @@ -1446,12 +1518,12 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, - u8 *dst) + u8 *dst) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (dst) - return mesh_path_del(dst, sdata); + return mesh_path_del(sdata, dst); mesh_path_flush_by_iface(sdata); return 0; @@ -1475,7 +1547,7 @@ static int ieee80211_change_mpath(struct wiphy *wiphy, return -ENOENT; } - mpath = mesh_path_lookup(dst, sdata); + mpath = mesh_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENOENT; @@ -1539,7 +1611,7 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); - mpath = mesh_path_lookup(dst, sdata); + mpath = mesh_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENOENT; @@ -1560,7 +1632,7 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); - mpath = mesh_path_lookup_by_idx(idx, sdata); + mpath = mesh_path_lookup_by_idx(sdata, idx); if (!mpath) { rcu_read_unlock(); return -ENOENT; @@ -1625,6 +1697,9 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, memcpy(sdata->vif.bss_conf.mcast_rate, setup->mcast_rate, sizeof(setup->mcast_rate)); + sdata->vif.bss_conf.beacon_int = setup->beacon_interval; + sdata->vif.bss_conf.dtim_period = setup->dtim_period; + return 0; } @@ -1723,6 +1798,14 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, mask)) conf->dot11MeshHWMPconfirmationInterval = nconf->dot11MeshHWMPconfirmationInterval; + if (_chg_mesh_attr(NL80211_MESHCONF_POWER_MODE, mask)) { + conf->power_mode = nconf->power_mode; + ieee80211_mps_local_status_update(sdata); + } + if (_chg_mesh_attr(NL80211_MESHCONF_AWAKE_WINDOW, mask)) + conf->dot11MeshAwakeWindowDuration = + nconf->dot11MeshAwakeWindowDuration; + ieee80211_mbss_info_change_notify(sdata, BSS_CHANGED_BEACON); return 0; } @@ -1748,9 +1831,7 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, if (err) return err; - ieee80211_start_mesh(sdata); - - return 0; + return ieee80211_start_mesh(sdata); } static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev) @@ -2208,7 +2289,8 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - if (sdata->vif.type != NL80211_IFTYPE_STATION) + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) @@ -2314,7 +2396,8 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, INIT_LIST_HEAD(&roc->dependents); /* if there's one pending or we're scanning, queue this one */ - if (!list_empty(&local->roc_list) || local->scanning) + if (!list_empty(&local->roc_list) || + local->scanning || local->radar_detect_enabled) goto out_check_combine; /* if not HW assist, just queue & schedule work */ @@ -2564,6 +2647,37 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, return ieee80211_cancel_roc(local, cookie, false); } +static int ieee80211_start_radar_detection(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_chan_def *chandef) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + unsigned long timeout; + int err; + + if (!list_empty(&local->roc_list) || local->scanning) + return -EBUSY; + + /* whatever, but channel contexts should not complain about that one */ + sdata->smps_mode = IEEE80211_SMPS_OFF; + sdata->needed_rx_chains = local->rx_chains; + sdata->radar_required = true; + + mutex_lock(&local->iflist_mtx); + err = ieee80211_vif_use_channel(sdata, chandef, + IEEE80211_CHANCTX_SHARED); + mutex_unlock(&local->iflist_mtx); + if (err) + return err; + + timeout = msecs_to_jiffies(IEEE80211_DFS_MIN_CAC_TIME_MS); + ieee80211_queue_delayed_work(&sdata->local->hw, + &sdata->dfs_cac_timer_work, timeout); + + return 0; +} + static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, unsigned int wait, const u8 *buf, size_t len, @@ -2668,7 +2782,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, goto out_unlock; } - IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN; + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN | + IEEE80211_TX_INTFL_OFFCHAN_TX_OK; if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) IEEE80211_SKB_CB(skb)->hw_queue = local->hw.offchannel_tx_hw_queue; @@ -3268,4 +3383,5 @@ struct cfg80211_ops mac80211_config_ops = { .get_et_stats = ieee80211_get_et_stats, .get_et_strings = ieee80211_get_et_strings, .get_channel = ieee80211_cfg_get_channel, + .start_radar_detection = ieee80211_start_radar_detection, }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 80e55527504..78c0d90dd64 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -9,7 +9,7 @@ #include "ieee80211_i.h" #include "driver-ops.h" -static void ieee80211_change_chandef(struct ieee80211_local *local, +static void ieee80211_change_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, const struct cfg80211_chan_def *chandef) { @@ -49,7 +49,7 @@ ieee80211_find_chanctx(struct ieee80211_local *local, if (!compat) continue; - ieee80211_change_chandef(local, ctx, compat); + ieee80211_change_chanctx(local, ctx, compat); return ctx; } @@ -91,6 +91,10 @@ ieee80211_new_chanctx(struct ieee80211_local *local, list_add_rcu(&ctx->list, &local->chanctx_list); + mutex_lock(&local->mtx); + ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); + return ctx; } @@ -110,6 +114,10 @@ static void ieee80211_free_chanctx(struct ieee80211_local *local, list_del_rcu(&ctx->list); kfree_rcu(ctx, rcu_head); + + mutex_lock(&local->mtx); + ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); } static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, @@ -128,6 +136,11 @@ static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, ctx->refcount++; ieee80211_recalc_txpower(sdata); + sdata->vif.bss_conf.idle = false; + + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_MONITOR) + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); return 0; } @@ -162,7 +175,7 @@ static void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, if (WARN_ON_ONCE(!compat)) return; - ieee80211_change_chandef(local, ctx, compat); + ieee80211_change_chanctx(local, ctx, compat); } static void ieee80211_unassign_vif_chanctx(struct ieee80211_sub_if_data *sdata, @@ -175,11 +188,18 @@ static void ieee80211_unassign_vif_chanctx(struct ieee80211_sub_if_data *sdata, ctx->refcount--; rcu_assign_pointer(sdata->vif.chanctx_conf, NULL); + sdata->vif.bss_conf.idle = true; + + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_MONITOR) + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); + drv_unassign_vif_chanctx(local, sdata, ctx); if (ctx->refcount > 0) { ieee80211_recalc_chanctx_chantype(sdata->local, ctx); ieee80211_recalc_smps_chanctx(local, ctx); + ieee80211_recalc_radar_chanctx(local, ctx); } } @@ -198,20 +218,42 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) ctx = container_of(conf, struct ieee80211_chanctx, conf); - if (sdata->vif.type == NL80211_IFTYPE_AP) { - struct ieee80211_sub_if_data *vlan; - - /* for the VLAN list */ - ASSERT_RTNL(); - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) - rcu_assign_pointer(vlan->vif.chanctx_conf, NULL); - } - ieee80211_unassign_vif_chanctx(sdata, ctx); if (ctx->refcount == 0) ieee80211_free_chanctx(local, ctx); } +void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *chanctx) +{ + struct ieee80211_sub_if_data *sdata; + bool radar_enabled = false; + + lockdep_assert_held(&local->chanctx_mtx); + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (sdata->radar_required) { + radar_enabled = true; + break; + } + } + rcu_read_unlock(); + + if (radar_enabled == chanctx->conf.radar_enabled) + return; + + chanctx->conf.radar_enabled = radar_enabled; + local->radar_detect_enabled = chanctx->conf.radar_enabled; + + if (!local->use_chanctx) { + local->hw.conf.radar_enabled = chanctx->conf.radar_enabled; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + } + + drv_change_chanctx(local, chanctx, IEEE80211_CHANCTX_CHANGE_RADAR); +} + void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *chanctx) { @@ -326,16 +368,57 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, goto out; } - if (sdata->vif.type == NL80211_IFTYPE_AP) { - struct ieee80211_sub_if_data *vlan; + ieee80211_recalc_smps_chanctx(local, ctx); + ieee80211_recalc_radar_chanctx(local, ctx); + out: + mutex_unlock(&local->chanctx_mtx); + return ret; +} + +int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + u32 *changed) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx_conf *conf; + struct ieee80211_chanctx *ctx; + int ret; + + if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, + IEEE80211_CHAN_DISABLED)) + return -EINVAL; + + mutex_lock(&local->chanctx_mtx); + if (cfg80211_chandef_identical(chandef, &sdata->vif.bss_conf.chandef)) { + ret = 0; + goto out; + } + + if (chandef->width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT) { + ret = -EINVAL; + goto out; + } - /* for the VLAN list */ - ASSERT_RTNL(); - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) - rcu_assign_pointer(vlan->vif.chanctx_conf, &ctx->conf); + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + if (!conf) { + ret = -EINVAL; + goto out; } - ieee80211_recalc_smps_chanctx(local, ctx); + ctx = container_of(conf, struct ieee80211_chanctx, conf); + if (!cfg80211_chandef_compatible(&conf->def, chandef)) { + ret = -EINVAL; + goto out; + } + + sdata->vif.bss_conf.chandef = *chandef; + + ieee80211_recalc_chanctx_chantype(local, ctx); + + *changed |= BSS_CHANGED_BANDWIDTH; + ret = 0; out: mutex_unlock(&local->chanctx_mtx); return ret; @@ -369,6 +452,40 @@ void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->chanctx_mtx); } +void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, + bool clear) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *vlan; + struct ieee80211_chanctx_conf *conf; + + ASSERT_RTNL(); + + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP)) + return; + + mutex_lock(&local->chanctx_mtx); + + /* + * Check that conf exists, even when clearing this function + * must be called with the AP's channel context still there + * as it would otherwise cause VLANs to have an invalid + * channel context pointer for a while, possibly pointing + * to a channel context that has already been freed. + */ + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + WARN_ON(!conf); + + if (clear) + conf = NULL; + + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + rcu_assign_pointer(vlan->vif.chanctx_conf, conf); + + mutex_unlock(&local->chanctx_mtx); +} + void ieee80211_iter_chan_contexts_atomic( struct ieee80211_hw *hw, void (*iter)(struct ieee80211_hw *hw, @@ -381,7 +498,8 @@ void ieee80211_iter_chan_contexts_atomic( rcu_read_lock(); list_for_each_entry_rcu(ctx, &local->chanctx_list, list) - iter(hw, &ctx->conf, iter_data); + if (ctx->driver_present) + iter(hw, &ctx->conf, iter_data); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ieee80211_iter_chan_contexts_atomic); diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h index 8f383a57601..4ccc5ed6237 100644 --- a/net/mac80211/debug.h +++ b/net/mac80211/debug.h @@ -44,6 +44,12 @@ #define MAC80211_MESH_SYNC_DEBUG 0 #endif +#ifdef CONFIG_MAC80211_MESH_PS_DEBUG +#define MAC80211_MESH_PS_DEBUG 1 +#else +#define MAC80211_MESH_PS_DEBUG 0 +#endif + #ifdef CONFIG_MAC80211_TDLS_DEBUG #define MAC80211_TDLS_DEBUG 1 #else @@ -151,6 +157,10 @@ do { \ _sdata_dbg(MAC80211_MESH_SYNC_DEBUG, \ sdata, fmt, ##__VA_ARGS__) +#define mps_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_MESH_PS_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + #define tdls_dbg(sdata, fmt, ...) \ _sdata_dbg(MAC80211_TDLS_DEBUG, \ sdata, fmt, ##__VA_ARGS__) diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 466f4b45dd9..b0e32d62811 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -121,8 +121,8 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf, sf += snprintf(buf + sf, mxln - sf, "SIGNAL_UNSPEC\n"); if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) sf += snprintf(buf + sf, mxln - sf, "SIGNAL_DBM\n"); - if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) - sf += snprintf(buf + sf, mxln - sf, "NEED_DTIM_PERIOD\n"); + if (local->hw.flags & IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC) + sf += snprintf(buf + sf, mxln - sf, "NEED_DTIM_BEFORE_ASSOC\n"); if (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT) sf += snprintf(buf + sf, mxln - sf, "SPECTRUM_MGMT\n"); if (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) @@ -151,8 +151,6 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf, sf += snprintf(buf + sf, mxln - sf, "AP_LINK_PS\n"); if (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW) sf += snprintf(buf + sf, mxln - sf, "TX_AMPDU_SETUP_IN_HW\n"); - if (local->hw.flags & IEEE80211_HW_SCAN_WHILE_IDLE) - sf += snprintf(buf + sf, mxln - sf, "SCAN_WHILE_IDLE\n"); rv = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); kfree(buf); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index cbde5cc49a4..059bbb82e84 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -515,6 +515,9 @@ IEEE80211_IF_FILE(dot11MeshHWMProotInterval, u.mesh.mshcfg.dot11MeshHWMProotInterval, DEC); IEEE80211_IF_FILE(dot11MeshHWMPconfirmationInterval, u.mesh.mshcfg.dot11MeshHWMPconfirmationInterval, DEC); +IEEE80211_IF_FILE(power_mode, u.mesh.mshcfg.power_mode, DEC); +IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration, + u.mesh.mshcfg.dot11MeshAwakeWindowDuration, DEC); #endif #define DEBUGFS_ADD_MODE(name, mode) \ @@ -620,6 +623,8 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) MESHPARAMS_ADD(dot11MeshHWMPactivePathToRootTimeout); MESHPARAMS_ADD(dot11MeshHWMProotInterval); MESHPARAMS_ADD(dot11MeshHWMPconfirmationInterval); + MESHPARAMS_ADD(power_mode); + MESHPARAMS_ADD(dot11MeshAwakeWindowDuration); #undef MESHPARAMS_ADD } #endif diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 6fb1168b9f1..c7591f73dbc 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -65,7 +65,7 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf, test_sta_flag(sta, WLAN_STA_##flg) ? #flg "\n" : "" int res = scnprintf(buf, sizeof(buf), - "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", TEST(AUTH), TEST(ASSOC), TEST(PS_STA), TEST(PS_DRIVER), TEST(AUTHORIZED), TEST(SHORT_PREAMBLE), @@ -74,7 +74,8 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf, TEST(UAPSD), TEST(SP), TEST(TDLS_PEER), TEST(TDLS_PEER_AUTH), TEST(4ADDR_EVENT), TEST(INSERTED), TEST(RATE_CONTROL), - TEST(TOFFSET_KNOWN)); + TEST(TOFFSET_KNOWN), TEST(MPSP_OWNER), + TEST(MPSP_RECIPIENT)); #undef TEST return simple_read_from_buffer(userbuf, count, ppos, buf, res); } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 698dc7e6f30..ee56d0779d8 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -207,6 +207,17 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, { might_sleep(); + if (WARN_ON_ONCE(changed & (BSS_CHANGED_BEACON | + BSS_CHANGED_BEACON_ENABLED) && + sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT)) + return; + + if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || + sdata->vif.type == NL80211_IFTYPE_MONITOR)) + return; + check_sdata_in_driver(sdata); trace_drv_bss_info_changed(local, sdata, info, changed); @@ -520,6 +531,43 @@ static inline void drv_sta_remove_debugfs(struct ieee80211_local *local, local->ops->sta_remove_debugfs(&local->hw, &sdata->vif, sta, dir); } + +static inline +void drv_add_interface_debugfs(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + might_sleep(); + + check_sdata_in_driver(sdata); + + if (!local->ops->add_interface_debugfs) + return; + + local->ops->add_interface_debugfs(&local->hw, &sdata->vif, + sdata->debugfs.dir); +} + +static inline +void drv_remove_interface_debugfs(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + might_sleep(); + + check_sdata_in_driver(sdata); + + if (!local->ops->remove_interface_debugfs) + return; + + local->ops->remove_interface_debugfs(&local->hw, &sdata->vif, + sdata->debugfs.dir); +} +#else +static inline +void drv_add_interface_debugfs(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) {} +static inline +void drv_remove_interface_debugfs(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) {} #endif static inline __must_check @@ -561,7 +609,8 @@ static inline void drv_sta_rc_update(struct ieee80211_local *local, check_sdata_in_driver(sdata); WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED && - sdata->vif.type != NL80211_IFTYPE_ADHOC); + (sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT)); trace_drv_sta_rc_update(local, sdata, sta, changed); if (local->ops->sta_rc_update) @@ -837,11 +886,12 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local, } static inline void drv_rssi_callback(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, const enum ieee80211_rssi_event event) { - trace_drv_rssi_callback(local, event); + trace_drv_rssi_callback(local, sdata, event); if (local->ops->rssi_callback) - local->ops->rssi_callback(&local->hw, event); + local->ops->rssi_callback(&local->hw, &sdata->vif, event); trace_drv_return_void(local); } @@ -913,6 +963,8 @@ static inline int drv_add_chanctx(struct ieee80211_local *local, if (local->ops->add_chanctx) ret = local->ops->add_chanctx(&local->hw, &ctx->conf); trace_drv_return_int(local, ret); + if (!ret) + ctx->driver_present = true; return ret; } @@ -924,6 +976,7 @@ static inline void drv_remove_chanctx(struct ieee80211_local *local, if (local->ops->remove_chanctx) local->ops->remove_chanctx(&local->hw, &ctx->conf); trace_drv_return_void(local); + ctx->driver_present = false; } static inline void drv_change_chanctx(struct ieee80211_local *local, @@ -931,8 +984,10 @@ static inline void drv_change_chanctx(struct ieee80211_local *local, u32 changed) { trace_drv_change_chanctx(local, ctx, changed); - if (local->ops->change_chanctx) + if (local->ops->change_chanctx) { + WARN_ON_ONCE(!ctx->driver_present); local->ops->change_chanctx(&local->hw, &ctx->conf, changed); + } trace_drv_return_void(local); } @@ -945,10 +1000,12 @@ static inline int drv_assign_vif_chanctx(struct ieee80211_local *local, check_sdata_in_driver(sdata); trace_drv_assign_vif_chanctx(local, sdata, ctx); - if (local->ops->assign_vif_chanctx) + if (local->ops->assign_vif_chanctx) { + WARN_ON_ONCE(!ctx->driver_present); ret = local->ops->assign_vif_chanctx(&local->hw, &sdata->vif, &ctx->conf); + } trace_drv_return_int(local, ret); return ret; @@ -961,10 +1018,12 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local, check_sdata_in_driver(sdata); trace_drv_unassign_vif_chanctx(local, sdata, ctx); - if (local->ops->unassign_vif_chanctx) + if (local->ops->unassign_vif_chanctx) { + WARN_ON_ONCE(!ctx->driver_present); local->ops->unassign_vif_chanctx(&local->hw, &sdata->vif, &ctx->conf); + } trace_drv_return_void(local); } @@ -1003,4 +1062,32 @@ static inline void drv_restart_complete(struct ieee80211_local *local) trace_drv_return_void(local); } +static inline void +drv_set_default_unicast_key(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + int key_idx) +{ + check_sdata_in_driver(sdata); + + WARN_ON_ONCE(key_idx < -1 || key_idx > 3); + + trace_drv_set_default_unicast_key(local, sdata, key_idx); + if (local->ops->set_default_unicast_key) + local->ops->set_default_unicast_key(&local->hw, &sdata->vif, + key_idx); + trace_drv_return_void(local); +} + +#if IS_ENABLED(CONFIG_IPV6) +static inline void drv_ipv6_addr_change(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct inet6_dev *idev) +{ + trace_drv_ipv6_addr_change(local, sdata); + if (local->ops->ipv6_addr_change) + local->ops->ipv6_addr_change(&local->hw, &sdata->vif, idev); + trace_drv_return_void(local); +} +#endif + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index a71d891794a..0db25d4bb22 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -37,6 +37,9 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, u8 *smask = (u8 *)(&sdata->u.mgd.ht_capa_mask.mcs.rx_mask); int i; + if (!ht_cap->ht_supported) + return; + if (sdata->vif.type != NL80211_IFTYPE_STATION) { /* AP interfaces call this code when adding new stations, * so just silently ignore non station interfaces. @@ -62,6 +65,9 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SUP_WIDTH_20_40); __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SGI_40); + /* Allow user to disable SGI-20 (SGI-40 is handled above) */ + __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SGI_20); + /* Allow user to disable the max-AMSDU bit. */ __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_MAX_AMSDU); @@ -86,22 +92,24 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, } -void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, +bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, - struct ieee80211_ht_cap *ht_cap_ie, - struct ieee80211_sta_ht_cap *ht_cap) + const struct ieee80211_ht_cap *ht_cap_ie, + struct sta_info *sta) { + struct ieee80211_sta_ht_cap ht_cap; u8 ampdu_info, tx_mcs_set_cap; int i, max_tx_streams; + bool changed; + enum ieee80211_sta_rx_bandwidth bw; + enum ieee80211_smps_mode smps_mode; - BUG_ON(!ht_cap); - - memset(ht_cap, 0, sizeof(*ht_cap)); + memset(&ht_cap, 0, sizeof(ht_cap)); if (!ht_cap_ie || !sband->ht_cap.ht_supported) - return; + goto apply; - ht_cap->ht_supported = true; + ht_cap.ht_supported = true; /* * The bits listed in this expression should be @@ -109,7 +117,7 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, * advertises more then we can't use those thus * we mask them out. */ - ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) & + ht_cap.cap = le16_to_cpu(ht_cap_ie->cap_info) & (sband->ht_cap.cap | ~(IEEE80211_HT_CAP_LDPC_CODING | IEEE80211_HT_CAP_SUP_WIDTH_20_40 | @@ -117,30 +125,31 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, IEEE80211_HT_CAP_SGI_20 | IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_DSSSCCK40)); + /* * The STBC bits are asymmetric -- if we don't have * TX then mask out the peer's RX and vice versa. */ if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC)) - ht_cap->cap &= ~IEEE80211_HT_CAP_RX_STBC; + ht_cap.cap &= ~IEEE80211_HT_CAP_RX_STBC; if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC)) - ht_cap->cap &= ~IEEE80211_HT_CAP_TX_STBC; + ht_cap.cap &= ~IEEE80211_HT_CAP_TX_STBC; ampdu_info = ht_cap_ie->ampdu_params_info; - ht_cap->ampdu_factor = + ht_cap.ampdu_factor = ampdu_info & IEEE80211_HT_AMPDU_PARM_FACTOR; - ht_cap->ampdu_density = + ht_cap.ampdu_density = (ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2; /* own MCS TX capabilities */ tx_mcs_set_cap = sband->ht_cap.mcs.tx_params; /* Copy peer MCS TX capabilities, the driver might need them. */ - ht_cap->mcs.tx_params = ht_cap_ie->mcs.tx_params; + ht_cap.mcs.tx_params = ht_cap_ie->mcs.tx_params; /* can we TX with MCS rates? */ if (!(tx_mcs_set_cap & IEEE80211_HT_MCS_TX_DEFINED)) - return; + goto apply; /* Counting from 0, therefore +1 */ if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_RX_DIFF) @@ -158,37 +167,90 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, * - remainder are multiple spatial streams using unequal modulation */ for (i = 0; i < max_tx_streams; i++) - ht_cap->mcs.rx_mask[i] = + ht_cap.mcs.rx_mask[i] = sband->ht_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i]; if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION) for (i = IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE; i < IEEE80211_HT_MCS_MASK_LEN; i++) - ht_cap->mcs.rx_mask[i] = + ht_cap.mcs.rx_mask[i] = sband->ht_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i]; /* handle MCS rate 32 too */ if (sband->ht_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1) - ht_cap->mcs.rx_mask[32/8] |= 1; + ht_cap.mcs.rx_mask[32/8] |= 1; + apply: /* * If user has specified capability over-rides, take care * of that here. */ - ieee80211_apply_htcap_overrides(sdata, ht_cap); + ieee80211_apply_htcap_overrides(sdata, &ht_cap); + + changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); + + memcpy(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); + + switch (sdata->vif.bss_conf.chandef.width) { + default: + WARN_ON_ONCE(1); + /* fall through */ + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + bw = IEEE80211_STA_RX_BW_20; + break; + case NL80211_CHAN_WIDTH_40: + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + bw = ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; + break; + } + + if (bw != sta->sta.bandwidth) + changed = true; + sta->sta.bandwidth = bw; + + sta->cur_max_bandwidth = + ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; + + switch ((ht_cap.cap & IEEE80211_HT_CAP_SM_PS) + >> IEEE80211_HT_CAP_SM_PS_SHIFT) { + case WLAN_HT_CAP_SM_PS_INVALID: + case WLAN_HT_CAP_SM_PS_STATIC: + smps_mode = IEEE80211_SMPS_STATIC; + break; + case WLAN_HT_CAP_SM_PS_DYNAMIC: + smps_mode = IEEE80211_SMPS_DYNAMIC; + break; + case WLAN_HT_CAP_SM_PS_DISABLED: + smps_mode = IEEE80211_SMPS_OFF; + break; + } + + if (smps_mode != sta->sta.smps_mode) + changed = true; + sta->sta.smps_mode = smps_mode; + + return changed; } -void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, bool tx) +void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, + enum ieee80211_agg_stop_reason reason) { int i; cancel_work_sync(&sta->ampdu_mlme.work); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { - __ieee80211_stop_tx_ba_session(sta, i, WLAN_BACK_INITIATOR, tx); + __ieee80211_stop_tx_ba_session(sta, i, reason); __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_LEAVE_QBSS, tx); + WLAN_REASON_QSTA_LEAVE_QBSS, + reason != AGG_STOP_DESTROY_STA && + reason != AGG_STOP_PEER_REQUEST); } } @@ -245,8 +307,7 @@ void ieee80211_ba_session_work(struct work_struct *work) if (tid_tx && test_and_clear_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state)) ___ieee80211_stop_tx_ba_session(sta, tid, - WLAN_BACK_INITIATOR, - true); + AGG_STOP_LOCAL_REQUEST); } mutex_unlock(&sta->ampdu_mlme.mtx); } @@ -314,8 +375,7 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0, true); else - __ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, - true); + __ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_PEER_REQUEST); } int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, @@ -387,6 +447,9 @@ void ieee80211_request_smps(struct ieee80211_vif *vif, if (WARN_ON(smps_mode == IEEE80211_SMPS_OFF)) smps_mode = IEEE80211_SMPS_AUTOMATIC; + if (sdata->u.mgd.driver_smps_mode == smps_mode) + return; + sdata->u.mgd.driver_smps_mode = smps_mode; ieee80211_queue_work(&sdata->local->hw, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 6b7644e818d..40b71dfcc79 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -67,7 +67,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, skb_reserve(skb, sdata->local->hw.extra_tx_headroom); if (!ether_addr_equal(ifibss->bssid, bssid)) - sta_info_flush(sdata->local, sdata); + sta_info_flush(sdata); /* if merging, indicate to driver that we leave the old IBSS */ if (sdata->vif.bss_conf.ibss_joined) { @@ -191,6 +191,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, rcu_assign_pointer(ifibss->presp, skb); + sdata->vif.bss_conf.enable_beacon = true; sdata->vif.bss_conf.beacon_int = beacon_int; sdata->vif.bss_conf.basic_rates = basic_rates; bss_change = BSS_CHANGED_BEACON_INT; @@ -227,7 +228,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, bss = cfg80211_inform_bss_frame(local->hw.wiphy, chan, mgmt, skb->len, 0, GFP_KERNEL); - cfg80211_put_bss(bss); + cfg80211_put_bss(local->hw.wiphy, bss); netif_carrier_on(sdata->dev); cfg80211_ibss_joined(sdata->dev, ifibss->bssid, GFP_KERNEL); } @@ -241,6 +242,8 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, u32 basic_rates; int i, j; u16 beacon_int = cbss->beacon_interval; + const struct cfg80211_bss_ies *ies; + u64 tsf; lockdep_assert_held(&sdata->u.ibss.mtx); @@ -264,13 +267,17 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, } } + rcu_read_lock(); + ies = rcu_dereference(cbss->ies); + tsf = ies->tsf; + rcu_read_unlock(); + __ieee80211_sta_join_ibss(sdata, cbss->bssid, beacon_int, cbss->channel, basic_rates, cbss->capability, - cbss->tsf, - false); + tsf, false); } static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, @@ -301,7 +308,7 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, "TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n", sdata->vif.addr, addr, sdata->u.ibss.bssid); ieee80211_send_auth(sdata, 1, WLAN_AUTH_OPEN, 0, NULL, 0, - addr, sdata->u.ibss.bssid, NULL, 0, 0); + addr, sdata->u.ibss.bssid, NULL, 0, 0, 0); } return sta; } @@ -421,15 +428,13 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, * has actually implemented this. */ ieee80211_send_auth(sdata, 2, WLAN_AUTH_OPEN, 0, NULL, 0, - mgmt->sa, sdata->u.ibss.bssid, NULL, 0, 0); + mgmt->sa, sdata->u.ibss.bssid, NULL, 0, 0, 0); } static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - size_t len, + struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status, - struct ieee802_11_elems *elems, - bool beacon) + struct ieee802_11_elems *elems) { struct ieee80211_local *local = sdata->local; int freq; @@ -491,33 +496,26 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, if (sta && elems->ht_operation && elems->ht_cap_elem && sdata->u.ibss.channel_type != NL80211_CHAN_NO_HT) { /* we both use HT */ - struct ieee80211_sta_ht_cap sta_ht_cap_new; + struct ieee80211_ht_cap htcap_ie; struct cfg80211_chan_def chandef; ieee80211_ht_oper_to_chandef(channel, elems->ht_operation, &chandef); - ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, - elems->ht_cap_elem, - &sta_ht_cap_new); + memcpy(&htcap_ie, elems->ht_cap_elem, sizeof(htcap_ie)); /* * fall back to HT20 if we don't use or use * the other extension channel */ - if (chandef.width != NL80211_CHAN_WIDTH_40 || - cfg80211_get_chandef_type(&chandef) != + if (cfg80211_get_chandef_type(&chandef) != sdata->u.ibss.channel_type) - sta_ht_cap_new.cap &= - ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; - - if (memcmp(&sta->sta.ht_cap, &sta_ht_cap_new, - sizeof(sta_ht_cap_new))) { - memcpy(&sta->sta.ht_cap, &sta_ht_cap_new, - sizeof(sta_ht_cap_new)); - rates_updated = true; - } + htcap_ie.cap_info &= + cpu_to_le16(~IEEE80211_HT_CAP_SUP_WIDTH_20_40); + + rates_updated |= ieee80211_ht_cap_ie_to_sta_ht_cap( + sdata, sband, &htcap_ie, sta); } if (sta && rates_updated) { @@ -530,14 +528,14 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, } bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, - channel, beacon); + channel); if (!bss) return; cbss = container_of((void *)bss, struct cfg80211_bss, priv); - /* was just updated in ieee80211_bss_info_update */ - beacon_timestamp = cbss->tsf; + /* same for beacon and probe response */ + beacon_timestamp = le64_to_cpu(mgmt->u.beacon.timestamp); /* check if we need to merge IBSS */ @@ -877,14 +875,21 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb(sdata, skb); } -static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - size_t len, - struct ieee80211_rx_status *rx_status) +static +void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee80211_rx_status *rx_status) { size_t baselen; struct ieee802_11_elems elems; + BUILD_BUG_ON(offsetof(typeof(mgmt->u.probe_resp), variable) != + offsetof(typeof(mgmt->u.beacon), variable)); + + /* + * either beacon or probe_resp but the variable field is at the + * same offset + */ baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt; if (baselen > len) return; @@ -892,25 +897,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, &elems); - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false); -} - -static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - size_t len, - struct ieee80211_rx_status *rx_status) -{ - size_t baselen; - struct ieee802_11_elems elems; - - /* Process beacon from the current BSS */ - baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; - if (baselen > len) - return; - - ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); - - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true); + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); } void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, @@ -934,12 +921,9 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_rx_mgmt_probe_req(sdata, skb); break; case IEEE80211_STYPE_PROBE_RESP: - ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, - rx_status); - break; case IEEE80211_STYPE_BEACON: - ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, - rx_status); + ieee80211_rx_mgmt_probe_beacon(sdata, mgmt, skb->len, + rx_status); break; case IEEE80211_STYPE_AUTH: ieee80211_rx_mgmt_auth_ibss(sdata, mgmt, skb->len); @@ -1117,10 +1101,6 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, mutex_unlock(&sdata->u.ibss.mtx); - mutex_lock(&sdata->local->mtx); - ieee80211_recalc_idle(sdata->local); - mutex_unlock(&sdata->local->mtx); - /* * 802.11n-2009 9.13.3.1: In an IBSS, the HT Protection field is * reserved, but an HT STA shall protect HT transmissions as though @@ -1174,7 +1154,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) if (cbss) { cfg80211_unlink_bss(local->hw.wiphy, cbss); - cfg80211_put_bss(cbss); + cfg80211_put_bss(local->hw.wiphy, cbss); } } @@ -1182,7 +1162,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) memset(ifibss->bssid, 0, ETH_ALEN); ifibss->ssid_len = 0; - sta_info_flush(sdata->local, sdata); + sta_info_flush(sdata); spin_lock_bh(&ifibss->incomplete_lock); while (!list_empty(&ifibss->incomplete_stations)) { @@ -1205,6 +1185,8 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) RCU_INIT_POINTER(sdata->u.ibss.presp, NULL); sdata->vif.bss_conf.ibss_joined = false; sdata->vif.bss_conf.ibss_creator = false; + sdata->vif.bss_conf.enable_beacon = false; + clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_IBSS); synchronize_rcu(); @@ -1216,9 +1198,5 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) mutex_unlock(&sdata->u.ibss.mtx); - mutex_lock(&local->mtx); - ieee80211_recalc_idle(sdata->local); - mutex_unlock(&local->mtx); - return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2ed065c0956..388580a1bad 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -86,23 +86,11 @@ struct ieee80211_fragment_entry { struct ieee80211_bss { - /* don't want to look up all the time */ - size_t ssid_len; - u8 ssid[IEEE80211_MAX_SSID_LEN]; - - u32 device_ts; + u32 device_ts_beacon, device_ts_presp; bool wmm_used; bool uapsd_supported; - unsigned long last_probe_resp; - -#ifdef CONFIG_MAC80211_MESH - u8 *mesh_id; - size_t mesh_id_len; - u8 *mesh_cfg; -#endif - #define IEEE80211_MAX_SUPP_RATES 32 u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; size_t supp_rates_len; @@ -153,31 +141,6 @@ enum ieee80211_bss_valid_data_flags { IEEE80211_BSS_VALID_ERP = BIT(3) }; -static inline u8 *bss_mesh_cfg(struct ieee80211_bss *bss) -{ -#ifdef CONFIG_MAC80211_MESH - return bss->mesh_cfg; -#endif - return NULL; -} - -static inline u8 *bss_mesh_id(struct ieee80211_bss *bss) -{ -#ifdef CONFIG_MAC80211_MESH - return bss->mesh_id; -#endif - return NULL; -} - -static inline u8 bss_mesh_id_len(struct ieee80211_bss *bss) -{ -#ifdef CONFIG_MAC80211_MESH - return bss->mesh_id_len; -#endif - return 0; -} - - typedef unsigned __bitwise__ ieee80211_tx_result; #define TX_CONTINUE ((__force ieee80211_tx_result) 0u) #define TX_DROP ((__force ieee80211_tx_result) 1u) @@ -380,6 +343,7 @@ struct ieee80211_mgd_auth_data { u8 key[WLAN_KEY_LEN_WEP104]; u8 key_len, key_idx; bool done; + bool timeout_started; u16 sae_trans, sae_status; size_t data_len; @@ -399,12 +363,14 @@ struct ieee80211_mgd_assoc_data { u8 ssid_len; u8 supp_rates_len; bool wmm, uapsd; - bool have_beacon; - bool sent_assoc; + bool have_beacon, need_beacon; bool synced; + bool timeout_started; u8 ap_ht_param; + struct ieee80211_vht_cap ap_vht_cap; + size_t ie_len; u8 ie[]; }; @@ -423,6 +389,7 @@ struct ieee80211_if_managed { unsigned long probe_timeout; int probe_send_count; bool nullfunc_failed; + bool connection_loss; struct mutex mtx; struct cfg80211_bss *associated; @@ -447,6 +414,10 @@ struct ieee80211_if_managed { bool beacon_crc_valid; u32 beacon_crc; + bool status_acked; + bool status_received; + __le16 status_fc; + enum { IEEE80211_MFP_DISABLED, IEEE80211_MFP_OPTIONAL, @@ -609,6 +580,9 @@ struct ieee80211_if_mesh { u32 mesh_seqnum; bool accepting_plinks; int num_gates; + struct beacon_data __rcu *beacon; + /* just protects beacon updates for now */ + struct mutex mtx; const u8 *ie; u8 ie_len; enum { @@ -621,6 +595,11 @@ struct ieee80211_if_mesh { s64 sync_offset_clockdrift_max; spinlock_t sync_offset_lock; bool adjusting_tbtt; + /* mesh power save */ + enum nl80211_mesh_power_mode nonpeer_pm; + int ps_peers_light_sleep; + int ps_peers_deep_sleep; + struct ps_data ps; }; #ifdef CONFIG_MAC80211_MESH @@ -659,10 +638,13 @@ enum ieee80211_sub_if_data_flags { * change handling while the interface is up * @SDATA_STATE_OFFCHANNEL: This interface is currently in offchannel * mode, so queues are stopped + * @SDATA_STATE_OFFCHANNEL_BEACON_STOPPED: Beaconing was stopped due + * to offchannel, reset when offchannel returns */ enum ieee80211_sdata_state_bits { SDATA_STATE_RUNNING, SDATA_STATE_OFFCHANNEL, + SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, }; /** @@ -685,6 +667,7 @@ struct ieee80211_chanctx { enum ieee80211_chanctx_mode mode; int refcount; + bool driver_present; struct ieee80211_chanctx_conf conf; }; @@ -711,9 +694,6 @@ struct ieee80211_sub_if_data { char name[IFNAMSIZ]; - /* to detect idle changes */ - bool old_idle; - /* Fragment table for host-based reassembly */ struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; unsigned int fragment_next; @@ -741,14 +721,15 @@ struct ieee80211_sub_if_data { struct work_struct work; struct sk_buff_head skb_queue; - bool arp_filter_state; - u8 needed_rx_chains; enum ieee80211_smps_mode smps_mode; int user_power_level; /* in dBm */ int ap_power_level; /* in dBm */ + bool radar_required; + struct delayed_work dfs_cac_timer_work; + /* * AP this belongs to: self in AP mode and * corresponding AP in VLAN mode, NULL for @@ -783,6 +764,11 @@ struct ieee80211_sub_if_data { struct dentry *default_mgmt_key; } debugfs; #endif + +#ifdef CONFIG_PM + struct ieee80211_bss_conf suspend_bss_conf; +#endif + /* must be last, dynamically sized area in this! */ struct ieee80211_vif vif; }; @@ -831,6 +817,7 @@ enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_AGGREGATION, IEEE80211_QUEUE_STOP_REASON_SUSPEND, IEEE80211_QUEUE_STOP_REASON_SKB_ADD, + IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL, }; #ifdef CONFIG_MAC80211_LEDS @@ -963,6 +950,10 @@ struct ieee80211_local { /* wowlan is enabled -- don't reconfig on resume */ bool wowlan; + /* DFS/radar detection is enabled */ + bool radar_detect_enabled; + struct work_struct radar_detected_work; + /* number of RX chains the hardware has */ u8 rx_chains; @@ -977,14 +968,7 @@ struct ieee80211_local { struct sk_buff_head skb_queue; struct sk_buff_head skb_queue_unreliable; - /* - * Internal FIFO queue which is shared between multiple rx path - * stages. Its main task is to provide a serialization mechanism, - * so all rx handlers can enjoy having exclusive access to their - * private data structures. - */ - struct sk_buff_head rx_skb_queue; - bool running_rx_handler; /* protected by rx_skb_queue.lock */ + spinlock_t rx_path_lock; /* Station data */ /* @@ -1118,14 +1102,13 @@ struct ieee80211_local { struct timer_list dynamic_ps_timer; struct notifier_block network_latency_notifier; struct notifier_block ifa_notifier; + struct notifier_block ifa6_notifier; /* * The dynamic ps timeout configured from user space via WEXT - * this will override whatever chosen by mac80211 internally. */ int dynamic_ps_forced_timeout; - int dynamic_ps_user_timeout; - bool disable_dynamic_ps; int user_power_level; /* in dBm, for all interfaces */ @@ -1183,40 +1166,41 @@ struct ieee80211_ra_tid { /* Parsed Information Elements */ struct ieee802_11_elems { - u8 *ie_start; + const u8 *ie_start; size_t total_len; /* pointers to IEs */ - u8 *ssid; - u8 *supp_rates; - u8 *fh_params; - u8 *ds_params; - u8 *cf_params; - struct ieee80211_tim_ie *tim; - u8 *ibss_params; - u8 *challenge; - u8 *wpa; - u8 *rsn; - u8 *erp_info; - u8 *ext_supp_rates; - u8 *wmm_info; - u8 *wmm_param; - struct ieee80211_ht_cap *ht_cap_elem; - struct ieee80211_ht_operation *ht_operation; - struct ieee80211_vht_cap *vht_cap_elem; - struct ieee80211_vht_operation *vht_operation; - struct ieee80211_meshconf_ie *mesh_config; - u8 *mesh_id; - u8 *peering; - u8 *preq; - u8 *prep; - u8 *perr; - struct ieee80211_rann_ie *rann; - struct ieee80211_channel_sw_ie *ch_switch_ie; - u8 *country_elem; - u8 *pwr_constr_elem; - u8 *quiet_elem; /* first quite element */ - u8 *timeout_int; + const u8 *ssid; + const u8 *supp_rates; + const u8 *fh_params; + const u8 *ds_params; + const u8 *cf_params; + const struct ieee80211_tim_ie *tim; + const u8 *ibss_params; + const u8 *challenge; + const u8 *rsn; + const u8 *erp_info; + const u8 *ext_supp_rates; + const u8 *wmm_info; + const u8 *wmm_param; + const struct ieee80211_ht_cap *ht_cap_elem; + const struct ieee80211_ht_operation *ht_operation; + const struct ieee80211_vht_cap *vht_cap_elem; + const struct ieee80211_vht_operation *vht_operation; + const struct ieee80211_meshconf_ie *mesh_config; + const u8 *mesh_id; + const u8 *peering; + const __le16 *awake_window; + const u8 *preq; + const u8 *prep; + const u8 *perr; + const struct ieee80211_rann_ie *rann; + const struct ieee80211_channel_sw_ie *ch_switch_ie; + const u8 *country_elem; + const u8 *pwr_constr_elem; + const u8 *quiet_elem; /* first quite element */ + const u8 *timeout_int; + const u8 *opmode_notif; /* length of them, respectively */ u8 ssid_len; @@ -1227,7 +1211,6 @@ struct ieee802_11_elems { u8 tim_len; u8 ibss_params_len; u8 challenge_len; - u8 wpa_len; u8 rsn_len; u8 erp_info_len; u8 ext_supp_rates_len; @@ -1296,10 +1279,10 @@ void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata); int ieee80211_max_network_latency(struct notifier_block *nb, unsigned long data, void *dummy); int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata); -void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel_sw_ie *sw_elem, - struct ieee80211_bss *bss, - u64 timestamp); +void +ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_channel_sw_ie *sw_elem, + struct ieee80211_bss *bss, u64 timestamp); void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata); @@ -1308,6 +1291,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata); void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata); +void ieee80211_mgd_conn_tx_status(struct ieee80211_sub_if_data *sdata, + __le16 fc, bool acked); /* IBSS code */ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local); @@ -1346,8 +1331,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local, struct ieee80211_mgmt *mgmt, size_t len, struct ieee802_11_elems *elems, - struct ieee80211_channel *channel, - bool beacon); + struct ieee80211_channel *channel); void ieee80211_rx_bss_put(struct ieee80211_local *local, struct ieee80211_bss *bss); @@ -1404,10 +1388,10 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw, /* HT */ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_ht_cap *ht_cap); -void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, +bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, - struct ieee80211_ht_cap *ht_cap_ie, - struct ieee80211_sta_ht_cap *ht_cap); + const struct ieee80211_ht_cap *ht_cap_ie, + struct sta_info *sta); void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid, u16 initiator, u16 reason_code); @@ -1420,7 +1404,8 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason, bool stop); void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason, bool stop); -void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, bool tx); +void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, + enum ieee80211_agg_stop_reason reason); void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_mgmt *mgmt, size_t len); @@ -1434,11 +1419,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, size_t len); int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_back_parties initiator, - bool tx); + enum ieee80211_agg_stop_reason reason); int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_back_parties initiator, - bool tx); + enum ieee80211_agg_stop_reason reason); void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid); void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid); void ieee80211_ba_session_work(struct work_struct *work); @@ -1448,10 +1431,17 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid); u8 ieee80211_mcs_to_chains(const struct ieee80211_mcs_info *mcs); /* VHT */ -void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - struct ieee80211_vht_cap *vht_cap_ie, - struct ieee80211_sta_vht_cap *vht_cap); +void +ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const struct ieee80211_vht_cap *vht_cap_ie, + struct sta_info *sta); +enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta); +void ieee80211_sta_set_rx_nss(struct sta_info *sta); +void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, u8 opmode, + enum ieee80211_band band, bool nss_only); + /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, @@ -1569,8 +1559,9 @@ static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local, void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, - u8 *extra, size_t extra_len, const u8 *bssid, - const u8 *da, const u8 *key, u8 key_len, u8 key_idx); + const u8 *extra, size_t extra_len, const u8 *bssid, + const u8 *da, const u8 *key, u8 key_len, u8 key_idx, + u32 tx_flags); void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, const u8 *bssid, u16 stype, u16 reason, bool send_frame, u8 *frame_buf); @@ -1587,7 +1578,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len, - u32 ratemask, bool directed, bool no_cck, + u32 ratemask, bool directed, u32 tx_flags, struct ieee80211_channel *channel, bool scan); void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, @@ -1619,18 +1610,31 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, /* channel management */ void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan, - struct ieee80211_ht_operation *ht_oper, + const struct ieee80211_ht_operation *ht_oper, struct cfg80211_chan_def *chandef); int __must_check ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, const struct cfg80211_chan_def *chandef, enum ieee80211_chanctx_mode mode); +int __must_check +ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + u32 *changed); void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata); void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata); +void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, + bool clear); void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *chanctx); +void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *chanctx); + +void ieee80211_dfs_cac_timer(unsigned long data); +void ieee80211_dfs_cac_timer_work(struct work_struct *work); +void ieee80211_dfs_cac_cancel(struct ieee80211_local *local); +void ieee80211_dfs_radar_detected_work(struct work_struct *work); #ifdef CONFIG_MAC80211_NOINLINE #define debug_noinline noinline diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 8be854e86cd..2c059e54e88 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -78,8 +78,7 @@ void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER); } -static u32 ieee80211_idle_off(struct ieee80211_local *local, - const char *reason) +static u32 ieee80211_idle_off(struct ieee80211_local *local) { if (!(local->hw.conf.flags & IEEE80211_CONF_IDLE)) return 0; @@ -99,110 +98,45 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local) return IEEE80211_CONF_CHANGE_IDLE; } -static u32 __ieee80211_recalc_idle(struct ieee80211_local *local) +void ieee80211_recalc_idle(struct ieee80211_local *local) { - struct ieee80211_sub_if_data *sdata; - int count = 0; - bool working = false, scanning = false; + bool working = false, scanning, active; unsigned int led_trig_start = 0, led_trig_stop = 0; struct ieee80211_roc_work *roc; + u32 change; -#ifdef CONFIG_PROVE_LOCKING - WARN_ON(debug_locks && !lockdep_rtnl_is_held() && - !lockdep_is_held(&local->iflist_mtx)); -#endif lockdep_assert_held(&local->mtx); - list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata)) { - sdata->vif.bss_conf.idle = true; - continue; - } - - sdata->old_idle = sdata->vif.bss_conf.idle; - - /* do not count disabled managed interfaces */ - if (sdata->vif.type == NL80211_IFTYPE_STATION && - !sdata->u.mgd.associated && - !sdata->u.mgd.auth_data && - !sdata->u.mgd.assoc_data) { - sdata->vif.bss_conf.idle = true; - continue; - } - /* do not count unused IBSS interfaces */ - if (sdata->vif.type == NL80211_IFTYPE_ADHOC && - !sdata->u.ibss.ssid_len) { - sdata->vif.bss_conf.idle = true; - continue; - } - - if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) - continue; - - /* count everything else */ - sdata->vif.bss_conf.idle = false; - count++; - } + active = !list_empty(&local->chanctx_list); if (!local->ops->remain_on_channel) { list_for_each_entry(roc, &local->roc_list, list) { working = true; - roc->sdata->vif.bss_conf.idle = false; + break; } } - sdata = rcu_dereference_protected(local->scan_sdata, - lockdep_is_held(&local->mtx)); - if (sdata && !(local->hw.flags & IEEE80211_HW_SCAN_WHILE_IDLE)) { - scanning = true; - sdata->vif.bss_conf.idle = false; - } - - list_for_each_entry(sdata, &local->interfaces, list) { - if (sdata->vif.type == NL80211_IFTYPE_MONITOR || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN || - sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) - continue; - if (sdata->old_idle == sdata->vif.bss_conf.idle) - continue; - if (!ieee80211_sdata_running(sdata)) - continue; - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); - } + scanning = test_bit(SCAN_SW_SCANNING, &local->scanning) || + test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning); if (working || scanning) led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_WORK; else led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_WORK; - if (count) + if (active) led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_CONNECTED; else led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_CONNECTED; ieee80211_mod_tpt_led_trig(local, led_trig_start, led_trig_stop); - if (working) - return ieee80211_idle_off(local, "working"); - if (scanning) - return ieee80211_idle_off(local, "scanning"); - if (!count) - return ieee80211_idle_on(local); + if (working || scanning || active) + change = ieee80211_idle_off(local); else - return ieee80211_idle_off(local, "in use"); - - return 0; -} - -void ieee80211_recalc_idle(struct ieee80211_local *local) -{ - u32 chg; - - mutex_lock(&local->iflist_mtx); - chg = __ieee80211_recalc_idle(local); - mutex_unlock(&local->iflist_mtx); - if (chg) - ieee80211_hw_config(local, chg); + change = ieee80211_idle_on(local); + if (change) + ieee80211_hw_config(local, change); } static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) @@ -360,7 +294,8 @@ static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata) } } - if ((sdata->vif.type != NL80211_IFTYPE_AP) || + if ((sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT) || !(sdata->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)) { sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE; return 0; @@ -621,6 +556,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) goto err_del_interface; } + drv_add_interface_debugfs(local, sdata); + if (sdata->vif.type == NL80211_IFTYPE_AP) { local->fif_pspoll++; local->fif_probe_req++; @@ -694,10 +631,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) if (sdata->flags & IEEE80211_SDATA_PROMISC) atomic_inc(&local->iff_promiscs); - mutex_lock(&local->mtx); - hw_reconf_flags |= __ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); - if (coming_up) local->open_count++; @@ -747,7 +680,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, unsigned long flags; struct sk_buff *skb, *tmp; u32 hw_reconf_flags = 0; - int i; + int i, flushed; + struct ps_data *ps; clear_bit(SDATA_STATE_RUNNING, &sdata->state); @@ -762,6 +696,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_roc_purge(sdata); + if (sdata->vif.type == NL80211_IFTYPE_STATION) + ieee80211_mgd_stop(sdata); + /* * Remove all stations associated with this interface. * @@ -772,11 +709,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, * (because if we remove a STA after ops->remove_interface() * the driver will have removed the vif info already!) * - * This is relevant only in AP, WDS and mesh modes, since in - * all other modes we've already removed all stations when - * disconnecting etc. + * This is relevant only in WDS mode, in all other modes we've + * already removed all stations when disconnecting or similar, + * so warn otherwise. + * + * We call sta_info_flush_cleanup() later, to combine RCU waits. */ - sta_info_flush(local, sdata); + flushed = sta_info_flush_defer(sdata); + WARN_ON_ONCE((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) || + (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1)); /* * Don't count this interface for promisc/allmulti while it @@ -813,6 +754,16 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, cancel_work_sync(&sdata->recalc_smps); + cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); + + if (sdata->wdev.cac_started) { + mutex_lock(&local->iflist_mtx); + ieee80211_vif_release_channel(sdata); + mutex_unlock(&local->iflist_mtx); + cfg80211_cac_event(sdata->dev, NL80211_RADAR_CAC_ABORTED, + GFP_KERNEL); + } + /* APs need special treatment */ if (sdata->vif.type == NL80211_IFTYPE_AP) { struct ieee80211_sub_if_data *vlan, *tmpsdata; @@ -822,8 +773,19 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, u.vlan.list) dev_close(vlan->dev); WARN_ON(!list_empty(&sdata->u.ap.vlans)); - } else if (sdata->vif.type == NL80211_IFTYPE_STATION) { - ieee80211_mgd_stop(sdata); + } else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + /* remove all packets in parent bc_buf pointing to this dev */ + ps = &sdata->bss->ps; + + spin_lock_irqsave(&ps->bc_buf.lock, flags); + skb_queue_walk_safe(&ps->bc_buf, skb, tmp) { + if (skb->dev == sdata->dev) { + __skb_unlink(skb, &ps->bc_buf); + local->total_ps_buffered--; + ieee80211_free_txskb(&local->hw, skb); + } + } + spin_unlock_irqrestore(&ps->bc_buf.lock, flags); } if (going_down) @@ -859,11 +821,17 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, cancel_work_sync(&sdata->work); /* * When we get here, the interface is marked down. - * Call synchronize_rcu() to wait for the RX path - * should it be using the interface and enqueuing - * frames at this very time on another CPU. + * + * sta_info_flush_cleanup() requires rcu_barrier() + * first to wait for the station call_rcu() calls + * to complete, here we need at least sychronize_rcu() + * it to wait for the RX path in case it is using the + * interface and enqueuing frames at this very time on + * another CPU. */ - synchronize_rcu(); + rcu_barrier(); + sta_info_flush_cleanup(sdata); + skb_queue_purge(&sdata->skb_queue); /* @@ -872,16 +840,14 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, */ ieee80211_free_keys(sdata); + drv_remove_interface_debugfs(local, sdata); + if (going_down) drv_remove_interface(local, sdata); } sdata->bss = NULL; - mutex_lock(&local->mtx); - hw_reconf_flags |= __ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); - ieee80211_recalc_ps(local, -1); if (local->open_count == 0) { @@ -961,7 +927,6 @@ static void ieee80211_set_multicast_list(struct net_device *dev) */ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_local *local = sdata->local; int flushed; int i; @@ -977,7 +942,7 @@ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_rmc_free(sdata); - flushed = sta_info_flush(local, sdata); + flushed = sta_info_flush(sdata); WARN_ON(flushed); } @@ -1218,6 +1183,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_AP: skb_queue_head_init(&sdata->u.ap.ps.bc_buf); INIT_LIST_HEAD(&sdata->u.ap.vlans); + sdata->vif.bss_conf.bssid = sdata->vif.addr; break; case NL80211_IFTYPE_P2P_CLIENT: type = NL80211_IFTYPE_STATION; @@ -1225,9 +1191,11 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->vif.p2p = true; /* fall through */ case NL80211_IFTYPE_STATION: + sdata->vif.bss_conf.bssid = sdata->u.mgd.bssid; ieee80211_sta_setup_sdata(sdata); break; case NL80211_IFTYPE_ADHOC: + sdata->vif.bss_conf.bssid = sdata->u.ibss.bssid; ieee80211_ibss_setup_sdata(sdata); break; case NL80211_IFTYPE_MESH_POINT: @@ -1241,8 +1209,12 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, MONITOR_FLAG_OTHER_BSS; break; case NL80211_IFTYPE_WDS: + sdata->vif.bss_conf.bssid = NULL; + break; case NL80211_IFTYPE_AP_VLAN: + break; case NL80211_IFTYPE_P2P_DEVICE: + sdata->vif.bss_conf.bssid = sdata->vif.addr; break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: @@ -1558,9 +1530,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, /* initialise type-independent data */ sdata->wdev.wiphy = local->hw.wiphy; sdata->local = local; -#ifdef CONFIG_INET - sdata->arp_filter_state = true; -#endif for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) skb_queue_head_init(&sdata->fragments[i].skb_list); @@ -1570,6 +1539,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, spin_lock_init(&sdata->cleanup_stations_lock); INIT_LIST_HEAD(&sdata->cleanup_stations); INIT_WORK(&sdata->cleanup_stations_wk, ieee80211_cleanup_sdata_stas_wk); + INIT_DELAYED_WORK(&sdata->dfs_cac_timer_work, + ieee80211_dfs_cac_timer_work); for (i = 0; i < IEEE80211_NUM_BANDS; i++) { struct ieee80211_supported_band *sband; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 619c5d69799..ef252eb58c3 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -204,8 +204,11 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, if (idx >= 0 && idx < NUM_DEFAULT_KEYS) key = key_mtx_dereference(sdata->local, sdata->keys[idx]); - if (uni) + if (uni) { rcu_assign_pointer(sdata->default_unicast_key, key); + drv_set_default_unicast_key(sdata->local, sdata, idx); + } + if (multi) rcu_assign_pointer(sdata->default_multicast_key, key); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 1b087fff93e..d0dd11153a6 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -23,6 +23,7 @@ #include <linux/inetdevice.h> #include <net/net_namespace.h> #include <net/cfg80211.h> +#include <net/addrconf.h> #include "ieee80211_i.h" #include "driver-ops.h" @@ -33,8 +34,6 @@ #include "cfg.h" #include "debugfs.h" -static struct lock_class_key ieee80211_rx_skb_queue_class; - void ieee80211_configure_filter(struct ieee80211_local *local) { u64 mc; @@ -207,76 +206,10 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, u32 changed) { struct ieee80211_local *local = sdata->local; - static const u8 zero[ETH_ALEN] = { 0 }; if (!changed) return; - if (sdata->vif.type == NL80211_IFTYPE_STATION) { - sdata->vif.bss_conf.bssid = sdata->u.mgd.bssid; - } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) - sdata->vif.bss_conf.bssid = sdata->u.ibss.bssid; - else if (sdata->vif.type == NL80211_IFTYPE_AP) - sdata->vif.bss_conf.bssid = sdata->vif.addr; - else if (sdata->vif.type == NL80211_IFTYPE_WDS) - sdata->vif.bss_conf.bssid = NULL; - else if (ieee80211_vif_is_mesh(&sdata->vif)) { - sdata->vif.bss_conf.bssid = zero; - } else if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) { - sdata->vif.bss_conf.bssid = sdata->vif.addr; - WARN_ONCE(changed & ~(BSS_CHANGED_IDLE), - "P2P Device BSS changed %#x", changed); - } else { - WARN_ON(1); - return; - } - - switch (sdata->vif.type) { - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_WDS: - case NL80211_IFTYPE_MESH_POINT: - break; - default: - /* do not warn to simplify caller in scan.c */ - changed &= ~BSS_CHANGED_BEACON_ENABLED; - if (WARN_ON(changed & BSS_CHANGED_BEACON)) - return; - break; - } - - if (changed & BSS_CHANGED_BEACON_ENABLED) { - if (local->quiescing || !ieee80211_sdata_running(sdata) || - test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) { - sdata->vif.bss_conf.enable_beacon = false; - } else { - /* - * Beacon should be enabled, but AP mode must - * check whether there is a beacon configured. - */ - switch (sdata->vif.type) { - case NL80211_IFTYPE_AP: - sdata->vif.bss_conf.enable_beacon = - !!sdata->u.ap.beacon; - break; - case NL80211_IFTYPE_ADHOC: - sdata->vif.bss_conf.enable_beacon = - !!sdata->u.ibss.presp; - break; -#ifdef CONFIG_MAC80211_MESH - case NL80211_IFTYPE_MESH_POINT: - sdata->vif.bss_conf.enable_beacon = - !!sdata->u.mesh.mesh_id_len; - break; -#endif - default: - /* not reached */ - WARN_ON(1); - break; - } - } - } - drv_bss_info_changed(local, sdata, &sdata->vif.bss_conf, changed); } @@ -415,27 +348,19 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, /* Copy the addresses to the bss_conf list */ ifa = idev->ifa_list; - while (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN && ifa) { - bss_conf->arp_addr_list[c] = ifa->ifa_address; + while (ifa) { + if (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN) + bss_conf->arp_addr_list[c] = ifa->ifa_address; ifa = ifa->ifa_next; c++; } - /* If not all addresses fit the list, disable filtering */ - if (ifa) { - sdata->arp_filter_state = false; - c = 0; - } else { - sdata->arp_filter_state = true; - } bss_conf->arp_addr_cnt = c; /* Configure driver only if associated (which also implies it is up) */ - if (ifmgd->associated) { - bss_conf->arp_filter_enabled = sdata->arp_filter_state; + if (ifmgd->associated) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_ARP_FILTER); - } mutex_unlock(&ifmgd->mtx); @@ -443,6 +368,37 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, } #endif +#if IS_ENABLED(CONFIG_IPV6) +static int ieee80211_ifa6_changed(struct notifier_block *nb, + unsigned long data, void *arg) +{ + struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)arg; + struct inet6_dev *idev = ifa->idev; + struct net_device *ndev = ifa->idev->dev; + struct ieee80211_local *local = + container_of(nb, struct ieee80211_local, ifa6_notifier); + struct wireless_dev *wdev = ndev->ieee80211_ptr; + struct ieee80211_sub_if_data *sdata; + + /* Make sure it's our interface that got changed */ + if (!wdev || wdev->wiphy != local->hw.wiphy) + return NOTIFY_DONE; + + sdata = IEEE80211_DEV_TO_SUB_IF(ndev); + + /* + * For now only support station mode. This is mostly because + * doing AP would have to handle AP_VLAN in some way ... + */ + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return NOTIFY_DONE; + + drv_ipv6_addr_change(local, sdata, idev); + + return NOTIFY_DONE; +} +#endif + static int ieee80211_napi_poll(struct napi_struct *napi, int budget) { struct ieee80211_local *local = @@ -537,6 +493,7 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = { .cap_info = cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40 | IEEE80211_HT_CAP_MAX_AMSDU | + IEEE80211_HT_CAP_SGI_20 | IEEE80211_HT_CAP_SGI_40), .mcs = { .rx_mask = { 0xff, 0xff, 0xff, 0xff, 0xff, @@ -544,6 +501,11 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = { }, }; +static const u8 extended_capabilities[] = { + 0, 0, 0, 0, 0, 0, 0, + WLAN_EXT_CAPA8_OPMODE_NOTIF, +}; + struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -600,6 +562,10 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, WIPHY_FLAG_REPORTS_OBSS | WIPHY_FLAG_OFFCHAN_TX; + wiphy->extended_capabilities = extended_capabilities; + wiphy->extended_capabilities_mask = extended_capabilities; + wiphy->extended_capabilities_len = ARRAY_SIZE(extended_capabilities); + if (ops->remain_on_channel) wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL; @@ -653,25 +619,19 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, mutex_init(&local->key_mtx); spin_lock_init(&local->filter_lock); + spin_lock_init(&local->rx_path_lock); spin_lock_init(&local->queue_stop_reason_lock); INIT_LIST_HEAD(&local->chanctx_list); mutex_init(&local->chanctx_mtx); - /* - * The rx_skb_queue is only accessed from tasklets, - * but other SKB queues are used from within IRQ - * context. Therefore, this one needs a different - * locking class so our direct, non-irq-safe use of - * the queue's lock doesn't throw lockdep warnings. - */ - skb_queue_head_init_class(&local->rx_skb_queue, - &ieee80211_rx_skb_queue_class); - INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work); INIT_WORK(&local->restart_work, ieee80211_restart_work); + INIT_WORK(&local->radar_detected_work, + ieee80211_dfs_radar_detected_work); + INIT_WORK(&local->reconfig_filter, ieee80211_reconfig_filter); local->smps_mode = IEEE80211_SMPS_OFF; @@ -747,9 +707,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) return -EINVAL; #endif - if ((hw->flags & IEEE80211_HW_SCAN_WHILE_IDLE) && !local->ops->hw_scan) - return -EINVAL; - if (!local->use_chanctx) { for (i = 0; i < local->hw.wiphy->n_iface_combinations; i++) { const struct ieee80211_iface_combination *comb; @@ -767,6 +724,16 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) */ if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_WDS)) return -EINVAL; + + /* DFS currently not supported with channel context drivers */ + for (i = 0; i < local->hw.wiphy->n_iface_combinations; i++) { + const struct ieee80211_iface_combination *comb; + + comb = &local->hw.wiphy->iface_combinations[i]; + + if (comb->radar_detect_widths) + return -EINVAL; + } } /* Only HW csum features are currently compatible with mac80211 */ @@ -1049,12 +1016,25 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_ifa; #endif +#if IS_ENABLED(CONFIG_IPV6) + local->ifa6_notifier.notifier_call = ieee80211_ifa6_changed; + result = register_inet6addr_notifier(&local->ifa6_notifier); + if (result) + goto fail_ifa6; +#endif + netif_napi_add(&local->napi_dev, &local->napi, ieee80211_napi_poll, local->hw.napi_weight); return 0; +#if IS_ENABLED(CONFIG_IPV6) + fail_ifa6: #ifdef CONFIG_INET + unregister_inetaddr_notifier(&local->ifa_notifier); +#endif +#endif +#if defined(CONFIG_INET) || defined(CONFIG_IPV6) fail_ifa: pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY, &local->network_latency_notifier); @@ -1090,6 +1070,9 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) #ifdef CONFIG_INET unregister_inetaddr_notifier(&local->ifa_notifier); #endif +#if IS_ENABLED(CONFIG_IPV6) + unregister_inet6addr_notifier(&local->ifa6_notifier); +#endif rtnl_lock(); @@ -1113,7 +1096,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) wiphy_warn(local->hw.wiphy, "skb_queue not empty\n"); skb_queue_purge(&local->skb_queue); skb_queue_purge(&local->skb_queue_unreliable); - skb_queue_purge(&local->rx_skb_queue); destroy_workqueue(local->workqueue); wiphy_unregister(local->hw.wiphy); @@ -1191,8 +1173,7 @@ static void __exit ieee80211_exit(void) rc80211_minstrel_ht_exit(); rc80211_minstrel_exit(); - if (mesh_allocated) - ieee80211s_stop(); + ieee80211s_stop(); ieee80211_iface_exit(); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 649ad513547..29ce2aa87e7 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -17,19 +17,14 @@ #define TMR_RUNNING_MP 1 #define TMR_RUNNING_MPR 2 -int mesh_allocated; +static int mesh_allocated; static struct kmem_cache *rm_cache; -#ifdef CONFIG_MAC80211_MESH bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt) { return (mgmt->u.action.u.mesh_action.action_code == WLAN_MESH_ACTION_HWMP_PATH_SELECTION); } -#else -bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt) -{ return false; } -#endif void ieee80211s_init(void) { @@ -41,6 +36,8 @@ void ieee80211s_init(void) void ieee80211s_stop(void) { + if (!mesh_allocated) + return; mesh_pathtbl_unregister(); kmem_cache_destroy(rm_cache); } @@ -95,24 +92,22 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, (ifmsh->mesh_cc_id == ie->mesh_config->meshconf_congest) && (ifmsh->mesh_sp_id == ie->mesh_config->meshconf_synch) && (ifmsh->mesh_auth_id == ie->mesh_config->meshconf_auth))) - goto mismatch; + return false; ieee80211_sta_get_rates(local, ie, ieee80211_get_sdata_band(sdata), &basic_rates); if (sdata->vif.bss_conf.basic_rates != basic_rates) - goto mismatch; + return false; ieee80211_ht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan, ie->ht_operation, &sta_chan_def); if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef, &sta_chan_def)) - goto mismatch; + return false; return true; -mismatch: - return false; } /** @@ -123,7 +118,7 @@ mismatch: bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie) { return (ie->mesh_config->meshconf_cap & - IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS) != 0; + IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS) != 0; } /** @@ -154,6 +149,31 @@ u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) return changed; } +/* + * mesh_sta_cleanup - clean up any mesh sta state + * + * @sta: mesh sta to clean up. + */ +void mesh_sta_cleanup(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + u32 changed; + + /* + * maybe userspace handles peer allocation and peering, but in either + * case the beacon is still generated by the kernel and we might need + * an update. + */ + changed = mesh_accept_plinks_update(sdata); + if (sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) { + changed |= mesh_plink_deactivate(sta); + del_timer_sync(&sta->plink_timer); + } + + if (changed) + ieee80211_mbss_info_change_notify(sdata, changed); +} + int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) { int i; @@ -176,11 +196,12 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata) if (!sdata->u.mesh.rmc) return; - for (i = 0; i < RMC_BUCKETS; i++) + for (i = 0; i < RMC_BUCKETS; i++) { list_for_each_entry_safe(p, n, &rmc->bucket[i], list) { list_del(&p->list); kmem_cache_free(rm_cache, p); } + } kfree(rmc); sdata->u.mesh.rmc = NULL; @@ -189,6 +210,7 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata) /** * mesh_rmc_check - Check frame in recent multicast cache and add if absent. * + * @sdata: interface * @sa: source address * @mesh_hdr: mesh_header * @@ -198,8 +220,8 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata) * received this frame lately. If the frame is not in the cache, it is added to * it. */ -int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, - struct ieee80211_sub_if_data *sdata) +int mesh_rmc_check(struct ieee80211_sub_if_data *sdata, + const u8 *sa, struct ieee80211s_hdr *mesh_hdr) { struct mesh_rmc *rmc = sdata->u.mesh.rmc; u32 seqnum = 0; @@ -213,12 +235,11 @@ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, list_for_each_entry_safe(p, n, &rmc->bucket[idx], list) { ++entries; if (time_after(jiffies, p->exp_time) || - (entries == RMC_QUEUE_MAX_LEN)) { + entries == RMC_QUEUE_MAX_LEN) { list_del(&p->list); kmem_cache_free(rm_cache, p); --entries; - } else if ((seqnum == p->seqnum) && - (ether_addr_equal(sa, p->sa))) + } else if ((seqnum == p->seqnum) && ether_addr_equal(sa, p->sa)) return -1; } @@ -233,8 +254,8 @@ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, return 0; } -int -mesh_add_meshconf_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) +int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u8 *pos, neighbors; @@ -265,16 +286,18 @@ mesh_add_meshconf_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) /* Mesh capability */ *pos = IEEE80211_MESHCONF_CAPAB_FORWARDING; *pos |= ifmsh->accepting_plinks ? - IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; + IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; + /* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */ + *pos |= ifmsh->ps_peers_deep_sleep ? + IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL : 0x00; *pos++ |= ifmsh->adjusting_tbtt ? - IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING : 0x00; + IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING : 0x00; *pos++ = 0x00; return 0; } -int -mesh_add_meshid_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) +int mesh_add_meshid_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u8 *pos; @@ -291,8 +314,31 @@ mesh_add_meshid_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) return 0; } -int -mesh_add_vendor_ies(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) +static int mesh_add_awake_window_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u8 *pos; + + /* see IEEE802.11-2012 13.14.6 */ + if (ifmsh->ps_peers_light_sleep == 0 && + ifmsh->ps_peers_deep_sleep == 0 && + ifmsh->nonpeer_pm == NL80211_MESH_POWER_ACTIVE) + return 0; + + if (skb_tailroom(skb) < 4) + return -ENOMEM; + + pos = skb_put(skb, 2 + 2); + *pos++ = WLAN_EID_MESH_AWAKE_WINDOW; + *pos++ = 2; + put_unaligned_le16(ifmsh->mshcfg.dot11MeshAwakeWindowDuration, pos); + + return 0; +} + +int mesh_add_vendor_ies(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u8 offset, len; @@ -315,8 +361,7 @@ mesh_add_vendor_ies(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) return 0; } -int -mesh_add_rsn_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) +int mesh_add_rsn_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u8 len = 0; @@ -344,11 +389,9 @@ mesh_add_rsn_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) return 0; } -int mesh_add_ds_params_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata) +static int mesh_add_ds_params_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { - struct ieee80211_local *local = sdata->local; - struct ieee80211_supported_band *sband; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_channel *chan; u8 *pos; @@ -365,19 +408,16 @@ int mesh_add_ds_params_ie(struct sk_buff *skb, chan = chanctx_conf->def.chan; rcu_read_unlock(); - sband = local->hw.wiphy->bands[chan->band]; - if (sband->band == IEEE80211_BAND_2GHZ) { - pos = skb_put(skb, 2 + 1); - *pos++ = WLAN_EID_DS_PARAMS; - *pos++ = 1; - *pos++ = ieee80211_frequency_to_channel(chan->center_freq); - } + pos = skb_put(skb, 2 + 1); + *pos++ = WLAN_EID_DS_PARAMS; + *pos++ = 1; + *pos++ = ieee80211_frequency_to_channel(chan->center_freq); return 0; } -int mesh_add_ht_cap_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata) +int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; enum ieee80211_band band = ieee80211_get_sdata_band(sdata); @@ -398,8 +438,8 @@ int mesh_add_ht_cap_ie(struct sk_buff *skb, return 0; } -int mesh_add_ht_oper_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata) +int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *chanctx_conf; @@ -434,6 +474,7 @@ int mesh_add_ht_oper_ie(struct sk_buff *skb, return 0; } + static void ieee80211_mesh_path_timer(unsigned long data) { struct ieee80211_sub_if_data *sdata = @@ -479,7 +520,7 @@ void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh) /** * ieee80211_fill_mesh_addresses - fill addresses of a locally originated mesh frame - * @hdr: 802.11 frame header + * @hdr: 802.11 frame header * @fc: frame control field * @meshda: destination address in the mesh * @meshsa: source address address in the mesh. Same as TA, as frame is @@ -510,8 +551,8 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, /** * ieee80211_new_mesh_header - create a new mesh header - * @meshhdr: uninitialized mesh header * @sdata: mesh interface to be used + * @meshhdr: uninitialized mesh header * @addr4or5: 1st address in the ae header, which may correspond to address 4 * (if addr6 is NULL) or address 5 (if addr6 is present). It may * be NULL. @@ -520,42 +561,49 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, * * Return the header length. */ -int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, - struct ieee80211_sub_if_data *sdata, char *addr4or5, - char *addr6) +int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata, + struct ieee80211s_hdr *meshhdr, + const char *addr4or5, const char *addr6) { - int aelen = 0; - BUG_ON(!addr4or5 && addr6); + if (WARN_ON(!addr4or5 && addr6)) + return 0; + memset(meshhdr, 0, sizeof(*meshhdr)); + meshhdr->ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; + + /* FIXME: racy -- TX on multiple queues can be concurrent */ put_unaligned(cpu_to_le32(sdata->u.mesh.mesh_seqnum), &meshhdr->seqnum); sdata->u.mesh.mesh_seqnum++; + if (addr4or5 && !addr6) { meshhdr->flags |= MESH_FLAGS_AE_A4; - aelen += ETH_ALEN; memcpy(meshhdr->eaddr1, addr4or5, ETH_ALEN); + return 2 * ETH_ALEN; } else if (addr4or5 && addr6) { meshhdr->flags |= MESH_FLAGS_AE_A5_A6; - aelen += 2 * ETH_ALEN; memcpy(meshhdr->eaddr1, addr4or5, ETH_ALEN); memcpy(meshhdr->eaddr2, addr6, ETH_ALEN); + return 3 * ETH_ALEN; } - return 6 + aelen; + + return ETH_ALEN; } -static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_mesh *ifmsh) +static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u32 changed; ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT); mesh_path_expire(sdata); changed = mesh_accept_plinks_update(sdata); - ieee80211_bss_info_change_notify(sdata, changed); + ieee80211_mbss_info_change_notify(sdata, changed); mod_timer(&ifmsh->housekeeping_timer, - round_jiffies(jiffies + IEEE80211_MESH_HOUSEKEEPING_INTERVAL)); + round_jiffies(jiffies + + IEEE80211_MESH_HOUSEKEEPING_INTERVAL)); } static void ieee80211_mesh_rootpath(struct ieee80211_sub_if_data *sdata) @@ -603,10 +651,149 @@ void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata) } #endif -void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) +static int +ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) +{ + struct beacon_data *bcn; + int head_len, tail_len; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + struct ieee80211_chanctx_conf *chanctx_conf; + enum ieee80211_band band; + u8 *pos; + struct ieee80211_sub_if_data *sdata; + int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) + + sizeof(mgmt->u.beacon); + + sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); + rcu_read_lock(); + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + band = chanctx_conf->def.chan->band; + rcu_read_unlock(); + + head_len = hdr_len + + 2 + /* NULL SSID */ + 2 + 8 + /* supported rates */ + 2 + 3; /* DS params */ + tail_len = 2 + (IEEE80211_MAX_SUPP_RATES - 8) + + 2 + sizeof(struct ieee80211_ht_cap) + + 2 + sizeof(struct ieee80211_ht_operation) + + 2 + ifmsh->mesh_id_len + + 2 + sizeof(struct ieee80211_meshconf_ie) + + 2 + sizeof(__le16) + /* awake window */ + ifmsh->ie_len; + + bcn = kzalloc(sizeof(*bcn) + head_len + tail_len, GFP_KERNEL); + /* need an skb for IE builders to operate on */ + skb = dev_alloc_skb(max(head_len, tail_len)); + + if (!bcn || !skb) + goto out_free; + + /* + * pointers go into the block we allocated, + * memory is | beacon_data | head | tail | + */ + bcn->head = ((u8 *) bcn) + sizeof(*bcn); + + /* fill in the head */ + mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); + memset(mgmt, 0, hdr_len); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_BEACON); + eth_broadcast_addr(mgmt->da); + memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); + ieee80211_mps_set_frame_flags(sdata, NULL, (void *) mgmt); + mgmt->u.beacon.beacon_int = + cpu_to_le16(sdata->vif.bss_conf.beacon_int); + mgmt->u.beacon.capab_info |= cpu_to_le16( + sdata->u.mesh.security ? WLAN_CAPABILITY_PRIVACY : 0); + + pos = skb_put(skb, 2); + *pos++ = WLAN_EID_SSID; + *pos++ = 0x0; + + if (ieee80211_add_srates_ie(sdata, skb, true, band) || + mesh_add_ds_params_ie(sdata, skb)) + goto out_free; + + bcn->head_len = skb->len; + memcpy(bcn->head, skb->data, bcn->head_len); + + /* now the tail */ + skb_trim(skb, 0); + bcn->tail = bcn->head + bcn->head_len; + + if (ieee80211_add_ext_srates_ie(sdata, skb, true, band) || + mesh_add_rsn_ie(sdata, skb) || + mesh_add_ht_cap_ie(sdata, skb) || + mesh_add_ht_oper_ie(sdata, skb) || + mesh_add_meshid_ie(sdata, skb) || + mesh_add_meshconf_ie(sdata, skb) || + mesh_add_awake_window_ie(sdata, skb) || + mesh_add_vendor_ies(sdata, skb)) + goto out_free; + + bcn->tail_len = skb->len; + memcpy(bcn->tail, skb->data, bcn->tail_len); + + dev_kfree_skb(skb); + rcu_assign_pointer(ifmsh->beacon, bcn); + return 0; +out_free: + kfree(bcn); + dev_kfree_skb(skb); + return -ENOMEM; +} + +static int +ieee80211_mesh_rebuild_beacon(struct ieee80211_if_mesh *ifmsh) +{ + struct ieee80211_sub_if_data *sdata; + struct beacon_data *old_bcn; + int ret; + sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); + + mutex_lock(&ifmsh->mtx); + + old_bcn = rcu_dereference_protected(ifmsh->beacon, + lockdep_is_held(&ifmsh->mtx)); + ret = ieee80211_mesh_build_beacon(ifmsh); + if (ret) + /* just reuse old beacon */ + goto out; + + if (old_bcn) + kfree_rcu(old_bcn, rcu_head); +out: + mutex_unlock(&ifmsh->mtx); + return ret; +} + +void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, + u32 changed) +{ + if (sdata->vif.bss_conf.enable_beacon && + (changed & (BSS_CHANGED_BEACON | + BSS_CHANGED_HT | + BSS_CHANGED_BASIC_RATES | + BSS_CHANGED_BEACON_INT))) + if (ieee80211_mesh_rebuild_beacon(&sdata->u.mesh)) + return; + ieee80211_bss_info_change_notify(sdata, changed); +} + +int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; + u32 changed = BSS_CHANGED_BEACON | + BSS_CHANGED_BEACON_ENABLED | + BSS_CHANGED_HT | + BSS_CHANGED_BASIC_RATES | + BSS_CHANGED_BEACON_INT; + enum ieee80211_band band = ieee80211_get_sdata_band(sdata); local->fif_other_bss++; /* mesh ifaces must set allmulti to forward mcast traffic */ @@ -624,34 +811,51 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) ieee80211_queue_work(&local->hw, &sdata->work); sdata->vif.bss_conf.ht_operation_mode = ifmsh->mshcfg.ht_opmode; - sdata->vif.bss_conf.beacon_int = MESH_DEFAULT_BEACON_INTERVAL; + sdata->vif.bss_conf.enable_beacon = true; sdata->vif.bss_conf.basic_rates = - ieee80211_mandatory_rates(sdata->local, - ieee80211_get_sdata_band(sdata)); - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON | - BSS_CHANGED_BEACON_ENABLED | - BSS_CHANGED_HT | - BSS_CHANGED_BASIC_RATES | - BSS_CHANGED_BEACON_INT); + ieee80211_mandatory_rates(local, band); + + changed |= ieee80211_mps_local_status_update(sdata); + + if (ieee80211_mesh_build_beacon(ifmsh)) { + ieee80211_stop_mesh(sdata); + return -ENOMEM; + } + + ieee80211_bss_info_change_notify(sdata, changed); netif_carrier_on(sdata->dev); + return 0; } void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct beacon_data *bcn; netif_carrier_off(sdata->dev); /* stop the beacon */ ifmsh->mesh_id_len = 0; + sdata->vif.bss_conf.enable_beacon = false; + clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); + mutex_lock(&ifmsh->mtx); + bcn = rcu_dereference_protected(ifmsh->beacon, + lockdep_is_held(&ifmsh->mtx)); + rcu_assign_pointer(ifmsh->beacon, NULL); + kfree_rcu(bcn, rcu_head); + mutex_unlock(&ifmsh->mtx); /* flush STAs and mpaths on this iface */ - sta_info_flush(sdata->local, sdata); + sta_info_flush(sdata); mesh_path_flush_by_iface(sdata); + /* free all potentially still buffered group-addressed frames */ + local->total_ps_buffered -= skb_queue_len(&ifmsh->ps.bc_buf); + skb_queue_purge(&ifmsh->ps.bc_buf); + del_timer_sync(&sdata->u.mesh.housekeeping_timer); del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); del_timer_sync(&sdata->u.mesh.mesh_path_timer); @@ -671,6 +875,62 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) sdata->u.mesh.timers_running = 0; } +static void +ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct sk_buff *presp; + struct beacon_data *bcn; + struct ieee80211_mgmt *hdr; + struct ieee802_11_elems elems; + size_t baselen; + u8 *pos, *end; + + end = ((u8 *) mgmt) + len; + pos = mgmt->u.probe_req.variable; + baselen = (u8 *) pos - (u8 *) mgmt; + if (baselen > len) + return; + + ieee802_11_parse_elems(pos, len - baselen, &elems); + + /* 802.11-2012 10.1.4.3.2 */ + if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) && + !is_broadcast_ether_addr(mgmt->da)) || + elems.ssid_len != 0) + return; + + if (elems.mesh_id_len != 0 && + (elems.mesh_id_len != ifmsh->mesh_id_len || + memcmp(elems.mesh_id, ifmsh->mesh_id, ifmsh->mesh_id_len))) + return; + + rcu_read_lock(); + bcn = rcu_dereference(ifmsh->beacon); + + if (!bcn) + goto out; + + presp = dev_alloc_skb(local->tx_headroom + + bcn->head_len + bcn->tail_len); + if (!presp) + goto out; + + skb_reserve(presp, local->tx_headroom); + memcpy(skb_put(presp, bcn->head_len), bcn->head, bcn->head_len); + memcpy(skb_put(presp, bcn->tail_len), bcn->tail, bcn->tail_len); + hdr = (struct ieee80211_mgmt *) presp->data; + hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_PROBE_RESP); + memcpy(hdr->da, mgmt->sa, ETH_ALEN); + IEEE80211_SKB_CB(presp)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + ieee80211_tx_skb(sdata, presp); +out: + rcu_read_unlock(); +} + static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, u16 stype, struct ieee80211_mgmt *mgmt, @@ -760,6 +1020,9 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_mesh_rx_bcn_presp(sdata, stype, mgmt, skb->len, rx_status); break; + case IEEE80211_STYPE_PROBE_REQ: + ieee80211_mesh_rx_probe_req(sdata, mgmt, skb->len); + break; case IEEE80211_STYPE_ACTION: ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status); break; @@ -782,7 +1045,7 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) mesh_mpp_table_grow(); if (test_and_clear_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags)) - ieee80211_mesh_housekeeping(sdata, ifmsh); + ieee80211_mesh_housekeeping(sdata); if (test_and_clear_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags)) ieee80211_mesh_rootpath(sdata); @@ -805,6 +1068,7 @@ void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + static u8 zero_addr[ETH_ALEN] = {}; setup_timer(&ifmsh->housekeeping_timer, ieee80211_mesh_housekeeping_timer, @@ -828,6 +1092,11 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) ieee80211_mesh_path_root_timer, (unsigned long) sdata); INIT_LIST_HEAD(&ifmsh->preq_queue.list); + skb_queue_head_init(&ifmsh->ps.bc_buf); spin_lock_init(&ifmsh->mesh_preq_queue_lock); spin_lock_init(&ifmsh->sync_offset_lock); + RCU_INIT_POINTER(ifmsh->beacon, NULL); + mutex_init(&ifmsh->mtx); + + sdata->vif.bss_conf.bssid = zero_addr; } diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 84c28c6101c..336c88a1668 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -26,12 +26,12 @@ * @MESH_PATH_ACTIVE: the mesh path can be used for forwarding * @MESH_PATH_RESOLVING: the discovery process is running for this mesh path * @MESH_PATH_SN_VALID: the mesh path contains a valid destination sequence - * number + * number * @MESH_PATH_FIXED: the mesh path has been manually set and should not be - * modified + * modified * @MESH_PATH_RESOLVED: the mesh path can has been resolved * @MESH_PATH_REQ_QUEUED: there is an unsent path request for this destination - * already queued up, waiting for the discovery process to start. + * already queued up, waiting for the discovery process to start. * * MESH_PATH_RESOLVED is used by the mesh path timer to * decide when to stop or cancel the mesh path discovery. @@ -73,16 +73,16 @@ enum mesh_deferred_task_flags { * @dst: mesh path destination mac address * @sdata: mesh subif * @next_hop: mesh neighbor to which frames for this destination will be - * forwarded + * forwarded * @timer: mesh path discovery timer * @frame_queue: pending queue for frames sent to this destination while the - * path is unresolved + * path is unresolved * @sn: target sequence number * @metric: current metric to this destination * @hop_count: hops to destination * @exp_time: in jiffies, when the path will expire or when it expired * @discovery_timeout: timeout (lapse in jiffies) used for the last discovery - * retry + * retry * @discovery_retries: number of discovery retries * @flags: mesh path flags, as specified on &enum mesh_path_flags * @state_lock: mesh path state lock used to protect changes to the @@ -191,8 +191,6 @@ struct mesh_rmc { #define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ) #define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) -#define MESH_DEFAULT_BEACON_INTERVAL 1000 /* in 1024 us units */ - #define MESH_PATH_EXPIRE (600 * HZ) /* Default maximum number of plinks per interface */ @@ -208,95 +206,113 @@ struct mesh_rmc { /* Various */ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, const u8 *da, const u8 *sa); -int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, - struct ieee80211_sub_if_data *sdata, char *addr4or5, - char *addr6); -int mesh_rmc_check(u8 *addr, struct ieee80211s_hdr *mesh_hdr, - struct ieee80211_sub_if_data *sdata); +int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata, + struct ieee80211s_hdr *meshhdr, + const char *addr4or5, const char *addr6); +int mesh_rmc_check(struct ieee80211_sub_if_data *sdata, + const u8 *addr, struct ieee80211s_hdr *mesh_hdr); bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *ie); void mesh_ids_set_default(struct ieee80211_if_mesh *mesh); -void mesh_mgmt_ies_add(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); -int mesh_add_meshconf_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); -int mesh_add_meshid_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); -int mesh_add_rsn_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); -int mesh_add_vendor_ies(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); -int mesh_add_ds_params_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); -int mesh_add_ht_cap_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); -int mesh_add_ht_oper_ie(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); +void mesh_mgmt_ies_add(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +int mesh_add_meshid_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +int mesh_add_rsn_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +int mesh_add_vendor_ies(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); void mesh_rmc_free(struct ieee80211_sub_if_data *sdata); int mesh_rmc_init(struct ieee80211_sub_if_data *sdata); void ieee80211s_init(void); void ieee80211s_update_metric(struct ieee80211_local *local, - struct sta_info *sta, struct sk_buff *skb); -void ieee80211s_stop(void); + struct sta_info *sta, struct sk_buff *skb); void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata); -void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); +int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata); void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh); const struct ieee80211_mesh_sync_ops *ieee80211_mesh_sync_ops_get(u8 method); +/* wrapper for ieee80211_bss_info_change_notify() */ +void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, + u32 changed); + +/* mesh power save */ +u32 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata); +u32 ieee80211_mps_set_sta_local_pm(struct sta_info *sta, + enum nl80211_mesh_power_mode pm); +void ieee80211_mps_set_frame_flags(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee80211_hdr *hdr); +void ieee80211_mps_sta_status_update(struct sta_info *sta); +void ieee80211_mps_rx_h_sta_process(struct sta_info *sta, + struct ieee80211_hdr *hdr); +void ieee80211_mpsp_trigger_process(u8 *qc, struct sta_info *sta, + bool tx, bool acked); +void ieee80211_mps_frame_release(struct sta_info *sta, + struct ieee802_11_elems *elems); /* Mesh paths */ -int mesh_nexthop_lookup(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); -int mesh_nexthop_resolve(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); +int mesh_nexthop_lookup(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata); -struct mesh_path *mesh_path_lookup(u8 *dst, - struct ieee80211_sub_if_data *sdata); -struct mesh_path *mpp_path_lookup(u8 *dst, - struct ieee80211_sub_if_data *sdata); -int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata); -struct mesh_path *mesh_path_lookup_by_idx(int idx, - struct ieee80211_sub_if_data *sdata); +struct mesh_path *mesh_path_lookup(struct ieee80211_sub_if_data *sdata, + const u8 *dst); +struct mesh_path *mpp_path_lookup(struct ieee80211_sub_if_data *sdata, + const u8 *dst); +int mpp_path_add(struct ieee80211_sub_if_data *sdata, + const u8 *dst, const u8 *mpp); +struct mesh_path * +mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx); void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop); void mesh_path_expire(struct ieee80211_sub_if_data *sdata); void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len); -int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata); + struct ieee80211_mgmt *mgmt, size_t len); +int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst); int mesh_path_add_gate(struct mesh_path *mpath); int mesh_path_send_to_gates(struct mesh_path *mpath); int mesh_gate_num(struct ieee80211_sub_if_data *sdata); + /* Mesh plinks */ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, - u8 *hw_addr, - struct ieee802_11_elems *ie); + u8 *hw_addr, struct ieee802_11_elems *ie); bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie); u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); void mesh_plink_broken(struct sta_info *sta); -void mesh_plink_deactivate(struct sta_info *sta); -int mesh_plink_open(struct sta_info *sta); -void mesh_plink_block(struct sta_info *sta); +u32 mesh_plink_deactivate(struct sta_info *sta); +u32 mesh_plink_open(struct sta_info *sta); +u32 mesh_plink_block(struct sta_info *sta); void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status); +void mesh_sta_cleanup(struct sta_info *sta); /* Private interfaces */ /* Mesh tables */ void mesh_mpath_table_grow(void); void mesh_mpp_table_grow(void); /* Mesh paths */ -int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, __le16 target_rcode, - const u8 *ra, struct ieee80211_sub_if_data *sdata); +int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, + u8 ttl, const u8 *target, __le32 target_sn, + __le16 target_rcode, const u8 *ra); void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta); void mesh_path_flush_pending(struct mesh_path *mpath); void mesh_path_tx_pending(struct mesh_path *mpath); int mesh_pathtbl_init(void); void mesh_pathtbl_unregister(void); -int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata); +int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr); void mesh_path_timer(unsigned long data); void mesh_path_flush_by_nexthop(struct sta_info *sta); -void mesh_path_discard_frame(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata); +void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); void mesh_path_quiesce(struct ieee80211_sub_if_data *sdata); void mesh_path_restart(struct ieee80211_sub_if_data *sdata); void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata); @@ -305,7 +321,19 @@ bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt); extern int mesh_paths_generation; #ifdef CONFIG_MAC80211_MESH -extern int mesh_allocated; +static inline +u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) +{ + atomic_inc(&sdata->u.mesh.estab_plinks); + return mesh_accept_plinks_update(sdata); +} + +static inline +u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) +{ + atomic_dec(&sdata->u.mesh.estab_plinks); + return mesh_accept_plinks_update(sdata); +} static inline int mesh_plink_free_count(struct ieee80211_sub_if_data *sdata) { @@ -337,8 +365,8 @@ void mesh_plink_quiesce(struct sta_info *sta); void mesh_plink_restart(struct sta_info *sta); void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata); void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata); +void ieee80211s_stop(void); #else -#define mesh_allocated 0 static inline void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {} static inline void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata) @@ -351,6 +379,7 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata) { return false; } static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) {} +static inline void ieee80211s_stop(void) {} #endif #endif /* IEEE80211S_H */ diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 2659e428b80..bdb8d3b1458 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -30,14 +30,14 @@ static void mesh_queue_preq(struct mesh_path *, u8); -static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae) +static inline u32 u32_field_get(const u8 *preq_elem, int offset, bool ae) { if (ae) offset += 6; return get_unaligned_le32(preq_elem + offset); } -static inline u32 u16_field_get(u8 *preq_elem, int offset, bool ae) +static inline u32 u16_field_get(const u8 *preq_elem, int offset, bool ae) { if (ae) offset += 6; @@ -102,10 +102,13 @@ enum mpath_frame_type { static const u8 broadcast_addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, - u8 *orig_addr, __le32 orig_sn, u8 target_flags, u8 *target, - __le32 target_sn, const u8 *da, u8 hop_count, u8 ttl, - __le32 lifetime, __le32 metric, __le32 preq_id, - struct ieee80211_sub_if_data *sdata) + const u8 *orig_addr, __le32 orig_sn, + u8 target_flags, const u8 *target, + __le32 target_sn, const u8 *da, + u8 hop_count, u8 ttl, + __le32 lifetime, __le32 metric, + __le32 preq_id, + struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -205,6 +208,7 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; skb_set_mac_header(skb, 0); skb_set_network_header(skb, 0); @@ -217,23 +221,26 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata, info->control.vif = &sdata->vif; info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; ieee80211_set_qos_hdr(sdata, skb); + ieee80211_mps_set_frame_flags(sdata, NULL, hdr); } /** - * mesh_send_path error - Sends a PERR mesh management frame + * mesh_path_error_tx - Sends a PERR mesh management frame * + * @ttl: allowed remaining hops * @target: broken destination * @target_sn: SN of the broken destination * @target_rcode: reason code for this PERR * @ra: node this frame is addressed to + * @sdata: local mesh subif * * Note: This function may be called with driver locks taken that the driver * also acquires in the TX path. To avoid a deadlock we don't transmit the * frame directly but add it to the pending queue instead. */ -int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, - __le16 target_rcode, const u8 *ra, - struct ieee80211_sub_if_data *sdata) +int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, + u8 ttl, const u8 *target, __le32 target_sn, + __le16 target_rcode, const u8 *ra) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -353,6 +360,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, * @sdata: local mesh subif * @mgmt: mesh management frame * @hwmp_ie: hwmp information element (PREP or PREQ) + * @action: type of hwmp ie * * This function updates the path routing information to the originator and the * transmitter of a HWMP PREQ or PREP frame. @@ -364,14 +372,14 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, * path routing information is updated. */ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - u8 *hwmp_ie, enum mpath_frame_type action) + struct ieee80211_mgmt *mgmt, + const u8 *hwmp_ie, enum mpath_frame_type action) { struct ieee80211_local *local = sdata->local; struct mesh_path *mpath; struct sta_info *sta; bool fresh_info; - u8 *orig_addr, *ta; + const u8 *orig_addr, *ta; u32 orig_sn, orig_metric; unsigned long orig_lifetime, exp_time; u32 last_hop_metric, new_metric; @@ -422,7 +430,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, process = false; fresh_info = false; } else { - mpath = mesh_path_lookup(orig_addr, sdata); + mpath = mesh_path_lookup(sdata, orig_addr); if (mpath) { spin_lock_bh(&mpath->state_lock); if (mpath->flags & MESH_PATH_FIXED) @@ -437,8 +445,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, } } } else { - mesh_path_add(orig_addr, sdata); - mpath = mesh_path_lookup(orig_addr, sdata); + mesh_path_add(sdata, orig_addr); + mpath = mesh_path_lookup(sdata, orig_addr); if (!mpath) { rcu_read_unlock(); return 0; @@ -470,7 +478,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, else { fresh_info = true; - mpath = mesh_path_lookup(ta, sdata); + mpath = mesh_path_lookup(sdata, ta); if (mpath) { spin_lock_bh(&mpath->state_lock); if ((mpath->flags & MESH_PATH_FIXED) || @@ -478,8 +486,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, (last_hop_metric > mpath->metric))) fresh_info = false; } else { - mesh_path_add(ta, sdata); - mpath = mesh_path_lookup(ta, sdata); + mesh_path_add(sdata, ta); + mpath = mesh_path_lookup(sdata, ta); if (!mpath) { rcu_read_unlock(); return 0; @@ -506,11 +514,11 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, - u8 *preq_elem, u32 metric) + const u8 *preq_elem, u32 metric) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath = NULL; - u8 *target_addr, *orig_addr; + const u8 *target_addr, *orig_addr; const u8 *da; u8 target_flags, ttl, flags; u32 orig_sn, target_sn, lifetime, orig_metric; @@ -545,7 +553,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, } else if (is_broadcast_ether_addr(target_addr) && (target_flags & IEEE80211_PREQ_TO_FLAG)) { rcu_read_lock(); - mpath = mesh_path_lookup(orig_addr, sdata); + mpath = mesh_path_lookup(sdata, orig_addr); if (mpath) { if (flags & IEEE80211_PREQ_PROACTIVE_PREP_FLAG) { reply = true; @@ -560,7 +568,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); } else { rcu_read_lock(); - mpath = mesh_path_lookup(target_addr, sdata); + mpath = mesh_path_lookup(sdata, target_addr); if (mpath) { if ((!(mpath->flags & MESH_PATH_SN_VALID)) || SN_LT(mpath->sn, target_sn)) { @@ -643,11 +651,11 @@ next_hop_deref_protected(struct mesh_path *mpath) static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, - u8 *prep_elem, u32 metric) + const u8 *prep_elem, u32 metric) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath; - u8 *target_addr, *orig_addr; + const u8 *target_addr, *orig_addr; u8 ttl, hopcount, flags; u8 next_hop[ETH_ALEN]; u32 target_sn, orig_sn, lifetime; @@ -670,7 +678,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, } rcu_read_lock(); - mpath = mesh_path_lookup(orig_addr, sdata); + mpath = mesh_path_lookup(sdata, orig_addr); if (mpath) spin_lock_bh(&mpath->state_lock); else @@ -706,12 +714,13 @@ fail: } static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, u8 *perr_elem) + struct ieee80211_mgmt *mgmt, + const u8 *perr_elem) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath; u8 ttl; - u8 *ta, *target_addr; + const u8 *ta, *target_addr; u32 target_sn; u16 target_rcode; @@ -727,7 +736,7 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, target_rcode = PERR_IE_TARGET_RCODE(perr_elem); rcu_read_lock(); - mpath = mesh_path_lookup(target_addr, sdata); + mpath = mesh_path_lookup(sdata, target_addr); if (mpath) { struct sta_info *sta; @@ -742,9 +751,10 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, spin_unlock_bh(&mpath->state_lock); if (!ifmsh->mshcfg.dot11MeshForwarding) goto endperr; - mesh_path_error_tx(ttl, target_addr, cpu_to_le32(target_sn), + mesh_path_error_tx(sdata, ttl, target_addr, + cpu_to_le32(target_sn), cpu_to_le16(target_rcode), - broadcast_addr, sdata); + broadcast_addr); } else spin_unlock_bh(&mpath->state_lock); } @@ -753,15 +763,15 @@ endperr: } static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - struct ieee80211_rann_ie *rann) + struct ieee80211_mgmt *mgmt, + const struct ieee80211_rann_ie *rann) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct mesh_path *mpath; u8 ttl, flags, hopcount; - u8 *orig_addr; + const u8 *orig_addr; u32 orig_sn, metric, metric_txsta, interval; bool root_is_gate; @@ -792,10 +802,10 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, metric_txsta = airtime_link_metric_get(local, sta); - mpath = mesh_path_lookup(orig_addr, sdata); + mpath = mesh_path_lookup(sdata, orig_addr); if (!mpath) { - mesh_path_add(orig_addr, sdata); - mpath = mesh_path_lookup(orig_addr, sdata); + mesh_path_add(sdata, orig_addr); + mpath = mesh_path_lookup(sdata, orig_addr); if (!mpath) { rcu_read_unlock(); sdata->u.mesh.mshstats.dropped_frames_no_route++; @@ -852,8 +862,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - size_t len) + struct ieee80211_mgmt *mgmt, size_t len) { struct ieee802_11_elems elems; size_t baselen; @@ -997,7 +1006,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) spin_unlock_bh(&ifmsh->mesh_preq_queue_lock); rcu_read_lock(); - mpath = mesh_path_lookup(preq_node->dst, sdata); + mpath = mesh_path_lookup(sdata, preq_node->dst); if (!mpath) goto enddiscovery; @@ -1067,8 +1076,8 @@ enddiscovery: * Returns: 0 if the next hop was found and -ENOENT if the frame was queued. * skb is freeed here if no mpath could be allocated. */ -int mesh_nexthop_resolve(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata) +int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); @@ -1077,18 +1086,22 @@ int mesh_nexthop_resolve(struct sk_buff *skb, u8 *target_addr = hdr->addr3; int err = 0; + /* Nulls are only sent to peers for PS and should be pre-addressed */ + if (ieee80211_is_qos_nullfunc(hdr->frame_control)) + return 0; + rcu_read_lock(); - err = mesh_nexthop_lookup(skb, sdata); + err = mesh_nexthop_lookup(sdata, skb); if (!err) goto endlookup; /* no nexthop found, start resolving */ - mpath = mesh_path_lookup(target_addr, sdata); + mpath = mesh_path_lookup(sdata, target_addr); if (!mpath) { - mesh_path_add(target_addr, sdata); - mpath = mesh_path_lookup(target_addr, sdata); + mesh_path_add(sdata, target_addr); + mpath = mesh_path_lookup(sdata, target_addr); if (!mpath) { - mesh_path_discard_frame(skb, sdata); + mesh_path_discard_frame(sdata, skb); err = -ENOSPC; goto endlookup; } @@ -1105,12 +1118,13 @@ int mesh_nexthop_resolve(struct sk_buff *skb, skb_queue_tail(&mpath->frame_queue, skb); err = -ENOENT; if (skb_to_free) - mesh_path_discard_frame(skb_to_free, sdata); + mesh_path_discard_frame(sdata, skb_to_free); endlookup: rcu_read_unlock(); return err; } + /** * mesh_nexthop_lookup - put the appropriate next hop on a mesh frame. Calling * this function is considered "using" the associated mpath, so preempt a path @@ -1121,8 +1135,8 @@ endlookup: * * Returns: 0 if the next hop was found. Nonzero otherwise. */ -int mesh_nexthop_lookup(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata) +int mesh_nexthop_lookup(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct mesh_path *mpath; struct sta_info *next_hop; @@ -1131,7 +1145,7 @@ int mesh_nexthop_lookup(struct sk_buff *skb, int err = -ENOENT; rcu_read_lock(); - mpath = mesh_path_lookup(target_addr, sdata); + mpath = mesh_path_lookup(sdata, target_addr); if (!mpath || !(mpath->flags & MESH_PATH_ACTIVE)) goto endlookup; @@ -1148,6 +1162,7 @@ int mesh_nexthop_lookup(struct sk_buff *skb, if (next_hop) { memcpy(hdr->addr1, next_hop->sta.addr, ETH_ALEN); memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + ieee80211_mps_set_frame_flags(sdata, next_hop, hdr); err = 0; } @@ -1189,8 +1204,7 @@ void mesh_path_timer(unsigned long data) } } -void -mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata) +void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u32 interval = ifmsh->mshcfg.dot11MeshHWMPRannInterval; diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index aa749818860..6b3c4e119c6 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -24,9 +24,12 @@ /* Keep the mean chain length below this constant */ #define MEAN_CHAIN_LEN 2 -#define MPATH_EXPIRED(mpath) ((mpath->flags & MESH_PATH_ACTIVE) && \ - time_after(jiffies, mpath->exp_time) && \ - !(mpath->flags & MESH_PATH_FIXED)) +static inline bool mpath_expired(struct mesh_path *mpath) +{ + return (mpath->flags & MESH_PATH_ACTIVE) && + time_after(jiffies, mpath->exp_time) && + !(mpath->flags & MESH_PATH_FIXED); +} struct mpath_node { struct hlist_node list; @@ -181,12 +184,12 @@ errcopy: return -ENOMEM; } -static u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, +static u32 mesh_table_hash(const u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl) { /* Use last four bytes of hw addr and interface index as hash index */ - return jhash_2words(*(u32 *)(addr+2), sdata->dev->ifindex, tbl->hash_rnd) - & tbl->hash_mask; + return jhash_2words(*(u32 *)(addr+2), sdata->dev->ifindex, + tbl->hash_rnd) & tbl->hash_mask; } @@ -212,6 +215,7 @@ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta) hdr = (struct ieee80211_hdr *) skb->data; memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN); memcpy(hdr->addr2, mpath->sdata->vif.addr, ETH_ALEN); + ieee80211_mps_set_frame_flags(sta->sdata, sta, hdr); } spin_unlock_irqrestore(&mpath->frame_queue.lock, flags); @@ -325,8 +329,8 @@ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath, } -static struct mesh_path *mpath_lookup(struct mesh_table *tbl, u8 *dst, - struct ieee80211_sub_if_data *sdata) +static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst, + struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; struct hlist_node *n; @@ -338,7 +342,7 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, u8 *dst, mpath = node->mpath; if (mpath->sdata == sdata && ether_addr_equal(dst, mpath->dst)) { - if (MPATH_EXPIRED(mpath)) { + if (mpath_expired(mpath)) { spin_lock_bh(&mpath->state_lock); mpath->flags &= ~MESH_PATH_ACTIVE; spin_unlock_bh(&mpath->state_lock); @@ -351,19 +355,21 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, u8 *dst, /** * mesh_path_lookup - look up a path in the mesh path table - * @dst: hardware address (ETH_ALEN length) of destination * @sdata: local subif + * @dst: hardware address (ETH_ALEN length) of destination * * Returns: pointer to the mesh path structure, or NULL if not found * * Locking: must be called within a read rcu section. */ -struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) +struct mesh_path * +mesh_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) { return mpath_lookup(rcu_dereference(mesh_paths), dst, sdata); } -struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) +struct mesh_path * +mpp_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) { return mpath_lookup(rcu_dereference(mpp_paths), dst, sdata); } @@ -378,7 +384,8 @@ struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) * * Locking: must be called within a read rcu section. */ -struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data *sdata) +struct mesh_path * +mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { struct mesh_table *tbl = rcu_dereference(mesh_paths); struct mpath_node *node; @@ -390,7 +397,7 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data if (sdata && node->mpath->sdata != sdata) continue; if (j++ == idx) { - if (MPATH_EXPIRED(node->mpath)) { + if (mpath_expired(node->mpath)) { spin_lock_bh(&node->mpath->state_lock); node->mpath->flags &= ~MESH_PATH_ACTIVE; spin_unlock_bh(&node->mpath->state_lock); @@ -434,11 +441,10 @@ int mesh_path_add_gate(struct mesh_path *mpath) spin_lock_bh(&tbl->gates_lock); hlist_add_head_rcu(&new_gate->list, tbl->known_gates); spin_unlock_bh(&tbl->gates_lock); - rcu_read_unlock(); mpath_dbg(mpath->sdata, "Mesh path: Recorded new gate: %pM. %d known gates\n", mpath->dst, mpath->sdata->u.mesh.num_gates); - return 0; + err = 0; err_rcu: rcu_read_unlock(); return err; @@ -449,30 +455,27 @@ err_rcu: * @tbl: table which holds our list of known gates * @mpath: gate mpath * - * Returns: 0 on success - * * Locking: must be called inside rcu_read_lock() section */ -static int mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath) +static void mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath) { struct mpath_node *gate; struct hlist_node *p, *q; - hlist_for_each_entry_safe(gate, p, q, tbl->known_gates, list) - if (gate->mpath == mpath) { - spin_lock_bh(&tbl->gates_lock); - hlist_del_rcu(&gate->list); - kfree_rcu(gate, rcu); - spin_unlock_bh(&tbl->gates_lock); - mpath->sdata->u.mesh.num_gates--; - mpath->is_gate = false; - mpath_dbg(mpath->sdata, - "Mesh path: Deleted gate: %pM. %d known gates\n", - mpath->dst, mpath->sdata->u.mesh.num_gates); - break; - } - - return 0; + hlist_for_each_entry_safe(gate, p, q, tbl->known_gates, list) { + if (gate->mpath != mpath) + continue; + spin_lock_bh(&tbl->gates_lock); + hlist_del_rcu(&gate->list); + kfree_rcu(gate, rcu); + spin_unlock_bh(&tbl->gates_lock); + mpath->sdata->u.mesh.num_gates--; + mpath->is_gate = false; + mpath_dbg(mpath->sdata, + "Mesh path: Deleted gate: %pM. %d known gates\n", + mpath->dst, mpath->sdata->u.mesh.num_gates); + break; + } } /** @@ -486,14 +489,14 @@ int mesh_gate_num(struct ieee80211_sub_if_data *sdata) /** * mesh_path_add - allocate and add a new path to the mesh path table - * @addr: destination address of the path (ETH_ALEN length) + * @dst: destination address of the path (ETH_ALEN length) * @sdata: local subif * * Returns: 0 on success * * State: the initial state of the new path is set to 0 */ -int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata) +int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; @@ -628,7 +631,8 @@ void mesh_mpp_table_grow(void) write_unlock_bh(&pathtbl_resize_lock); } -int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata) +int mpp_path_add(struct ieee80211_sub_if_data *sdata, + const u8 *dst, const u8 *mpp) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; @@ -737,9 +741,10 @@ void mesh_plink_broken(struct sta_info *sta) mpath->flags &= ~MESH_PATH_ACTIVE; ++mpath->sn; spin_unlock_bh(&mpath->state_lock); - mesh_path_error_tx(sdata->u.mesh.mshcfg.element_ttl, - mpath->dst, cpu_to_le32(mpath->sn), - reason, bcast, sdata); + mesh_path_error_tx(sdata, + sdata->u.mesh.mshcfg.element_ttl, + mpath->dst, cpu_to_le32(mpath->sn), + reason, bcast); } } rcu_read_unlock(); @@ -854,7 +859,7 @@ void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) * * Returns: 0 if successful */ -int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata) +int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct mesh_table *tbl; struct mesh_path *mpath; @@ -963,8 +968,8 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) * * Locking: the function must me called within a rcu_read_lock region */ -void mesh_path_discard_frame(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata) +void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { kfree_skb(skb); sdata->u.mesh.mshstats.dropped_frames_no_route++; @@ -982,7 +987,7 @@ void mesh_path_flush_pending(struct mesh_path *mpath) struct sk_buff *skb; while ((skb = skb_dequeue(&mpath->frame_queue)) != NULL) - mesh_path_discard_frame(skb, mpath->sdata); + mesh_path_discard_frame(mpath->sdata, skb); } /** @@ -1103,7 +1108,7 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata) if ((!(mpath->flags & MESH_PATH_RESOLVING)) && (!(mpath->flags & MESH_PATH_FIXED)) && time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) - mesh_path_del(mpath->dst, mpath->sdata); + mesh_path_del(mpath->sdata, mpath->dst); } rcu_read_unlock(); } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 4b274e9c91a..07d396d5707 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -37,23 +37,31 @@ enum plink_event { CLS_IGNR }; -static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, - enum ieee80211_self_protected_actioncode action, - u8 *da, __le16 llid, __le16 plid, __le16 reason); +static const char * const mplstates[] = { + [NL80211_PLINK_LISTEN] = "LISTEN", + [NL80211_PLINK_OPN_SNT] = "OPN-SNT", + [NL80211_PLINK_OPN_RCVD] = "OPN-RCVD", + [NL80211_PLINK_CNF_RCVD] = "CNF_RCVD", + [NL80211_PLINK_ESTAB] = "ESTAB", + [NL80211_PLINK_HOLDING] = "HOLDING", + [NL80211_PLINK_BLOCKED] = "BLOCKED" +}; -static inline -u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) -{ - atomic_inc(&sdata->u.mesh.estab_plinks); - return mesh_accept_plinks_update(sdata); -} +static const char * const mplevents[] = { + [PLINK_UNDEFINED] = "NONE", + [OPN_ACPT] = "OPN_ACPT", + [OPN_RJCT] = "OPN_RJCT", + [OPN_IGNR] = "OPN_IGNR", + [CNF_ACPT] = "CNF_ACPT", + [CNF_RJCT] = "CNF_RJCT", + [CNF_IGNR] = "CNF_IGNR", + [CLS_ACPT] = "CLS_ACPT", + [CLS_IGNR] = "CLS_IGNR" +}; -static inline -u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) -{ - atomic_dec(&sdata->u.mesh.estab_plinks); - return mesh_accept_plinks_update(sdata); -} +static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, + enum ieee80211_self_protected_actioncode action, + u8 *da, __le16 llid, __le16 plid, __le16 reason); /** * mesh_plink_fsm_restart - restart a mesh peer link finite state machine @@ -70,27 +78,63 @@ static inline void mesh_plink_fsm_restart(struct sta_info *sta) } /* - * Allocate mesh sta entry and insert into station table + * mesh_set_short_slot_time - enable / disable ERP short slot time. + * + * The standard indirectly mandates mesh STAs to turn off short slot time by + * disallowing advertising this (802.11-2012 8.4.1.4), but that doesn't mean we + * can't be sneaky about it. Enable short slot time if all mesh STAs in the + * MBSS support ERP rates. + * + * Returns BSS_CHANGED_ERP_SLOT or 0 for no change. */ -static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, - u8 *hw_addr) +static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_local *local = sdata->local; + enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; struct sta_info *sta; + u32 erp_rates = 0, changed = 0; + int i; + bool short_slot = false; - if (sdata->local->num_sta >= MESH_MAX_PLINKS) - return NULL; + if (band == IEEE80211_BAND_5GHZ) { + /* (IEEE 802.11-2012 19.4.5) */ + short_slot = true; + goto out; + } else if (band != IEEE80211_BAND_2GHZ || + (band == IEEE80211_BAND_2GHZ && + local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) + goto out; - sta = sta_info_alloc(sdata, hw_addr, GFP_KERNEL); - if (!sta) - return NULL; + for (i = 0; i < sband->n_bitrates; i++) + if (sband->bitrates[i].flags & IEEE80211_RATE_ERP_G) + erp_rates |= BIT(i); - sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); - sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); - sta_info_pre_move_state(sta, IEEE80211_STA_AUTHORIZED); + if (!erp_rates) + goto out; - set_sta_flag(sta, WLAN_STA_WME); + rcu_read_lock(); + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (sdata != sta->sdata || + sta->plink_state != NL80211_PLINK_ESTAB) + continue; - return sta; + short_slot = false; + if (erp_rates & sta->sta.supp_rates[band]) + short_slot = true; + else + break; + } + rcu_read_unlock(); + +out: + if (sdata->vif.bss_conf.use_short_slot != short_slot) { + sdata->vif.bss_conf.use_short_slot = short_slot; + changed = BSS_CHANGED_ERP_SLOT; + mpl_dbg(sdata, "mesh_plink %pM: ERP short slot time %d\n", + sdata->vif.addr, short_slot); + } + return changed; } /** @@ -107,7 +151,6 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct sta_info *sta; - u32 changed = 0; u16 ht_opmode; bool non_ht_sta = false, ht20_sta = false; @@ -120,23 +163,19 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) sta->plink_state != NL80211_PLINK_ESTAB) continue; - switch (sta->ch_width) { - case NL80211_CHAN_WIDTH_20_NOHT: - mpl_dbg(sdata, - "mesh_plink %pM: nonHT sta (%pM) is present\n", - sdata->vif.addr, sta->sta.addr); + if (sta->sta.bandwidth > IEEE80211_STA_RX_BW_20) + continue; + + if (!sta->sta.ht_cap.ht_supported) { + mpl_dbg(sdata, "nonHT sta (%pM) is present\n", + sta->sta.addr); non_ht_sta = true; - goto out; - case NL80211_CHAN_WIDTH_20: - mpl_dbg(sdata, - "mesh_plink %pM: HT20 sta (%pM) is present\n", - sdata->vif.addr, sta->sta.addr); - ht20_sta = true; - default: break; } + + mpl_dbg(sdata, "HT20 sta (%pM) is present\n", sta->sta.addr); + ht20_sta = true; } -out: rcu_read_unlock(); if (non_ht_sta) @@ -147,16 +186,13 @@ out: else ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONE; - if (sdata->vif.bss_conf.ht_operation_mode != ht_opmode) { - sdata->vif.bss_conf.ht_operation_mode = ht_opmode; - sdata->u.mesh.mshcfg.ht_opmode = ht_opmode; - changed = BSS_CHANGED_HT; - mpl_dbg(sdata, - "mesh_plink %pM: protection mode changed to %d\n", - sdata->vif.addr, ht_opmode); - } + if (sdata->vif.bss_conf.ht_operation_mode == ht_opmode) + return 0; - return changed; + sdata->vif.bss_conf.ht_operation_mode = ht_opmode; + sdata->u.mesh.mshcfg.ht_opmode = ht_opmode; + mpl_dbg(sdata, "selected new HT protection mode %d\n", ht_opmode); + return BSS_CHANGED_HT; } /** @@ -179,6 +215,9 @@ static u32 __mesh_plink_deactivate(struct sta_info *sta) sta->plink_state = NL80211_PLINK_BLOCKED; mesh_path_flush_by_nexthop(sta); + ieee80211_mps_sta_status_update(sta); + changed |= ieee80211_mps_local_status_update(sdata); + return changed; } @@ -189,7 +228,7 @@ static u32 __mesh_plink_deactivate(struct sta_info *sta) * * All mesh paths with this peer as next hop will be flushed */ -void mesh_plink_deactivate(struct sta_info *sta) +u32 mesh_plink_deactivate(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; u32 changed; @@ -202,12 +241,13 @@ void mesh_plink_deactivate(struct sta_info *sta) sta->reason); spin_unlock_bh(&sta->lock); - ieee80211_bss_info_change_notify(sdata, changed); + return changed; } static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, - enum ieee80211_self_protected_actioncode action, - u8 *da, __le16 llid, __le16 plid, __le16 reason) { + enum ieee80211_self_protected_actioncode action, + u8 *da, __le16 llid, __le16 plid, __le16 reason) +{ struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_tx_info *info; @@ -258,13 +298,13 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, } if (ieee80211_add_srates_ie(sdata, skb, true, band) || ieee80211_add_ext_srates_ie(sdata, skb, true, band) || - mesh_add_rsn_ie(skb, sdata) || - mesh_add_meshid_ie(skb, sdata) || - mesh_add_meshconf_ie(skb, sdata)) + mesh_add_rsn_ie(sdata, skb) || + mesh_add_meshid_ie(sdata, skb) || + mesh_add_meshconf_ie(sdata, skb)) goto free; } else { /* WLAN_SP_MESH_PEERING_CLOSE */ info->flags |= IEEE80211_TX_CTL_NO_ACK; - if (mesh_add_meshid_ie(skb, sdata)) + if (mesh_add_meshid_ie(sdata, skb)) goto free; } @@ -308,12 +348,12 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, } if (action != WLAN_SP_MESH_PEERING_CLOSE) { - if (mesh_add_ht_cap_ie(skb, sdata) || - mesh_add_ht_oper_ie(skb, sdata)) + if (mesh_add_ht_cap_ie(sdata, skb) || + mesh_add_ht_oper_ie(sdata, skb)) goto free; } - if (mesh_add_vendor_ies(skb, sdata)) + if (mesh_add_vendor_ies(sdata, skb)) goto free; ieee80211_tx_skb(sdata, skb); @@ -323,92 +363,147 @@ free: return err; } -/** - * mesh_peer_init - initialize new mesh peer and return resulting sta_info - * - * @sdata: local meshif - * @addr: peer's address - * @elems: IEs from beacon or mesh peering frame - * - * call under RCU - */ -static struct sta_info *mesh_peer_init(struct ieee80211_sub_if_data *sdata, - u8 *addr, - struct ieee802_11_elems *elems) +static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee802_11_elems *elems, bool insert) { struct ieee80211_local *local = sdata->local; enum ieee80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_supported_band *sband; - u32 rates, basic_rates = 0; - struct sta_info *sta; - bool insert = false; + u32 rates, basic_rates = 0, changed = 0; sband = local->hw.wiphy->bands[band]; rates = ieee80211_sta_get_rates(local, elems, band, &basic_rates); - sta = sta_info_get(sdata, addr); - if (!sta) { - /* Userspace handles peer allocation when security is enabled */ - if (sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) { - cfg80211_notify_new_peer_candidate(sdata->dev, addr, - elems->ie_start, - elems->total_len, - GFP_ATOMIC); - return NULL; - } - - sta = mesh_plink_alloc(sdata, addr); - if (!sta) - return NULL; - insert = true; - } - spin_lock_bh(&sta->lock); sta->last_rx = jiffies; - if (sta->plink_state == NL80211_PLINK_ESTAB) { - spin_unlock_bh(&sta->lock); - return sta; - } + /* rates and capabilities don't change during peering */ + if (sta->plink_state == NL80211_PLINK_ESTAB) + goto out; + + if (sta->sta.supp_rates[band] != rates) + changed |= IEEE80211_RC_SUPP_RATES_CHANGED; sta->sta.supp_rates[band] = rates; - if (elems->ht_cap_elem && - sdata->vif.bss_conf.chandef.width != NL80211_CHAN_WIDTH_20_NOHT) - ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, - elems->ht_cap_elem, - &sta->sta.ht_cap); - else - memset(&sta->sta.ht_cap, 0, sizeof(sta->sta.ht_cap)); - - if (elems->ht_operation) { - struct cfg80211_chan_def chandef; - - if (!(elems->ht_operation->ht_param & - IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)) - sta->sta.ht_cap.cap &= - ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; - ieee80211_ht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan, - elems->ht_operation, &chandef); - sta->ch_width = chandef.width; + + if (ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, + elems->ht_cap_elem, sta)) + changed |= IEEE80211_RC_BW_CHANGED; + + /* HT peer is operating 20MHz-only */ + if (elems->ht_operation && + !(elems->ht_operation->ht_param & + IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)) { + if (sta->sta.bandwidth != IEEE80211_STA_RX_BW_20) + changed |= IEEE80211_RC_BW_CHANGED; + sta->sta.bandwidth = IEEE80211_STA_RX_BW_20; } if (insert) rate_control_rate_init(sta); + else + rate_control_rate_update(local, sband, sta, changed); +out: spin_unlock_bh(&sta->lock); +} + +static struct sta_info * +__mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *hw_addr) +{ + struct sta_info *sta; - if (insert && sta_info_insert(sta)) + if (sdata->local->num_sta >= MESH_MAX_PLINKS) + return NULL; + + sta = sta_info_alloc(sdata, hw_addr, GFP_KERNEL); + if (!sta) return NULL; + sta->plink_state = NL80211_PLINK_LISTEN; + init_timer(&sta->plink_timer); + + sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); + sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); + sta_info_pre_move_state(sta, IEEE80211_STA_AUTHORIZED); + + set_sta_flag(sta, WLAN_STA_WME); + + return sta; +} + +static struct sta_info * +mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr, + struct ieee802_11_elems *elems) +{ + struct sta_info *sta = NULL; + + /* Userspace handles peer allocation when security is enabled */ + if (sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) + cfg80211_notify_new_peer_candidate(sdata->dev, addr, + elems->ie_start, + elems->total_len, + GFP_KERNEL); + else + sta = __mesh_sta_info_alloc(sdata, addr); + + return sta; +} + +/* + * mesh_sta_info_get - return mesh sta info entry for @addr. + * + * @sdata: local meshif + * @addr: peer's address + * @elems: IEs from beacon or mesh peering frame. + * + * Return existing or newly allocated sta_info under RCU read lock. + * (re)initialize with given IEs. + */ +static struct sta_info * +mesh_sta_info_get(struct ieee80211_sub_if_data *sdata, + u8 *addr, struct ieee802_11_elems *elems) __acquires(RCU) +{ + struct sta_info *sta = NULL; + + rcu_read_lock(); + sta = sta_info_get(sdata, addr); + if (sta) { + mesh_sta_info_init(sdata, sta, elems, false); + } else { + rcu_read_unlock(); + /* can't run atomic */ + sta = mesh_sta_info_alloc(sdata, addr, elems); + if (!sta) { + rcu_read_lock(); + return NULL; + } + + mesh_sta_info_init(sdata, sta, elems, true); + + if (sta_info_insert_rcu(sta)) + return NULL; + } + return sta; } +/* + * mesh_neighbour_update - update or initialize new mesh neighbor. + * + * @sdata: local meshif + * @addr: peer's address + * @elems: IEs from beacon or mesh peering frame + * + * Initiates peering if appropriate. + */ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, u8 *hw_addr, struct ieee802_11_elems *elems) { struct sta_info *sta; + u32 changed = 0; - rcu_read_lock(); - sta = mesh_peer_init(sdata, hw_addr, elems); + sta = mesh_sta_info_get(sdata, hw_addr, elems); if (!sta) goto out; @@ -417,10 +512,12 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, sdata->u.mesh.accepting_plinks && sdata->u.mesh.mshcfg.auto_open_plinks && rssi_threshold_check(sta, sdata)) - mesh_plink_open(sta); + changed = mesh_plink_open(sta); + ieee80211_mps_frame_release(sta, elems); out: rcu_read_unlock(); + ieee80211_mbss_info_change_notify(sdata, changed); } static void mesh_plink_timer(unsigned long data) @@ -504,6 +601,13 @@ static void mesh_plink_timer(unsigned long data) #ifdef CONFIG_PM void mesh_plink_quiesce(struct sta_info *sta) { + if (!ieee80211_vif_is_mesh(&sta->sdata->vif)) + return; + + /* no kernel mesh sta timers have been initialized */ + if (sta->sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE) + return; + if (del_timer_sync(&sta->plink_timer)) sta->plink_timer_was_running = true; } @@ -526,13 +630,14 @@ static inline void mesh_plink_timer_set(struct sta_info *sta, int timeout) add_timer(&sta->plink_timer); } -int mesh_plink_open(struct sta_info *sta) +u32 mesh_plink_open(struct sta_info *sta) { __le16 llid; struct ieee80211_sub_if_data *sdata = sta->sdata; + u32 changed; if (!test_sta_flag(sta, WLAN_STA_AUTH)) - return -EPERM; + return 0; spin_lock_bh(&sta->lock); get_random_bytes(&llid, 2); @@ -540,7 +645,7 @@ int mesh_plink_open(struct sta_info *sta) if (sta->plink_state != NL80211_PLINK_LISTEN && sta->plink_state != NL80211_PLINK_BLOCKED) { spin_unlock_bh(&sta->lock); - return -EBUSY; + return 0; } sta->plink_state = NL80211_PLINK_OPN_SNT; mesh_plink_timer_set(sta, sdata->u.mesh.mshcfg.dot11MeshRetryTimeout); @@ -549,13 +654,16 @@ int mesh_plink_open(struct sta_info *sta) "Mesh plink: starting establishment with %pM\n", sta->sta.addr); - return mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, - sta->sta.addr, llid, 0, 0); + /* set the non-peer mode to active during peering */ + changed = ieee80211_mps_local_status_update(sdata); + + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, + sta->sta.addr, llid, 0, 0); + return changed; } -void mesh_plink_block(struct sta_info *sta) +u32 mesh_plink_block(struct sta_info *sta) { - struct ieee80211_sub_if_data *sdata = sta->sdata; u32 changed; spin_lock_bh(&sta->lock); @@ -563,12 +671,13 @@ void mesh_plink_block(struct sta_info *sta) sta->plink_state = NL80211_PLINK_BLOCKED; spin_unlock_bh(&sta->lock); - ieee80211_bss_info_change_notify(sdata, changed); + return changed; } -void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, - size_t len, struct ieee80211_rx_status *rx_status) +void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee80211_rx_status *rx_status) { struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg; struct ieee802_11_elems elems; @@ -581,15 +690,6 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m u8 *baseaddr; u32 changed = 0; __le16 plid, llid, reason; - static const char *mplstates[] = { - [NL80211_PLINK_LISTEN] = "LISTEN", - [NL80211_PLINK_OPN_SNT] = "OPN-SNT", - [NL80211_PLINK_OPN_RCVD] = "OPN-RCVD", - [NL80211_PLINK_CNF_RCVD] = "CNF_RCVD", - [NL80211_PLINK_ESTAB] = "ESTAB", - [NL80211_PLINK_HOLDING] = "HOLDING", - [NL80211_PLINK_BLOCKED] = "BLOCKED" - }; /* need action_code, aux */ if (len < IEEE80211_MIN_ACTION_SIZE + 3) @@ -609,13 +709,15 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m baselen += 4; } ieee802_11_parse_elems(baseaddr, len - baselen, &elems); + if (!elems.peering) { mpl_dbg(sdata, "Mesh plink: missing necessary peer link ie\n"); return; } + if (elems.rsn_len && - sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) { + sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) { mpl_dbg(sdata, "Mesh plink: can't establish link with secure peer\n"); return; @@ -634,7 +736,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m } if (ftype != WLAN_SP_MESH_PEERING_CLOSE && - (!elems.mesh_id || !elems.mesh_config)) { + (!elems.mesh_id || !elems.mesh_config)) { mpl_dbg(sdata, "Mesh plink: missing necessary ie\n"); return; } @@ -646,6 +748,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len == 8)) memcpy(&llid, PLINK_GET_PLID(elems.peering), 2); + /* WARNING: Only for sta pointer, is dropped & re-acquired */ rcu_read_lock(); sta = sta_info_get(sdata, mgmt->sa); @@ -749,8 +852,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m } if (event == OPN_ACPT) { + rcu_read_unlock(); /* allocate sta entry if necessary and update info */ - sta = mesh_peer_init(sdata, mgmt->sa, &elems); + sta = mesh_sta_info_get(sdata, mgmt->sa, &elems); if (!sta) { mpl_dbg(sdata, "Mesh plink: failed to init peer!\n"); rcu_read_unlock(); @@ -758,11 +862,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m } } - mpl_dbg(sdata, - "Mesh plink (peer, state, llid, plid, event): %pM %s %d %d %d\n", - mgmt->sa, mplstates[sta->plink_state], - le16_to_cpu(sta->llid), le16_to_cpu(sta->plid), - event); + mpl_dbg(sdata, "peer %pM in state %s got event %s\n", mgmt->sa, + mplstates[sta->plink_state], mplevents[event]); reason = 0; spin_lock_bh(&sta->lock); switch (sta->plink_state) { @@ -780,6 +881,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m sta->llid = llid; mesh_plink_timer_set(sta, mshcfg->dot11MeshRetryTimeout); + + /* set the non-peer mode to active during peering */ + changed |= ieee80211_mps_local_status_update(sdata); + spin_unlock_bh(&sta->lock); mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, @@ -870,8 +975,12 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m spin_unlock_bh(&sta->lock); changed |= mesh_plink_inc_estab_count(sdata); changed |= mesh_set_ht_prot_mode(sdata); + changed |= mesh_set_short_slot_time(sdata); mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); + ieee80211_mps_sta_status_update(sta); + changed |= ieee80211_mps_set_sta_local_pm(sta, + mshcfg->power_mode); break; default: spin_unlock_bh(&sta->lock); @@ -905,11 +1014,15 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m spin_unlock_bh(&sta->lock); changed |= mesh_plink_inc_estab_count(sdata); changed |= mesh_set_ht_prot_mode(sdata); + changed |= mesh_set_short_slot_time(sdata); mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CONFIRM, sta->sta.addr, llid, plid, 0); + ieee80211_mps_sta_status_update(sta); + changed |= ieee80211_mps_set_sta_local_pm(sta, + mshcfg->power_mode); break; default: spin_unlock_bh(&sta->lock); @@ -928,6 +1041,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m mod_plink_timer(sta, mshcfg->dot11MeshHoldingTimeout); spin_unlock_bh(&sta->lock); changed |= mesh_set_ht_prot_mode(sdata); + changed |= mesh_set_short_slot_time(sdata); mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, sta->sta.addr, llid, plid, reason); break; @@ -976,5 +1090,5 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m rcu_read_unlock(); if (changed) - ieee80211_bss_info_change_notify(sdata, changed); + ieee80211_mbss_info_change_notify(sdata, changed); } diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c new file mode 100644 index 00000000000..3b7bfc01ee3 --- /dev/null +++ b/net/mac80211/mesh_ps.c @@ -0,0 +1,598 @@ +/* + * Copyright 2012-2013, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de> + * Copyright 2012-2013, cozybit Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "mesh.h" +#include "wme.h" + + +/* mesh PS management */ + +/** + * mps_qos_null_get - create pre-addressed QoS Null frame for mesh powersave + */ +static struct sk_buff *mps_qos_null_get(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + struct ieee80211_hdr *nullfunc; /* use 4addr header */ + struct sk_buff *skb; + int size = sizeof(*nullfunc); + __le16 fc; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + size + 2); + if (!skb) + return NULL; + skb_reserve(skb, local->hw.extra_tx_headroom); + + nullfunc = (struct ieee80211_hdr *) skb_put(skb, size); + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); + ieee80211_fill_mesh_addresses(nullfunc, &fc, sta->sta.addr, + sdata->vif.addr); + nullfunc->frame_control = fc; + nullfunc->duration_id = 0; + /* no address resolution for this frame -> set addr 1 immediately */ + memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); + memset(skb_put(skb, 2), 0, 2); /* append QoS control field */ + ieee80211_mps_set_frame_flags(sdata, sta, nullfunc); + + return skb; +} + +/** + * mps_qos_null_tx - send a QoS Null to indicate link-specific power mode + */ +static void mps_qos_null_tx(struct sta_info *sta) +{ + struct sk_buff *skb; + + skb = mps_qos_null_get(sta); + if (!skb) + return; + + mps_dbg(sta->sdata, "announcing peer-specific power mode to %pM\n", + sta->sta.addr); + + /* don't unintentionally start a MPSP */ + if (!test_sta_flag(sta, WLAN_STA_PS_STA)) { + u8 *qc = ieee80211_get_qos_ctl((void *) skb->data); + + qc[0] |= IEEE80211_QOS_CTL_EOSP; + } + + ieee80211_tx_skb(sta->sdata, skb); +} + +/** + * ieee80211_mps_local_status_update - track status of local link-specific PMs + * + * @sdata: local mesh subif + * + * sets the non-peer power mode and triggers the driver PS (re-)configuration + * Return BSS_CHANGED_BEACON if a beacon update is necessary. + */ +u32 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct sta_info *sta; + bool peering = false; + int light_sleep_cnt = 0; + int deep_sleep_cnt = 0; + u32 changed = 0; + enum nl80211_mesh_power_mode nonpeer_pm; + + rcu_read_lock(); + list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) { + if (sdata != sta->sdata) + continue; + + switch (sta->plink_state) { + case NL80211_PLINK_OPN_SNT: + case NL80211_PLINK_OPN_RCVD: + case NL80211_PLINK_CNF_RCVD: + peering = true; + break; + case NL80211_PLINK_ESTAB: + if (sta->local_pm == NL80211_MESH_POWER_LIGHT_SLEEP) + light_sleep_cnt++; + else if (sta->local_pm == NL80211_MESH_POWER_DEEP_SLEEP) + deep_sleep_cnt++; + break; + default: + break; + } + } + rcu_read_unlock(); + + /* + * Set non-peer mode to active during peering/scanning/authentication + * (see IEEE802.11-2012 13.14.8.3). The non-peer mesh power mode is + * deep sleep if the local STA is in light or deep sleep towards at + * least one mesh peer (see 13.14.3.1). Otherwise, set it to the + * user-configured default value. + */ + if (peering) { + mps_dbg(sdata, "setting non-peer PM to active for peering\n"); + nonpeer_pm = NL80211_MESH_POWER_ACTIVE; + } else if (light_sleep_cnt || deep_sleep_cnt) { + mps_dbg(sdata, "setting non-peer PM to deep sleep\n"); + nonpeer_pm = NL80211_MESH_POWER_DEEP_SLEEP; + } else { + mps_dbg(sdata, "setting non-peer PM to user value\n"); + nonpeer_pm = ifmsh->mshcfg.power_mode; + } + + /* need update if sleep counts move between 0 and non-zero */ + if (ifmsh->nonpeer_pm != nonpeer_pm || + !ifmsh->ps_peers_light_sleep != !light_sleep_cnt || + !ifmsh->ps_peers_deep_sleep != !deep_sleep_cnt) + changed = BSS_CHANGED_BEACON; + + ifmsh->nonpeer_pm = nonpeer_pm; + ifmsh->ps_peers_light_sleep = light_sleep_cnt; + ifmsh->ps_peers_deep_sleep = deep_sleep_cnt; + + return changed; +} + +/** + * ieee80211_mps_set_sta_local_pm - set local PM towards a mesh STA + * + * @sta: mesh STA + * @pm: the power mode to set + * Return BSS_CHANGED_BEACON if a beacon update is in order. + */ +u32 ieee80211_mps_set_sta_local_pm(struct sta_info *sta, + enum nl80211_mesh_power_mode pm) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + + mps_dbg(sdata, "local STA operates in mode %d with %pM\n", + pm, sta->sta.addr); + + sta->local_pm = pm; + + /* + * announce peer-specific power mode transition + * (see IEEE802.11-2012 13.14.3.2 and 13.14.3.3) + */ + if (sta->plink_state == NL80211_PLINK_ESTAB) + mps_qos_null_tx(sta); + + return ieee80211_mps_local_status_update(sdata); +} + +/** + * ieee80211_mps_set_frame_flags - set mesh PS flags in FC (and QoS Control) + * + * @sdata: local mesh subif + * @sta: mesh STA + * @hdr: 802.11 frame header + * + * see IEEE802.11-2012 8.2.4.1.7 and 8.2.4.5.11 + * + * NOTE: sta must be given when an individually-addressed QoS frame header + * is handled, for group-addressed and management frames it is not used + */ +void ieee80211_mps_set_frame_flags(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee80211_hdr *hdr) +{ + enum nl80211_mesh_power_mode pm; + u8 *qc; + + if (WARN_ON(is_unicast_ether_addr(hdr->addr1) && + ieee80211_is_data_qos(hdr->frame_control) && + !sta)) + return; + + if (is_unicast_ether_addr(hdr->addr1) && + ieee80211_is_data_qos(hdr->frame_control) && + sta->plink_state == NL80211_PLINK_ESTAB) + pm = sta->local_pm; + else + pm = sdata->u.mesh.nonpeer_pm; + + if (pm == NL80211_MESH_POWER_ACTIVE) + hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_PM); + else + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM); + + if (!ieee80211_is_data_qos(hdr->frame_control)) + return; + + qc = ieee80211_get_qos_ctl(hdr); + + if ((is_unicast_ether_addr(hdr->addr1) && + pm == NL80211_MESH_POWER_DEEP_SLEEP) || + (is_multicast_ether_addr(hdr->addr1) && + sdata->u.mesh.ps_peers_deep_sleep > 0)) + qc[1] |= (IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8); + else + qc[1] &= ~(IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8); +} + +/** + * ieee80211_mps_sta_status_update - update buffering status of neighbor STA + * + * @sta: mesh STA + * + * called after change of peering status or non-peer/peer-specific power mode + */ +void ieee80211_mps_sta_status_update(struct sta_info *sta) +{ + enum nl80211_mesh_power_mode pm; + bool do_buffer; + + /* + * use peer-specific power mode if peering is established and the + * peer's power mode is known + */ + if (sta->plink_state == NL80211_PLINK_ESTAB && + sta->peer_pm != NL80211_MESH_POWER_UNKNOWN) + pm = sta->peer_pm; + else + pm = sta->nonpeer_pm; + + do_buffer = (pm != NL80211_MESH_POWER_ACTIVE); + + /* Don't let the same PS state be set twice */ + if (test_sta_flag(sta, WLAN_STA_PS_STA) == do_buffer) + return; + + if (do_buffer) { + set_sta_flag(sta, WLAN_STA_PS_STA); + atomic_inc(&sta->sdata->u.mesh.ps.num_sta_ps); + mps_dbg(sta->sdata, "start PS buffering frames towards %pM\n", + sta->sta.addr); + } else { + ieee80211_sta_ps_deliver_wakeup(sta); + } + + /* clear the MPSP flags for non-peers or active STA */ + if (sta->plink_state != NL80211_PLINK_ESTAB) { + clear_sta_flag(sta, WLAN_STA_MPSP_OWNER); + clear_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT); + } else if (!do_buffer) { + clear_sta_flag(sta, WLAN_STA_MPSP_OWNER); + } +} + +static void mps_set_sta_peer_pm(struct sta_info *sta, + struct ieee80211_hdr *hdr) +{ + enum nl80211_mesh_power_mode pm; + u8 *qc = ieee80211_get_qos_ctl(hdr); + + /* + * Test Power Management field of frame control (PW) and + * mesh power save level subfield of QoS control field (PSL) + * + * | PM | PSL| Mesh PM | + * +----+----+---------+ + * | 0 |Rsrv| Active | + * | 1 | 0 | Light | + * | 1 | 1 | Deep | + */ + if (ieee80211_has_pm(hdr->frame_control)) { + if (qc[1] & (IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8)) + pm = NL80211_MESH_POWER_DEEP_SLEEP; + else + pm = NL80211_MESH_POWER_LIGHT_SLEEP; + } else { + pm = NL80211_MESH_POWER_ACTIVE; + } + + if (sta->peer_pm == pm) + return; + + mps_dbg(sta->sdata, "STA %pM enters mode %d\n", + sta->sta.addr, pm); + + sta->peer_pm = pm; + + ieee80211_mps_sta_status_update(sta); +} + +static void mps_set_sta_nonpeer_pm(struct sta_info *sta, + struct ieee80211_hdr *hdr) +{ + enum nl80211_mesh_power_mode pm; + + if (ieee80211_has_pm(hdr->frame_control)) + pm = NL80211_MESH_POWER_DEEP_SLEEP; + else + pm = NL80211_MESH_POWER_ACTIVE; + + if (sta->nonpeer_pm == pm) + return; + + mps_dbg(sta->sdata, "STA %pM sets non-peer mode to %d\n", + sta->sta.addr, pm); + + sta->nonpeer_pm = pm; + + ieee80211_mps_sta_status_update(sta); +} + +/** + * ieee80211_mps_rx_h_sta_process - frame receive handler for mesh powersave + * + * @sta: STA info that transmitted the frame + * @hdr: IEEE 802.11 (QoS) Header + */ +void ieee80211_mps_rx_h_sta_process(struct sta_info *sta, + struct ieee80211_hdr *hdr) +{ + if (is_unicast_ether_addr(hdr->addr1) && + ieee80211_is_data_qos(hdr->frame_control)) { + /* + * individually addressed QoS Data/Null frames contain + * peer link-specific PS mode towards the local STA + */ + mps_set_sta_peer_pm(sta, hdr); + + /* check for mesh Peer Service Period trigger frames */ + ieee80211_mpsp_trigger_process(ieee80211_get_qos_ctl(hdr), + sta, false, false); + } else { + /* + * can only determine non-peer PS mode + * (see IEEE802.11-2012 8.2.4.1.7) + */ + mps_set_sta_nonpeer_pm(sta, hdr); + } +} + + +/* mesh PS frame release */ + +static void mpsp_trigger_send(struct sta_info *sta, bool rspi, bool eosp) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct sk_buff *skb; + struct ieee80211_hdr *nullfunc; + struct ieee80211_tx_info *info; + u8 *qc; + + skb = mps_qos_null_get(sta); + if (!skb) + return; + + nullfunc = (struct ieee80211_hdr *) skb->data; + if (!eosp) + nullfunc->frame_control |= + cpu_to_le16(IEEE80211_FCTL_MOREDATA); + /* + * | RSPI | EOSP | MPSP triggering | + * +------+------+--------------------+ + * | 0 | 0 | local STA is owner | + * | 0 | 1 | no MPSP (MPSP end) | + * | 1 | 0 | both STA are owner | + * | 1 | 1 | peer STA is owner | see IEEE802.11-2012 13.14.9.2 + */ + qc = ieee80211_get_qos_ctl(nullfunc); + if (rspi) + qc[1] |= (IEEE80211_QOS_CTL_RSPI >> 8); + if (eosp) + qc[0] |= IEEE80211_QOS_CTL_EOSP; + + info = IEEE80211_SKB_CB(skb); + + info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER | + IEEE80211_TX_CTL_REQ_TX_STATUS; + + mps_dbg(sdata, "sending MPSP trigger%s%s to %pM\n", + rspi ? " RSPI" : "", eosp ? " EOSP" : "", sta->sta.addr); + + ieee80211_tx_skb(sdata, skb); +} + +/** + * mpsp_qos_null_append - append QoS Null frame to MPSP skb queue if needed + * + * To properly end a mesh MPSP the last transmitted frame has to set the EOSP + * flag in the QoS Control field. In case the current tailing frame is not a + * QoS Data frame, append a QoS Null to carry the flag. + */ +static void mpsp_qos_null_append(struct sta_info *sta, + struct sk_buff_head *frames) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct sk_buff *new_skb, *skb = skb_peek_tail(frames); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *info; + + if (ieee80211_is_data_qos(hdr->frame_control)) + return; + + new_skb = mps_qos_null_get(sta); + if (!new_skb) + return; + + mps_dbg(sdata, "appending QoS Null in MPSP towards %pM\n", + sta->sta.addr); + /* + * This frame has to be transmitted last. Assign lowest priority to + * make sure it cannot pass other frames when releasing multiple ACs. + */ + new_skb->priority = 1; + skb_set_queue_mapping(new_skb, IEEE80211_AC_BK); + ieee80211_set_qos_hdr(sdata, new_skb); + + info = IEEE80211_SKB_CB(new_skb); + info->control.vif = &sdata->vif; + info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; + + __skb_queue_tail(frames, new_skb); +} + +/** + * mps_frame_deliver - transmit frames during mesh powersave + * + * @sta: STA info to transmit to + * @n_frames: number of frames to transmit. -1 for all + */ +static void mps_frame_deliver(struct sta_info *sta, int n_frames) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + int ac; + struct sk_buff_head frames; + struct sk_buff *skb; + bool more_data = false; + + skb_queue_head_init(&frames); + + /* collect frame(s) from buffers */ + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + while (n_frames != 0) { + skb = skb_dequeue(&sta->tx_filtered[ac]); + if (!skb) { + skb = skb_dequeue( + &sta->ps_tx_buf[ac]); + if (skb) + local->total_ps_buffered--; + } + if (!skb) + break; + n_frames--; + __skb_queue_tail(&frames, skb); + } + + if (!skb_queue_empty(&sta->tx_filtered[ac]) || + !skb_queue_empty(&sta->ps_tx_buf[ac])) + more_data = true; + } + + /* nothing to send? -> EOSP */ + if (skb_queue_empty(&frames)) { + mpsp_trigger_send(sta, false, true); + return; + } + + /* in a MPSP make sure the last skb is a QoS Data frame */ + if (test_sta_flag(sta, WLAN_STA_MPSP_OWNER)) + mpsp_qos_null_append(sta, &frames); + + mps_dbg(sta->sdata, "sending %d frames to PS STA %pM\n", + skb_queue_len(&frames), sta->sta.addr); + + /* prepare collected frames for transmission */ + skb_queue_walk(&frames, skb) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *) skb->data; + + /* + * Tell TX path to send this frame even though the + * STA may still remain is PS mode after this frame + * exchange. + */ + info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; + + if (more_data || !skb_queue_is_last(&frames, skb)) + hdr->frame_control |= + cpu_to_le16(IEEE80211_FCTL_MOREDATA); + else + hdr->frame_control &= + cpu_to_le16(~IEEE80211_FCTL_MOREDATA); + + if (skb_queue_is_last(&frames, skb) && + ieee80211_is_data_qos(hdr->frame_control)) { + u8 *qoshdr = ieee80211_get_qos_ctl(hdr); + + /* MPSP trigger frame ends service period */ + *qoshdr |= IEEE80211_QOS_CTL_EOSP; + info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + } + } + + ieee80211_add_pending_skbs(local, &frames); + sta_info_recalc_tim(sta); +} + +/** + * ieee80211_mpsp_trigger_process - track status of mesh Peer Service Periods + * + * @qc: QoS Control field + * @sta: peer to start a MPSP with + * @tx: frame was transmitted by the local STA + * @acked: frame has been transmitted successfully + * + * NOTE: active mode STA may only serve as MPSP owner + */ +void ieee80211_mpsp_trigger_process(u8 *qc, struct sta_info *sta, + bool tx, bool acked) +{ + u8 rspi = qc[1] & (IEEE80211_QOS_CTL_RSPI >> 8); + u8 eosp = qc[0] & IEEE80211_QOS_CTL_EOSP; + + if (tx) { + if (rspi && acked) + set_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT); + + if (eosp) + clear_sta_flag(sta, WLAN_STA_MPSP_OWNER); + else if (acked && + test_sta_flag(sta, WLAN_STA_PS_STA) && + !test_and_set_sta_flag(sta, WLAN_STA_MPSP_OWNER)) + mps_frame_deliver(sta, -1); + } else { + if (eosp) + clear_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT); + else if (sta->local_pm != NL80211_MESH_POWER_ACTIVE) + set_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT); + + if (rspi && !test_and_set_sta_flag(sta, WLAN_STA_MPSP_OWNER)) + mps_frame_deliver(sta, -1); + } +} + +/** + * ieee80211_mps_frame_release - release buffered frames in response to beacon + * + * @sta: mesh STA + * @elems: beacon IEs + * + * For peers if we have individually-addressed frames buffered or the peer + * indicates buffered frames, send a corresponding MPSP trigger frame. Since + * we do not evaluate the awake window duration, QoS Nulls are used as MPSP + * trigger frames. If the neighbour STA is not a peer, only send single frames. + */ +void ieee80211_mps_frame_release(struct sta_info *sta, + struct ieee802_11_elems *elems) +{ + int ac, buffer_local = 0; + bool has_buffered = false; + + /* TIM map only for LLID <= IEEE80211_MAX_AID */ + if (sta->plink_state == NL80211_PLINK_ESTAB) + has_buffered = ieee80211_check_tim(elems->tim, elems->tim_len, + le16_to_cpu(sta->llid) % IEEE80211_MAX_AID); + + if (has_buffered) + mps_dbg(sta->sdata, "%pM indicates buffered frames\n", + sta->sta.addr); + + /* only transmit to PS STA with announced, non-zero awake window */ + if (test_sta_flag(sta, WLAN_STA_PS_STA) && + (!elems->awake_window || !le16_to_cpu(*elems->awake_window))) + return; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + buffer_local += skb_queue_len(&sta->ps_tx_buf[ac]) + + skb_queue_len(&sta->tx_filtered[ac]); + + if (!has_buffered && !buffer_local) + return; + + if (sta->plink_state == NL80211_PLINK_ESTAB) + mpsp_trigger_send(sta, has_buffered, !buffer_local); + else + mps_frame_deliver(sta, 1); +} diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index aa8d1e43738..05a256b38e2 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -43,7 +43,7 @@ struct sync_method { static bool mesh_peer_tbtt_adjusting(struct ieee802_11_elems *ie) { return (ie->mesh_config->meshconf_cap & - IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING) != 0; + IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING) != 0; } void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata) @@ -112,7 +112,8 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, if (elems->mesh_config && mesh_peer_tbtt_adjusting(elems)) { clear_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN); - msync_dbg(sdata, "STA %pM : is adjusting TBTT\n", sta->sta.addr); + msync_dbg(sdata, "STA %pM : is adjusting TBTT\n", + sta->sta.addr); goto no_sync; } @@ -129,18 +130,15 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, sta->t_offset = t_t - t_r; if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) { - s64 t_clockdrift = sta->t_offset_setpoint - - sta->t_offset; + s64 t_clockdrift = sta->t_offset_setpoint - sta->t_offset; msync_dbg(sdata, "STA %pM : sta->t_offset=%lld, sta->t_offset_setpoint=%lld, t_clockdrift=%lld\n", - sta->sta.addr, - (long long) sta->t_offset, - (long long) - sta->t_offset_setpoint, + sta->sta.addr, (long long) sta->t_offset, + (long long) sta->t_offset_setpoint, (long long) t_clockdrift); if (t_clockdrift > TOFFSET_MAXIMUM_ADJUSTMENT || - t_clockdrift < -TOFFSET_MAXIMUM_ADJUSTMENT) { + t_clockdrift < -TOFFSET_MAXIMUM_ADJUSTMENT) { msync_dbg(sdata, "STA %pM : t_clockdrift=%lld too large, setpoint reset\n", sta->sta.addr, @@ -149,15 +147,10 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, goto no_sync; } - rcu_read_unlock(); - spin_lock_bh(&ifmsh->sync_offset_lock); - if (t_clockdrift > - ifmsh->sync_offset_clockdrift_max) - ifmsh->sync_offset_clockdrift_max - = t_clockdrift; + if (t_clockdrift > ifmsh->sync_offset_clockdrift_max) + ifmsh->sync_offset_clockdrift_max = t_clockdrift; spin_unlock_bh(&ifmsh->sync_offset_lock); - } else { sta->t_offset_setpoint = sta->t_offset - TOFFSET_SET_MARGIN; set_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN); @@ -165,9 +158,7 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, "STA %pM : offset was invalid, sta->t_offset=%lld\n", sta->sta.addr, (long long) sta->t_offset); - rcu_read_unlock(); } - return; no_sync: rcu_read_unlock(); @@ -177,14 +168,12 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - WARN_ON(ifmsh->mesh_sp_id - != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET); + WARN_ON(ifmsh->mesh_sp_id != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET); BUG_ON(!rcu_read_lock_held()); spin_lock_bh(&ifmsh->sync_offset_lock); - if (ifmsh->sync_offset_clockdrift_max > - TOFFSET_MINIMUM_ADJUSTMENT) { + if (ifmsh->sync_offset_clockdrift_max > TOFFSET_MINIMUM_ADJUSTMENT) { /* Since ajusting the tsf here would * require a possibly blocking call * to the driver tsf setter, we punt @@ -193,8 +182,7 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata) msync_dbg(sdata, "TBTT : kicking off TBTT adjustment with clockdrift_max=%lld\n", ifmsh->sync_offset_clockdrift_max); - set_bit(MESH_WORK_DRIFT_ADJUST, - &ifmsh->wrkq_flags); + set_bit(MESH_WORK_DRIFT_ADJUST, &ifmsh->wrkq_flags); ifmsh->adjusting_tbtt = true; } else { @@ -220,14 +208,11 @@ static const struct sync_method sync_methods[] = { const struct ieee80211_mesh_sync_ops *ieee80211_mesh_sync_ops_get(u8 method) { - const struct ieee80211_mesh_sync_ops *ops = NULL; - u8 i; + int i; for (i = 0 ; i < ARRAY_SIZE(sync_methods); ++i) { - if (sync_methods[i].method == method) { - ops = &sync_methods[i].ops; - break; - } + if (sync_methods[i].method == method) + return &sync_methods[i].ops; } - return ops; + return NULL; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5107248af7f..9f6464f3e05 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -30,11 +30,13 @@ #include "rate.h" #include "led.h" -#define IEEE80211_AUTH_TIMEOUT (HZ / 5) -#define IEEE80211_AUTH_MAX_TRIES 3 -#define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5) -#define IEEE80211_ASSOC_TIMEOUT (HZ / 5) -#define IEEE80211_ASSOC_MAX_TRIES 3 +#define IEEE80211_AUTH_TIMEOUT (HZ / 5) +#define IEEE80211_AUTH_TIMEOUT_SHORT (HZ / 10) +#define IEEE80211_AUTH_MAX_TRIES 3 +#define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5) +#define IEEE80211_ASSOC_TIMEOUT (HZ / 5) +#define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10) +#define IEEE80211_ASSOC_MAX_TRIES 3 static int max_nullfunc_tries = 2; module_param(max_nullfunc_tries, int, 0644); @@ -112,6 +114,9 @@ enum rx_mgmt_action { /* caller must call cfg80211_send_assoc_timeout() */ RX_MGMT_CFG80211_ASSOC_TIMEOUT, + + /* used when a processed beacon causes a deauth */ + RX_MGMT_CFG80211_TX_DEAUTH, }; /* utils */ @@ -172,79 +177,331 @@ static int ecw2cw(int ecw) return (1 << ecw) - 1; } -static u32 ieee80211_config_ht_tx(struct ieee80211_sub_if_data *sdata, - struct ieee80211_ht_operation *ht_oper, - const u8 *bssid, bool reconfig) +static u32 chandef_downgrade(struct cfg80211_chan_def *c) +{ + u32 ret; + int tmp; + + switch (c->width) { + case NL80211_CHAN_WIDTH_20: + c->width = NL80211_CHAN_WIDTH_20_NOHT; + ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; + break; + case NL80211_CHAN_WIDTH_40: + c->width = NL80211_CHAN_WIDTH_20; + c->center_freq1 = c->chan->center_freq; + ret = IEEE80211_STA_DISABLE_40MHZ | + IEEE80211_STA_DISABLE_VHT; + break; + case NL80211_CHAN_WIDTH_80: + tmp = (30 + c->chan->center_freq - c->center_freq1)/20; + /* n_P40 */ + tmp /= 2; + /* freq_P40 */ + c->center_freq1 = c->center_freq1 - 20 + 40 * tmp; + c->width = NL80211_CHAN_WIDTH_40; + ret = IEEE80211_STA_DISABLE_VHT; + break; + case NL80211_CHAN_WIDTH_80P80: + c->center_freq2 = 0; + c->width = NL80211_CHAN_WIDTH_80; + ret = IEEE80211_STA_DISABLE_80P80MHZ | + IEEE80211_STA_DISABLE_160MHZ; + break; + case NL80211_CHAN_WIDTH_160: + /* n_P20 */ + tmp = (70 + c->chan->center_freq - c->center_freq1)/20; + /* n_P80 */ + tmp /= 4; + c->center_freq1 = c->center_freq1 - 40 + 80 * tmp; + c->width = NL80211_CHAN_WIDTH_80; + ret = IEEE80211_STA_DISABLE_80P80MHZ | + IEEE80211_STA_DISABLE_160MHZ; + break; + default: + case NL80211_CHAN_WIDTH_20_NOHT: + WARN_ON_ONCE(1); + c->width = NL80211_CHAN_WIDTH_20_NOHT; + ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; + break; + } + + WARN_ON_ONCE(!cfg80211_chandef_valid(c)); + + return ret; +} + +static u32 +ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + struct ieee80211_channel *channel, + const struct ieee80211_ht_operation *ht_oper, + const struct ieee80211_vht_operation *vht_oper, + struct cfg80211_chan_def *chandef, bool verbose) +{ + struct cfg80211_chan_def vht_chandef; + u32 ht_cfreq, ret; + + chandef->chan = channel; + chandef->width = NL80211_CHAN_WIDTH_20_NOHT; + chandef->center_freq1 = channel->center_freq; + chandef->center_freq2 = 0; + + if (!ht_oper || !sband->ht_cap.ht_supported) { + ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; + goto out; + } + + chandef->width = NL80211_CHAN_WIDTH_20; + + ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan, + channel->band); + /* check that channel matches the right operating channel */ + if (channel->center_freq != ht_cfreq) { + /* + * It's possible that some APs are confused here; + * Netgear WNDR3700 sometimes reports 4 higher than + * the actual channel in association responses, but + * since we look at probe response/beacon data here + * it should be OK. + */ + if (verbose) + sdata_info(sdata, + "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", + channel->center_freq, ht_cfreq, + ht_oper->primary_chan, channel->band); + ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; + goto out; + } + + /* check 40 MHz support, if we have it */ + if (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) { + switch (ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { + case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: + chandef->width = NL80211_CHAN_WIDTH_40; + chandef->center_freq1 += 10; + break; + case IEEE80211_HT_PARAM_CHA_SEC_BELOW: + chandef->width = NL80211_CHAN_WIDTH_40; + chandef->center_freq1 -= 10; + break; + } + } else { + /* 40 MHz (and 80 MHz) must be supported for VHT */ + ret = IEEE80211_STA_DISABLE_VHT; + goto out; + } + + if (!vht_oper || !sband->vht_cap.vht_supported) { + ret = IEEE80211_STA_DISABLE_VHT; + goto out; + } + + vht_chandef.chan = channel; + vht_chandef.center_freq1 = + ieee80211_channel_to_frequency(vht_oper->center_freq_seg1_idx, + channel->band); + vht_chandef.center_freq2 = 0; + + if (vht_oper->center_freq_seg2_idx) + vht_chandef.center_freq2 = + ieee80211_channel_to_frequency( + vht_oper->center_freq_seg2_idx, + channel->band); + + switch (vht_oper->chan_width) { + case IEEE80211_VHT_CHANWIDTH_USE_HT: + vht_chandef.width = chandef->width; + break; + case IEEE80211_VHT_CHANWIDTH_80MHZ: + vht_chandef.width = NL80211_CHAN_WIDTH_80; + break; + case IEEE80211_VHT_CHANWIDTH_160MHZ: + vht_chandef.width = NL80211_CHAN_WIDTH_160; + break; + case IEEE80211_VHT_CHANWIDTH_80P80MHZ: + vht_chandef.width = NL80211_CHAN_WIDTH_80P80; + break; + default: + if (verbose) + sdata_info(sdata, + "AP VHT operation IE has invalid channel width (%d), disable VHT\n", + vht_oper->chan_width); + ret = IEEE80211_STA_DISABLE_VHT; + goto out; + } + + if (!cfg80211_chandef_valid(&vht_chandef)) { + if (verbose) + sdata_info(sdata, + "AP VHT information is invalid, disable VHT\n"); + ret = IEEE80211_STA_DISABLE_VHT; + goto out; + } + + if (cfg80211_chandef_identical(chandef, &vht_chandef)) { + ret = 0; + goto out; + } + + if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) { + if (verbose) + sdata_info(sdata, + "AP VHT information doesn't match HT, disable VHT\n"); + ret = IEEE80211_STA_DISABLE_VHT; + goto out; + } + + *chandef = vht_chandef; + + ret = 0; + +out: + /* don't print the message below for VHT mismatch if VHT is disabled */ + if (ret & IEEE80211_STA_DISABLE_VHT) + vht_chandef = *chandef; + + while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, + IEEE80211_CHAN_DISABLED)) { + if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { + ret = IEEE80211_STA_DISABLE_HT | + IEEE80211_STA_DISABLE_VHT; + goto out; + } + + ret |= chandef_downgrade(chandef); + } + + if (chandef->width != vht_chandef.width && verbose) + sdata_info(sdata, + "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n"); + + WARN_ON_ONCE(!cfg80211_chandef_valid(chandef)); + return ret; +} + +static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + const struct ieee80211_ht_operation *ht_oper, + const struct ieee80211_vht_operation *vht_oper, + const u8 *bssid, u32 *changed) { struct ieee80211_local *local = sdata->local; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_supported_band *sband; - struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_channel *chan; - struct sta_info *sta; - u32 changed = 0; + struct cfg80211_chan_def chandef; u16 ht_opmode; - bool disable_40 = false; + u32 flags; + enum ieee80211_sta_rx_bandwidth new_sta_bw; + int ret; - rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (WARN_ON(!chanctx_conf)) { - rcu_read_unlock(); + /* if HT was/is disabled, don't track any bandwidth changes */ + if (ifmgd->flags & IEEE80211_STA_DISABLE_HT || !ht_oper) return 0; - } - chan = chanctx_conf->def.chan; - rcu_read_unlock(); + + /* don't check VHT if we associated as non-VHT station */ + if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT) + vht_oper = NULL; + + if (WARN_ON_ONCE(!sta)) + return -EINVAL; + + chan = sdata->vif.bss_conf.chandef.chan; sband = local->hw.wiphy->bands[chan->band]; - switch (sdata->vif.bss_conf.chandef.width) { + /* calculate new channel (type) based on HT/VHT operation IEs */ + flags = ieee80211_determine_chantype(sdata, sband, chan, ht_oper, + vht_oper, &chandef, false); + + /* + * Downgrade the new channel if we associated with restricted + * capabilities. For example, if we associated as a 20 MHz STA + * to a 40 MHz AP (due to regulatory, capabilities or config + * reasons) then switching to a 40 MHz channel now won't do us + * any good -- we couldn't use it with the AP. + */ + if (ifmgd->flags & IEEE80211_STA_DISABLE_80P80MHZ && + chandef.width == NL80211_CHAN_WIDTH_80P80) + flags |= chandef_downgrade(&chandef); + if (ifmgd->flags & IEEE80211_STA_DISABLE_160MHZ && + chandef.width == NL80211_CHAN_WIDTH_160) + flags |= chandef_downgrade(&chandef); + if (ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ && + chandef.width > NL80211_CHAN_WIDTH_20) + flags |= chandef_downgrade(&chandef); + + if (cfg80211_chandef_identical(&chandef, &sdata->vif.bss_conf.chandef)) + return 0; + + sdata_info(sdata, + "AP %pM changed bandwidth, new config is %d MHz, width %d (%d/%d MHz)\n", + ifmgd->bssid, chandef.chan->center_freq, chandef.width, + chandef.center_freq1, chandef.center_freq2); + + if (flags != (ifmgd->flags & (IEEE80211_STA_DISABLE_HT | + IEEE80211_STA_DISABLE_VHT | + IEEE80211_STA_DISABLE_40MHZ | + IEEE80211_STA_DISABLE_80P80MHZ | + IEEE80211_STA_DISABLE_160MHZ)) || + !cfg80211_chandef_valid(&chandef)) { + sdata_info(sdata, + "AP %pM changed bandwidth in a way we can't support - disconnect\n", + ifmgd->bssid); + return -EINVAL; + } + + switch (chandef.width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + new_sta_bw = IEEE80211_STA_RX_BW_20; + break; case NL80211_CHAN_WIDTH_40: - if (sdata->vif.bss_conf.chandef.chan->center_freq > - sdata->vif.bss_conf.chandef.center_freq1 && - chan->flags & IEEE80211_CHAN_NO_HT40PLUS) - disable_40 = true; - if (sdata->vif.bss_conf.chandef.chan->center_freq < - sdata->vif.bss_conf.chandef.center_freq1 && - chan->flags & IEEE80211_CHAN_NO_HT40MINUS) - disable_40 = true; + new_sta_bw = IEEE80211_STA_RX_BW_40; break; - default: + case NL80211_CHAN_WIDTH_80: + new_sta_bw = IEEE80211_STA_RX_BW_80; + break; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + new_sta_bw = IEEE80211_STA_RX_BW_160; break; + default: + return -EINVAL; } - /* This can change during the lifetime of the BSS */ - if (!(ht_oper->ht_param & IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)) - disable_40 = true; - - mutex_lock(&local->sta_mtx); - sta = sta_info_get(sdata, bssid); + if (new_sta_bw > sta->cur_max_bandwidth) + new_sta_bw = sta->cur_max_bandwidth; - WARN_ON_ONCE(!sta); - - if (sta && !sta->supports_40mhz) - disable_40 = true; - - if (sta && (!reconfig || - (disable_40 != !(sta->sta.ht_cap.cap & - IEEE80211_HT_CAP_SUP_WIDTH_20_40)))) { + if (new_sta_bw < sta->sta.bandwidth) { + sta->sta.bandwidth = new_sta_bw; + rate_control_rate_update(local, sband, sta, + IEEE80211_RC_BW_CHANGED); + } - if (disable_40) - sta->sta.ht_cap.cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; - else - sta->sta.ht_cap.cap |= IEEE80211_HT_CAP_SUP_WIDTH_20_40; + ret = ieee80211_vif_change_bandwidth(sdata, &chandef, changed); + if (ret) { + sdata_info(sdata, + "AP %pM changed bandwidth to incompatible one - disconnect\n", + ifmgd->bssid); + return ret; + } + if (new_sta_bw > sta->sta.bandwidth) { + sta->sta.bandwidth = new_sta_bw; rate_control_rate_update(local, sband, sta, IEEE80211_RC_BW_CHANGED); } - mutex_unlock(&local->sta_mtx); ht_opmode = le16_to_cpu(ht_oper->operation_mode); /* if bss configuration changed store the new one */ - if (!reconfig || (sdata->vif.bss_conf.ht_operation_mode != ht_opmode)) { - changed |= BSS_CHANGED_HT; + if (sdata->vif.bss_conf.ht_operation_mode != ht_opmode) { + *changed |= BSS_CHANGED_HT; sdata->vif.bss_conf.ht_operation_mode = ht_opmode; } - return changed; + return 0; } /* frame sending functions */ @@ -341,11 +598,13 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata, static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, - struct ieee80211_supported_band *sband) + struct ieee80211_supported_band *sband, + struct ieee80211_vht_cap *ap_vht_cap) { u8 *pos; u32 cap; struct ieee80211_sta_vht_cap vht_cap; + int i; BUILD_BUG_ON(sizeof(vht_cap) != sizeof(sband->vht_cap)); @@ -364,6 +623,42 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, cap &= ~IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ; } + /* + * Some APs apparently get confused if our capabilities are better + * than theirs, so restrict what we advertise in the assoc request. + */ + if (!(ap_vht_cap->vht_cap_info & + cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE))) + cap &= ~IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE; + + if (!(ap_vht_cap->vht_cap_info & + cpu_to_le32(IEEE80211_VHT_CAP_TXSTBC))) + cap &= ~(IEEE80211_VHT_CAP_RXSTBC_1 | + IEEE80211_VHT_CAP_RXSTBC_3 | + IEEE80211_VHT_CAP_RXSTBC_4); + + for (i = 0; i < 8; i++) { + int shift = i * 2; + u16 mask = IEEE80211_VHT_MCS_NOT_SUPPORTED << shift; + u16 ap_mcs, our_mcs; + + ap_mcs = (le16_to_cpu(ap_vht_cap->supp_mcs.tx_mcs_map) & + mask) >> shift; + our_mcs = (le16_to_cpu(vht_cap.vht_mcs.rx_mcs_map) & + mask) >> shift; + + switch (ap_mcs) { + default: + if (our_mcs <= ap_mcs) + break; + /* fall through */ + case IEEE80211_VHT_MCS_NOT_SUPPORTED: + vht_cap.vht_mcs.rx_mcs_map &= cpu_to_le16(~mask); + vht_cap.vht_mcs.rx_mcs_map |= + cpu_to_le16(ap_mcs << shift); + } + } + /* reserve and fill IE */ pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2); ieee80211_ie_build_vht_cap(pos, &vht_cap, cap); @@ -562,7 +857,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) sband, chan, sdata->smps_mode); if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) - ieee80211_add_vht_ie(sdata, skb, sband); + ieee80211_add_vht_ie(sdata, skb, sband, + &assoc_data->ap_vht_cap); /* if present, add any custom non-vendor IEs that go after HT */ if (assoc_data->ie_len && assoc_data->ie) { @@ -605,6 +901,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) drv_mgd_prepare_tx(local, sdata); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS | + IEEE80211_TX_INTFL_MLME_CONN_TX; ieee80211_tx_skb(sdata, skb); } @@ -641,7 +940,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, if (powersave) nullfunc->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM); - IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | + IEEE80211_TX_INTFL_OFFCHAN_TX_OK; if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | IEEE80211_STA_CONNECTION_POLL)) IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_USE_MINRATE; @@ -745,10 +1045,10 @@ static void ieee80211_chswitch_timer(unsigned long data) ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); } -void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel_sw_ie *sw_elem, - struct ieee80211_bss *bss, - u64 timestamp) +void +ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_channel_sw_ie *sw_elem, + struct ieee80211_bss *bss, u64 timestamp) { struct cfg80211_bss *cbss = container_of((void *)bss, struct cfg80211_bss, priv); @@ -907,39 +1207,6 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, return 0; } -void ieee80211_enable_dyn_ps(struct ieee80211_vif *vif) -{ - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - struct ieee80211_local *local = sdata->local; - struct ieee80211_conf *conf = &local->hw.conf; - - WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION || - !(local->hw.flags & IEEE80211_HW_SUPPORTS_PS) || - (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)); - - local->disable_dynamic_ps = false; - conf->dynamic_ps_timeout = local->dynamic_ps_user_timeout; -} -EXPORT_SYMBOL(ieee80211_enable_dyn_ps); - -void ieee80211_disable_dyn_ps(struct ieee80211_vif *vif) -{ - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - struct ieee80211_local *local = sdata->local; - struct ieee80211_conf *conf = &local->hw.conf; - - WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION || - !(local->hw.flags & IEEE80211_HW_SUPPORTS_PS) || - (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)); - - local->disable_dynamic_ps = true; - conf->dynamic_ps_timeout = 0; - del_timer_sync(&local->dynamic_ps_timer); - ieee80211_queue_work(&local->hw, - &local->dynamic_ps_enable_work); -} -EXPORT_SYMBOL(ieee80211_disable_dyn_ps); - /* powersave */ static void ieee80211_enable_ps(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) @@ -1042,7 +1309,6 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) } if (count == 1 && ieee80211_powersave_allowed(found)) { - struct ieee80211_conf *conf = &local->hw.conf; s32 beaconint_us; if (latency < 0) @@ -1066,10 +1332,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) else timeout = 100; } - local->dynamic_ps_user_timeout = timeout; - if (!local->disable_dynamic_ps) - conf->dynamic_ps_timeout = - local->dynamic_ps_user_timeout; + local->hw.conf.dynamic_ps_timeout = timeout; if (beaconint_us > latency) { local->ps_sdata = NULL; @@ -1139,8 +1402,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) if (local->hw.conf.flags & IEEE80211_CONF_PS) return; - if (!local->disable_dynamic_ps && - local->hw.conf.dynamic_ps_timeout > 0) { + if (local->hw.conf.dynamic_ps_timeout > 0) { /* don't enter PS if TX frames are pending */ if (drv_tx_frames_pending(local)) { mod_timer(&local->dynamic_ps_timer, jiffies + @@ -1205,16 +1467,30 @@ void ieee80211_dynamic_ps_timer(unsigned long data) ieee80211_queue_work(&local->hw, &local->dynamic_ps_enable_work); } +void ieee80211_dfs_cac_timer_work(struct work_struct *work) +{ + struct delayed_work *delayed_work = + container_of(work, struct delayed_work, work); + struct ieee80211_sub_if_data *sdata = + container_of(delayed_work, struct ieee80211_sub_if_data, + dfs_cac_timer_work); + + ieee80211_vif_release_channel(sdata); + + cfg80211_cac_event(sdata->dev, NL80211_RADAR_CAC_FINISHED, GFP_KERNEL); +} + /* MLME */ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - u8 *wmm_param, size_t wmm_param_len) + const u8 *wmm_param, size_t wmm_param_len) { struct ieee80211_tx_queue_params params; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; size_t left; int count; - u8 *pos, uapsd_queues = 0; + const u8 *pos; + u8 uapsd_queues = 0; if (!local->ops->conf_tx) return false; @@ -1406,7 +1682,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ieee80211_led_assoc(local, 1); - if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) { + if (sdata->u.mgd.assoc_data->have_beacon) { /* * If the AP is buggy we may get here with no DTIM period * known, so assume it's 1 which is the only safe assumption @@ -1414,6 +1690,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, * probably just won't work at all. */ bss_conf->dtim_period = sdata->u.mgd.dtim_period ?: 1; + bss_info_changed |= BSS_CHANGED_DTIM_PERIOD; } else { bss_conf->dtim_period = 0; } @@ -1426,10 +1703,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, bss_info_changed |= BSS_CHANGED_CQM; /* Enable ARP filtering */ - if (bss_conf->arp_filter_enabled != sdata->arp_filter_state) { - bss_conf->arp_filter_enabled = sdata->arp_filter_state; + if (bss_conf->arp_addr_cnt) bss_info_changed |= BSS_CHANGED_ARP_FILTER; - } ieee80211_bss_info_change_notify(sdata, bss_info_changed); @@ -1450,7 +1725,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; - struct sta_info *sta; u32 changed = 0; ASSERT_MGD_MTX(ifmgd); @@ -1482,14 +1756,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, netif_tx_stop_all_queues(sdata->dev); netif_carrier_off(sdata->dev); - mutex_lock(&local->sta_mtx); - sta = sta_info_get(sdata, ifmgd->bssid); - if (sta) { - set_sta_flag(sta, WLAN_STA_BLOCK_BA); - ieee80211_sta_tear_down_BA_sessions(sta, false); - } - mutex_unlock(&local->sta_mtx); - /* * if we want to get out of ps before disassoc (why?) we have * to do it before sending disassoc, as otherwise the null-packet @@ -1521,7 +1787,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(ifmgd->bssid, 0, ETH_ALEN); /* remove AP and TDLS peers */ - sta_info_flush(local, sdata); + sta_info_flush_defer(sdata); /* finally reset all BSS / config parameters */ changed |= ieee80211_reset_erp_info(sdata); @@ -1543,10 +1809,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, cancel_work_sync(&local->dynamic_ps_enable_work); /* Disable ARP filtering */ - if (sdata->vif.bss_conf.arp_filter_enabled) { - sdata->vif.bss_conf.arp_filter_enabled = false; + if (sdata->vif.bss_conf.arp_addr_cnt) changed |= BSS_CHANGED_ARP_FILTER; - } sdata->vif.bss_conf.qos = false; changed |= BSS_CHANGED_QOS; @@ -1629,17 +1893,18 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, if (!ieee80211_is_data(hdr->frame_control)) return; - if (ack) - ieee80211_sta_reset_conn_monitor(sdata); - if (ieee80211_is_nullfunc(hdr->frame_control) && sdata->u.mgd.probe_send_count > 0) { if (ack) - sdata->u.mgd.probe_send_count = 0; + ieee80211_sta_reset_conn_monitor(sdata); else sdata->u.mgd.nullfunc_failed = true; ieee80211_queue_work(&sdata->local->hw, &sdata->work); + return; } + + if (ack) + ieee80211_sta_reset_conn_monitor(sdata); } static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) @@ -1680,7 +1945,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) ssid_len = ssid[1]; ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid_len, NULL, - 0, (u32) -1, true, false, + 0, (u32) -1, true, 0, ifmgd->associated->channel, false); rcu_read_unlock(); } @@ -1714,7 +1979,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, if (beacon) mlme_dbg_ratelimited(sdata, - "detected beacon loss from AP - sending probe request\n"); + "detected beacon loss from AP - probing\n"); ieee80211_cqm_rssi_notify(&sdata->vif, NL80211_CQM_RSSI_BEACON_LOSS_EVENT, GFP_KERNEL); @@ -1795,11 +2060,9 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_ap_probereq_get); -static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata, - bool transmit_frame) +static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct ieee80211_local *local = sdata->local; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; mutex_lock(&ifmgd->mtx); @@ -1810,8 +2073,10 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, - transmit_frame, frame_buf); + true, frame_buf); ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; + ieee80211_wake_queues_by_reason(&sdata->local->hw, + IEEE80211_QUEUE_STOP_REASON_CSA); mutex_unlock(&ifmgd->mtx); /* @@ -1819,10 +2084,6 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata, * but that's not a problem. */ cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); - - mutex_lock(&local->mtx); - ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); } static void ieee80211_beacon_connection_loss_work(struct work_struct *work) @@ -1841,10 +2102,10 @@ static void ieee80211_beacon_connection_loss_work(struct work_struct *work) rcu_read_unlock(); } - if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) { + if (ifmgd->connection_loss) { sdata_info(sdata, "Connection to AP %pM lost\n", ifmgd->bssid); - __ieee80211_disconnect(sdata, false); + __ieee80211_disconnect(sdata); } else { ieee80211_mgd_probe_ap(sdata, true); } @@ -1856,9 +2117,7 @@ static void ieee80211_csa_connection_drop_work(struct work_struct *work) container_of(work, struct ieee80211_sub_if_data, u.mgd.csa_connection_drop_work); - ieee80211_wake_queues_by_reason(&sdata->local->hw, - IEEE80211_QUEUE_STOP_REASON_CSA); - __ieee80211_disconnect(sdata, true); + __ieee80211_disconnect(sdata); } void ieee80211_beacon_loss(struct ieee80211_vif *vif) @@ -1869,6 +2128,7 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif) trace_api_beacon_loss(sdata); WARN_ON(hw->flags & IEEE80211_HW_CONNECTION_MONITOR); + sdata->u.mgd.connection_loss = false; ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work); } EXPORT_SYMBOL(ieee80211_beacon_loss); @@ -1880,7 +2140,7 @@ void ieee80211_connection_loss(struct ieee80211_vif *vif) trace_api_connection_loss(sdata); - WARN_ON(!(hw->flags & IEEE80211_HW_CONNECTION_MONITOR)); + sdata->u.mgd.connection_loss = true; ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work); } EXPORT_SYMBOL(ieee80211_connection_loss); @@ -1902,7 +2162,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, ieee80211_vif_release_channel(sdata); } - cfg80211_put_bss(auth_data->bss); + cfg80211_put_bss(sdata->local->hw.wiphy, auth_data->bss); kfree(auth_data); sdata->u.mgd.auth_data = NULL; } @@ -1910,9 +2170,11 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { + struct ieee80211_local *local = sdata->local; struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data; u8 *pos; struct ieee802_11_elems elems; + u32 tx_flags = 0; pos = mgmt->u.auth.variable; ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); @@ -1920,11 +2182,14 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, return; auth_data->expected_transaction = 4; drv_mgd_prepare_tx(sdata->local, sdata); + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS | + IEEE80211_TX_INTFL_MLME_CONN_TX; ieee80211_send_auth(sdata, 3, auth_data->algorithm, 0, elems.challenge - 2, elems.challenge_len + 2, auth_data->bss->bssid, auth_data->bss->bssid, auth_data->key, auth_data->key_len, - auth_data->key_idx); + auth_data->key_idx, tx_flags); } static enum rx_mgmt_action __must_check @@ -1991,6 +2256,7 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "authenticated\n"); ifmgd->auth_data->done = true; ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC; + ifmgd->auth_data->timeout_started = true; run_again(ifmgd, ifmgd->auth_data->timeout); if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE && @@ -2049,10 +2315,6 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - mutex_lock(&sdata->local->mtx); - ieee80211_recalc_idle(sdata->local); - mutex_unlock(&sdata->local->mtx); - return RX_MGMT_CFG80211_DEAUTH; } @@ -2080,10 +2342,6 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - mutex_lock(&sdata->local->mtx); - ieee80211_recalc_idle(sdata->local); - mutex_unlock(&sdata->local->mtx); - return RX_MGMT_CFG80211_DISASSOC; } @@ -2193,6 +2451,24 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, ifmgd->aid = aid; + /* + * We previously checked these in the beacon/probe response, so + * they should be present here. This is just a safety net. + */ + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && + (!elems.wmm_param || !elems.ht_cap_elem || !elems.ht_operation)) { + sdata_info(sdata, + "HT AP is missing WMM params or HT capability/operation in AssocResp\n"); + return false; + } + + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && + (!elems.vht_cap_elem || !elems.vht_operation)) { + sdata_info(sdata, + "VHT AP is missing VHT capability/operation in AssocResp\n"); + return false; + } + mutex_lock(&sdata->local->sta_mtx); /* * station info was already allocated and inserted before @@ -2206,17 +2482,36 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, sband = local->hw.wiphy->bands[ieee80211_get_sdata_band(sdata)]; + /* Set up internal HT/VHT capabilities */ if (elems.ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, - elems.ht_cap_elem, &sta->sta.ht_cap); - - sta->supports_40mhz = - sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40; + elems.ht_cap_elem, sta); if (elems.vht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, - elems.vht_cap_elem, - &sta->sta.vht_cap); + elems.vht_cap_elem, sta); + + /* + * Some APs, e.g. Netgear WNDR3700, report invalid HT operation data + * in their association response, so ignore that data for our own + * configuration. If it changed since the last beacon, we'll get the + * next beacon and update then. + */ + + /* + * If an operating mode notification IE is present, override the + * NSS calculation (that would be done in rate_control_rate_init()) + * and use the # of streams from that element. + */ + if (elems.opmode_notif && + !(*elems.opmode_notif & IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF)) { + u8 nss; + + nss = *elems.opmode_notif & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK; + nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT; + nss += 1; + sta->sta.rx_nss = nss; + } rate_control_rate_init(sta); @@ -2226,9 +2521,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, if (elems.wmm_param) set_sta_flag(sta, WLAN_STA_WME); - err = sta_info_move_state(sta, IEEE80211_STA_AUTH); - if (!err) - err = sta_info_move_state(sta, IEEE80211_STA_ASSOC); + err = sta_info_move_state(sta, IEEE80211_STA_ASSOC); if (!err && !(ifmgd->flags & IEEE80211_STA_CONTROL_PORT)) err = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); if (err) { @@ -2257,11 +2550,6 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, ieee80211_set_wmm_default(sdata, false); changed |= BSS_CHANGED_QOS; - if (elems.ht_operation && elems.wmm_param && - !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) - changed |= ieee80211_config_ht_tx(sdata, elems.ht_operation, - cbss->bssid, false); - /* set AID and assoc capability, * ieee80211_set_associated() will tell the driver */ bss_conf->aid = aid; @@ -2335,6 +2623,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, "%pM rejected association temporarily; comeback duration %u TU (%u ms)\n", mgmt->sa, tu, ms); assoc_data->timeout = jiffies + msecs_to_jiffies(ms); + assoc_data->timeout_started = true; if (ms > IEEE80211_ASSOC_TIMEOUT) run_again(ifmgd, assoc_data->timeout); return RX_MGMT_NONE; @@ -2350,7 +2639,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if (!ieee80211_assoc_success(sdata, *bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ ieee80211_destroy_assoc_data(sdata, false); - cfg80211_put_bss(*bss); + cfg80211_put_bss(sdata->local->hw.wiphy, *bss); return RX_MGMT_CFG80211_ASSOC_TIMEOUT; } sdata_info(sdata, "associated\n"); @@ -2369,8 +2658,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status, - struct ieee802_11_elems *elems, - bool beacon) + struct ieee802_11_elems *elems) { struct ieee80211_local *local = sdata->local; int freq; @@ -2387,7 +2675,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, need_ps = sdata->u.mgd.associated && !sdata->u.mgd.dtim_period; if (elems->tim && !elems->parse_error) { - struct ieee80211_tim_ie *tim_ie = elems->tim; + const struct ieee80211_tim_ie *tim_ie = elems->tim; sdata->u.mgd.dtim_period = tim_ie->dtim_period; } } @@ -2404,7 +2692,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, return; bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, - channel, beacon); + channel); if (bss) ieee80211_rx_bss_put(local, bss); @@ -2447,7 +2735,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, &elems); - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false); + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); if (ifmgd->associated && ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) @@ -2459,6 +2747,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "direct probe responded\n"); ifmgd->auth_data->tries = 0; ifmgd->auth_data->timeout = jiffies; + ifmgd->auth_data->timeout_started = true; run_again(ifmgd, ifmgd->auth_data->timeout); } } @@ -2484,10 +2773,10 @@ static const u64 care_about_ies = (1ULL << WLAN_EID_HT_CAPABILITY) | (1ULL << WLAN_EID_HT_OPERATION); -static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - size_t len, - struct ieee80211_rx_status *rx_status) +static enum rx_mgmt_action +ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + u8 *deauth_buf, struct ieee80211_rx_status *rx_status) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; @@ -2496,6 +2785,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_channel *chan; + struct sta_info *sta; u32 changed = 0; bool erp_valid; u8 erp_value = 0; @@ -2507,40 +2797,51 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, /* Process beacon from the current BSS */ baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; if (baselen > len) - return; + return RX_MGMT_NONE; rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (!chanctx_conf) { rcu_read_unlock(); - return; + return RX_MGMT_NONE; } if (rx_status->freq != chanctx_conf->def.chan->center_freq) { rcu_read_unlock(); - return; + return RX_MGMT_NONE; } chan = chanctx_conf->def.chan; rcu_read_unlock(); - if (ifmgd->assoc_data && !ifmgd->assoc_data->have_beacon && + if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon && ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) { ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, - false); + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); ifmgd->assoc_data->have_beacon = true; - ifmgd->assoc_data->sent_assoc = false; + ifmgd->assoc_data->need_beacon = false; + if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { + sdata->vif.bss_conf.sync_tsf = + le64_to_cpu(mgmt->u.beacon.timestamp); + sdata->vif.bss_conf.sync_device_ts = + rx_status->device_timestamp; + if (elems.tim) + sdata->vif.bss_conf.sync_dtim_count = + elems.tim->dtim_count; + else + sdata->vif.bss_conf.sync_dtim_count = 0; + } /* continue assoc process */ ifmgd->assoc_data->timeout = jiffies; + ifmgd->assoc_data->timeout_started = true; run_again(ifmgd, ifmgd->assoc_data->timeout); - return; + return RX_MGMT_NONE; } if (!ifmgd->associated || !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) - return; + return RX_MGMT_NONE; bssid = ifmgd->associated->bssid; /* Track average RSSI from the Beacon frames of the current AP */ @@ -2571,12 +2872,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (sig > ifmgd->rssi_max_thold && (last_sig <= ifmgd->rssi_min_thold || last_sig == 0)) { ifmgd->last_ave_beacon_signal = sig; - drv_rssi_callback(local, RSSI_EVENT_HIGH); + drv_rssi_callback(local, sdata, RSSI_EVENT_HIGH); } else if (sig < ifmgd->rssi_min_thold && (last_sig >= ifmgd->rssi_max_thold || last_sig == 0)) { ifmgd->last_ave_beacon_signal = sig; - drv_rssi_callback(local, RSSI_EVENT_LOW); + drv_rssi_callback(local, sdata, RSSI_EVENT_LOW); } } @@ -2606,7 +2907,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (ifmgd->flags & IEEE80211_STA_BEACON_POLL) { mlme_dbg_ratelimited(sdata, - "cancelling probereq poll due to a received beacon\n"); + "cancelling AP probe due to a received beacon\n"); mutex_lock(&local->mtx); ifmgd->flags &= ~IEEE80211_STA_BEACON_POLL; ieee80211_run_deferred_scan(local); @@ -2678,17 +2979,42 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } if (ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid) - return; + return RX_MGMT_NONE; ifmgd->beacon_crc = ncrc; ifmgd->beacon_crc_valid = true; - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, - true); + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); if (ieee80211_sta_wmm_params(local, sdata, elems.wmm_param, elems.wmm_param_len)) changed |= BSS_CHANGED_QOS; + /* + * If we haven't had a beacon before, tell the driver about the + * DTIM period (and beacon timing if desired) now. + */ + if (!bss_conf->dtim_period) { + /* a few bogus AP send dtim_period = 0 or no TIM IE */ + if (elems.tim) + bss_conf->dtim_period = elems.tim->dtim_period ?: 1; + else + bss_conf->dtim_period = 1; + + if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { + sdata->vif.bss_conf.sync_tsf = + le64_to_cpu(mgmt->u.beacon.timestamp); + sdata->vif.bss_conf.sync_device_ts = + rx_status->device_timestamp; + if (elems.tim) + sdata->vif.bss_conf.sync_dtim_count = + elems.tim->dtim_count; + else + sdata->vif.bss_conf.sync_dtim_count = 0; + } + + changed |= BSS_CHANGED_DTIM_PERIOD; + } + if (elems.erp_info && elems.erp_info_len >= 1) { erp_valid = true; erp_value = elems.erp_info[0]; @@ -2699,11 +3025,22 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, le16_to_cpu(mgmt->u.beacon.capab_info), erp_valid, erp_value); + mutex_lock(&local->sta_mtx); + sta = sta_info_get(sdata, bssid); - if (elems.ht_cap_elem && elems.ht_operation && elems.wmm_param && - !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) - changed |= ieee80211_config_ht_tx(sdata, elems.ht_operation, - bssid, true); + if (ieee80211_config_bw(sdata, sta, elems.ht_operation, + elems.vht_operation, bssid, &changed)) { + mutex_unlock(&local->sta_mtx); + ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, + WLAN_REASON_DEAUTH_LEAVING, + true, deauth_buf); + return RX_MGMT_CFG80211_TX_DEAUTH; + } + + if (sta && elems.opmode_notif) + ieee80211_vht_handle_opmode(sdata, sta, *elems.opmode_notif, + rx_status->band, true); + mutex_unlock(&local->sta_mtx); if (elems.country_elem && elems.pwr_constr_elem && mgmt->u.probe_resp.capab_info & @@ -2714,6 +3051,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, elems.pwr_constr_elem); ieee80211_bss_info_change_notify(sdata, changed); + + return RX_MGMT_NONE; } void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, @@ -2724,6 +3063,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; struct cfg80211_bss *bss = NULL; enum rx_mgmt_action rma = RX_MGMT_NONE; + u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN]; u16 fc; rx_status = (struct ieee80211_rx_status *) skb->cb; @@ -2734,7 +3074,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_BEACON: - ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, rx_status); + rma = ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, + deauth_buf, rx_status); break; case IEEE80211_STYPE_PROBE_RESP: ieee80211_rx_mgmt_probe_resp(sdata, skb); @@ -2783,6 +3124,10 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, case RX_MGMT_CFG80211_ASSOC_TIMEOUT: cfg80211_send_assoc_timeout(sdata->dev, mgmt->bssid); break; + case RX_MGMT_CFG80211_TX_DEAUTH: + cfg80211_send_deauth(sdata->dev, deauth_buf, + sizeof(deauth_buf)); + break; default: WARN(1, "unexpected: %d", rma); } @@ -2804,14 +3149,13 @@ static void ieee80211_sta_timer(unsigned long data) } static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, - u8 *bssid, u8 reason) + u8 *bssid, u8 reason, bool tx) { - struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason, - false, frame_buf); + tx, frame_buf); mutex_unlock(&ifmgd->mtx); /* @@ -2820,10 +3164,6 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, */ cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); - mutex_lock(&local->mtx); - ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); - mutex_lock(&ifmgd->mtx); } @@ -2832,12 +3172,17 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_auth_data *auth_data = ifmgd->auth_data; + u32 tx_flags = 0; lockdep_assert_held(&ifmgd->mtx); if (WARN_ON_ONCE(!auth_data)) return -EINVAL; + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS | + IEEE80211_TX_INTFL_MLME_CONN_TX; + auth_data->tries++; if (auth_data->tries > IEEE80211_AUTH_MAX_TRIES) { @@ -2874,7 +3219,8 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) ieee80211_send_auth(sdata, trans, auth_data->algorithm, status, auth_data->data, auth_data->data_len, auth_data->bss->bssid, - auth_data->bss->bssid, NULL, 0, 0); + auth_data->bss->bssid, NULL, 0, 0, + tx_flags); } else { const u8 *ssidie; @@ -2893,13 +3239,18 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) * will not answer to direct packet in unassociated state. */ ieee80211_send_probe_req(sdata, NULL, ssidie + 2, ssidie[1], - NULL, 0, (u32) -1, true, false, + NULL, 0, (u32) -1, true, tx_flags, auth_data->bss->channel, false); rcu_read_unlock(); } - auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; - run_again(ifmgd, auth_data->timeout); + if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { + auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; + ifmgd->auth_data->timeout_started = true; + run_again(ifmgd, auth_data->timeout); + } else { + auth_data->timeout_started = false; + } return 0; } @@ -2930,12 +3281,29 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) IEEE80211_ASSOC_MAX_TRIES); ieee80211_send_assoc(sdata); - assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT; - run_again(&sdata->u.mgd, assoc_data->timeout); + if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { + assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT; + assoc_data->timeout_started = true; + run_again(&sdata->u.mgd, assoc_data->timeout); + } else { + assoc_data->timeout_started = false; + } return 0; } +void ieee80211_mgd_conn_tx_status(struct ieee80211_sub_if_data *sdata, + __le16 fc, bool acked) +{ + struct ieee80211_local *local = sdata->local; + + sdata->u.mgd.status_fc = fc; + sdata->u.mgd.status_acked = acked; + sdata->u.mgd.status_received = true; + + ieee80211_queue_work(&local->hw, &sdata->work); +} + void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; @@ -2943,7 +3311,36 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) mutex_lock(&ifmgd->mtx); - if (ifmgd->auth_data && + if (ifmgd->status_received) { + __le16 fc = ifmgd->status_fc; + bool status_acked = ifmgd->status_acked; + + ifmgd->status_received = false; + if (ifmgd->auth_data && + (ieee80211_is_probe_req(fc) || ieee80211_is_auth(fc))) { + if (status_acked) { + ifmgd->auth_data->timeout = + jiffies + IEEE80211_AUTH_TIMEOUT_SHORT; + run_again(ifmgd, ifmgd->auth_data->timeout); + } else { + ifmgd->auth_data->timeout = jiffies - 1; + } + ifmgd->auth_data->timeout_started = true; + } else if (ifmgd->assoc_data && + (ieee80211_is_assoc_req(fc) || + ieee80211_is_reassoc_req(fc))) { + if (status_acked) { + ifmgd->assoc_data->timeout = + jiffies + IEEE80211_ASSOC_TIMEOUT_SHORT; + run_again(ifmgd, ifmgd->assoc_data->timeout); + } else { + ifmgd->assoc_data->timeout = jiffies - 1; + } + ifmgd->assoc_data->timeout_started = true; + } + } + + if (ifmgd->auth_data && ifmgd->auth_data->timeout_started && time_after(jiffies, ifmgd->auth_data->timeout)) { if (ifmgd->auth_data->done) { /* @@ -2962,12 +3359,13 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) cfg80211_send_auth_timeout(sdata->dev, bssid); mutex_lock(&ifmgd->mtx); } - } else if (ifmgd->auth_data) + } else if (ifmgd->auth_data && ifmgd->auth_data->timeout_started) run_again(ifmgd, ifmgd->auth_data->timeout); - if (ifmgd->assoc_data && + if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started && time_after(jiffies, ifmgd->assoc_data->timeout)) { - if (!ifmgd->assoc_data->have_beacon || + if ((ifmgd->assoc_data->need_beacon && + !ifmgd->assoc_data->have_beacon) || ieee80211_do_assoc(sdata)) { u8 bssid[ETH_ALEN]; @@ -2979,7 +3377,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) cfg80211_send_assoc_timeout(sdata->dev, bssid); mutex_lock(&ifmgd->mtx); } - } else if (ifmgd->assoc_data) + } else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started) run_again(ifmgd, ifmgd->assoc_data->timeout); if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | @@ -3010,7 +3408,8 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) "No ack for nullfunc frame to AP %pM, disconnecting.\n", bssid); ieee80211_sta_connection_lost(sdata, bssid, - WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY); + WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, + false); } } else if (time_is_after_jiffies(ifmgd->probe_timeout)) run_again(ifmgd, ifmgd->probe_timeout); @@ -3019,7 +3418,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) "Failed to send nullfunc to AP %pM after %dms, disconnecting\n", bssid, probe_wait_ms); ieee80211_sta_connection_lost(sdata, bssid, - WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY); + WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false); } else if (ifmgd->probe_send_count < max_tries) { mlme_dbg(sdata, "No probe response from AP %pM after %dms, try %d/%i\n", @@ -3038,15 +3437,11 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) bssid, probe_wait_ms); ieee80211_sta_connection_lost(sdata, bssid, - WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY); + WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false); } } mutex_unlock(&ifmgd->mtx); - - mutex_lock(&local->mtx); - ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); } static void ieee80211_sta_bcn_mon_timer(unsigned long data) @@ -3058,6 +3453,7 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data) if (local->quiescing) return; + sdata->u.mgd.connection_loss = false; ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.beacon_connection_loss_work); } @@ -3133,23 +3529,23 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - if (!ifmgd->associated) + mutex_lock(&ifmgd->mtx); + if (!ifmgd->associated) { + mutex_unlock(&ifmgd->mtx); return; + } if (sdata->flags & IEEE80211_SDATA_DISCONNECT_RESUME) { sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME; - mutex_lock(&ifmgd->mtx); - if (ifmgd->associated) { - mlme_dbg(sdata, - "driver requested disconnect after resume\n"); - ieee80211_sta_connection_lost(sdata, - ifmgd->associated->bssid, - WLAN_REASON_UNSPECIFIED); - mutex_unlock(&ifmgd->mtx); - return; - } + mlme_dbg(sdata, "driver requested disconnect after resume\n"); + ieee80211_sta_connection_lost(sdata, + ifmgd->associated->bssid, + WLAN_REASON_UNSPECIFIED, + true); mutex_unlock(&ifmgd->mtx); + return; } + mutex_unlock(&ifmgd->mtx); if (test_and_clear_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running)) add_timer(&ifmgd->timer); @@ -3225,201 +3621,6 @@ int ieee80211_max_network_latency(struct notifier_block *nb, return 0; } -static u32 chandef_downgrade(struct cfg80211_chan_def *c) -{ - u32 ret; - int tmp; - - switch (c->width) { - case NL80211_CHAN_WIDTH_20: - c->width = NL80211_CHAN_WIDTH_20_NOHT; - ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; - break; - case NL80211_CHAN_WIDTH_40: - c->width = NL80211_CHAN_WIDTH_20; - c->center_freq1 = c->chan->center_freq; - ret = IEEE80211_STA_DISABLE_40MHZ | - IEEE80211_STA_DISABLE_VHT; - break; - case NL80211_CHAN_WIDTH_80: - tmp = (30 + c->chan->center_freq - c->center_freq1)/20; - /* n_P40 */ - tmp /= 2; - /* freq_P40 */ - c->center_freq1 = c->center_freq1 - 20 + 40 * tmp; - c->width = NL80211_CHAN_WIDTH_40; - ret = IEEE80211_STA_DISABLE_VHT; - break; - case NL80211_CHAN_WIDTH_80P80: - c->center_freq2 = 0; - c->width = NL80211_CHAN_WIDTH_80; - ret = IEEE80211_STA_DISABLE_80P80MHZ | - IEEE80211_STA_DISABLE_160MHZ; - break; - case NL80211_CHAN_WIDTH_160: - /* n_P20 */ - tmp = (70 + c->chan->center_freq - c->center_freq1)/20; - /* n_P80 */ - tmp /= 4; - c->center_freq1 = c->center_freq1 - 40 + 80 * tmp; - c->width = NL80211_CHAN_WIDTH_80; - ret = IEEE80211_STA_DISABLE_80P80MHZ | - IEEE80211_STA_DISABLE_160MHZ; - break; - default: - case NL80211_CHAN_WIDTH_20_NOHT: - WARN_ON_ONCE(1); - c->width = NL80211_CHAN_WIDTH_20_NOHT; - ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; - break; - } - - WARN_ON_ONCE(!cfg80211_chandef_valid(c)); - - return ret; -} - -static u32 -ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - struct ieee80211_channel *channel, - const struct ieee80211_ht_operation *ht_oper, - const struct ieee80211_vht_operation *vht_oper, - struct cfg80211_chan_def *chandef) -{ - struct cfg80211_chan_def vht_chandef; - u32 ht_cfreq, ret; - - chandef->chan = channel; - chandef->width = NL80211_CHAN_WIDTH_20_NOHT; - chandef->center_freq1 = channel->center_freq; - chandef->center_freq2 = 0; - - if (!ht_oper || !sband->ht_cap.ht_supported) { - ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; - goto out; - } - - chandef->width = NL80211_CHAN_WIDTH_20; - - ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan, - channel->band); - /* check that channel matches the right operating channel */ - if (channel->center_freq != ht_cfreq) { - /* - * It's possible that some APs are confused here; - * Netgear WNDR3700 sometimes reports 4 higher than - * the actual channel in association responses, but - * since we look at probe response/beacon data here - * it should be OK. - */ - sdata_info(sdata, - "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", - channel->center_freq, ht_cfreq, - ht_oper->primary_chan, channel->band); - ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; - goto out; - } - - /* check 40 MHz support, if we have it */ - if (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) { - switch (ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { - case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: - chandef->width = NL80211_CHAN_WIDTH_40; - chandef->center_freq1 += 10; - break; - case IEEE80211_HT_PARAM_CHA_SEC_BELOW: - chandef->width = NL80211_CHAN_WIDTH_40; - chandef->center_freq1 -= 10; - break; - } - } else { - /* 40 MHz (and 80 MHz) must be supported for VHT */ - ret = IEEE80211_STA_DISABLE_VHT; - goto out; - } - - if (!vht_oper || !sband->vht_cap.vht_supported) { - ret = IEEE80211_STA_DISABLE_VHT; - goto out; - } - - vht_chandef.chan = channel; - vht_chandef.center_freq1 = - ieee80211_channel_to_frequency(vht_oper->center_freq_seg1_idx, - channel->band); - vht_chandef.center_freq2 = 0; - - if (vht_oper->center_freq_seg2_idx) - vht_chandef.center_freq2 = - ieee80211_channel_to_frequency( - vht_oper->center_freq_seg2_idx, - channel->band); - - switch (vht_oper->chan_width) { - case IEEE80211_VHT_CHANWIDTH_USE_HT: - vht_chandef.width = chandef->width; - break; - case IEEE80211_VHT_CHANWIDTH_80MHZ: - vht_chandef.width = NL80211_CHAN_WIDTH_80; - break; - case IEEE80211_VHT_CHANWIDTH_160MHZ: - vht_chandef.width = NL80211_CHAN_WIDTH_160; - break; - case IEEE80211_VHT_CHANWIDTH_80P80MHZ: - vht_chandef.width = NL80211_CHAN_WIDTH_80P80; - break; - default: - sdata_info(sdata, - "AP VHT operation IE has invalid channel width (%d), disable VHT\n", - vht_oper->chan_width); - ret = IEEE80211_STA_DISABLE_VHT; - goto out; - } - - if (!cfg80211_chandef_valid(&vht_chandef)) { - sdata_info(sdata, - "AP VHT information is invalid, disable VHT\n"); - ret = IEEE80211_STA_DISABLE_VHT; - goto out; - } - - if (cfg80211_chandef_identical(chandef, &vht_chandef)) { - ret = 0; - goto out; - } - - if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) { - sdata_info(sdata, - "AP VHT information doesn't match HT, disable VHT\n"); - ret = IEEE80211_STA_DISABLE_VHT; - goto out; - } - - *chandef = vht_chandef; - - ret = 0; - -out: - while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, - IEEE80211_CHAN_DISABLED)) { - if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { - ret = IEEE80211_STA_DISABLE_HT | - IEEE80211_STA_DISABLE_VHT; - goto out; - } - - ret |= chandef_downgrade(chandef); - } - - if (chandef->width != vht_chandef.width) - sdata_info(sdata, - "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n"); - - WARN_ON_ONCE(!cfg80211_chandef_valid(chandef)); - return ret; -} - static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata, struct cfg80211_bss *cbss) { @@ -3485,16 +3686,22 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && sband->ht_cap.ht_supported) { - const u8 *ht_oper_ie; + const u8 *ht_oper_ie, *ht_cap; ht_oper_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_OPERATION); if (ht_oper_ie && ht_oper_ie[1] >= sizeof(*ht_oper)) ht_oper = (void *)(ht_oper_ie + 2); + + ht_cap = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_CAPABILITY); + if (!ht_cap || ht_cap[1] < sizeof(struct ieee80211_ht_cap)) { + ifmgd->flags |= IEEE80211_STA_DISABLE_HT; + ht_oper = NULL; + } } if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && sband->vht_cap.vht_supported) { - const u8 *vht_oper_ie; + const u8 *vht_oper_ie, *vht_cap; vht_oper_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_VHT_OPERATION); @@ -3504,15 +3711,21 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, vht_oper = NULL; sdata_info(sdata, "AP advertised VHT without HT, disabling both\n"); - sdata->flags |= IEEE80211_STA_DISABLE_HT; - sdata->flags |= IEEE80211_STA_DISABLE_VHT; + ifmgd->flags |= IEEE80211_STA_DISABLE_HT; + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + } + + vht_cap = ieee80211_bss_get_ie(cbss, WLAN_EID_VHT_CAPABILITY); + if (!vht_cap || vht_cap[1] < sizeof(struct ieee80211_vht_cap)) { + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + vht_oper = NULL; } } ifmgd->flags |= ieee80211_determine_chantype(sdata, sband, cbss->channel, ht_oper, vht_oper, - &chandef); + &chandef, true); sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss), local->rx_chains); @@ -3562,15 +3775,12 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, return -ENOMEM; } - mutex_lock(&local->mtx); - ieee80211_recalc_idle(sdata->local); - mutex_unlock(&local->mtx); - if (new_sta) { u32 rates = 0, basic_rates = 0; bool have_higher_than_11mbit; int min_rate = INT_MAX, min_rate_index = -1; struct ieee80211_supported_band *sband; + const struct cfg80211_bss_ies *ies; sband = local->hw.wiphy->bands[cbss->channel->band]; @@ -3614,8 +3824,34 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, /* set timing information */ sdata->vif.bss_conf.beacon_int = cbss->beacon_interval; - sdata->vif.bss_conf.sync_tsf = cbss->tsf; - sdata->vif.bss_conf.sync_device_ts = bss->device_ts; + rcu_read_lock(); + ies = rcu_dereference(cbss->beacon_ies); + if (ies) { + const u8 *tim_ie; + + sdata->vif.bss_conf.sync_tsf = ies->tsf; + sdata->vif.bss_conf.sync_device_ts = + bss->device_ts_beacon; + tim_ie = cfg80211_find_ie(WLAN_EID_TIM, + ies->data, ies->len); + if (tim_ie && tim_ie[1] >= 2) + sdata->vif.bss_conf.sync_dtim_count = tim_ie[2]; + else + sdata->vif.bss_conf.sync_dtim_count = 0; + } else if (!(local->hw.flags & + IEEE80211_HW_TIMING_BEACON_ONLY)) { + ies = rcu_dereference(cbss->proberesp_ies); + /* must be non-NULL since beacon IEs were NULL */ + sdata->vif.bss_conf.sync_tsf = ies->tsf; + sdata->vif.bss_conf.sync_device_ts = + bss->device_ts_presp; + sdata->vif.bss_conf.sync_dtim_count = 0; + } else { + sdata->vif.bss_conf.sync_tsf = 0; + sdata->vif.bss_conf.sync_device_ts = 0; + sdata->vif.bss_conf.sync_dtim_count = 0; + } + rcu_read_unlock(); /* tell driver about BSSID, basic rates and timing */ ieee80211_bss_info_change_notify(sdata, @@ -3735,7 +3971,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, } /* hold our own reference */ - cfg80211_ref_bss(auth_data->bss); + cfg80211_ref_bss(local->hw.wiphy, auth_data->bss); err = 0; goto out_unlock; @@ -3758,8 +3994,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_bss *bss = (void *)req->bss->priv; struct ieee80211_mgd_assoc_data *assoc_data; + const struct cfg80211_bss_ies *beacon_ies; struct ieee80211_supported_band *sband; - const u8 *ssidie, *ht_ie; + const u8 *ssidie, *ht_ie, *vht_ie; int i, err; assoc_data = kzalloc(sizeof(*assoc_data) + req->ie_len, GFP_KERNEL); @@ -3878,6 +4115,12 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ((struct ieee80211_ht_operation *)(ht_ie + 2))->ht_param; else ifmgd->flags |= IEEE80211_STA_DISABLE_HT; + vht_ie = ieee80211_bss_get_ie(req->bss, WLAN_EID_VHT_CAPABILITY); + if (vht_ie && vht_ie[1] >= sizeof(struct ieee80211_vht_cap)) + memcpy(&assoc_data->ap_vht_cap, vht_ie + 2, + sizeof(struct ieee80211_vht_cap)); + else + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; rcu_read_unlock(); if (bss->wmm_used && bss->uapsd_supported && @@ -3917,40 +4160,48 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, if (err) goto err_clear; - if (sdata->local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) { - const struct cfg80211_bss_ies *beacon_ies; + rcu_read_lock(); + beacon_ies = rcu_dereference(req->bss->beacon_ies); - rcu_read_lock(); - beacon_ies = rcu_dereference(req->bss->beacon_ies); - if (!beacon_ies) { - /* - * Wait up to one beacon interval ... - * should this be more if we miss one? - */ - sdata_info(sdata, "waiting for beacon from %pM\n", - ifmgd->bssid); - assoc_data->timeout = - TU_TO_EXP_TIME(req->bss->beacon_interval); - } else { - const u8 *tim_ie = cfg80211_find_ie(WLAN_EID_TIM, - beacon_ies->data, - beacon_ies->len); - if (tim_ie && tim_ie[1] >= - sizeof(struct ieee80211_tim_ie)) { - const struct ieee80211_tim_ie *tim; - tim = (void *)(tim_ie + 2); - ifmgd->dtim_period = tim->dtim_period; - } - assoc_data->have_beacon = true; - assoc_data->sent_assoc = false; - assoc_data->timeout = jiffies; + if (sdata->local->hw.flags & IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC && + !beacon_ies) { + /* + * Wait up to one beacon interval ... + * should this be more if we miss one? + */ + sdata_info(sdata, "waiting for beacon from %pM\n", + ifmgd->bssid); + assoc_data->timeout = TU_TO_EXP_TIME(req->bss->beacon_interval); + assoc_data->timeout_started = true; + assoc_data->need_beacon = true; + } else if (beacon_ies) { + const u8 *tim_ie = cfg80211_find_ie(WLAN_EID_TIM, + beacon_ies->data, + beacon_ies->len); + u8 dtim_count = 0; + + if (tim_ie && tim_ie[1] >= sizeof(struct ieee80211_tim_ie)) { + const struct ieee80211_tim_ie *tim; + tim = (void *)(tim_ie + 2); + ifmgd->dtim_period = tim->dtim_period; + dtim_count = tim->dtim_count; } - rcu_read_unlock(); - } else { assoc_data->have_beacon = true; - assoc_data->sent_assoc = false; assoc_data->timeout = jiffies; + assoc_data->timeout_started = true; + + if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { + sdata->vif.bss_conf.sync_tsf = beacon_ies->tsf; + sdata->vif.bss_conf.sync_device_ts = + bss->device_ts_beacon; + sdata->vif.bss_conf.sync_dtim_count = dtim_count; + } + } else { + assoc_data->timeout = jiffies; + assoc_data->timeout_started = true; } + rcu_read_unlock(); + run_again(ifmgd, assoc_data->timeout); if (bss->corrupt_data) { @@ -4017,10 +4268,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, mutex_unlock(&ifmgd->mtx); out: - mutex_lock(&sdata->local->mtx); - ieee80211_recalc_idle(sdata->local); - mutex_unlock(&sdata->local->mtx); - if (sent_frame) __cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); @@ -4061,10 +4308,6 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, __cfg80211_send_disassoc(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); - mutex_lock(&sdata->local->mtx); - ieee80211_recalc_idle(sdata->local); - mutex_unlock(&sdata->local->mtx); - return 0; } diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index a3ad4c3c80a..cc79b4a2e82 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -113,6 +113,15 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) * notify the AP about us leaving the channel and stop all * STA interfaces. */ + + /* + * Stop queues and transmit all frames queued by the driver + * before sending nullfunc to enable powersave at the AP. + */ + ieee80211_stop_queues_by_reason(&local->hw, + IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); + drv_flush(local, false); + mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) @@ -125,18 +134,17 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); /* Check to see if we should disable beaconing. */ - if (sdata->vif.type == NL80211_IFTYPE_AP || - sdata->vif.type == NL80211_IFTYPE_ADHOC || - sdata->vif.type == NL80211_IFTYPE_MESH_POINT) + if (sdata->vif.bss_conf.enable_beacon) { + set_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, + &sdata->state); + sdata->vif.bss_conf.enable_beacon = false; ieee80211_bss_info_change_notify( sdata, BSS_CHANGED_BEACON_ENABLED); - - if (sdata->vif.type != NL80211_IFTYPE_MONITOR) { - netif_tx_stop_all_queues(sdata->dev); - if (sdata->vif.type == NL80211_IFTYPE_STATION && - sdata->u.mgd.associated) - ieee80211_offchannel_ps_enable(sdata); } + + if (sdata->vif.type == NL80211_IFTYPE_STATION && + sdata->u.mgd.associated) + ieee80211_offchannel_ps_enable(sdata); } mutex_unlock(&local->iflist_mtx); } @@ -164,27 +172,17 @@ void ieee80211_offchannel_return(struct ieee80211_local *local) sdata->u.mgd.associated) ieee80211_offchannel_ps_disable(sdata); - if (sdata->vif.type != NL80211_IFTYPE_MONITOR) { - /* - * This may wake up queues even though the driver - * currently has them stopped. This is not very - * likely, since the driver won't have gotten any - * (or hardly any) new packets while we weren't - * on the right channel, and even if it happens - * it will at most lead to queueing up one more - * packet per queue in mac80211 rather than on - * the interface qdisc. - */ - netif_tx_wake_all_queues(sdata->dev); - } - - if (sdata->vif.type == NL80211_IFTYPE_AP || - sdata->vif.type == NL80211_IFTYPE_ADHOC || - sdata->vif.type == NL80211_IFTYPE_MESH_POINT) + if (test_and_clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, + &sdata->state)) { + sdata->vif.bss_conf.enable_beacon = true; ieee80211_bss_info_change_notify( sdata, BSS_CHANGED_BEACON_ENABLED); + } } mutex_unlock(&local->iflist_mtx); + + ieee80211_wake_queues_by_reason(&local->hw, + IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); } void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc) diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 79a48f37d40..d0275f34bf7 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -7,25 +7,23 @@ #include "led.h" /* return value indicates whether the driver should be further notified */ -static bool ieee80211_quiesce(struct ieee80211_sub_if_data *sdata) +static void ieee80211_quiesce(struct ieee80211_sub_if_data *sdata) { switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: ieee80211_sta_quiesce(sdata); - return true; + break; case NL80211_IFTYPE_ADHOC: ieee80211_ibss_quiesce(sdata); - return true; + break; case NL80211_IFTYPE_MESH_POINT: ieee80211_mesh_quiesce(sdata); - return true; - case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_MONITOR: - /* don't tell driver about this */ - return false; + break; default: - return true; + break; } + + cancel_work_sync(&sdata->work); } int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) @@ -40,11 +38,14 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) ieee80211_scan_cancel(local); + ieee80211_dfs_cac_cancel(local); + if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) { set_sta_flag(sta, WLAN_STA_BLOCK_BA); - ieee80211_sta_tear_down_BA_sessions(sta, true); + ieee80211_sta_tear_down_BA_sessions( + sta, AGG_STOP_LOCAL_REQUEST); } mutex_unlock(&local->sta_mtx); } @@ -94,10 +95,9 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) WARN_ON(err != 1); local->wowlan = false; } else { - list_for_each_entry(sdata, &local->interfaces, list) { - cancel_work_sync(&sdata->work); - ieee80211_quiesce(sdata); - } + list_for_each_entry(sdata, &local->interfaces, list) + if (ieee80211_sdata_running(sdata)) + ieee80211_quiesce(sdata); goto suspend; } } @@ -124,17 +124,43 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) /* remove all interfaces */ list_for_each_entry(sdata, &local->interfaces, list) { - cancel_work_sync(&sdata->work); + static u8 zero_addr[ETH_ALEN] = {}; + u32 changed = 0; - if (!ieee80211_quiesce(sdata)) + if (!ieee80211_sdata_running(sdata)) continue; - if (!ieee80211_sdata_running(sdata)) + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MONITOR: + /* skip these */ continue; + case NL80211_IFTYPE_STATION: + if (sdata->vif.bss_conf.assoc) + changed = BSS_CHANGED_ASSOC | + BSS_CHANGED_BSSID | + BSS_CHANGED_IDLE; + break; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_MESH_POINT: + if (sdata->vif.bss_conf.enable_beacon) + changed = BSS_CHANGED_BEACON_ENABLED; + break; + default: + break; + } - /* disable beaconing */ - ieee80211_bss_info_change_notify(sdata, - BSS_CHANGED_BEACON_ENABLED); + ieee80211_quiesce(sdata); + + sdata->suspend_bss_conf = sdata->vif.bss_conf; + memset(&sdata->vif.bss_conf, 0, sizeof(sdata->vif.bss_conf)); + sdata->vif.bss_conf.idle = true; + if (sdata->suspend_bss_conf.bssid) + sdata->vif.bss_conf.bssid = zero_addr; + + /* disable beaconing or remove association */ + ieee80211_bss_info_change_notify(sdata, changed); if (sdata->vif.type == NL80211_IFTYPE_AP && rcu_access_pointer(sdata->u.ap.beacon)) @@ -204,3 +230,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) * ieee80211_reconfig(), which is also needed for hardware * hang/firmware failure/etc. recovery. */ + +void ieee80211_report_wowlan_wakeup(struct ieee80211_vif *vif, + struct cfg80211_wowlan_wakeup *wakeup, + gfp_t gfp) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + cfg80211_report_wowlan_wakeup(&sdata->wdev, wakeup, gfp); +} +EXPORT_SYMBOL(ieee80211_report_wowlan_wakeup); diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 301386dabf8..d35a5dd3fb1 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -68,6 +68,8 @@ static inline void rate_control_rate_init(struct sta_info *sta) sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band]; rcu_read_unlock(); + ieee80211_sta_set_rx_nss(sta); + ref->ops->rate_init(ref->priv, sband, ista, priv_sta); set_sta_flag(sta, WLAN_STA_RATE_CONTROL); } diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 8c5acdc0622..eea45a2c7c3 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -494,6 +494,33 @@ minstrel_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta) kfree(mi); } +static void +minstrel_init_cck_rates(struct minstrel_priv *mp) +{ + static const int bitrates[4] = { 10, 20, 55, 110 }; + struct ieee80211_supported_band *sband; + int i, j; + + sband = mp->hw->wiphy->bands[IEEE80211_BAND_2GHZ]; + if (!sband) + return; + + for (i = 0, j = 0; i < sband->n_bitrates; i++) { + struct ieee80211_rate *rate = &sband->bitrates[i]; + + if (rate->flags & IEEE80211_RATE_ERP_G) + continue; + + for (j = 0; j < ARRAY_SIZE(bitrates); j++) { + if (rate->bitrate != bitrates[j]) + continue; + + mp->cck_rates[j] = i; + break; + } + } +} + static void * minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) { @@ -539,6 +566,8 @@ minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) S_IRUGO | S_IWUGO, debugfsdir, &mp->fixed_rate_idx); #endif + minstrel_init_cck_rates(mp); + return mp; } diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index 5d278eccaef..5ecf757817f 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -79,6 +79,8 @@ struct minstrel_priv { unsigned int lookaround_rate; unsigned int lookaround_rate_mrr; + u8 cck_rates[4]; + #ifdef CONFIG_MAC80211_DEBUGFS /* * enable fixed rate processing per RC diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 9f9c453bc45..3af141c6971 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org> + * Copyright (C) 2010-2013 Felix Fietkau <nbd@openwrt.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -63,6 +63,30 @@ } \ } +#define CCK_DURATION(_bitrate, _short, _len) \ + (10 /* SIFS */ + \ + (_short ? 72 + 24 : 144 + 48 ) + \ + (8 * (_len + 4) * 10) / (_bitrate)) + +#define CCK_ACK_DURATION(_bitrate, _short) \ + (CCK_DURATION((_bitrate > 10 ? 20 : 10), false, 60) + \ + CCK_DURATION(_bitrate, _short, AVG_PKT_SIZE)) + +#define CCK_DURATION_LIST(_short) \ + CCK_ACK_DURATION(10, _short), \ + CCK_ACK_DURATION(20, _short), \ + CCK_ACK_DURATION(55, _short), \ + CCK_ACK_DURATION(110, _short) + +#define CCK_GROUP \ + [MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS] = { \ + .streams = 0, \ + .duration = { \ + CCK_DURATION_LIST(false), \ + CCK_DURATION_LIST(true) \ + } \ + } + /* * To enable sufficiently targeted rate sampling, MCS rates are divided into * groups, based on the number of streams and flags (HT40, SGI) that they @@ -95,8 +119,13 @@ const struct mcs_group minstrel_mcs_groups[] = { #if MINSTREL_MAX_STREAMS >= 3 MCS_GROUP(3, 1, 1), #endif + + /* must be last */ + CCK_GROUP }; +#define MINSTREL_CCK_GROUP (ARRAY_SIZE(minstrel_mcs_groups) - 1) + static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES]; /* @@ -119,6 +148,29 @@ minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate) !!(rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)); } +static struct minstrel_rate_stats * +minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + struct ieee80211_tx_rate *rate) +{ + int group, idx; + + if (rate->flags & IEEE80211_TX_RC_MCS) { + group = minstrel_ht_get_group_idx(rate); + idx = rate->idx % MCS_GROUP_RATES; + } else { + group = MINSTREL_CCK_GROUP; + + for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) + if (rate->idx == mp->cck_rates[idx]) + break; + + /* short preamble */ + if (!(mi->groups[group].supported & BIT(idx))) + idx += 4; + } + return &mi->groups[group].rates[idx]; +} + static inline struct minstrel_rate_stats * minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index) { @@ -159,7 +211,7 @@ static void minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) { struct minstrel_rate_stats *mr; - unsigned int usecs; + unsigned int usecs = 0; mr = &mi->groups[group].rates[rate]; @@ -168,7 +220,9 @@ minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) return; } - usecs = mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); + if (group != MINSTREL_CCK_GROUP) + usecs = mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); + usecs += minstrel_mcs_groups[group].duration[rate]; mr->cur_tp = MINSTREL_TRUNC((1000000 / usecs) * mr->probability); } @@ -231,10 +285,6 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) if (!mr->cur_tp) continue; - /* ignore the lowest rate of each single-stream group */ - if (!i && minstrel_mcs_groups[group].streams == 1) - continue; - if ((mr->cur_tp > cur_prob_tp && mr->probability > MINSTREL_FRAC(3, 4)) || mr->probability > cur_prob) { mg->max_prob_rate = index; @@ -297,7 +347,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) } static bool -minstrel_ht_txstat_valid(struct ieee80211_tx_rate *rate) +minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct ieee80211_tx_rate *rate) { if (rate->idx < 0) return false; @@ -305,7 +355,13 @@ minstrel_ht_txstat_valid(struct ieee80211_tx_rate *rate) if (!rate->count) return false; - return !!(rate->flags & IEEE80211_TX_RC_MCS); + if (rate->flags & IEEE80211_TX_RC_MCS) + return true; + + return rate->idx == mp->cck_rates[0] || + rate->idx == mp->cck_rates[1] || + rate->idx == mp->cck_rates[2] || + rate->idx == mp->cck_rates[3]; } static void @@ -390,7 +446,6 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, struct minstrel_rate_stats *rate, *rate2; struct minstrel_priv *mp = priv; bool last; - int group; int i; if (!msp->is_ht) @@ -419,13 +474,12 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) mi->sample_packets += info->status.ampdu_len; - last = !minstrel_ht_txstat_valid(&ar[0]); + last = !minstrel_ht_txstat_valid(mp, &ar[0]); for (i = 0; !last; i++) { last = (i == IEEE80211_TX_MAX_RATES - 1) || - !minstrel_ht_txstat_valid(&ar[i + 1]); + !minstrel_ht_txstat_valid(mp, &ar[i + 1]); - group = minstrel_ht_get_group_idx(&ar[i]); - rate = &mi->groups[group].rates[ar[i].idx % 8]; + rate = minstrel_ht_get_stats(mp, mi, &ar[i]); if (last) rate->success += info->status.ampdu_ack_len; @@ -451,7 +505,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, if (time_after(jiffies, mi->stats_update + (mp->update_interval / 2 * HZ) / 1000)) { minstrel_ht_update_stats(mp, mi); - if (!(info->flags & IEEE80211_TX_CTL_AMPDU)) + if (!(info->flags & IEEE80211_TX_CTL_AMPDU) && + mi->max_prob_rate / MCS_GROUP_RATES != MINSTREL_CCK_GROUP) minstrel_aggr_check(sta, skb); } } @@ -467,6 +522,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, unsigned int ctime = 0; unsigned int t_slot = 9; /* FIXME */ unsigned int ampdu_len = MINSTREL_TRUNC(mi->avg_ampdu_len); + unsigned int overhead = 0, overhead_rtscts = 0; mr = minstrel_get_ratestats(mi, index); if (mr->probability < MINSTREL_FRAC(1, 10)) { @@ -488,9 +544,14 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, ctime += (t_slot * cw) >> 1; cw = min((cw << 1) | 1, mp->cw_max); + if (index / MCS_GROUP_RATES != MINSTREL_CCK_GROUP) { + overhead = mi->overhead; + overhead_rtscts = mi->overhead_rtscts; + } + /* Total TX time for data and Contention after first 2 tries */ - tx_time = ctime + 2 * (mi->overhead + tx_time_data); - tx_time_rtscts = ctime + 2 * (mi->overhead_rtscts + tx_time_data); + tx_time = ctime + 2 * (overhead + tx_time_data); + tx_time_rtscts = ctime + 2 * (overhead_rtscts + tx_time_data); /* See how many more tries we can fit inside segment size */ do { @@ -499,8 +560,8 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, cw = min((cw << 1) | 1, mp->cw_max); /* Total TX time after this try */ - tx_time += ctime + mi->overhead + tx_time_data; - tx_time_rtscts += ctime + mi->overhead_rtscts + tx_time_data; + tx_time += ctime + overhead + tx_time_data; + tx_time_rtscts += ctime + overhead_rtscts + tx_time_data; if (tx_time_rtscts < mp->segment_size) mr->retry_count_rtscts++; @@ -530,9 +591,16 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, else rate->count = mr->retry_count; - rate->flags = IEEE80211_TX_RC_MCS | group->flags; + rate->flags = 0; if (rtscts) rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS; + + if (index / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) { + rate->idx = mp->cck_rates[index % ARRAY_SIZE(mp->cck_rates)]; + return; + } + + rate->flags |= IEEE80211_TX_RC_MCS | group->flags; rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES; } @@ -596,6 +664,22 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) } static void +minstrel_ht_check_cck_shortpreamble(struct minstrel_priv *mp, + struct minstrel_ht_sta *mi, bool val) +{ + u8 supported = mi->groups[MINSTREL_CCK_GROUP].supported; + + if (!supported || !mi->cck_supported_short) + return; + + if (supported & (mi->cck_supported_short << (val * 4))) + return; + + supported ^= mi->cck_supported_short | (mi->cck_supported_short << 4); + mi->groups[MINSTREL_CCK_GROUP].supported = supported; +} + +static void minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, struct ieee80211_tx_rate_control *txrc) { @@ -614,6 +698,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, return mac80211_minstrel.get_rate(priv, sta, &msp->legacy, txrc); info->flags |= mi->tx_flags; + minstrel_ht_check_cck_shortpreamble(mp, mi, txrc->short_preamble); /* Don't use EAPOL frames for sampling on non-mrr hw */ if (mp->hw->max_rates == 1 && @@ -687,6 +772,30 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, } static void +minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta) +{ + int i; + + if (sband->band != IEEE80211_BAND_2GHZ) + return; + + mi->cck_supported = 0; + mi->cck_supported_short = 0; + for (i = 0; i < 4; i++) { + if (!rate_supported(sta, sband->band, mp->cck_rates[i])) + continue; + + mi->cck_supported |= BIT(i); + if (sband->bitrates[i].flags & IEEE80211_RATE_SHORT_PREAMBLE) + mi->cck_supported_short |= BIT(i); + } + + mi->groups[MINSTREL_CCK_GROUP].supported = mi->cck_supported; +} + +static void minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, void *priv_sta) { @@ -699,14 +808,13 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, int ack_dur; int stbc; int i; - unsigned int smps; /* fall back to the old minstrel for legacy stations */ if (!sta->ht_cap.ht_supported) goto use_legacy; BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) != - MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS); + MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS + 1); msp->is_ht = true; memset(mi, 0, sizeof(*mi)); @@ -735,28 +843,29 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, if (sta_cap & IEEE80211_HT_CAP_LDPC_CODING) mi->tx_flags |= IEEE80211_TX_CTL_LDPC; - smps = (sta_cap & IEEE80211_HT_CAP_SM_PS) >> - IEEE80211_HT_CAP_SM_PS_SHIFT; - for (i = 0; i < ARRAY_SIZE(mi->groups); i++) { - u16 req = 0; - mi->groups[i].supported = 0; - if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI) { - if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) - req |= IEEE80211_HT_CAP_SGI_40; - else - req |= IEEE80211_HT_CAP_SGI_20; + if (i == MINSTREL_CCK_GROUP) { + minstrel_ht_update_cck(mp, mi, sband, sta); + continue; } - if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) - req |= IEEE80211_HT_CAP_SUP_WIDTH_20_40; + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI) { + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) { + if (!(sta_cap & IEEE80211_HT_CAP_SGI_40)) + continue; + } else { + if (!(sta_cap & IEEE80211_HT_CAP_SGI_20)) + continue; + } + } - if ((sta_cap & req) != req) + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH && + sta->bandwidth < IEEE80211_STA_RX_BW_40) continue; /* Mark MCS > 7 as unsupported if STA is in static SMPS mode */ - if (smps == WLAN_HT_CAP_SM_PS_STATIC && + if (sta->smps_mode == IEEE80211_SMPS_STATIC && minstrel_mcs_groups[i].streams > 1) continue; diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h index 462d2b227ed..302dbd52180 100644 --- a/net/mac80211/rc80211_minstrel_ht.h +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -107,8 +107,11 @@ struct minstrel_ht_sta { /* current MCS group to be sampled */ u8 sample_group; + u8 cck_supported; + u8 cck_supported_short; + /* MCS rate group info and statistics */ - struct minstrel_mcs_group_data groups[MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS]; + struct minstrel_mcs_group_data groups[MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS + 1]; }; struct minstrel_ht_sta_priv { diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index e788f76a1df..df44a5ad827 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -15,13 +15,76 @@ #include "rc80211_minstrel.h" #include "rc80211_minstrel_ht.h" +static char * +minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) +{ + unsigned int max_mcs = MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS; + const struct mcs_group *mg; + unsigned int j, tp, prob, eprob; + char htmode = '2'; + char gimode = 'L'; + + if (!mi->groups[i].supported) + return p; + + mg = &minstrel_mcs_groups[i]; + if (mg->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + htmode = '4'; + if (mg->flags & IEEE80211_TX_RC_SHORT_GI) + gimode = 'S'; + + for (j = 0; j < MCS_GROUP_RATES; j++) { + struct minstrel_rate_stats *mr = &mi->groups[i].rates[j]; + static const int bitrates[4] = { 10, 20, 55, 110 }; + int idx = i * MCS_GROUP_RATES + j; + + if (!(mi->groups[i].supported & BIT(j))) + continue; + + if (i == max_mcs) + p += sprintf(p, "CCK/%cP ", j < 4 ? 'L' : 'S'); + else + p += sprintf(p, "HT%c0/%cGI ", htmode, gimode); + + *(p++) = (idx == mi->max_tp_rate) ? 'T' : ' '; + *(p++) = (idx == mi->max_tp_rate2) ? 't' : ' '; + *(p++) = (idx == mi->max_prob_rate) ? 'P' : ' '; + + if (i == max_mcs) { + int r = bitrates[j % 4]; + p += sprintf(p, " %2u.%1uM", r / 10, r % 10); + } else { + p += sprintf(p, " MCS%-2u", (mg->streams - 1) * + MCS_GROUP_RATES + j); + } + + tp = mr->cur_tp / 10; + prob = MINSTREL_TRUNC(mr->cur_prob * 1000); + eprob = MINSTREL_TRUNC(mr->probability * 1000); + + p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " + "%3u %3u(%3u) %8llu %8llu\n", + tp / 10, tp % 10, + eprob / 10, eprob % 10, + prob / 10, prob % 10, + mr->retry_count, + mr->last_success, + mr->last_attempts, + (unsigned long long)mr->succ_hist, + (unsigned long long)mr->att_hist); + } + + return p; +} + static int minstrel_ht_stats_open(struct inode *inode, struct file *file) { struct minstrel_ht_sta_priv *msp = inode->i_private; struct minstrel_ht_sta *mi = &msp->ht; struct minstrel_debugfs_info *ms; - unsigned int i, j, tp, prob, eprob; + unsigned int i; + unsigned int max_mcs = MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS; char *p; int ret; @@ -38,50 +101,13 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file) file->private_data = ms; p = ms->buf; - p += sprintf(p, "type rate throughput ewma prob this prob " - "this succ/attempt success attempts\n"); - for (i = 0; i < MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS; i++) { - char htmode = '2'; - char gimode = 'L'; - - if (!mi->groups[i].supported) - continue; - - if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) - htmode = '4'; - if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI) - gimode = 'S'; + p += sprintf(p, "type rate throughput ewma prob this prob " + "retry this succ/attempt success attempts\n"); - for (j = 0; j < MCS_GROUP_RATES; j++) { - struct minstrel_rate_stats *mr = &mi->groups[i].rates[j]; - int idx = i * MCS_GROUP_RATES + j; + p = minstrel_ht_stats_dump(mi, max_mcs, p); + for (i = 0; i < max_mcs; i++) + p = minstrel_ht_stats_dump(mi, i, p); - if (!(mi->groups[i].supported & BIT(j))) - continue; - - p += sprintf(p, "HT%c0/%cGI ", htmode, gimode); - - *(p++) = (idx == mi->max_tp_rate) ? 'T' : ' '; - *(p++) = (idx == mi->max_tp_rate2) ? 't' : ' '; - *(p++) = (idx == mi->max_prob_rate) ? 'P' : ' '; - p += sprintf(p, "MCS%-2u", (minstrel_mcs_groups[i].streams - 1) * - MCS_GROUP_RATES + j); - - tp = mr->cur_tp / 10; - prob = MINSTREL_TRUNC(mr->cur_prob * 1000); - eprob = MINSTREL_TRUNC(mr->probability * 1000); - - p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " - "%3u(%3u) %8llu %8llu\n", - tp / 10, tp % 10, - eprob / 10, eprob % 10, - prob / 10, prob % 10, - mr->last_success, - mr->last_attempts, - (unsigned long long)mr->succ_hist, - (unsigned long long)mr->att_hist); - } - } p += sprintf(p, "\nTotal packet count:: ideal %d " "lookaround %d\n", max(0, (int) mi->total_packets - (int) mi->sample_packets), diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 580704eba8b..bb73ed2d20b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -668,9 +668,9 @@ static inline u16 seq_sub(u16 sq1, u16 sq2) static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_rx *tid_agg_rx, - int index) + int index, + struct sk_buff_head *frames) { - struct ieee80211_local *local = sdata->local; struct sk_buff *skb = tid_agg_rx->reorder_buf[index]; struct ieee80211_rx_status *status; @@ -684,7 +684,7 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata, tid_agg_rx->reorder_buf[index] = NULL; status = IEEE80211_SKB_RXCB(skb); status->rx_flags |= IEEE80211_RX_DEFERRED_RELEASE; - skb_queue_tail(&local->rx_skb_queue, skb); + __skb_queue_tail(frames, skb); no_frame: tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); @@ -692,7 +692,8 @@ no_frame: static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_rx *tid_agg_rx, - u16 head_seq_num) + u16 head_seq_num, + struct sk_buff_head *frames) { int index; @@ -701,7 +702,8 @@ static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) { index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size; - ieee80211_release_reorder_frame(sdata, tid_agg_rx, index); + ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, + frames); } } @@ -717,7 +719,8 @@ static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata #define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10) static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, - struct tid_ampdu_rx *tid_agg_rx) + struct tid_ampdu_rx *tid_agg_rx, + struct sk_buff_head *frames) { int index, j; @@ -746,7 +749,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, ht_dbg_ratelimited(sdata, "release an RX reorder frame due to timeout on earlier frames\n"); - ieee80211_release_reorder_frame(sdata, tid_agg_rx, j); + ieee80211_release_reorder_frame(sdata, tid_agg_rx, j, + frames); /* * Increment the head seq# also for the skipped slots. @@ -756,7 +760,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, skipped = 0; } } else while (tid_agg_rx->reorder_buf[index]) { - ieee80211_release_reorder_frame(sdata, tid_agg_rx, index); + ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, + frames); index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size; } @@ -788,7 +793,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, */ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_rx *tid_agg_rx, - struct sk_buff *skb) + struct sk_buff *skb, + struct sk_buff_head *frames) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; u16 sc = le16_to_cpu(hdr->seq_ctrl); @@ -816,7 +822,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata head_seq_num = seq_inc(seq_sub(mpdu_seq_num, buf_size)); /* release stored frames up to new head to stack */ ieee80211_release_reorder_frames(sdata, tid_agg_rx, - head_seq_num); + head_seq_num, frames); } /* Now the new frame is always in the range of the reordering buffer */ @@ -846,7 +852,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata tid_agg_rx->reorder_buf[index] = skb; tid_agg_rx->reorder_time[index] = jiffies; tid_agg_rx->stored_mpdu_num++; - ieee80211_sta_reorder_release(sdata, tid_agg_rx); + ieee80211_sta_reorder_release(sdata, tid_agg_rx, frames); out: spin_unlock(&tid_agg_rx->reorder_lock); @@ -857,7 +863,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata * Reorder MPDUs from A-MPDUs, keeping them on a buffer. Returns * true if the MPDU was buffered, false if it should be processed. */ -static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx) +static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, + struct sk_buff_head *frames) { struct sk_buff *skb = rx->skb; struct ieee80211_local *local = rx->local; @@ -922,11 +929,12 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx) * sure that we cannot get to it any more before doing * anything with it. */ - if (ieee80211_sta_manage_reorder_buf(rx->sdata, tid_agg_rx, skb)) + if (ieee80211_sta_manage_reorder_buf(rx->sdata, tid_agg_rx, skb, + frames)) return; dont_reorder: - skb_queue_tail(&local->rx_skb_queue, skb); + __skb_queue_tail(frames, skb); } static ieee80211_rx_result debug_noinline @@ -1452,6 +1460,10 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) } } + /* mesh power save support */ + if (ieee80211_vif_is_mesh(&rx->sdata->vif)) + ieee80211_mps_rx_h_sta_process(sta, hdr); + /* * Drop (qos-)data::nullfunc frames silently, since they * are used only to control station power saving mode. @@ -2015,7 +2027,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) /* frame is in RMC, don't forward */ if (ieee80211_is_data(hdr->frame_control) && is_multicast_ether_addr(hdr->addr1) && - mesh_rmc_check(hdr->addr3, mesh_hdr, rx->sdata)) + mesh_rmc_check(rx->sdata, hdr->addr3, mesh_hdr)) return RX_DROP_MONITOR; if (!ieee80211_is_data(hdr->frame_control) || @@ -2042,9 +2054,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) } rcu_read_lock(); - mppath = mpp_path_lookup(proxied_addr, sdata); + mppath = mpp_path_lookup(sdata, proxied_addr); if (!mppath) { - mpp_path_add(proxied_addr, mpp_addr, sdata); + mpp_path_add(sdata, proxied_addr, mpp_addr); } else { spin_lock_bh(&mppath->state_lock); if (!ether_addr_equal(mppath->mpp, mpp_addr)) @@ -2090,12 +2102,15 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) if (is_multicast_ether_addr(fwd_hdr->addr1)) { IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_mcast); memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN); - } else if (!mesh_nexthop_lookup(fwd_skb, sdata)) { + /* update power mode indication when forwarding */ + ieee80211_mps_set_frame_flags(sdata, NULL, fwd_hdr); + } else if (!mesh_nexthop_lookup(sdata, fwd_skb)) { + /* mesh power mode flags updated in mesh_nexthop_lookup */ IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_unicast); } else { /* unable to resolve next hop */ - mesh_path_error_tx(ifmsh->mshcfg.element_ttl, fwd_hdr->addr3, - 0, reason, fwd_hdr->addr2, sdata); + mesh_path_error_tx(sdata, ifmsh->mshcfg.element_ttl, + fwd_hdr->addr3, 0, reason, fwd_hdr->addr2); IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_no_route); kfree_skb(fwd_skb); return RX_DROP_MONITOR; @@ -2177,7 +2192,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) } static ieee80211_rx_result debug_noinline -ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) +ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) { struct sk_buff *skb = rx->skb; struct ieee80211_bar *bar = (struct ieee80211_bar *)skb->data; @@ -2216,7 +2231,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) spin_lock(&tid_agg_rx->reorder_lock); /* release stored frames up to start of BAR */ ieee80211_release_reorder_frames(rx->sdata, tid_agg_rx, - start_seq_num); + start_seq_num, frames); spin_unlock(&tid_agg_rx->reorder_lock); kfree_skb(skb); @@ -2353,38 +2368,34 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) sdata->vif.type != NL80211_IFTYPE_ADHOC) break; - /* verify action & smps_control are present */ + /* verify action & smps_control/chanwidth are present */ if (len < IEEE80211_MIN_ACTION_SIZE + 2) goto invalid; switch (mgmt->u.action.u.ht_smps.action) { case WLAN_HT_ACTION_SMPS: { struct ieee80211_supported_band *sband; - u8 smps; + enum ieee80211_smps_mode smps_mode; /* convert to HT capability */ switch (mgmt->u.action.u.ht_smps.smps_control) { case WLAN_HT_SMPS_CONTROL_DISABLED: - smps = WLAN_HT_CAP_SM_PS_DISABLED; + smps_mode = IEEE80211_SMPS_OFF; break; case WLAN_HT_SMPS_CONTROL_STATIC: - smps = WLAN_HT_CAP_SM_PS_STATIC; + smps_mode = IEEE80211_SMPS_STATIC; break; case WLAN_HT_SMPS_CONTROL_DYNAMIC: - smps = WLAN_HT_CAP_SM_PS_DYNAMIC; + smps_mode = IEEE80211_SMPS_DYNAMIC; break; default: goto invalid; } - smps <<= IEEE80211_HT_CAP_SM_PS_SHIFT; /* if no change do nothing */ - if ((rx->sta->sta.ht_cap.cap & - IEEE80211_HT_CAP_SM_PS) == smps) + if (rx->sta->sta.smps_mode == smps_mode) goto handled; - - rx->sta->sta.ht_cap.cap &= ~IEEE80211_HT_CAP_SM_PS; - rx->sta->sta.ht_cap.cap |= smps; + rx->sta->sta.smps_mode = smps_mode; sband = rx->local->hw.wiphy->bands[status->band]; @@ -2392,11 +2403,66 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) IEEE80211_RC_SMPS_CHANGED); goto handled; } + case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: { + struct ieee80211_supported_band *sband; + u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth; + enum ieee80211_sta_rx_bandwidth new_bw; + + /* If it doesn't support 40 MHz it can't change ... */ + if (!(rx->sta->sta.ht_cap.cap & + IEEE80211_HT_CAP_SUP_WIDTH_20_40)) + goto handled; + + if (chanwidth == IEEE80211_HT_CHANWIDTH_20MHZ) + new_bw = IEEE80211_STA_RX_BW_20; + else + new_bw = ieee80211_sta_cur_vht_bw(rx->sta); + + if (rx->sta->sta.bandwidth == new_bw) + goto handled; + + sband = rx->local->hw.wiphy->bands[status->band]; + + rate_control_rate_update(local, sband, rx->sta, + IEEE80211_RC_BW_CHANGED); + goto handled; + } default: goto invalid; } break; + case WLAN_CATEGORY_VHT: + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT && + sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_ADHOC) + break; + + /* verify action code is present */ + if (len < IEEE80211_MIN_ACTION_SIZE + 1) + goto invalid; + + switch (mgmt->u.action.u.vht_opmode_notif.action_code) { + case WLAN_VHT_ACTION_OPMODE_NOTIF: { + u8 opmode; + + /* verify opmode is present */ + if (len < IEEE80211_MIN_ACTION_SIZE + 2) + goto invalid; + + opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode; + + ieee80211_vht_handle_opmode(rx->sdata, rx->sta, + opmode, status->band, + false); + goto handled; + } + default: + break; + } + break; case WLAN_CATEGORY_BACK: if (sdata->vif.type != NL80211_IFTYPE_STATION && sdata->vif.type != NL80211_IFTYPE_MESH_POINT && @@ -2648,8 +2714,9 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) return RX_DROP_MONITOR; break; case cpu_to_le16(IEEE80211_STYPE_PROBE_REQ): - /* process only for ibss */ - if (sdata->vif.type != NL80211_IFTYPE_ADHOC) + /* process only for ibss and mesh */ + if (sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return RX_DROP_MONITOR; break; default: @@ -2772,7 +2839,8 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx, } } -static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx) +static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, + struct sk_buff_head *frames) { ieee80211_rx_result res = RX_DROP_MONITOR; struct sk_buff *skb; @@ -2784,15 +2852,9 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx) goto rxh_next; \ } while (0); - spin_lock(&rx->local->rx_skb_queue.lock); - if (rx->local->running_rx_handler) - goto unlock; - - rx->local->running_rx_handler = true; - - while ((skb = __skb_dequeue(&rx->local->rx_skb_queue))) { - spin_unlock(&rx->local->rx_skb_queue.lock); + spin_lock_bh(&rx->local->rx_path_lock); + while ((skb = __skb_dequeue(frames))) { /* * all the other fields are valid across frames * that belong to an aMPDU since they are on the @@ -2813,7 +2875,12 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx) #endif CALL_RXH(ieee80211_rx_h_amsdu) CALL_RXH(ieee80211_rx_h_data) - CALL_RXH(ieee80211_rx_h_ctrl); + + /* special treatment -- needs the queue */ + res = ieee80211_rx_h_ctrl(rx, frames); + if (res != RX_CONTINUE) + goto rxh_next; + CALL_RXH(ieee80211_rx_h_mgmt_check) CALL_RXH(ieee80211_rx_h_action) CALL_RXH(ieee80211_rx_h_userspace_mgmt) @@ -2822,20 +2889,20 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx) rxh_next: ieee80211_rx_handlers_result(rx, res); - spin_lock(&rx->local->rx_skb_queue.lock); + #undef CALL_RXH } - rx->local->running_rx_handler = false; - - unlock: - spin_unlock(&rx->local->rx_skb_queue.lock); + spin_unlock_bh(&rx->local->rx_path_lock); } static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx) { + struct sk_buff_head reorder_release; ieee80211_rx_result res = RX_DROP_MONITOR; + __skb_queue_head_init(&reorder_release); + #define CALL_RXH(rxh) \ do { \ res = rxh(rx); \ @@ -2845,9 +2912,9 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx) CALL_RXH(ieee80211_rx_h_check) - ieee80211_rx_reorder_ampdu(rx); + ieee80211_rx_reorder_ampdu(rx, &reorder_release); - ieee80211_rx_handlers(rx); + ieee80211_rx_handlers(rx, &reorder_release); return; rxh_next: @@ -2862,6 +2929,7 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx) */ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) { + struct sk_buff_head frames; struct ieee80211_rx_data rx = { .sta = sta, .sdata = sta->sdata, @@ -2877,11 +2945,13 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) if (!tid_agg_rx) return; + __skb_queue_head_init(&frames); + spin_lock(&tid_agg_rx->reorder_lock); - ieee80211_sta_reorder_release(sta->sdata, tid_agg_rx); + ieee80211_sta_reorder_release(sta->sdata, tid_agg_rx, &frames); spin_unlock(&tid_agg_rx->reorder_lock); - ieee80211_rx_handlers(&rx); + ieee80211_rx_handlers(&rx, &frames); } /* main receive path */ diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index bf82e69d060..43a45cf00e0 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -27,22 +27,15 @@ #define IEEE80211_PROBE_DELAY (HZ / 33) #define IEEE80211_CHANNEL_TIME (HZ / 33) -#define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 8) - -static void ieee80211_rx_bss_free(struct cfg80211_bss *cbss) -{ - struct ieee80211_bss *bss = (void *)cbss->priv; - - kfree(bss_mesh_id(bss)); - kfree(bss_mesh_cfg(bss)); -} +#define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 9) void ieee80211_rx_bss_put(struct ieee80211_local *local, struct ieee80211_bss *bss) { if (!bss) return; - cfg80211_put_bss(container_of((void *)bss, struct cfg80211_bss, priv)); + cfg80211_put_bss(local->hw.wiphy, + container_of((void *)bss, struct cfg80211_bss, priv)); } static bool is_uapsd_supported(struct ieee802_11_elems *elems) @@ -65,12 +58,11 @@ static bool is_uapsd_supported(struct ieee802_11_elems *elems) struct ieee80211_bss * ieee80211_bss_info_update(struct ieee80211_local *local, struct ieee80211_rx_status *rx_status, - struct ieee80211_mgmt *mgmt, - size_t len, + struct ieee80211_mgmt *mgmt, size_t len, struct ieee802_11_elems *elems, - struct ieee80211_channel *channel, - bool beacon) + struct ieee80211_channel *channel) { + bool beacon = ieee80211_is_beacon(mgmt->frame_control); struct cfg80211_bss *cbss; struct ieee80211_bss *bss; int clen, srlen; @@ -86,10 +78,12 @@ ieee80211_bss_info_update(struct ieee80211_local *local, if (!cbss) return NULL; - cbss->free_priv = ieee80211_rx_bss_free; bss = (void *)cbss->priv; - bss->device_ts = rx_status->device_timestamp; + if (beacon) + bss->device_ts_beacon = rx_status->device_timestamp; + else + bss->device_ts_presp = rx_status->device_timestamp; if (elems->parse_error) { if (beacon) @@ -147,9 +141,6 @@ ieee80211_bss_info_update(struct ieee80211_local *local, bss->valid_data |= IEEE80211_BSS_VALID_WMM; } - if (!beacon) - bss->last_probe_resp = jiffies; - return bss; } @@ -203,7 +194,7 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) bss = ieee80211_bss_info_update(local, rx_status, mgmt, skb->len, &elems, - channel, beacon); + channel); if (bss) ieee80211_rx_bss_put(local, bss); } @@ -343,6 +334,9 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) ieee80211_offchannel_stop_vifs(local); + /* ensure nullfunc is transmitted before leaving operating channel */ + drv_flush(local, false); + ieee80211_configure_filter(local); /* We need to set power level at maximum rate for scanning. */ @@ -357,6 +351,9 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) static bool ieee80211_can_scan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { + if (local->radar_detect_enabled) + return false; + if (!list_empty(&local->roc_list)) return false; @@ -391,6 +388,11 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, int i; struct ieee80211_sub_if_data *sdata; enum ieee80211_band band = local->hw.conf.channel->band; + u32 tx_flags; + + tx_flags = IEEE80211_TX_INTFL_OFFCHAN_TX_OK; + if (local->scan_req->no_cck) + tx_flags |= IEEE80211_TX_CTL_NO_CCK_RATE; sdata = rcu_dereference_protected(local->scan_sdata, lockdep_is_held(&local->mtx)); @@ -402,8 +404,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, local->scan_req->ssids[i].ssid_len, local->scan_req->ie, local->scan_req->ie_len, local->scan_req->rates[band], false, - local->scan_req->no_cck, - local->hw.conf.channel, true); + tx_flags, local->hw.conf.channel, true); /* * After sending probe requests, wait for probe responses @@ -547,8 +548,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, bool associated = false; bool tx_empty = true; bool bad_latency; - bool listen_int_exceeded; - unsigned long min_beacon_int = 0; struct ieee80211_sub_if_data *sdata; struct ieee80211_channel *next_chan; enum mac80211_scan_state next_scan_state; @@ -567,11 +566,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, if (sdata->u.mgd.associated) { associated = true; - if (sdata->vif.bss_conf.beacon_int < - min_beacon_int || min_beacon_int == 0) - min_beacon_int = - sdata->vif.bss_conf.beacon_int; - if (!qdisc_all_tx_empty(sdata->dev)) { tx_empty = false; break; @@ -588,34 +582,19 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, * see if we can scan another channel without interfering * with the current traffic situation. * - * Since we don't know if the AP has pending frames for us - * we can only check for our tx queues and use the current - * pm_qos requirements for rx. Hence, if no tx traffic occurs - * at all we will scan as many channels in a row as the pm_qos - * latency allows us to. Additionally we also check for the - * currently negotiated listen interval to prevent losing - * frames unnecessarily. - * - * Otherwise switch back to the operating channel. + * Keep good latency, do not stay off-channel more than 125 ms. */ bad_latency = time_after(jiffies + - ieee80211_scan_get_channel_time(next_chan), - local->leave_oper_channel_time + - usecs_to_jiffies(pm_qos_request(PM_QOS_NETWORK_LATENCY))); - - listen_int_exceeded = time_after(jiffies + - ieee80211_scan_get_channel_time(next_chan), - local->leave_oper_channel_time + - usecs_to_jiffies(min_beacon_int * 1024) * - local->hw.conf.listen_interval); + ieee80211_scan_get_channel_time(next_chan), + local->leave_oper_channel_time + HZ / 8); if (associated && !tx_empty) { if (local->scan_req->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) next_scan_state = SCAN_ABORT; else next_scan_state = SCAN_SUSPEND; - } else if (associated && (bad_latency || listen_int_exceeded)) { + } else if (associated && bad_latency) { next_scan_state = SCAN_SUSPEND; } else { next_scan_state = SCAN_SET_CHANNEL; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index ca9fde19818..a79ce820cb5 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -104,12 +104,24 @@ static void cleanup_single_sta(struct sta_info *sta) * neither mac80211 nor the driver can reference this * sta struct any more except by still existing timers * associated with this station that we clean up below. + * + * Note though that this still uses the sdata and even + * calls the driver in AP and mesh mode, so interfaces + * of those types mush use call sta_info_flush_cleanup() + * (typically via sta_info_flush()) before deconfiguring + * the driver. + * + * In station mode, nothing happens here so it doesn't + * have to (and doesn't) do that, this is intentional to + * speed up roaming. */ if (test_sta_flag(sta, WLAN_STA_PS_STA)) { if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) ps = &sdata->bss->ps; + else if (ieee80211_vif_is_mesh(&sdata->vif)) + ps = &sdata->u.mesh.ps; else return; @@ -125,13 +137,8 @@ static void cleanup_single_sta(struct sta_info *sta) ieee80211_purge_tx_queue(&local->hw, &sta->tx_filtered[ac]); } -#ifdef CONFIG_MAC80211_MESH - if (ieee80211_vif_is_mesh(&sdata->vif)) { - mesh_accept_plinks_update(sdata); - mesh_plink_deactivate(sta); - del_timer_sync(&sta->plink_timer); - } -#endif + if (ieee80211_vif_is_mesh(&sdata->vif)) + mesh_sta_cleanup(sta); cancel_work_sync(&sta->drv_unblock_wk); @@ -368,12 +375,9 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, for (i = 0; i < IEEE80211_NUM_TIDS; i++) sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX); - sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); + sta->sta.smps_mode = IEEE80211_SMPS_OFF; -#ifdef CONFIG_MAC80211_MESH - sta->plink_state = NL80211_PLINK_LISTEN; - init_timer(&sta->plink_timer); -#endif + sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); return sta; } @@ -569,7 +573,6 @@ void sta_info_recalc_tim(struct sta_info *sta) { struct ieee80211_local *local = sta->local; struct ps_data *ps; - unsigned long flags; bool indicate_tim = false; u8 ignore_for_tim = sta->sta.uapsd_queues; int ac; @@ -582,6 +585,12 @@ void sta_info_recalc_tim(struct sta_info *sta) ps = &sta->sdata->bss->ps; id = sta->sta.aid; +#ifdef CONFIG_MAC80211_MESH + } else if (ieee80211_vif_is_mesh(&sta->sdata->vif)) { + ps = &sta->sdata->u.mesh.ps; + /* TIM map only for PLID <= IEEE80211_MAX_AID */ + id = le16_to_cpu(sta->plid) % IEEE80211_MAX_AID; +#endif } else { return; } @@ -620,7 +629,7 @@ void sta_info_recalc_tim(struct sta_info *sta) } done: - spin_lock_irqsave(&local->tim_lock, flags); + spin_lock_bh(&local->tim_lock); if (indicate_tim) __bss_tim_set(ps->tim, id); @@ -633,7 +642,7 @@ void sta_info_recalc_tim(struct sta_info *sta) local->tim_in_locked_section = false; } - spin_unlock_irqrestore(&local->tim_lock, flags); + spin_unlock_bh(&local->tim_lock); } static bool sta_info_buffer_expired(struct sta_info *sta, struct sk_buff *skb) @@ -740,8 +749,9 @@ static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local, bool have_buffered = false; int ac; - /* This is only necessary for stations on BSS interfaces */ - if (!sta->sdata->bss) + /* This is only necessary for stations on BSS/MBSS interfaces */ + if (!sta->sdata->bss && + !ieee80211_vif_is_mesh(&sta->sdata->vif)) return false; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) @@ -774,7 +784,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta) * will be sufficient. */ set_sta_flag(sta, WLAN_STA_BLOCK_BA); - ieee80211_sta_tear_down_BA_sessions(sta, false); + ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA); ret = sta_info_hash_del(local, sta); if (ret) @@ -885,20 +895,12 @@ void sta_info_init(struct ieee80211_local *local) void sta_info_stop(struct ieee80211_local *local) { del_timer_sync(&local->sta_cleanup); - sta_info_flush(local, NULL); } -/** - * sta_info_flush - flush matching STA entries from the STA table - * - * Returns the number of removed STA entries. - * - * @local: local interface data - * @sdata: matching rule for the net device (sta->dev) or %NULL to match all STAs - */ -int sta_info_flush(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) + +int sta_info_flush_defer(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; int ret = 0; @@ -906,30 +908,22 @@ int sta_info_flush(struct ieee80211_local *local, mutex_lock(&local->sta_mtx); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { - if (!sdata || sdata == sta->sdata) { + if (sdata == sta->sdata) { WARN_ON(__sta_info_destroy(sta)); ret++; } } mutex_unlock(&local->sta_mtx); - rcu_barrier(); - - if (sdata) { - ieee80211_cleanup_sdata_stas(sdata); - cancel_work_sync(&sdata->cleanup_stations_wk); - } else { - mutex_lock(&local->iflist_mtx); - list_for_each_entry(sdata, &local->interfaces, list) { - ieee80211_cleanup_sdata_stas(sdata); - cancel_work_sync(&sdata->cleanup_stations_wk); - } - mutex_unlock(&local->iflist_mtx); - } - return ret; } +void sta_info_flush_cleanup(struct ieee80211_sub_if_data *sdata) +{ + ieee80211_cleanup_sdata_stas(sdata); + cancel_work_sync(&sdata->cleanup_stations_wk); +} + void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, unsigned long exp_time) { @@ -945,6 +939,11 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, if (time_after(jiffies, sta->last_rx + exp_time)) { sta_dbg(sta->sdata, "expiring inactive STA %pM\n", sta->sta.addr); + + if (ieee80211_vif_is_mesh(&sdata->vif) && + test_sta_flag(sta, WLAN_STA_PS_STA)) + atomic_dec(&sdata->u.mesh.ps.num_sta_ps); + WARN_ON(__sta_info_destroy(sta)); } } @@ -1003,6 +1002,8 @@ static void clear_sta_ps_flags(void *_sta) if (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) ps = &sdata->bss->ps; + else if (ieee80211_vif_is_mesh(&sdata->vif)) + ps = &sdata->u.mesh.ps; else return; @@ -1120,6 +1121,8 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, drv_allow_buffered_frames(local, sta, BIT(tid), 1, reason, false); + skb->dev = sdata->dev; + rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (WARN_ON(!chanctx_conf)) { diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 37c1889afd3..4947341a2a8 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -56,6 +56,8 @@ * @WLAN_STA_INSERTED: This station is inserted into the hash table. * @WLAN_STA_RATE_CONTROL: rate control was initialized for this station. * @WLAN_STA_TOFFSET_KNOWN: toffset calculated for this station is valid. + * @WLAN_STA_MPSP_OWNER: local STA is owner of a mesh Peer Service Period. + * @WLAN_STA_MPSP_RECIPIENT: local STA is recipient of a MPSP. */ enum ieee80211_sta_info_flags { WLAN_STA_AUTH, @@ -78,6 +80,8 @@ enum ieee80211_sta_info_flags { WLAN_STA_INSERTED, WLAN_STA_RATE_CONTROL, WLAN_STA_TOFFSET_KNOWN, + WLAN_STA_MPSP_OWNER, + WLAN_STA_MPSP_RECIPIENT, }; #define ADDBA_RESP_INTERVAL HZ @@ -92,6 +96,13 @@ enum ieee80211_sta_info_flags { #define HT_AGG_STATE_WANT_START 4 #define HT_AGG_STATE_WANT_STOP 5 +enum ieee80211_agg_stop_reason { + AGG_STOP_DECLINED, + AGG_STOP_LOCAL_REQUEST, + AGG_STOP_PEER_REQUEST, + AGG_STOP_DESTROY_STA, +}; + /** * struct tid_ampdu_tx - TID aggregation information (Tx). * @@ -274,7 +285,9 @@ struct sta_ampdu_mlme { * @t_offset: timing offset relative to this host * @t_offset_setpoint: reference timing offset of this sta to be used when * calculating clockdrift - * @ch_width: peer's channel width + * @local_pm: local link-specific power save mode + * @peer_pm: peer-specific power save mode towards local STA + * @nonpeer_pm: STA power save mode towards non-peer neighbors * @debugfs: debug filesystem info * @dead: set to true when sta is unlinked * @uploaded: set to true when sta is uploaded to the driver @@ -282,8 +295,9 @@ struct sta_ampdu_mlme { * @sta: station information we share with the driver * @sta_state: duplicates information about station state (for debug) * @beacon_loss_count: number of times beacon loss has triggered - * @supports_40mhz: tracks whether the station advertised 40 MHz support - * as we overwrite its HT parameters with the currently used value + * @rcu_head: RCU head used for freeing this station struct + * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, + * taken from HT/VHT capabilities or VHT operating mode notification */ struct sta_info { /* General information, mostly static */ @@ -371,7 +385,10 @@ struct sta_info { struct timer_list plink_timer; s64 t_offset; s64 t_offset_setpoint; - enum nl80211_chan_width ch_width; + /* mesh power save */ + enum nl80211_mesh_power_mode local_pm; + enum nl80211_mesh_power_mode peer_pm; + enum nl80211_mesh_power_mode nonpeer_pm; #endif #ifdef CONFIG_MAC80211_DEBUGFS @@ -381,11 +398,11 @@ struct sta_info { } debugfs; #endif + enum ieee80211_sta_rx_bandwidth cur_max_bandwidth; + unsigned int lost_packets; unsigned int beacon_loss_count; - bool supports_40mhz; - /* keep last! */ struct ieee80211_sta sta; }; @@ -548,8 +565,39 @@ void sta_info_recalc_tim(struct sta_info *sta); void sta_info_init(struct ieee80211_local *local); void sta_info_stop(struct ieee80211_local *local); -int sta_info_flush(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata); +int sta_info_flush_defer(struct ieee80211_sub_if_data *sdata); + +/** + * sta_info_flush_cleanup - flush the sta_info cleanup queue + * @sdata: the interface + * + * Flushes the sta_info cleanup queue for a given interface; + * this is necessary before the interface is removed or, for + * AP/mesh interfaces, before it is deconfigured. + * + * Note an rcu_barrier() must precede the function, after all + * stations have been flushed/removed to ensure the call_rcu() + * calls that add stations to the cleanup queue have completed. + */ +void sta_info_flush_cleanup(struct ieee80211_sub_if_data *sdata); + +/** + * sta_info_flush - flush matching STA entries from the STA table + * + * Returns the number of removed STA entries. + * + * @sdata: sdata to remove all stations from + */ +static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata) +{ + int ret = sta_info_flush_defer(sdata); + + rcu_barrier(); + sta_info_flush_cleanup(sdata); + + return ret; +} + void sta_set_rate_info_tx(struct sta_info *sta, const struct ieee80211_tx_rate *rate, struct rate_info *rinfo); diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 07d99578a2b..43439203f4e 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -335,7 +335,8 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, if (dropped) acked = false; - if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { + if (info->flags & (IEEE80211_TX_INTFL_NL80211_FRAME_TX | + IEEE80211_TX_INTFL_MLME_CONN_TX)) { struct ieee80211_sub_if_data *sdata = NULL; struct ieee80211_sub_if_data *iter_sdata; u64 cookie = (unsigned long)skb; @@ -357,10 +358,13 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, sdata = rcu_dereference(local->p2p_sdata); } - if (!sdata) + if (!sdata) { skb->dev = NULL; - else if (ieee80211_is_nullfunc(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control)) { + } else if (info->flags & IEEE80211_TX_INTFL_MLME_CONN_TX) { + ieee80211_mgd_conn_tx_status(sdata, hdr->frame_control, + acked); + } else if (ieee80211_is_nullfunc(hdr->frame_control) || + ieee80211_is_qos_nullfunc(hdr->frame_control)) { cfg80211_probe_status(sdata->dev, hdr->addr1, cookie, acked, GFP_ATOMIC); } else { @@ -468,6 +472,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) return; } + /* mesh Peer Service Period support */ + if (ieee80211_vif_is_mesh(&sta->sdata->vif) && + ieee80211_is_data_qos(fc)) + ieee80211_mpsp_trigger_process( + ieee80211_get_qos_ctl(hdr), + sta, true, acked); + if ((local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) && (rates_idx != -1)) sta->last_tx_rate = info->status.rates[rates_idx]; @@ -502,11 +513,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) IEEE80211_BAR_CTRL_TID_INFO_MASK) >> IEEE80211_BAR_CTRL_TID_INFO_SHIFT; - if (local->hw.flags & - IEEE80211_HW_TEARDOWN_AGGR_ON_BAR_FAIL) - ieee80211_stop_tx_ba_session(&sta->sta, tid); - else - ieee80211_set_bar_pending(sta, tid, ssn); + ieee80211_set_bar_pending(sta, tid, ssn); } } diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 57e14d59e12..3ed801d90f1 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -177,12 +177,11 @@ void ieee80211_get_tkip_p1k_iv(struct ieee80211_key_conf *keyconf, struct ieee80211_key *key = (struct ieee80211_key *) container_of(keyconf, struct ieee80211_key, conf); struct tkip_ctx *ctx = &key->u.tkip.tx; - unsigned long flags; - spin_lock_irqsave(&key->u.tkip.txlock, flags); + spin_lock_bh(&key->u.tkip.txlock); ieee80211_compute_tkip_p1k(key, iv32); memcpy(p1k, ctx->p1k, sizeof(ctx->p1k)); - spin_unlock_irqrestore(&key->u.tkip.txlock, flags); + spin_unlock_bh(&key->u.tkip.txlock); } EXPORT_SYMBOL(ieee80211_get_tkip_p1k_iv); @@ -208,12 +207,11 @@ void ieee80211_get_tkip_p2k(struct ieee80211_key_conf *keyconf, const u8 *data = (u8 *)hdr + ieee80211_hdrlen(hdr->frame_control); u32 iv32 = get_unaligned_le32(&data[4]); u16 iv16 = data[2] | (data[0] << 8); - unsigned long flags; - spin_lock_irqsave(&key->u.tkip.txlock, flags); + spin_lock_bh(&key->u.tkip.txlock); ieee80211_compute_tkip_p1k(key, iv32); tkip_mixing_phase2(tk, ctx, iv16, p2k); - spin_unlock_irqrestore(&key->u.tkip.txlock, flags); + spin_unlock_bh(&key->u.tkip.txlock); } EXPORT_SYMBOL(ieee80211_get_tkip_p2k); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index a8270b441a6..3d7cd2a0582 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -28,21 +28,27 @@ #define VIF_PR_FMT " vif:%s(%d%s)" #define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" -#define CHANCTX_ENTRY __field(u32, control_freq) \ +#define CHANDEF_ENTRY __field(u32, control_freq) \ __field(u32, chan_width) \ __field(u32, center_freq1) \ - __field(u32, center_freq2) \ + __field(u32, center_freq2) +#define CHANDEF_ASSIGN(c) \ + __entry->control_freq = (c)->chan->center_freq; \ + __entry->chan_width = (c)->width; \ + __entry->center_freq1 = (c)->center_freq1; \ + __entry->center_freq2 = (c)->center_freq2; +#define CHANDEF_PR_FMT " control:%d MHz width:%d center: %d/%d MHz" +#define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \ + __entry->center_freq1, __entry->center_freq2 + +#define CHANCTX_ENTRY CHANDEF_ENTRY \ __field(u8, rx_chains_static) \ __field(u8, rx_chains_dynamic) -#define CHANCTX_ASSIGN __entry->control_freq = ctx->conf.def.chan->center_freq;\ - __entry->chan_width = ctx->conf.def.width; \ - __entry->center_freq1 = ctx->conf.def.center_freq1; \ - __entry->center_freq2 = ctx->conf.def.center_freq2; \ +#define CHANCTX_ASSIGN CHANDEF_ASSIGN(&ctx->conf.def) \ __entry->rx_chains_static = ctx->conf.rx_chains_static; \ __entry->rx_chains_dynamic = ctx->conf.rx_chains_dynamic -#define CHANCTX_PR_FMT " control:%d MHz width:%d center: %d/%d MHz chains:%d/%d" -#define CHANCTX_PR_ARG __entry->control_freq, __entry->chan_width, \ - __entry->center_freq1, __entry->center_freq2, \ +#define CHANCTX_PR_FMT CHANDEF_PR_FMT " chains:%d/%d" +#define CHANCTX_PR_ARG CHANDEF_PR_ARG, \ __entry->rx_chains_static, __entry->rx_chains_dynamic @@ -334,6 +340,7 @@ TRACE_EVENT(drv_bss_info_changed, __field(u16, assoc_cap) __field(u64, sync_tsf) __field(u32, sync_device_ts) + __field(u8, sync_dtim_count) __field(u32, basic_rates) __array(int, mcast_rate, IEEE80211_NUM_BANDS) __field(u16, ht_operation_mode) @@ -341,8 +348,11 @@ TRACE_EVENT(drv_bss_info_changed, __field(s32, cqm_rssi_hyst); __field(u32, channel_width); __field(u32, channel_cfreq1); - __dynamic_array(u32, arp_addr_list, info->arp_addr_cnt); - __field(bool, arp_filter_enabled); + __dynamic_array(u32, arp_addr_list, + info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ? + IEEE80211_BSS_ARP_ADDR_LIST_LEN : + info->arp_addr_cnt); + __field(int, arp_addr_cnt); __field(bool, qos); __field(bool, idle); __field(bool, ps); @@ -370,6 +380,7 @@ TRACE_EVENT(drv_bss_info_changed, __entry->assoc_cap = info->assoc_capability; __entry->sync_tsf = info->sync_tsf; __entry->sync_device_ts = info->sync_device_ts; + __entry->sync_dtim_count = info->sync_dtim_count; __entry->basic_rates = info->basic_rates; memcpy(__entry->mcast_rate, info->mcast_rate, sizeof(__entry->mcast_rate)); @@ -378,9 +389,11 @@ TRACE_EVENT(drv_bss_info_changed, __entry->cqm_rssi_hyst = info->cqm_rssi_hyst; __entry->channel_width = info->chandef.width; __entry->channel_cfreq1 = info->chandef.center_freq1; + __entry->arp_addr_cnt = info->arp_addr_cnt; memcpy(__get_dynamic_array(arp_addr_list), info->arp_addr_list, - sizeof(u32) * info->arp_addr_cnt); - __entry->arp_filter_enabled = info->arp_filter_enabled; + sizeof(u32) * (info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ? + IEEE80211_BSS_ARP_ADDR_LIST_LEN : + info->arp_addr_cnt)); __entry->qos = info->qos; __entry->idle = info->idle; __entry->ps = info->ps; @@ -466,7 +479,7 @@ TRACE_EVENT(drv_set_tim, TP_printk( LOCAL_PR_FMT STA_PR_FMT " set:%d", - LOCAL_PR_ARG, STA_PR_FMT, __entry->set + LOCAL_PR_ARG, STA_PR_ARG, __entry->set ) ); @@ -1178,23 +1191,26 @@ TRACE_EVENT(drv_set_rekey_data, TRACE_EVENT(drv_rssi_callback, TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, enum ieee80211_rssi_event rssi_event), - TP_ARGS(local, rssi_event), + TP_ARGS(local, sdata, rssi_event), TP_STRUCT__entry( LOCAL_ENTRY + VIF_ENTRY __field(u32, rssi_event) ), TP_fast_assign( LOCAL_ASSIGN; + VIF_ASSIGN; __entry->rssi_event = rssi_event; ), TP_printk( - LOCAL_PR_FMT " rssi_event:%d", - LOCAL_PR_ARG, __entry->rssi_event + LOCAL_PR_FMT VIF_PR_FMT " rssi_event:%d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->rssi_event ) ); @@ -1426,6 +1442,14 @@ DEFINE_EVENT(local_only_evt, drv_restart_complete, TP_ARGS(local) ); +#if IS_ENABLED(CONFIG_IPV6) +DEFINE_EVENT(local_sdata_evt, drv_ipv6_addr_change, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + TP_ARGS(local, sdata) +); +#endif + /* * Tracing for API calls that drivers call. */ @@ -1660,7 +1684,7 @@ TRACE_EVENT(api_sta_block_awake, TP_printk( LOCAL_PR_FMT STA_PR_FMT " block:%d", - LOCAL_PR_ARG, STA_PR_FMT, __entry->block + LOCAL_PR_ARG, STA_PR_ARG, __entry->block ) ); @@ -1758,7 +1782,7 @@ TRACE_EVENT(api_eosp, TP_printk( LOCAL_PR_FMT STA_PR_FMT, - LOCAL_PR_ARG, STA_PR_FMT + LOCAL_PR_ARG, STA_PR_ARG ) ); @@ -1815,6 +1839,48 @@ TRACE_EVENT(stop_queue, ) ); +TRACE_EVENT(drv_set_default_unicast_key, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + int key_idx), + + TP_ARGS(local, sdata, key_idx), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(int, key_idx) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->key_idx = key_idx; + ), + + TP_printk(LOCAL_PR_FMT VIF_PR_FMT " key_idx:%d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->key_idx) +); + +TRACE_EVENT(api_radar_detected, + TP_PROTO(struct ieee80211_local *local), + + TP_ARGS(local), + + TP_STRUCT__entry( + LOCAL_ENTRY + ), + + TP_fast_assign( + LOCAL_ASSIGN; + ), + + TP_printk( + LOCAL_PR_FMT " radar detected", + LOCAL_PR_ARG + ) +); + #ifdef CONFIG_MAC80211_MESSAGE_TRACING #undef TRACE_SYSTEM #define TRACE_SYSTEM mac80211_msg diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 467c1d1b66f..5b9602b6240 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -329,6 +329,8 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) if (sdata->vif.type == NL80211_IFTYPE_AP) ps = &sdata->u.ap.ps; + else if (ieee80211_vif_is_mesh(&sdata->vif)) + ps = &sdata->u.mesh.ps; else continue; @@ -372,18 +374,20 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) /* * broadcast/multicast frame * - * If any of the associated stations is in power save mode, + * If any of the associated/peer stations is in power save mode, * the frame is buffered to be sent after DTIM beacon frame. * This is done either by the hardware or us. */ - /* powersaving STAs currently only in AP/VLAN mode */ + /* powersaving STAs currently only in AP/VLAN/mesh mode */ if (tx->sdata->vif.type == NL80211_IFTYPE_AP || tx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { if (!tx->sdata->bss) return TX_CONTINUE; ps = &tx->sdata->bss->ps; + } else if (ieee80211_vif_is_mesh(&tx->sdata->vif)) { + ps = &tx->sdata->u.mesh.ps; } else { return TX_CONTINUE; } @@ -594,7 +598,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) break; } - if (unlikely(tx->key && tx->key->flags & KEY_FLAG_TAINTED)) + if (unlikely(tx->key && tx->key->flags & KEY_FLAG_TAINTED && + !ieee80211_is_deauth(hdr->frame_control))) return TX_DROP; if (!skip_hw && tx->key && @@ -1225,6 +1230,21 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, spin_lock_irqsave(&local->queue_stop_reason_lock, flags); if (local->queue_stop_reasons[q] || (!txpending && !skb_queue_empty(&local->pending[q]))) { + if (unlikely(info->flags & + IEEE80211_TX_INTFL_OFFCHAN_TX_OK && + local->queue_stop_reasons[q] & + ~BIT(IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL))) { + /* + * Drop off-channel frames if queues are stopped + * for any reason other than off-channel + * operation. Never queue them. + */ + spin_unlock_irqrestore( + &local->queue_stop_reason_lock, flags); + ieee80211_purge_tx_queue(&local->hw, skbs); + return true; + } + /* * Since queue is stopped, queue up frames for later * transmission from the tx-pending tasklet when the @@ -1472,12 +1492,14 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, hdr = (struct ieee80211_hdr *) skb->data; info->control.vif = &sdata->vif; - if (ieee80211_vif_is_mesh(&sdata->vif) && - ieee80211_is_data(hdr->frame_control) && - !is_multicast_ether_addr(hdr->addr1) && - mesh_nexthop_resolve(skb, sdata)) { - /* skb queued: don't free */ - return; + if (ieee80211_vif_is_mesh(&sdata->vif)) { + if (ieee80211_is_data(hdr->frame_control) && + is_unicast_ether_addr(hdr->addr1)) { + if (mesh_nexthop_resolve(sdata, skb)) + return; /* skb queued: don't free */ + } else { + ieee80211_mps_set_frame_flags(sdata, NULL, hdr); + } } ieee80211_set_qos_hdr(sdata, skb); @@ -1787,16 +1809,16 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, break; /* fall through */ case NL80211_IFTYPE_AP: + if (sdata->vif.type == NL80211_IFTYPE_AP) + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + if (!chanctx_conf) + goto fail_rcu; fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); /* DA BSSID SA */ memcpy(hdr.addr1, skb->data, ETH_ALEN); memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN); memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN); hdrlen = 24; - if (sdata->vif.type == NL80211_IFTYPE_AP) - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (!chanctx_conf) - goto fail_rcu; band = chanctx_conf->def.chan->band; break; case NL80211_IFTYPE_WDS: @@ -1822,9 +1844,9 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, } if (!is_multicast_ether_addr(skb->data)) { - mpath = mesh_path_lookup(skb->data, sdata); + mpath = mesh_path_lookup(sdata, skb->data); if (!mpath) - mppath = mpp_path_lookup(skb->data, sdata); + mppath = mpp_path_lookup(sdata, skb->data); } /* @@ -1837,8 +1859,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, !(mppath && !ether_addr_equal(mppath->mpp, skb->data))) { hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc, skb->data, skb->data + ETH_ALEN); - meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, - sdata, NULL, NULL); + meshhdrlen = ieee80211_new_mesh_header(sdata, &mesh_hdr, + NULL, NULL); } else { /* DS -> MBSS (802.11-2012 13.11.3.3). * For unicast with unknown forwarding information, @@ -1857,18 +1879,14 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, mesh_da, sdata->vif.addr); if (is_multicast_ether_addr(mesh_da)) /* DA TA mSA AE:SA */ - meshhdrlen = - ieee80211_new_mesh_header(&mesh_hdr, - sdata, - skb->data + ETH_ALEN, - NULL); + meshhdrlen = ieee80211_new_mesh_header( + sdata, &mesh_hdr, + skb->data + ETH_ALEN, NULL); else /* RA TA mDA mSA AE:DA SA */ - meshhdrlen = - ieee80211_new_mesh_header(&mesh_hdr, - sdata, - skb->data, - skb->data + ETH_ALEN); + meshhdrlen = ieee80211_new_mesh_header( + sdata, &mesh_hdr, skb->data, + skb->data + ETH_ALEN); } chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); @@ -2264,9 +2282,8 @@ void ieee80211_tx_pending(unsigned long data) /* functions for drivers to get certain frames */ -static void ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, - struct ps_data *ps, - struct sk_buff *skb) +static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, + struct ps_data *ps, struct sk_buff *skb) { u8 *pos, *tim; int aid0 = 0; @@ -2328,6 +2345,29 @@ static void ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, } } +static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, + struct ps_data *ps, struct sk_buff *skb) +{ + struct ieee80211_local *local = sdata->local; + + /* + * Not very nice, but we want to allow the driver to call + * ieee80211_beacon_get() as a response to the set_tim() + * callback. That, however, is already invoked under the + * sta_lock to guarantee consistent and race-free update + * of the tim bitmap in mac80211 and the driver. + */ + if (local->tim_in_locked_section) { + __ieee80211_beacon_add_tim(sdata, ps, skb); + } else { + spin_lock(&local->tim_lock); + __ieee80211_beacon_add_tim(sdata, ps, skb); + spin_unlock(&local->tim_lock); + } + + return 0; +} + struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u16 *tim_offset, u16 *tim_length) @@ -2372,22 +2412,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, memcpy(skb_put(skb, beacon->head_len), beacon->head, beacon->head_len); - /* - * Not very nice, but we want to allow the driver to call - * ieee80211_beacon_get() as a response to the set_tim() - * callback. That, however, is already invoked under the - * sta_lock to guarantee consistent and race-free update - * of the tim bitmap in mac80211 and the driver. - */ - if (local->tim_in_locked_section) { - ieee80211_beacon_add_tim(sdata, &ap->ps, skb); - } else { - unsigned long flags; - - spin_lock_irqsave(&local->tim_lock, flags); - ieee80211_beacon_add_tim(sdata, &ap->ps, skb); - spin_unlock_irqrestore(&local->tim_lock, flags); - } + ieee80211_beacon_add_tim(sdata, &ap->ps, skb); if (tim_offset) *tim_offset = beacon->head_len; @@ -2415,66 +2440,26 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); } else if (ieee80211_vif_is_mesh(&sdata->vif)) { - struct ieee80211_mgmt *mgmt; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - u8 *pos; - int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) + - sizeof(mgmt->u.beacon); + struct beacon_data *bcn = rcu_dereference(ifmsh->beacon); -#ifdef CONFIG_MAC80211_MESH - if (!sdata->u.mesh.mesh_id_len) + if (!bcn) goto out; -#endif if (ifmsh->sync_ops) ifmsh->sync_ops->adjust_tbtt( sdata); skb = dev_alloc_skb(local->tx_headroom + - hdr_len + - 2 + /* NULL SSID */ - 2 + 8 + /* supported rates */ - 2 + 3 + /* DS params */ - 2 + (IEEE80211_MAX_SUPP_RATES - 8) + - 2 + sizeof(struct ieee80211_ht_cap) + - 2 + sizeof(struct ieee80211_ht_operation) + - 2 + sdata->u.mesh.mesh_id_len + - 2 + sizeof(struct ieee80211_meshconf_ie) + - sdata->u.mesh.ie_len); + bcn->head_len + + 256 + /* TIM IE */ + bcn->tail_len); if (!skb) goto out; - - skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); - memset(mgmt, 0, hdr_len); - mgmt->frame_control = - cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); - eth_broadcast_addr(mgmt->da); - memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); - memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); - mgmt->u.beacon.beacon_int = - cpu_to_le16(sdata->vif.bss_conf.beacon_int); - mgmt->u.beacon.capab_info |= cpu_to_le16( - sdata->u.mesh.security ? WLAN_CAPABILITY_PRIVACY : 0); - - pos = skb_put(skb, 2); - *pos++ = WLAN_EID_SSID; - *pos++ = 0x0; - - band = chanctx_conf->def.chan->band; - - if (ieee80211_add_srates_ie(sdata, skb, true, band) || - mesh_add_ds_params_ie(skb, sdata) || - ieee80211_add_ext_srates_ie(sdata, skb, true, band) || - mesh_add_rsn_ie(skb, sdata) || - mesh_add_ht_cap_ie(skb, sdata) || - mesh_add_ht_oper_ie(skb, sdata) || - mesh_add_meshid_ie(skb, sdata) || - mesh_add_meshconf_ie(skb, sdata) || - mesh_add_vendor_ies(skb, sdata)) { - pr_err("o11s: couldn't add ies!\n"); - goto out; - } + skb_reserve(skb, local->tx_headroom); + memcpy(skb_put(skb, bcn->head_len), bcn->head, bcn->head_len); + ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb); + memcpy(skb_put(skb, bcn->tail_len), bcn->tail, bcn->tail_len); } else { WARN_ON(1); goto out; @@ -2724,6 +2709,8 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, goto out; ps = &sdata->u.ap.ps; + } else if (ieee80211_vif_is_mesh(&sdata->vif)) { + ps = &sdata->u.mesh.ps; } else { goto out; } @@ -2747,6 +2734,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, cpu_to_le16(IEEE80211_FCTL_MOREDATA); } + sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev); if (!ieee80211_tx_prepare(sdata, &tx, skb)) break; dev_kfree_skb_any(skb); @@ -2779,6 +2767,8 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, skb_set_queue_mapping(skb, ac); skb->priority = tid; + skb->dev = sdata->dev; + /* * The other path calling ieee80211_xmit is from the tasklet, * and while we can handle concurrent transmissions locking diff --git a/net/mac80211/util.c b/net/mac80211/util.c index f11e8c540db..0f38f43ac62 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -739,11 +739,7 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, if (calc_crc) crc = crc32_be(crc, pos - 2, elen + 2); - if (pos[3] == 1) { - /* OUI Type 1 - WPA IE */ - elems->wpa = pos; - elems->wpa_len = elen; - } else if (elen >= 5 && pos[3] == 2) { + if (elen >= 5 && pos[3] == 2) { /* OUI Type 2 - WMM IE */ if (pos[4] == 0) { elems->wmm_info = pos; @@ -791,6 +787,12 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, else elem_parse_failed = true; break; + case WLAN_EID_OPMODE_NOTIF: + if (elen > 0) + elems->opmode_notif = pos; + else + elem_parse_failed = true; + break; case WLAN_EID_MESH_ID: elems->mesh_id = pos; elems->mesh_id_len = elen; @@ -805,6 +807,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, elems->peering = pos; elems->peering_len = elen; break; + case WLAN_EID_MESH_AWAKE_WINDOW: + if (elen >= 2) + elems->awake_window = (void *)pos; + break; case WLAN_EID_PREQ: elems->preq = pos; elems->preq_len = elen; @@ -1029,8 +1035,9 @@ u32 ieee80211_mandatory_rates(struct ieee80211_local *local, void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, - u8 *extra, size_t extra_len, const u8 *da, - const u8 *bssid, const u8 *key, u8 key_len, u8 key_idx) + const u8 *extra, size_t extra_len, const u8 *da, + const u8 *bssid, const u8 *key, u8 key_len, u8 key_idx, + u32 tx_flags) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -1063,7 +1070,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, WARN_ON(err); } - IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | + tx_flags; ieee80211_tx_skb(sdata, skb); } @@ -1277,7 +1285,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len, - u32 ratemask, bool directed, bool no_cck, + u32 ratemask, bool directed, u32 tx_flags, struct ieee80211_channel *channel, bool scan) { struct sk_buff *skb; @@ -1286,9 +1294,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, ssid, ssid_len, ie, ie_len, directed); if (skb) { - if (no_cck) - IEEE80211_SKB_CB(skb)->flags |= - IEEE80211_TX_CTL_NO_CCK_RATE; + IEEE80211_SKB_CB(skb)->flags |= tx_flags; if (scan) ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band); else @@ -1358,6 +1364,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) struct ieee80211_chanctx *ctx; struct sta_info *sta; int res, i; + bool reconfig_due_to_wowlan = false; #ifdef CONFIG_PM if (local->suspended) @@ -1377,6 +1384,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) * res is 1, which means the driver requested * to go through a regular reset on wakeup. */ + reconfig_due_to_wowlan = true; } #endif /* everything else happens only if HW was up & running */ @@ -1526,11 +1534,20 @@ int ieee80211_reconfig(struct ieee80211_local *local) BSS_CHANGED_IDLE | BSS_CHANGED_TXPOWER; +#ifdef CONFIG_PM + if (local->resuming && !reconfig_due_to_wowlan) + sdata->vif.bss_conf = sdata->suspend_bss_conf; +#endif + switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: changed |= BSS_CHANGED_ASSOC | BSS_CHANGED_ARP_FILTER | BSS_CHANGED_PS; + + if (sdata->u.mgd.dtim_period) + changed |= BSS_CHANGED_DTIM_PERIOD; + mutex_lock(&sdata->u.mgd.mtx); ieee80211_bss_info_change_notify(sdata, changed); mutex_unlock(&sdata->u.mgd.mtx); @@ -1550,9 +1567,11 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* fall through */ case NL80211_IFTYPE_MESH_POINT: - changed |= BSS_CHANGED_BEACON | - BSS_CHANGED_BEACON_ENABLED; - ieee80211_bss_info_change_notify(sdata, changed); + if (sdata->vif.bss_conf.enable_beacon) { + changed |= BSS_CHANGED_BEACON | + BSS_CHANGED_BEACON_ENABLED; + ieee80211_bss_info_change_notify(sdata, changed); + } break; case NL80211_IFTYPE_WDS: break; @@ -1632,7 +1651,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) { - ieee80211_sta_tear_down_BA_sessions(sta, true); + ieee80211_sta_tear_down_BA_sessions( + sta, AGG_STOP_LOCAL_REQUEST); clear_sta_flag(sta, WLAN_STA_BLOCK_BA); } @@ -1646,10 +1666,11 @@ int ieee80211_reconfig(struct ieee80211_local *local) * If this is for hw restart things are still running. * We may want to change that later, however. */ - if (!local->suspended) { + if (!local->suspended || reconfig_due_to_wowlan) drv_restart_complete(local); + + if (!local->suspended) return 0; - } #ifdef CONFIG_PM /* first set suspended false, then resuming */ @@ -1864,7 +1885,7 @@ u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, } u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, - u32 cap) + u32 cap) { __le32 tmp; @@ -1926,7 +1947,7 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, } void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan, - struct ieee80211_ht_operation *ht_oper, + const struct ieee80211_ht_operation *ht_oper, struct cfg80211_chan_def *chandef) { enum nl80211_channel_type channel_type; @@ -2114,3 +2135,49 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, return ts; } + +void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata; + + mutex_lock(&local->iflist_mtx); + list_for_each_entry(sdata, &local->interfaces, list) { + cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); + + if (sdata->wdev.cac_started) { + ieee80211_vif_release_channel(sdata); + cfg80211_cac_event(sdata->dev, + NL80211_RADAR_CAC_ABORTED, + GFP_KERNEL); + } + } + mutex_unlock(&local->iflist_mtx); +} + +void ieee80211_dfs_radar_detected_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, radar_detected_work); + struct cfg80211_chan_def chandef; + + ieee80211_dfs_cac_cancel(local); + + if (local->use_chanctx) + /* currently not handled */ + WARN_ON(1); + else { + cfg80211_chandef_create(&chandef, local->hw.conf.channel, + local->hw.conf.channel_type); + cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL); + } +} + +void ieee80211_radar_detected(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + + trace_api_radar_detected(local); + + ieee80211_queue_work(hw, &local->radar_detected_work); +} +EXPORT_SYMBOL(ieee80211_radar_detected); diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index f311388aeed..a2c2258bc84 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -10,21 +10,29 @@ #include <linux/export.h> #include <net/mac80211.h> #include "ieee80211_i.h" +#include "rate.h" -void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - struct ieee80211_vht_cap *vht_cap_ie, - struct ieee80211_sta_vht_cap *vht_cap) +void +ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const struct ieee80211_vht_cap *vht_cap_ie, + struct sta_info *sta) { - if (WARN_ON_ONCE(!vht_cap)) - return; + struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; memset(vht_cap, 0, sizeof(*vht_cap)); + if (!sta->sta.ht_cap.ht_supported) + return; + if (!vht_cap_ie || !sband->vht_cap.vht_supported) return; + /* A VHT STA must support 40 MHz */ + if (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40)) + return; + vht_cap->vht_supported = true; vht_cap->cap = le32_to_cpu(vht_cap_ie->vht_cap_info); @@ -32,4 +40,156 @@ void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, /* Copy peer MCS info, the driver might need them. */ memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs, sizeof(struct ieee80211_vht_mcs_info)); + + switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { + case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: + case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; + break; + default: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80; + } + + sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta); +} + +enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + u32 cap = sta->sta.vht_cap.cap; + enum ieee80211_sta_rx_bandwidth bw; + + if (!sta->sta.vht_cap.vht_supported) { + bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; + goto check_max; + } + + switch (sdata->vif.bss_conf.chandef.width) { + default: + WARN_ON_ONCE(1); + /* fall through */ + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_40: + bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; + break; + case NL80211_CHAN_WIDTH_160: + if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) == + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ) { + bw = IEEE80211_STA_RX_BW_160; + break; + } + /* fall through */ + case NL80211_CHAN_WIDTH_80P80: + if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) == + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) { + bw = IEEE80211_STA_RX_BW_160; + break; + } + /* fall through */ + case NL80211_CHAN_WIDTH_80: + bw = IEEE80211_STA_RX_BW_80; + } + + check_max: + if (bw > sta->cur_max_bandwidth) + bw = sta->cur_max_bandwidth; + return bw; +} + +void ieee80211_sta_set_rx_nss(struct sta_info *sta) +{ + u8 ht_rx_nss = 0, vht_rx_nss = 0; + + /* if we received a notification already don't overwrite it */ + if (sta->sta.rx_nss) + return; + + if (sta->sta.ht_cap.ht_supported) { + if (sta->sta.ht_cap.mcs.rx_mask[0]) + ht_rx_nss++; + if (sta->sta.ht_cap.mcs.rx_mask[1]) + ht_rx_nss++; + if (sta->sta.ht_cap.mcs.rx_mask[2]) + ht_rx_nss++; + if (sta->sta.ht_cap.mcs.rx_mask[3]) + ht_rx_nss++; + /* FIXME: consider rx_highest? */ + } + + if (sta->sta.vht_cap.vht_supported) { + int i; + u16 rx_mcs_map; + + rx_mcs_map = le16_to_cpu(sta->sta.vht_cap.vht_mcs.rx_mcs_map); + + for (i = 7; i >= 0; i--) { + u8 mcs = (rx_mcs_map >> (2 * i)) & 3; + + if (mcs != IEEE80211_VHT_MCS_NOT_SUPPORTED) { + vht_rx_nss = i + 1; + break; + } + } + /* FIXME: consider rx_highest? */ + } + + ht_rx_nss = max(ht_rx_nss, vht_rx_nss); + sta->sta.rx_nss = max_t(u8, 1, ht_rx_nss); +} + +void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, u8 opmode, + enum ieee80211_band band, bool nss_only) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + enum ieee80211_sta_rx_bandwidth new_bw; + u32 changed = 0; + u8 nss; + + sband = local->hw.wiphy->bands[band]; + + /* ignore - no support for BF yet */ + if (opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF) + return; + + nss = opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK; + nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT; + nss += 1; + + if (sta->sta.rx_nss != nss) { + sta->sta.rx_nss = nss; + changed |= IEEE80211_RC_NSS_CHANGED; + } + + if (nss_only) + goto change; + + switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) { + case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20; + break; + case IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_40; + break; + case IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80; + break; + case IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; + break; + } + + new_bw = ieee80211_sta_cur_vht_bw(sta); + if (new_bw != sta->sta.bandwidth) { + sta->sta.bandwidth = new_bw; + changed |= IEEE80211_RC_NSS_CHANGED; + } + + change: + if (changed) + rate_control_rate_update(local, sband, sta, changed); } diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 906f00cd6d2..afba19cb6f8 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -191,6 +191,15 @@ void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata, /* qos header is 2 bytes */ *p++ = ack_policy | tid; - *p = ieee80211_vif_is_mesh(&sdata->vif) ? - (IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT >> 8) : 0; + if (ieee80211_vif_is_mesh(&sdata->vif)) { + /* preserve RSPI and Mesh PS Level bit */ + *p &= ((IEEE80211_QOS_CTL_RSPI | + IEEE80211_QOS_CTL_MESH_PS_LEVEL) >> 8); + + /* Nulls don't have a mesh header (frame body) */ + if (!ieee80211_is_qos_nullfunc(hdr->frame_control)) + *p |= (IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT >> 8); + } else { + *p = 0; + } } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index c175ee866ff..c7c6d644486 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -181,7 +181,6 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_key *key = tx->key; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - unsigned long flags; unsigned int hdrlen; int len, tail; u8 *pos; @@ -216,12 +215,12 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) return 0; /* Increase IV for the frame */ - spin_lock_irqsave(&key->u.tkip.txlock, flags); + spin_lock(&key->u.tkip.txlock); key->u.tkip.tx.iv16++; if (key->u.tkip.tx.iv16 == 0) key->u.tkip.tx.iv32++; pos = ieee80211_tkip_add_iv(pos, key); - spin_unlock_irqrestore(&key->u.tkip.txlock, flags); + spin_unlock(&key->u.tkip.txlock); /* hwaccel - with software IV */ if (info->control.hw_key) diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index 199b92261e9..d20c6d3c247 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -41,7 +41,7 @@ static inline int mac802154_fetch_skb_u8(struct sk_buff *skb, u8 *val) return -EINVAL; *val = skb->data[0]; - skb_pull(skb, 1); + skb_pull(skb, 1); return 0; } @@ -137,16 +137,12 @@ static int mac802154_header_create(struct sk_buff *skb, struct ieee802154_addr dev_addr; struct mac802154_sub_if_data *priv = netdev_priv(dev); int pos = 2; - u8 *head; + u8 head[MAC802154_FRAME_HARD_HEADER_LEN]; u16 fc; if (!daddr) return -EINVAL; - head = kzalloc(MAC802154_FRAME_HARD_HEADER_LEN, GFP_KERNEL); - if (head == NULL) - return -ENOMEM; - head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */ fc = mac_cb_type(skb); @@ -210,7 +206,6 @@ static int mac802154_header_create(struct sk_buff *skb, head[1] = fc >> 8; memcpy(skb_push(skb, pos), head, pos); - kfree(head); return pos; } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 49e96df5fbc..d4dd702574e 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -124,6 +124,12 @@ config NF_CONNTRACK_TIMESTAMP If unsure, say `N'. +config NF_CONNTRACK_LABELS + bool + help + This option enables support for assigning user-defined flag bits + to connection tracking entries. It selected by the connlabel match. + config NF_CT_PROTO_DCCP tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' depends on EXPERIMENTAL @@ -805,6 +811,15 @@ config NETFILTER_XT_MATCH_ADDRTYPE If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +config NETFILTER_XT_MATCH_BPF + tristate '"bpf" match support' + depends on NETFILTER_ADVANCED + help + BPF matching applies a linux socket filter to each packet and + accepts those for which the filter returns non-zero. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_CLUSTER tristate '"cluster" match support' depends on NF_CONNTRACK @@ -842,6 +857,19 @@ config NETFILTER_XT_MATCH_CONNBYTES If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +config NETFILTER_XT_MATCH_CONNLABEL + tristate '"connlabel" match support' + select NF_CONNTRACK_LABELS + depends on NF_CONNTRACK + depends on NETFILTER_ADVANCED + ---help--- + This match allows you to test and assign userspace-defined labels names + to a connection. The kernel only stores bit values - mapping + names to bits is done by userspace. + + Unlike connmark, more than 32 flag bits may be assigned to a + connection simultaneously. + config NETFILTER_XT_MATCH_CONNLIMIT tristate '"connlimit" match support"' depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 32596978df1..a1abf87d43b 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -4,6 +4,7 @@ nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_exp nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o +nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o obj-$(CONFIG_NETFILTER) = netfilter.o @@ -98,9 +99,11 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o # matches obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o +obj-$(CONFIG_NETFILTER_XT_MATCH_BPF) += xt_bpf.o obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLABEL) += xt_connlabel.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 6d6d8f2b033..f82b2e606cf 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -88,14 +88,14 @@ find_set_type(const char *name, u8 family, u8 revision) static bool load_settype(const char *name) { - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); pr_debug("try to load ip_set_%s\n", name); if (request_module("ip_set_%s", name) < 0) { pr_warning("Can't find ip_set type %s\n", name); - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); return false; } - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); return true; } @@ -532,7 +532,7 @@ ip_set_nfnl_get(const char *name) ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); for (i = 0; i < ip_set_max; i++) { s = nfnl_set(i); if (s != NULL && STREQ(s->name, name)) { @@ -541,7 +541,7 @@ ip_set_nfnl_get(const char *name) break; } } - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); return index; } @@ -561,13 +561,13 @@ ip_set_nfnl_get_byindex(ip_set_id_t index) if (index > ip_set_max) return IPSET_INVALID_ID; - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); set = nfnl_set(index); if (set) __ip_set_get(set); else index = IPSET_INVALID_ID; - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); return index; } @@ -584,11 +584,11 @@ void ip_set_nfnl_put(ip_set_id_t index) { struct ip_set *set; - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); set = nfnl_set(index); if (set != NULL) __ip_set_put(set); - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); @@ -1763,10 +1763,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) goto done; } req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); find_set_and_id(req_get->set.name, &id); req_get->set.index = id; - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; } case IP_SET_OP_GET_BYINDEX: { @@ -1778,11 +1778,11 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) ret = -EINVAL; goto done; } - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); set = nfnl_set(req_get->set.index); strncpy(req_get->set.name, set ? set->name : "", IPSET_MAXNAMELEN); - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; } default: diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 5c0b78528e5..b7d4cb475ae 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -234,7 +234,7 @@ hash_ip6_data_equal(const struct hash_ip6_elem *ip1, const struct hash_ip6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0; + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6); } static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 6283351f4ee..d8f77bacae8 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -284,7 +284,7 @@ hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1, const struct hash_ipport6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->port == ip2->port && ip1->proto == ip2->proto; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 6a21271c8d5..1da1e955f38 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -294,8 +294,8 @@ hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1, const struct hash_ipportip6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && - ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && + ipv6_addr_equal(&ip1->ip2.in6, &ip2->ip2.in6) && ip1->port == ip2->port && ip1->proto == ip2->proto; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 2d5cd4ee30e..f2627226a08 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -388,8 +388,8 @@ hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1, const struct hash_ipportnet6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && - ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && + ipv6_addr_equal(&ip1->ip2.in6, &ip2->ip2.in6) && ip1->cidr == ip2->cidr && ip1->port == ip2->port && ip1->proto == ip2->proto; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 29e94b981f3..4b677cf6bf7 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -286,7 +286,7 @@ hash_net6_data_equal(const struct hash_net6_elem *ip1, const struct hash_net6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->cidr == ip2->cidr; } diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 45a101439bc..6ba985f1c96 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -471,7 +471,7 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1, const struct hash_netiface6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->cidr == ip2->cidr && (++*multi) && ip1->physdev == ip2->physdev && diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 7ef700de596..af20c0c5ced 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -350,7 +350,7 @@ hash_netport6_data_equal(const struct hash_netport6_elem *ip1, const struct hash_netport6_elem *ip2, u32 *multi) { - return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->port == ip2->port && ip1->proto == ip2->proto && ip1->cidr == ip2->cidr; diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 9713e6e86d4..0b779d7df88 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -605,12 +605,12 @@ int __net_init ip_vs_app_net_init(struct net *net) struct netns_ipvs *ipvs = net_ipvs(net); INIT_LIST_HEAD(&ipvs->app_list); - proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops); + proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops); return 0; } void __net_exit ip_vs_app_net_cleanup(struct net *net) { unregister_ip_vs_app(net, NULL /* all */); - proc_net_remove(net, "ip_vs_app"); + remove_proc_entry("ip_vs_app", net->proc_net); } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 30e764ad021..9f00db7e03f 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -796,8 +796,7 @@ static void ip_vs_conn_expire(unsigned long data) */ if (likely(atomic_read(&cp->refcnt) == 1)) { /* delete the timer if it is activated by other users */ - if (timer_pending(&cp->timer)) - del_timer(&cp->timer); + del_timer(&cp->timer); /* does anybody control me? */ if (cp->control) @@ -1292,8 +1291,8 @@ int __net_init ip_vs_conn_net_init(struct net *net) atomic_set(&ipvs->conn_count, 0); - proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops); - proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); + proc_create("ip_vs_conn", 0, net->proc_net, &ip_vs_conn_fops); + proc_create("ip_vs_conn_sync", 0, net->proc_net, &ip_vs_conn_sync_fops); return 0; } @@ -1301,8 +1300,8 @@ void __net_exit ip_vs_conn_net_cleanup(struct net *net) { /* flush all the connection entries first */ ip_vs_conn_flush(net); - proc_net_remove(net, "ip_vs_conn"); - proc_net_remove(net, "ip_vs_conn_sync"); + remove_proc_entry("ip_vs_conn", net->proc_net); + remove_proc_entry("ip_vs_conn_sync", net->proc_net); } int __init ip_vs_conn_init(void) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ec664cbb119..c68198bf912 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3800,10 +3800,10 @@ int __net_init ip_vs_control_net_init(struct net *net) spin_lock_init(&ipvs->tot_stats.lock); - proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); - proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); - proc_net_fops_create(net, "ip_vs_stats_percpu", 0, - &ip_vs_stats_percpu_fops); + proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops); + proc_create("ip_vs_stats", 0, net->proc_net, &ip_vs_stats_fops); + proc_create("ip_vs_stats_percpu", 0, net->proc_net, + &ip_vs_stats_percpu_fops); if (ip_vs_control_net_init_sysctl(net)) goto err; @@ -3822,9 +3822,9 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net) ip_vs_trash_cleanup(net); ip_vs_stop_estimator(net, &ipvs->tot_stats); ip_vs_control_net_cleanup_sysctl(net); - proc_net_remove(net, "ip_vs_stats_percpu"); - proc_net_remove(net, "ip_vs_stats"); - proc_net_remove(net, "ip_vs"); + remove_proc_entry("ip_vs_stats_percpu", net->proc_net); + remove_proc_entry("ip_vs_stats", net->proc_net); + remove_proc_entry("ip_vs", net->proc_net); free_percpu(ipvs->tot_stats.cpustats); } diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 7df424e2d10..2d3030ab5b6 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -106,36 +106,26 @@ static void nf_conntrack_acct_fini_sysctl(struct net *net) } #endif -int nf_conntrack_acct_init(struct net *net) +int nf_conntrack_acct_pernet_init(struct net *net) { - int ret; - net->ct.sysctl_acct = nf_ct_acct; + return nf_conntrack_acct_init_sysctl(net); +} - if (net_eq(net, &init_net)) { - ret = nf_ct_extend_register(&acct_extend); - if (ret < 0) { - printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); - goto out_extend_register; - } - } +void nf_conntrack_acct_pernet_fini(struct net *net) +{ + nf_conntrack_acct_fini_sysctl(net); +} - ret = nf_conntrack_acct_init_sysctl(net); +int nf_conntrack_acct_init(void) +{ + int ret = nf_ct_extend_register(&acct_extend); if (ret < 0) - goto out_sysctl; - - return 0; - -out_sysctl: - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&acct_extend); -out_extend_register: + pr_err("nf_conntrack_acct: Unable to register extension\n"); return ret; } -void nf_conntrack_acct_fini(struct net *net) +void nf_conntrack_acct_fini(void) { - nf_conntrack_acct_fini_sysctl(net); - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&acct_extend); + nf_ct_extend_unregister(&acct_extend); } diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index c514fe6033d..dbdaa114926 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -145,6 +145,7 @@ static int amanda_help(struct sk_buff *skb, exp = nf_ct_expect_alloc(ct); if (exp == NULL) { + nf_ct_helper_log(skb, ct, "cannot alloc expectation"); ret = NF_DROP; goto out; } @@ -158,8 +159,10 @@ static int amanda_help(struct sk_buff *skb, if (nf_nat_amanda && ct->status & IPS_NAT_MASK) ret = nf_nat_amanda(skb, ctinfo, protoff, off - dataoff, len, exp); - else if (nf_ct_expect_related(exp) != 0) + else if (nf_ct_expect_related(exp) != 0) { + nf_ct_helper_log(skb, ct, "cannot add expectation"); ret = NF_DROP; + } nf_ct_expect_put(exp); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e4a0c4fb3a7..c8e001a9c45 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -45,6 +45,7 @@ #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_timestamp.h> #include <net/netfilter/nf_conntrack_timeout.h> +#include <net/netfilter/nf_conntrack_labels.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_core.h> @@ -763,6 +764,7 @@ void nf_conntrack_free(struct nf_conn *ct) } EXPORT_SYMBOL_GPL(nf_conntrack_free); + /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static struct nf_conntrack_tuple_hash * @@ -809,6 +811,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); + nf_ct_labels_ext_add(ct); ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, @@ -1331,18 +1334,42 @@ static int untrack_refs(void) return cnt; } -static void nf_conntrack_cleanup_init_net(void) +void nf_conntrack_cleanup_start(void) +{ + RCU_INIT_POINTER(ip_ct_attach, NULL); +} + +void nf_conntrack_cleanup_end(void) { + RCU_INIT_POINTER(nf_ct_destroy, NULL); while (untrack_refs() > 0) schedule(); #ifdef CONFIG_NF_CONNTRACK_ZONES nf_ct_extend_unregister(&nf_ct_zone_extend); #endif + nf_conntrack_proto_fini(); + nf_conntrack_labels_fini(); + nf_conntrack_helper_fini(); + nf_conntrack_timeout_fini(); + nf_conntrack_ecache_fini(); + nf_conntrack_tstamp_fini(); + nf_conntrack_acct_fini(); + nf_conntrack_expect_fini(); } -static void nf_conntrack_cleanup_net(struct net *net) +/* + * Mishearing the voices in his head, our hero wonders how he's + * supposed to kill the mall. + */ +void nf_conntrack_cleanup_net(struct net *net) { + /* + * This makes sure all current packets have passed through + * netfilter framework. Roll on, two-stage module + * delete... + */ + synchronize_net(); i_see_dead_people: nf_ct_iterate_cleanup(net, kill_all, NULL); nf_ct_release_dying_list(net); @@ -1352,38 +1379,17 @@ static void nf_conntrack_cleanup_net(struct net *net) } nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); - nf_conntrack_helper_fini(net); - nf_conntrack_timeout_fini(net); - nf_conntrack_ecache_fini(net); - nf_conntrack_tstamp_fini(net); - nf_conntrack_acct_fini(net); - nf_conntrack_expect_fini(net); + nf_conntrack_proto_pernet_fini(net); + nf_conntrack_helper_pernet_fini(net); + nf_conntrack_ecache_pernet_fini(net); + nf_conntrack_tstamp_pernet_fini(net); + nf_conntrack_acct_pernet_fini(net); + nf_conntrack_expect_pernet_fini(net); kmem_cache_destroy(net->ct.nf_conntrack_cachep); kfree(net->ct.slabname); free_percpu(net->ct.stat); } -/* Mishearing the voices in his head, our hero wonders how he's - supposed to kill the mall. */ -void nf_conntrack_cleanup(struct net *net) -{ - if (net_eq(net, &init_net)) - RCU_INIT_POINTER(ip_ct_attach, NULL); - - /* This makes sure all current packets have passed through - netfilter framework. Roll on, two-stage module - delete... */ - synchronize_net(); - nf_conntrack_proto_fini(net); - nf_conntrack_cleanup_net(net); -} - -void nf_conntrack_cleanup_end(void) -{ - RCU_INIT_POINTER(nf_ct_destroy, NULL); - nf_conntrack_cleanup_init_net(); -} - void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) { struct hlist_nulls_head *hash; @@ -1474,7 +1480,7 @@ void nf_ct_untracked_status_or(unsigned long bits) } EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or); -static int nf_conntrack_init_init_net(void) +int nf_conntrack_init_start(void) { int max_factor = 8; int ret, cpu; @@ -1501,11 +1507,44 @@ static int nf_conntrack_init_init_net(void) printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n", NF_CONNTRACK_VERSION, nf_conntrack_htable_size, nf_conntrack_max); + + ret = nf_conntrack_expect_init(); + if (ret < 0) + goto err_expect; + + ret = nf_conntrack_acct_init(); + if (ret < 0) + goto err_acct; + + ret = nf_conntrack_tstamp_init(); + if (ret < 0) + goto err_tstamp; + + ret = nf_conntrack_ecache_init(); + if (ret < 0) + goto err_ecache; + + ret = nf_conntrack_timeout_init(); + if (ret < 0) + goto err_timeout; + + ret = nf_conntrack_helper_init(); + if (ret < 0) + goto err_helper; + + ret = nf_conntrack_labels_init(); + if (ret < 0) + goto err_labels; + #ifdef CONFIG_NF_CONNTRACK_ZONES ret = nf_ct_extend_register(&nf_ct_zone_extend); if (ret < 0) goto err_extend; #endif + ret = nf_conntrack_proto_init(); + if (ret < 0) + goto err_proto; + /* Set up fake conntrack: to never be deleted, not in any hashes */ for_each_possible_cpu(cpu) { struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu); @@ -1516,12 +1555,38 @@ static int nf_conntrack_init_init_net(void) nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); return 0; +err_proto: #ifdef CONFIG_NF_CONNTRACK_ZONES + nf_ct_extend_unregister(&nf_ct_zone_extend); err_extend: #endif + nf_conntrack_labels_fini(); +err_labels: + nf_conntrack_helper_fini(); +err_helper: + nf_conntrack_timeout_fini(); +err_timeout: + nf_conntrack_ecache_fini(); +err_ecache: + nf_conntrack_tstamp_fini(); +err_tstamp: + nf_conntrack_acct_fini(); +err_acct: + nf_conntrack_expect_fini(); +err_expect: return ret; } +void nf_conntrack_init_end(void) +{ + /* For use by REJECT target */ + RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); + RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack); + + /* Howto get NAT offsets */ + RCU_INIT_POINTER(nf_ct_nat_offset, NULL); +} + /* * We need to use special "null" values, not used in hash table */ @@ -1529,7 +1594,7 @@ err_extend: #define DYING_NULLS_VAL ((1<<30)+1) #define TEMPLATE_NULLS_VAL ((1<<30)+2) -static int nf_conntrack_init_net(struct net *net) +int nf_conntrack_init_net(struct net *net) { int ret; @@ -1565,35 +1630,36 @@ static int nf_conntrack_init_net(struct net *net) printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); goto err_hash; } - ret = nf_conntrack_expect_init(net); + ret = nf_conntrack_expect_pernet_init(net); if (ret < 0) goto err_expect; - ret = nf_conntrack_acct_init(net); + ret = nf_conntrack_acct_pernet_init(net); if (ret < 0) goto err_acct; - ret = nf_conntrack_tstamp_init(net); + ret = nf_conntrack_tstamp_pernet_init(net); if (ret < 0) goto err_tstamp; - ret = nf_conntrack_ecache_init(net); + ret = nf_conntrack_ecache_pernet_init(net); if (ret < 0) goto err_ecache; - ret = nf_conntrack_timeout_init(net); - if (ret < 0) - goto err_timeout; - ret = nf_conntrack_helper_init(net); + ret = nf_conntrack_helper_pernet_init(net); if (ret < 0) goto err_helper; + ret = nf_conntrack_proto_pernet_init(net); + if (ret < 0) + goto err_proto; return 0; + +err_proto: + nf_conntrack_helper_pernet_fini(net); err_helper: - nf_conntrack_timeout_fini(net); -err_timeout: - nf_conntrack_ecache_fini(net); + nf_conntrack_ecache_pernet_fini(net); err_ecache: - nf_conntrack_tstamp_fini(net); + nf_conntrack_tstamp_pernet_fini(net); err_tstamp: - nf_conntrack_acct_fini(net); + nf_conntrack_acct_pernet_fini(net); err_acct: - nf_conntrack_expect_fini(net); + nf_conntrack_expect_pernet_fini(net); err_expect: nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); err_hash: @@ -1610,38 +1676,3 @@ s16 (*nf_ct_nat_offset)(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq); EXPORT_SYMBOL_GPL(nf_ct_nat_offset); - -int nf_conntrack_init(struct net *net) -{ - int ret; - - if (net_eq(net, &init_net)) { - ret = nf_conntrack_init_init_net(); - if (ret < 0) - goto out_init_net; - } - ret = nf_conntrack_proto_init(net); - if (ret < 0) - goto out_proto; - ret = nf_conntrack_init_net(net); - if (ret < 0) - goto out_net; - - if (net_eq(net, &init_net)) { - /* For use by REJECT target */ - RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); - RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack); - - /* Howto get NAT offsets */ - RCU_INIT_POINTER(nf_ct_nat_offset, NULL); - } - return 0; - -out_net: - nf_conntrack_proto_fini(net); -out_proto: - if (net_eq(net, &init_net)) - nf_conntrack_cleanup_init_net(); -out_init_net: - return ret; -} diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index faa978f1714..b5d2eb8bf0d 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -233,38 +233,27 @@ static void nf_conntrack_event_fini_sysctl(struct net *net) } #endif /* CONFIG_SYSCTL */ -int nf_conntrack_ecache_init(struct net *net) +int nf_conntrack_ecache_pernet_init(struct net *net) { - int ret; - net->ct.sysctl_events = nf_ct_events; net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout; + return nf_conntrack_event_init_sysctl(net); +} - if (net_eq(net, &init_net)) { - ret = nf_ct_extend_register(&event_extend); - if (ret < 0) { - printk(KERN_ERR "nf_ct_event: Unable to register " - "event extension.\n"); - goto out_extend_register; - } - } +void nf_conntrack_ecache_pernet_fini(struct net *net) +{ + nf_conntrack_event_fini_sysctl(net); +} - ret = nf_conntrack_event_init_sysctl(net); +int nf_conntrack_ecache_init(void) +{ + int ret = nf_ct_extend_register(&event_extend); if (ret < 0) - goto out_sysctl; - - return 0; - -out_sysctl: - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&event_extend); -out_extend_register: + pr_err("nf_ct_event: Unable to register event extension.\n"); return ret; } -void nf_conntrack_ecache_fini(struct net *net) +void nf_conntrack_ecache_fini(void) { - nf_conntrack_event_fini_sysctl(net); - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&event_extend); + nf_ct_extend_unregister(&event_extend); } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 527651a53a4..3921e5bc123 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -571,7 +571,8 @@ static int exp_proc_init(struct net *net) #ifdef CONFIG_NF_CONNTRACK_PROCFS struct proc_dir_entry *proc; - proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops); + proc = proc_create("nf_conntrack_expect", 0440, net->proc_net, + &exp_file_ops); if (!proc) return -ENOMEM; #endif /* CONFIG_NF_CONNTRACK_PROCFS */ @@ -581,59 +582,56 @@ static int exp_proc_init(struct net *net) static void exp_proc_remove(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_PROCFS - proc_net_remove(net, "nf_conntrack_expect"); + remove_proc_entry("nf_conntrack_expect", net->proc_net); #endif /* CONFIG_NF_CONNTRACK_PROCFS */ } module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400); -int nf_conntrack_expect_init(struct net *net) +int nf_conntrack_expect_pernet_init(struct net *net) { int err = -ENOMEM; - if (net_eq(net, &init_net)) { - if (!nf_ct_expect_hsize) { - nf_ct_expect_hsize = net->ct.htable_size / 256; - if (!nf_ct_expect_hsize) - nf_ct_expect_hsize = 1; - } - nf_ct_expect_max = nf_ct_expect_hsize * 4; - } - net->ct.expect_count = 0; net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0); if (net->ct.expect_hash == NULL) goto err1; - if (net_eq(net, &init_net)) { - nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", - sizeof(struct nf_conntrack_expect), - 0, 0, NULL); - if (!nf_ct_expect_cachep) - goto err2; - } - err = exp_proc_init(net); if (err < 0) - goto err3; + goto err2; return 0; - -err3: - if (net_eq(net, &init_net)) - kmem_cache_destroy(nf_ct_expect_cachep); err2: nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); err1: return err; } -void nf_conntrack_expect_fini(struct net *net) +void nf_conntrack_expect_pernet_fini(struct net *net) { exp_proc_remove(net); - if (net_eq(net, &init_net)) { - rcu_barrier(); /* Wait for call_rcu() before destroy */ - kmem_cache_destroy(nf_ct_expect_cachep); - } nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); } + +int nf_conntrack_expect_init(void) +{ + if (!nf_ct_expect_hsize) { + nf_ct_expect_hsize = nf_conntrack_htable_size / 256; + if (!nf_ct_expect_hsize) + nf_ct_expect_hsize = 1; + } + nf_ct_expect_max = nf_ct_expect_hsize * 4; + nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", + sizeof(struct nf_conntrack_expect), + 0, 0, NULL); + if (!nf_ct_expect_cachep) + return -ENOMEM; + return 0; +} + +void nf_conntrack_expect_fini(void) +{ + rcu_barrier(); /* Wait for call_rcu() before destroy */ + kmem_cache_destroy(nf_ct_expect_cachep); +} diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 1ce3befb7c8..62fb8faedb8 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -435,8 +435,8 @@ skip_nl_seq: connection tracking, not packet filtering. However, it is necessary for accurate tracking in this case. */ - pr_debug("conntrack_ftp: partial %s %u+%u\n", - search[dir][i].pattern, ntohl(th->seq), datalen); + nf_ct_helper_log(skb, ct, "partial matching of `%s'", + search[dir][i].pattern); ret = NF_DROP; goto out; } else if (found == 0) { /* No match */ @@ -450,6 +450,7 @@ skip_nl_seq: exp = nf_ct_expect_alloc(ct); if (exp == NULL) { + nf_ct_helper_log(skb, ct, "cannot alloc expectation"); ret = NF_DROP; goto out; } @@ -500,9 +501,10 @@ skip_nl_seq: protoff, matchoff, matchlen, exp); else { /* Can't expect this? Best to drop packet now. */ - if (nf_ct_expect_related(exp) != 0) + if (nf_ct_expect_related(exp) != 0) { + nf_ct_helper_log(skb, ct, "cannot add expectation"); ret = NF_DROP; - else + } else ret = NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 962795e839a..7df7b36d2e2 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -623,7 +623,7 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff, drop: spin_unlock_bh(&nf_h323_lock); - net_info_ratelimited("nf_ct_h245: packet dropped\n"); + nf_ct_helper_log(skb, ct, "cannot process H.245 message"); return NF_DROP; } @@ -1197,7 +1197,7 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff, drop: spin_unlock_bh(&nf_h323_lock); - net_info_ratelimited("nf_ct_q931: packet dropped\n"); + nf_ct_helper_log(skb, ct, "cannot process Q.931 message"); return NF_DROP; } @@ -1795,7 +1795,7 @@ static int ras_help(struct sk_buff *skb, unsigned int protoff, drop: spin_unlock_bh(&nf_h323_lock); - net_info_ratelimited("nf_ct_ras: packet dropped\n"); + nf_ct_helper_log(skb, ct, "cannot process RAS message"); return NF_DROP; } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 884f2b39319..013cdf69fe2 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -28,6 +28,7 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_extend.h> +#include <net/netfilter/nf_log.h> static DEFINE_MUTEX(nf_ct_helper_mutex); struct hlist_head *nf_ct_helper_hash __read_mostly; @@ -236,7 +237,9 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, /* We only allow helper re-assignment of the same sort since * we cannot reallocate the helper extension area. */ - if (help->helper != helper) { + struct nf_conntrack_helper *tmp = rcu_dereference(help->helper); + + if (tmp && tmp->help != helper->help) { RCU_INIT_POINTER(help->helper, NULL); goto out; } @@ -332,6 +335,24 @@ nf_ct_helper_expectfn_find_by_symbol(const void *symbol) } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol); +__printf(3, 4) +void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, + const char *fmt, ...) +{ + const struct nf_conn_help *help; + const struct nf_conntrack_helper *helper; + + /* Called from the helper function, this call never fails */ + help = nfct_help(ct); + + /* rcu_read_lock()ed by nf_hook_slow */ + helper = rcu_dereference(help->helper); + + nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, + "nf_ct_%s: dropping packet: %s ", helper->name, fmt); +} +EXPORT_SYMBOL_GPL(nf_ct_helper_log); + int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { int ret = 0; @@ -423,44 +444,41 @@ static struct nf_ct_ext_type helper_extend __read_mostly = { .id = NF_CT_EXT_HELPER, }; -int nf_conntrack_helper_init(struct net *net) +int nf_conntrack_helper_pernet_init(struct net *net) { - int err; - net->ct.auto_assign_helper_warned = false; net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper; + return nf_conntrack_helper_init_sysctl(net); +} - if (net_eq(net, &init_net)) { - nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ - nf_ct_helper_hash = - nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); - if (!nf_ct_helper_hash) - return -ENOMEM; +void nf_conntrack_helper_pernet_fini(struct net *net) +{ + nf_conntrack_helper_fini_sysctl(net); +} - err = nf_ct_extend_register(&helper_extend); - if (err < 0) - goto err1; +int nf_conntrack_helper_init(void) +{ + int ret; + nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ + nf_ct_helper_hash = + nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); + if (!nf_ct_helper_hash) + return -ENOMEM; + + ret = nf_ct_extend_register(&helper_extend); + if (ret < 0) { + pr_err("nf_ct_helper: Unable to register helper extension.\n"); + goto out_extend; } - err = nf_conntrack_helper_init_sysctl(net); - if (err < 0) - goto out_sysctl; - return 0; - -out_sysctl: - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&helper_extend); -err1: +out_extend: nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); - return err; + return ret; } -void nf_conntrack_helper_fini(struct net *net) +void nf_conntrack_helper_fini(void) { - nf_conntrack_helper_fini_sysctl(net); - if (net_eq(net, &init_net)) { - nf_ct_extend_unregister(&helper_extend); - nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); - } + nf_ct_extend_unregister(&helper_extend); + nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); } diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 3b20aa77cfc..70985c5d0ff 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -194,6 +194,8 @@ static int help(struct sk_buff *skb, unsigned int protoff, exp = nf_ct_expect_alloc(ct); if (exp == NULL) { + nf_ct_helper_log(skb, ct, + "cannot alloc expectation"); ret = NF_DROP; goto out; } @@ -210,8 +212,11 @@ static int help(struct sk_buff *skb, unsigned int protoff, addr_beg_p - ib_ptr, addr_end_p - addr_beg_p, exp); - else if (nf_ct_expect_related(exp) != 0) + else if (nf_ct_expect_related(exp) != 0) { + nf_ct_helper_log(skb, ct, + "cannot add expectation"); ret = NF_DROP; + } nf_ct_expect_put(exp); goto out; } diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c new file mode 100644 index 00000000000..8fe2e99428b --- /dev/null +++ b/net/netfilter/nf_conntrack_labels.c @@ -0,0 +1,112 @@ +/* + * test/set flag bits stored in conntrack extension area. + * + * (C) 2013 Astaro GmbH & Co KG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ctype.h> +#include <linux/export.h> +#include <linux/jhash.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/slab.h> + +#include <net/netfilter/nf_conntrack_ecache.h> +#include <net/netfilter/nf_conntrack_labels.h> + +static unsigned int label_bits(const struct nf_conn_labels *l) +{ + unsigned int longs = l->words; + return longs * BITS_PER_LONG; +} + +bool nf_connlabel_match(const struct nf_conn *ct, u16 bit) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels) + return false; + + return bit < label_bits(labels) && test_bit(bit, labels->bits); +} +EXPORT_SYMBOL_GPL(nf_connlabel_match); + +int nf_connlabel_set(struct nf_conn *ct, u16 bit) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels || bit >= label_bits(labels)) + return -ENOSPC; + + if (test_bit(bit, labels->bits)) + return 0; + + if (test_and_set_bit(bit, labels->bits)) + nf_conntrack_event_cache(IPCT_LABEL, ct); + + return 0; +} +EXPORT_SYMBOL_GPL(nf_connlabel_set); + +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) +static void replace_u32(u32 *address, u32 mask, u32 new) +{ + u32 old, tmp; + + do { + old = *address; + tmp = (old & mask) ^ new; + } while (cmpxchg(address, old, tmp) != old); +} + +int nf_connlabels_replace(struct nf_conn *ct, + const u32 *data, + const u32 *mask, unsigned int words32) +{ + struct nf_conn_labels *labels; + unsigned int size, i; + u32 *dst; + + labels = nf_ct_labels_find(ct); + if (!labels) + return -ENOSPC; + + size = labels->words * sizeof(long); + if (size < (words32 * sizeof(u32))) + words32 = size / sizeof(u32); + + dst = (u32 *) labels->bits; + if (words32) { + for (i = 0; i < words32; i++) + replace_u32(&dst[i], mask ? ~mask[i] : 0, data[i]); + } + + size /= sizeof(u32); + for (i = words32; i < size; i++) /* pad */ + replace_u32(&dst[i], 0, 0); + + nf_conntrack_event_cache(IPCT_LABEL, ct); + return 0; +} +EXPORT_SYMBOL_GPL(nf_connlabels_replace); +#endif + +static struct nf_ct_ext_type labels_extend __read_mostly = { + .len = sizeof(struct nf_conn_labels), + .align = __alignof__(struct nf_conn_labels), + .id = NF_CT_EXT_LABELS, +}; + +int nf_conntrack_labels_init(void) +{ + return nf_ct_extend_register(&labels_extend); +} + +void nf_conntrack_labels_fini(void) +{ + nf_ct_extend_unregister(&labels_extend); +} diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 627b0e50b23..5d60e04f967 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -43,6 +43,7 @@ #include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_timestamp.h> +#include <net/netfilter/nf_conntrack_labels.h> #ifdef CONFIG_NF_NAT_NEEDED #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_l4proto.h> @@ -323,6 +324,40 @@ nla_put_failure: #define ctnetlink_dump_secctx(a, b) (0) #endif +#ifdef CONFIG_NF_CONNTRACK_LABELS +static int ctnetlink_label_size(const struct nf_conn *ct) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels) + return 0; + return nla_total_size(labels->words * sizeof(long)); +} + +static int +ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + unsigned int len, i; + + if (!labels) + return 0; + + len = labels->words * sizeof(long); + i = 0; + do { + if (labels->bits[i] != 0) + return nla_put(skb, CTA_LABELS, len, labels->bits); + i++; + } while (i < labels->words); + + return 0; +} +#else +#define ctnetlink_dump_labels(a, b) (0) +#define ctnetlink_label_size(a) (0) +#endif + #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple) static inline int @@ -463,6 +498,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, ctnetlink_dump_helpinfo(skb, ct) < 0 || ctnetlink_dump_mark(skb, ct) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || + ctnetlink_dump_labels(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || ctnetlink_dump_master(skb, ct) < 0 || @@ -561,6 +597,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif + ctnetlink_proto_size(ct) + + ctnetlink_label_size(ct) ; } @@ -662,6 +699,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) && ctnetlink_dump_secctx(skb, ct) < 0) goto nla_put_failure; #endif + if (events & (1 << IPCT_LABEL) && + ctnetlink_dump_labels(skb, ct) < 0) + goto nla_put_failure; if (events & (1 << IPCT_RELATED) && ctnetlink_dump_master(skb, ct) < 0) @@ -921,6 +961,7 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, return 0; } +#define __CTA_LABELS_MAX_LENGTH ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE) static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_TUPLE_ORIG] = { .type = NLA_NESTED }, [CTA_TUPLE_REPLY] = { .type = NLA_NESTED }, @@ -937,6 +978,10 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_NAT_SEQ_ADJ_REPLY] = { .type = NLA_NESTED }, [CTA_ZONE] = { .type = NLA_U16 }, [CTA_MARK_MASK] = { .type = NLA_U32 }, + [CTA_LABELS] = { .type = NLA_BINARY, + .len = __CTA_LABELS_MAX_LENGTH }, + [CTA_LABELS_MASK] = { .type = NLA_BINARY, + .len = __CTA_LABELS_MAX_LENGTH }, }; static int @@ -1211,13 +1256,13 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, if (!parse_nat_setup) { #ifdef CONFIG_MODULES rcu_read_unlock(); - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_CTNETLINK); if (request_module("nf-nat") < 0) { - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_CTNETLINK); rcu_read_lock(); return -EOPNOTSUPP; } - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_CTNETLINK); rcu_read_lock(); if (nfnetlink_parse_nat_setup_hook) return -EAGAIN; @@ -1229,13 +1274,13 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, if (err == -EAGAIN) { #ifdef CONFIG_MODULES rcu_read_unlock(); - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_CTNETLINK); if (request_module("nf-nat-%u", nf_ct_l3num(ct)) < 0) { - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_CTNETLINK); rcu_read_lock(); return -EOPNOTSUPP; } - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_CTNETLINK); rcu_read_lock(); #else err = -EOPNOTSUPP; @@ -1465,6 +1510,31 @@ ctnetlink_change_nat_seq_adj(struct nf_conn *ct, #endif static int +ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[]) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + size_t len = nla_len(cda[CTA_LABELS]); + const void *mask = cda[CTA_LABELS_MASK]; + + if (len & (sizeof(u32)-1)) /* must be multiple of u32 */ + return -EINVAL; + + if (mask) { + if (nla_len(cda[CTA_LABELS_MASK]) == 0 || + nla_len(cda[CTA_LABELS_MASK]) != len) + return -EINVAL; + mask = nla_data(cda[CTA_LABELS_MASK]); + } + + len /= sizeof(u32); + + return nf_connlabels_replace(ct, nla_data(cda[CTA_LABELS]), mask, len); +#else + return -EOPNOTSUPP; +#endif +} + +static int ctnetlink_change_conntrack(struct nf_conn *ct, const struct nlattr * const cda[]) { @@ -1510,6 +1580,11 @@ ctnetlink_change_conntrack(struct nf_conn *ct, return err; } #endif + if (cda[CTA_LABELS]) { + err = ctnetlink_attach_labels(ct, cda); + if (err < 0) + return err; + } return 0; } @@ -1598,6 +1673,8 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); + nf_ct_labels_ext_add(ct); + /* we must add conntrack extensions before confirmation. */ ct->status |= IPS_CONFIRMED; @@ -1705,6 +1782,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_CREATE) { enum ip_conntrack_events events; + if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY]) + return -EINVAL; + ct = ctnetlink_create_conntrack(net, zone, cda, &otuple, &rtuple, u3); if (IS_ERR(ct)) @@ -1716,6 +1796,10 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, else events = IPCT_NEW; + if (cda[CTA_LABELS] && + ctnetlink_attach_labels(ct, cda) == 0) + events |= (1 << IPCT_LABEL); + nf_conntrack_eventmask_report((1 << IPCT_REPLY) | (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | @@ -1983,6 +2067,8 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct) if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif + if (ctnetlink_dump_labels(skb, ct) < 0) + goto nla_put_failure; rcu_read_unlock(); return 0; @@ -2011,6 +2097,11 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct) if (err < 0) return err; } + if (cda[CTA_LABELS]) { + err = ctnetlink_attach_labels(ct, cda); + if (err < 0) + return err; + } #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index cc7669ef0b9..e6678d2b624 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -14,7 +14,7 @@ * Limitations: * - We blindly assume that control connections are always * established in PNS->PAC direction. This is a violation - * of RFFC2673 + * of RFC 2637 * - We can only support one single call within each session * TODO: * - testing of incoming PPTP calls diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 51e928db48c..58ab4050830 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -212,8 +212,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct net *net, #endif } -static int -nf_conntrack_l3proto_register_net(struct nf_conntrack_l3proto *proto) +int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto) { int ret = 0; struct nf_conntrack_l3proto *old; @@ -242,8 +241,9 @@ out_unlock: return ret; } +EXPORT_SYMBOL_GPL(nf_ct_l3proto_register); -int nf_conntrack_l3proto_register(struct net *net, +int nf_ct_l3proto_pernet_register(struct net *net, struct nf_conntrack_l3proto *proto) { int ret = 0; @@ -254,22 +254,11 @@ int nf_conntrack_l3proto_register(struct net *net, return ret; } - ret = nf_ct_l3proto_register_sysctl(net, proto); - if (ret < 0) - return ret; - - if (net == &init_net) { - ret = nf_conntrack_l3proto_register_net(proto); - if (ret < 0) - nf_ct_l3proto_unregister_sysctl(net, proto); - } - - return ret; + return nf_ct_l3proto_register_sysctl(net, proto); } -EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register); +EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register); -static void -nf_conntrack_l3proto_unregister_net(struct nf_conntrack_l3proto *proto) +void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) { BUG_ON(proto->l3proto >= AF_MAX); @@ -283,19 +272,17 @@ nf_conntrack_l3proto_unregister_net(struct nf_conntrack_l3proto *proto) synchronize_rcu(); } +EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); -void nf_conntrack_l3proto_unregister(struct net *net, +void nf_ct_l3proto_pernet_unregister(struct net *net, struct nf_conntrack_l3proto *proto) { - if (net == &init_net) - nf_conntrack_l3proto_unregister_net(proto); - nf_ct_l3proto_unregister_sysctl(net, proto); /* Remove all contrack entries for this protocol */ nf_ct_iterate_cleanup(net, kill_l3proto, proto); } -EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); +EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister); static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, struct nf_conntrack_l4proto *l4proto) @@ -376,8 +363,7 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net, /* FIXME: Allow NULL functions and sub in pointers to generic for them. --RR */ -static int -nf_conntrack_l4proto_register_net(struct nf_conntrack_l4proto *l4proto) +int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto) { int ret = 0; @@ -431,8 +417,9 @@ out_unlock: mutex_unlock(&nf_ct_proto_mutex); return ret; } +EXPORT_SYMBOL_GPL(nf_ct_l4proto_register); -int nf_conntrack_l4proto_register(struct net *net, +int nf_ct_l4proto_pernet_register(struct net *net, struct nf_conntrack_l4proto *l4proto) { int ret = 0; @@ -452,22 +439,13 @@ int nf_conntrack_l4proto_register(struct net *net, if (ret < 0) goto out; - if (net == &init_net) { - ret = nf_conntrack_l4proto_register_net(l4proto); - if (ret < 0) { - nf_ct_l4proto_unregister_sysctl(net, pn, l4proto); - goto out; - } - } - pn->users++; out: return ret; } -EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register); +EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); -static void -nf_conntrack_l4proto_unregister_net(struct nf_conntrack_l4proto *l4proto) +void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) { BUG_ON(l4proto->l3proto >= PF_MAX); @@ -482,15 +460,13 @@ nf_conntrack_l4proto_unregister_net(struct nf_conntrack_l4proto *l4proto) synchronize_rcu(); } +EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); -void nf_conntrack_l4proto_unregister(struct net *net, +void nf_ct_l4proto_pernet_unregister(struct net *net, struct nf_conntrack_l4proto *l4proto) { struct nf_proto_net *pn = NULL; - if (net == &init_net) - nf_conntrack_l4proto_unregister_net(l4proto); - pn = nf_ct_l4proto_net(net, l4proto); if (pn == NULL) return; @@ -501,11 +477,10 @@ void nf_conntrack_l4proto_unregister(struct net *net, /* Remove all contrack entries for this protocol */ nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); } -EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); +EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister); -int nf_conntrack_proto_init(struct net *net) +int nf_conntrack_proto_pernet_init(struct net *net) { - unsigned int i; int err; struct nf_proto_net *pn = nf_ct_l4proto_net(net, &nf_conntrack_l4proto_generic); @@ -520,19 +495,12 @@ int nf_conntrack_proto_init(struct net *net) if (err < 0) return err; - if (net == &init_net) { - for (i = 0; i < AF_MAX; i++) - rcu_assign_pointer(nf_ct_l3protos[i], - &nf_conntrack_l3proto_generic); - } - pn->users++; return 0; } -void nf_conntrack_proto_fini(struct net *net) +void nf_conntrack_proto_pernet_fini(struct net *net) { - unsigned int i; struct nf_proto_net *pn = nf_ct_l4proto_net(net, &nf_conntrack_l4proto_generic); @@ -540,9 +508,21 @@ void nf_conntrack_proto_fini(struct net *net) nf_ct_l4proto_unregister_sysctl(net, pn, &nf_conntrack_l4proto_generic); - if (net == &init_net) { - /* free l3proto protocol tables */ - for (i = 0; i < PF_MAX; i++) - kfree(nf_ct_protos[i]); - } +} + +int nf_conntrack_proto_init(void) +{ + unsigned int i; + for (i = 0; i < AF_MAX; i++) + rcu_assign_pointer(nf_ct_l3protos[i], + &nf_conntrack_l3proto_generic); + return 0; +} + +void nf_conntrack_proto_fini(void) +{ + unsigned int i; + /* free l3proto protocol tables */ + for (i = 0; i < PF_MAX; i++) + kfree(nf_ct_protos[i]); } diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index a8ae287bc7a..432f9578000 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -935,32 +935,27 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { static __net_init int dccp_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, - &dccp_proto4); + ret = nf_ct_l4proto_pernet_register(net, &dccp_proto4); if (ret < 0) { - pr_err("nf_conntrack_l4proto_dccp4 :protocol register failed.\n"); + pr_err("nf_conntrack_dccp4: pernet registration failed.\n"); goto out; } - ret = nf_conntrack_l4proto_register(net, - &dccp_proto6); + ret = nf_ct_l4proto_pernet_register(net, &dccp_proto6); if (ret < 0) { - pr_err("nf_conntrack_l4proto_dccp6 :protocol register failed.\n"); + pr_err("nf_conntrack_dccp6: pernet registration failed.\n"); goto cleanup_dccp4; } return 0; cleanup_dccp4: - nf_conntrack_l4proto_unregister(net, - &dccp_proto4); + nf_ct_l4proto_pernet_unregister(net, &dccp_proto4); out: return ret; } static __net_exit void dccp_net_exit(struct net *net) { - nf_conntrack_l4proto_unregister(net, - &dccp_proto6); - nf_conntrack_l4proto_unregister(net, - &dccp_proto4); + nf_ct_l4proto_pernet_unregister(net, &dccp_proto6); + nf_ct_l4proto_pernet_unregister(net, &dccp_proto4); } static struct pernet_operations dccp_net_ops = { @@ -972,11 +967,33 @@ static struct pernet_operations dccp_net_ops = { static int __init nf_conntrack_proto_dccp_init(void) { - return register_pernet_subsys(&dccp_net_ops); + int ret; + + ret = nf_ct_l4proto_register(&dccp_proto4); + if (ret < 0) + goto out_dccp4; + + ret = nf_ct_l4proto_register(&dccp_proto6); + if (ret < 0) + goto out_dccp6; + + ret = register_pernet_subsys(&dccp_net_ops); + if (ret < 0) + goto out_pernet; + + return 0; +out_pernet: + nf_ct_l4proto_unregister(&dccp_proto6); +out_dccp6: + nf_ct_l4proto_unregister(&dccp_proto4); +out_dccp4: + return ret; } static void __exit nf_conntrack_proto_dccp_fini(void) { + nf_ct_l4proto_unregister(&dccp_proto6); + nf_ct_l4proto_unregister(&dccp_proto4); unregister_pernet_subsys(&dccp_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index b09b7af7f6f..bd7d01d9c7e 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -397,15 +397,15 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { static int proto_gre_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, &nf_conntrack_l4proto_gre4); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_gre4); if (ret < 0) - pr_err("nf_conntrack_l4proto_gre4 :protocol register failed.\n"); + pr_err("nf_conntrack_gre4: pernet registration failed.\n"); return ret; } static void proto_gre_net_exit(struct net *net) { - nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_gre4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_gre4); nf_ct_gre_keymap_flush(net); } @@ -418,11 +418,26 @@ static struct pernet_operations proto_gre_net_ops = { static int __init nf_ct_proto_gre_init(void) { - return register_pernet_subsys(&proto_gre_net_ops); + int ret; + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4); + if (ret < 0) + goto out_gre4; + + ret = register_pernet_subsys(&proto_gre_net_ops); + if (ret < 0) + goto out_pernet; + + return 0; +out_pernet: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4); +out_gre4: + return ret; } static void __exit nf_ct_proto_gre_fini(void) { + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4); unregister_pernet_subsys(&proto_gre_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c746d61f83e..480f616d593 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -853,33 +853,28 @@ static int sctp_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_sctp4); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp4); if (ret < 0) { - pr_err("nf_conntrack_l4proto_sctp4 :protocol register failed.\n"); + pr_err("nf_conntrack_sctp4: pernet registration failed.\n"); goto out; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_sctp6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp6); if (ret < 0) { - pr_err("nf_conntrack_l4proto_sctp6 :protocol register failed.\n"); + pr_err("nf_conntrack_sctp6: pernet registration failed.\n"); goto cleanup_sctp4; } return 0; cleanup_sctp4: - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_sctp4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4); out: return ret; } static void sctp_net_exit(struct net *net) { - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_sctp6); - nf_conntrack_l4proto_unregister(net, - &nf_conntrack_l4proto_sctp4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4); } static struct pernet_operations sctp_net_ops = { @@ -891,11 +886,33 @@ static struct pernet_operations sctp_net_ops = { static int __init nf_conntrack_proto_sctp_init(void) { - return register_pernet_subsys(&sctp_net_ops); + int ret; + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4); + if (ret < 0) + goto out_sctp4; + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp6); + if (ret < 0) + goto out_sctp6; + + ret = register_pernet_subsys(&sctp_net_ops); + if (ret < 0) + goto out_pernet; + + return 0; +out_pernet: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6); +out_sctp6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4); +out_sctp4: + return ret; } static void __exit nf_conntrack_proto_sctp_fini(void) { + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4); unregister_pernet_subsys(&sctp_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 4b66df20928..157489581c3 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -336,30 +336,28 @@ static int udplite_net_init(struct net *net) { int ret = 0; - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_udplite4); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite4); if (ret < 0) { - pr_err("nf_conntrack_l4proto_udplite4 :protocol register failed.\n"); + pr_err("nf_conntrack_udplite4: pernet registration failed.\n"); goto out; } - ret = nf_conntrack_l4proto_register(net, - &nf_conntrack_l4proto_udplite6); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite6); if (ret < 0) { - pr_err("nf_conntrack_l4proto_udplite4 :protocol register failed.\n"); + pr_err("nf_conntrack_udplite6: pernet registration failed.\n"); goto cleanup_udplite4; } return 0; cleanup_udplite4: - nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_udplite4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4); out: return ret; } static void udplite_net_exit(struct net *net) { - nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_udplite6); - nf_conntrack_l4proto_unregister(net, &nf_conntrack_l4proto_udplite4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite6); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4); } static struct pernet_operations udplite_net_ops = { @@ -371,11 +369,33 @@ static struct pernet_operations udplite_net_ops = { static int __init nf_conntrack_proto_udplite_init(void) { - return register_pernet_subsys(&udplite_net_ops); + int ret; + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4); + if (ret < 0) + goto out_udplite4; + + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite6); + if (ret < 0) + goto out_udplite6; + + ret = register_pernet_subsys(&udplite_net_ops); + if (ret < 0) + goto out_pernet; + + return 0; +out_pernet: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6); +out_udplite6: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4); +out_udplite4: + return ret; } static void __exit nf_conntrack_proto_udplite_exit(void) { + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4); unregister_pernet_subsys(&udplite_net_ops); } diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 295429f3908..4a2134fd3fc 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -138,6 +138,7 @@ static int help(struct sk_buff *skb, exp = nf_ct_expect_alloc(ct); if (exp == NULL) { + nf_ct_helper_log(skb, ct, "cannot alloc expectation"); ret = NF_DROP; goto out; } @@ -151,8 +152,10 @@ static int help(struct sk_buff *skb, nf_ct_dump_tuple(&exp->tuple); /* Can't expect this? Best to drop packet now. */ - if (nf_ct_expect_related(exp) != 0) + if (nf_ct_expect_related(exp) != 0) { + nf_ct_helper_log(skb, ct, "cannot add expectation"); ret = NF_DROP; + } nf_ct_expect_put(exp); diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index df8f4f28448..069229d919b 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1095,8 +1095,10 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, port = simple_strtoul(*dptr + mediaoff, NULL, 10); if (port == 0) continue; - if (port < 1024 || port > 65535) + if (port < 1024 || port > 65535) { + nf_ct_helper_log(skb, ct, "wrong port %u", port); return NF_DROP; + } /* The media description overrides the session description. */ maddr_len = 0; @@ -1107,15 +1109,20 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, memcpy(&rtp_addr, &maddr, sizeof(rtp_addr)); } else if (caddr_len) memcpy(&rtp_addr, &caddr, sizeof(rtp_addr)); - else + else { + nf_ct_helper_log(skb, ct, "cannot parse SDP message"); return NF_DROP; + } ret = set_expected_rtp_rtcp(skb, protoff, dataoff, dptr, datalen, &rtp_addr, htons(port), t->class, mediaoff, medialen); - if (ret != NF_ACCEPT) + if (ret != NF_ACCEPT) { + nf_ct_helper_log(skb, ct, + "cannot add expectation for voice"); return ret; + } /* Update media connection address if present */ if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) { @@ -1123,8 +1130,10 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, dptr, datalen, mediaoff, SDP_HDR_CONNECTION, SDP_HDR_MEDIA, &rtp_addr); - if (ret != NF_ACCEPT) + if (ret != NF_ACCEPT) { + nf_ct_helper_log(skb, ct, "cannot mangle SDP"); return ret; + } } i++; } @@ -1258,9 +1267,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, ret = ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, SIP_HDR_CONTACT, NULL, &matchoff, &matchlen, &daddr, &port); - if (ret < 0) + if (ret < 0) { + nf_ct_helper_log(skb, ct, "cannot parse contact"); return NF_DROP; - else if (ret == 0) + } else if (ret == 0) return NF_ACCEPT; /* We don't support third-party registrations */ @@ -1273,8 +1283,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, if (ct_sip_parse_numerical_param(ct, *dptr, matchoff + matchlen, *datalen, - "expires=", NULL, NULL, &expires) < 0) + "expires=", NULL, NULL, &expires) < 0) { + nf_ct_helper_log(skb, ct, "cannot parse expires"); return NF_DROP; + } if (expires == 0) { ret = NF_ACCEPT; @@ -1282,8 +1294,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, } exp = nf_ct_expect_alloc(ct); - if (!exp) + if (!exp) { + nf_ct_helper_log(skb, ct, "cannot alloc expectation"); return NF_DROP; + } saddr = NULL; if (sip_direct_signalling) @@ -1300,9 +1314,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen, exp, matchoff, matchlen); else { - if (nf_ct_expect_related(exp) != 0) + if (nf_ct_expect_related(exp) != 0) { + nf_ct_helper_log(skb, ct, "cannot add expectation"); ret = NF_DROP; - else + } else ret = NF_ACCEPT; } nf_ct_expect_put(exp); @@ -1356,9 +1371,10 @@ static int process_register_response(struct sk_buff *skb, unsigned int protoff, SIP_HDR_CONTACT, &in_contact, &matchoff, &matchlen, &addr, &port); - if (ret < 0) + if (ret < 0) { + nf_ct_helper_log(skb, ct, "cannot parse contact"); return NF_DROP; - else if (ret == 0) + } else if (ret == 0) break; /* We don't support third-party registrations */ @@ -1373,8 +1389,10 @@ static int process_register_response(struct sk_buff *skb, unsigned int protoff, matchoff + matchlen, *datalen, "expires=", NULL, NULL, &c_expires); - if (ret < 0) + if (ret < 0) { + nf_ct_helper_log(skb, ct, "cannot parse expires"); return NF_DROP; + } if (c_expires == 0) break; if (refresh_signalling_expectation(ct, &addr, proto, port, @@ -1408,15 +1426,21 @@ static int process_sip_response(struct sk_buff *skb, unsigned int protoff, if (*datalen < strlen("SIP/2.0 200")) return NF_ACCEPT; code = simple_strtoul(*dptr + strlen("SIP/2.0 "), NULL, 10); - if (!code) + if (!code) { + nf_ct_helper_log(skb, ct, "cannot get code"); return NF_DROP; + } if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ, - &matchoff, &matchlen) <= 0) + &matchoff, &matchlen) <= 0) { + nf_ct_helper_log(skb, ct, "cannot parse cseq"); return NF_DROP; + } cseq = simple_strtoul(*dptr + matchoff, NULL, 10); - if (!cseq) + if (!cseq) { + nf_ct_helper_log(skb, ct, "cannot get cseq"); return NF_DROP; + } matchend = matchoff + matchlen + 1; for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { @@ -1440,8 +1464,25 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned int matchoff, matchlen; unsigned int cseq, i; + union nf_inet_addr addr; + __be16 port; + + /* Many Cisco IP phones use a high source port for SIP requests, but + * listen for the response on port 5060. If we are the local + * router for one of these phones, save the port number from the + * Via: header so that nf_nat_sip can redirect the responses to + * the correct port. + */ + if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, + SIP_HDR_VIA_UDP, NULL, &matchoff, + &matchlen, &addr, &port) > 0 && + port != ct->tuplehash[dir].tuple.src.u.udp.port && + nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3)) + ct_sip_info->forced_dport = port; for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { const struct sip_handler *handler; @@ -1454,11 +1495,15 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, continue; if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ, - &matchoff, &matchlen) <= 0) + &matchoff, &matchlen) <= 0) { + nf_ct_helper_log(skb, ct, "cannot parse cseq"); return NF_DROP; + } cseq = simple_strtoul(*dptr + matchoff, NULL, 10); - if (!cseq) + if (!cseq) { + nf_ct_helper_log(skb, ct, "cannot get cseq"); return NF_DROP; + } return handler->request(skb, protoff, dataoff, dptr, datalen, cseq); @@ -1481,8 +1526,10 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { nf_nat_sip = rcu_dereference(nf_nat_sip_hook); if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff, - dptr, datalen)) + dptr, datalen)) { + nf_ct_helper_log(skb, ct, "cannot NAT SIP message"); ret = NF_DROP; + } } return ret; @@ -1546,11 +1593,14 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, end += strlen("\r\n\r\n") + clen; msglen = origlen = end - dptr; - if (msglen > datalen) + if (msglen > datalen) { + nf_ct_helper_log(skb, ct, "incomplete/bad SIP message"); return NF_DROP; + } ret = process_sip_msg(skb, ct, protoff, dataoff, &dptr, &msglen); + /* process_sip_* functions report why this packet is dropped */ if (ret != NF_ACCEPT) break; diff = msglen - origlen; diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c index 6e545e26289..87b95a2c270 100644 --- a/net/netfilter/nf_conntrack_snmp.c +++ b/net/netfilter/nf_conntrack_snmp.c @@ -16,6 +16,7 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> +#include <linux/netfilter/nf_conntrack_snmp.h> #define SNMP_PORT 161 diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index e7185c68481..6bcce401fd1 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -366,7 +366,7 @@ static int nf_conntrack_standalone_init_proc(struct net *net) { struct proc_dir_entry *pde; - pde = proc_net_fops_create(net, "nf_conntrack", 0440, &ct_file_ops); + pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops); if (!pde) goto out_nf_conntrack; @@ -377,7 +377,7 @@ static int nf_conntrack_standalone_init_proc(struct net *net) return 0; out_stat_nf_conntrack: - proc_net_remove(net, "nf_conntrack"); + remove_proc_entry("nf_conntrack", net->proc_net); out_nf_conntrack: return -ENOMEM; } @@ -385,7 +385,7 @@ out_nf_conntrack: static void nf_conntrack_standalone_fini_proc(struct net *net) { remove_proc_entry("nf_conntrack", net->proc_net_stat); - proc_net_remove(net, "nf_conntrack"); + remove_proc_entry("nf_conntrack", net->proc_net); } #else static int nf_conntrack_standalone_init_proc(struct net *net) @@ -472,13 +472,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) { struct ctl_table *table; - if (net_eq(net, &init_net)) { - nf_ct_netfilter_header = - register_net_sysctl(&init_net, "net", nf_ct_netfilter_table); - if (!nf_ct_netfilter_header) - goto out; - } - table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table), GFP_KERNEL); if (!table) @@ -502,10 +495,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) out_unregister_netfilter: kfree(table); out_kmemdup: - if (net_eq(net, &init_net)) - unregister_net_sysctl_table(nf_ct_netfilter_header); -out: - printk(KERN_ERR "nf_conntrack: can't register to sysctl.\n"); return -ENOMEM; } @@ -513,8 +502,6 @@ static void nf_conntrack_standalone_fini_sysctl(struct net *net) { struct ctl_table *table; - if (net_eq(net, &init_net)) - unregister_net_sysctl_table(nf_ct_netfilter_header); table = net->ct.sysctl_header->ctl_table_arg; unregister_net_sysctl_table(net->ct.sysctl_header); kfree(table); @@ -530,51 +517,85 @@ static void nf_conntrack_standalone_fini_sysctl(struct net *net) } #endif /* CONFIG_SYSCTL */ -static int nf_conntrack_net_init(struct net *net) +static int nf_conntrack_pernet_init(struct net *net) { int ret; - ret = nf_conntrack_init(net); + ret = nf_conntrack_init_net(net); if (ret < 0) goto out_init; + ret = nf_conntrack_standalone_init_proc(net); if (ret < 0) goto out_proc; + net->ct.sysctl_checksum = 1; net->ct.sysctl_log_invalid = 0; ret = nf_conntrack_standalone_init_sysctl(net); if (ret < 0) goto out_sysctl; + return 0; out_sysctl: nf_conntrack_standalone_fini_proc(net); out_proc: - nf_conntrack_cleanup(net); + nf_conntrack_cleanup_net(net); out_init: return ret; } -static void nf_conntrack_net_exit(struct net *net) +static void nf_conntrack_pernet_exit(struct net *net) { nf_conntrack_standalone_fini_sysctl(net); nf_conntrack_standalone_fini_proc(net); - nf_conntrack_cleanup(net); + nf_conntrack_cleanup_net(net); } static struct pernet_operations nf_conntrack_net_ops = { - .init = nf_conntrack_net_init, - .exit = nf_conntrack_net_exit, + .init = nf_conntrack_pernet_init, + .exit = nf_conntrack_pernet_exit, }; static int __init nf_conntrack_standalone_init(void) { - return register_pernet_subsys(&nf_conntrack_net_ops); + int ret = nf_conntrack_init_start(); + if (ret < 0) + goto out_start; + +#ifdef CONFIG_SYSCTL + nf_ct_netfilter_header = + register_net_sysctl(&init_net, "net", nf_ct_netfilter_table); + if (!nf_ct_netfilter_header) { + pr_err("nf_conntrack: can't register to sysctl.\n"); + goto out_sysctl; + } +#endif + + ret = register_pernet_subsys(&nf_conntrack_net_ops); + if (ret < 0) + goto out_pernet; + + nf_conntrack_init_end(); + return 0; + +out_pernet: +#ifdef CONFIG_SYSCTL + unregister_net_sysctl_table(nf_ct_netfilter_header); +out_sysctl: +#endif + nf_conntrack_cleanup_end(); +out_start: + return ret; } static void __exit nf_conntrack_standalone_fini(void) { + nf_conntrack_cleanup_start(); unregister_pernet_subsys(&nf_conntrack_net_ops); +#ifdef CONFIG_SYSCTL + unregister_net_sysctl_table(nf_ct_netfilter_header); +#endif nf_conntrack_cleanup_end(); } diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 81fc61c0526..e9936c83020 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -60,8 +60,10 @@ static int tftp_help(struct sk_buff *skb, nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); exp = nf_ct_expect_alloc(ct); - if (exp == NULL) + if (exp == NULL) { + nf_ct_helper_log(skb, ct, "cannot alloc expectation"); return NF_DROP; + } tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), @@ -74,8 +76,10 @@ static int tftp_help(struct sk_buff *skb, nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook); if (nf_nat_tftp && ct->status & IPS_NAT_MASK) ret = nf_nat_tftp(skb, ctinfo, exp); - else if (nf_ct_expect_related(exp) != 0) + else if (nf_ct_expect_related(exp) != 0) { + nf_ct_helper_log(skb, ct, "cannot add expectation"); ret = NF_DROP; + } nf_ct_expect_put(exp); break; case TFTP_OPCODE_DATA: diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index a878ce5b252..93da609d9d2 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -37,24 +37,15 @@ static struct nf_ct_ext_type timeout_extend __read_mostly = { .id = NF_CT_EXT_TIMEOUT, }; -int nf_conntrack_timeout_init(struct net *net) +int nf_conntrack_timeout_init(void) { - int ret = 0; - - if (net_eq(net, &init_net)) { - ret = nf_ct_extend_register(&timeout_extend); - if (ret < 0) { - printk(KERN_ERR "nf_ct_timeout: Unable to register " - "timeout extension.\n"); - return ret; - } - } - - return 0; + int ret = nf_ct_extend_register(&timeout_extend); + if (ret < 0) + pr_err("nf_ct_timeout: Unable to register timeout extension.\n"); + return ret; } -void nf_conntrack_timeout_fini(struct net *net) +void nf_conntrack_timeout_fini(void) { - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&timeout_extend); + nf_ct_extend_unregister(&timeout_extend); } diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c index 7ea8026f07c..902fb0a6b38 100644 --- a/net/netfilter/nf_conntrack_timestamp.c +++ b/net/netfilter/nf_conntrack_timestamp.c @@ -88,37 +88,28 @@ static void nf_conntrack_tstamp_fini_sysctl(struct net *net) } #endif -int nf_conntrack_tstamp_init(struct net *net) +int nf_conntrack_tstamp_pernet_init(struct net *net) { - int ret; - net->ct.sysctl_tstamp = nf_ct_tstamp; + return nf_conntrack_tstamp_init_sysctl(net); +} - if (net_eq(net, &init_net)) { - ret = nf_ct_extend_register(&tstamp_extend); - if (ret < 0) { - printk(KERN_ERR "nf_ct_tstamp: Unable to register " - "extension\n"); - goto out_extend_register; - } - } +void nf_conntrack_tstamp_pernet_fini(struct net *net) +{ + nf_conntrack_tstamp_fini_sysctl(net); + nf_ct_extend_unregister(&tstamp_extend); +} - ret = nf_conntrack_tstamp_init_sysctl(net); +int nf_conntrack_tstamp_init(void) +{ + int ret; + ret = nf_ct_extend_register(&tstamp_extend); if (ret < 0) - goto out_sysctl; - - return 0; - -out_sysctl: - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&tstamp_extend); -out_extend_register: + pr_err("nf_ct_tstamp: Unable to register extension\n"); return ret; } -void nf_conntrack_tstamp_fini(struct net *net) +void nf_conntrack_tstamp_fini(void) { - nf_conntrack_tstamp_fini_sysctl(net); - if (net_eq(net, &init_net)) - nf_ct_extend_unregister(&tstamp_extend); + nf_ct_extend_unregister(&tstamp_extend); } diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c index 42d33788117..3b67c9d1127 100644 --- a/net/netfilter/nf_nat_amanda.c +++ b/net/netfilter/nf_nat_amanda.c @@ -56,15 +56,19 @@ static unsigned int help(struct sk_buff *skb, } } - if (port == 0) + if (port == 0) { + nf_ct_helper_log(skb, exp->master, "all ports in use"); return NF_DROP; + } sprintf(buffer, "%u", port); ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo, protoff, matchoff, matchlen, buffer, strlen(buffer)); - if (ret != NF_ACCEPT) + if (ret != NF_ACCEPT) { + nf_ct_helper_log(skb, exp->master, "cannot mangle packet"); nf_ct_unexpect_related(exp); + } return ret; } diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c index e839b97b286..e84a578dbe3 100644 --- a/net/netfilter/nf_nat_ftp.c +++ b/net/netfilter/nf_nat_ftp.c @@ -96,8 +96,10 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, } } - if (port == 0) + if (port == 0) { + nf_ct_helper_log(skb, ct, "all ports in use"); return NF_DROP; + } buflen = nf_nat_ftp_fmt_cmd(ct, type, buffer, sizeof(buffer), &newaddr, port); @@ -113,6 +115,7 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, return NF_ACCEPT; out: + nf_ct_helper_log(skb, ct, "cannot mangle packet"); nf_ct_unexpect_related(exp); return NF_DROP; } diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c index 1fedee6e7fb..f02b3605823 100644 --- a/net/netfilter/nf_nat_irc.c +++ b/net/netfilter/nf_nat_irc.c @@ -56,14 +56,18 @@ static unsigned int help(struct sk_buff *skb, } } - if (port == 0) + if (port == 0) { + nf_ct_helper_log(skb, exp->master, "all ports in use"); return NF_DROP; + } ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo, protoff, matchoff, matchlen, buffer, strlen(buffer)); - if (ret != NF_ACCEPT) + if (ret != NF_ACCEPT) { + nf_ct_helper_log(skb, exp->master, "cannot mangle packet"); nf_ct_unexpect_related(exp); + } return ret; } diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index 16303c75221..96ccdf78a29 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -95,6 +95,7 @@ static int map_addr(struct sk_buff *skb, unsigned int protoff, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")]; unsigned int buflen; union nf_inet_addr newaddr; @@ -107,7 +108,8 @@ static int map_addr(struct sk_buff *skb, unsigned int protoff, } else if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, addr) && ct->tuplehash[dir].tuple.dst.u.udp.port == port) { newaddr = ct->tuplehash[!dir].tuple.src.u3; - newport = ct->tuplehash[!dir].tuple.src.u.udp.port; + newport = ct_sip_info->forced_dport ? : + ct->tuplehash[!dir].tuple.src.u.udp.port; } else return 1; @@ -144,6 +146,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); unsigned int coff, matchoff, matchlen; enum sip_header_types hdr; union nf_inet_addr addr; @@ -156,8 +159,10 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, &matchoff, &matchlen, &addr, &port) > 0 && !map_addr(skb, protoff, dataoff, dptr, datalen, - matchoff, matchlen, &addr, port)) + matchoff, matchlen, &addr, port)) { + nf_ct_helper_log(skb, ct, "cannot mangle SIP message"); return NF_DROP; + } request = 1; } else request = 0; @@ -190,8 +195,10 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, olen = *datalen; if (!map_addr(skb, protoff, dataoff, dptr, datalen, - matchoff, matchlen, &addr, port)) + matchoff, matchlen, &addr, port)) { + nf_ct_helper_log(skb, ct, "cannot mangle Via header"); return NF_DROP; + } matchend = matchoff + matchlen + *datalen - olen; @@ -206,8 +213,10 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, &ct->tuplehash[!dir].tuple.dst.u3, true); if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, - poff, plen, buffer, buflen)) + poff, plen, buffer, buflen)) { + nf_ct_helper_log(skb, ct, "cannot mangle maddr"); return NF_DROP; + } } /* The received= parameter (RFC 2361) contains the address @@ -222,6 +231,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, false); if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, poff, plen, buffer, buflen)) + nf_ct_helper_log(skb, ct, "cannot mangle received"); return NF_DROP; } @@ -235,8 +245,10 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; buflen = sprintf(buffer, "%u", ntohs(p)); if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, - poff, plen, buffer, buflen)) + poff, plen, buffer, buflen)) { + nf_ct_helper_log(skb, ct, "cannot mangle rport"); return NF_DROP; + } } } @@ -250,13 +262,36 @@ next: &addr, &port) > 0) { if (!map_addr(skb, protoff, dataoff, dptr, datalen, matchoff, matchlen, - &addr, port)) + &addr, port)) { + nf_ct_helper_log(skb, ct, "cannot mangle contact"); return NF_DROP; + } } if (!map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_FROM) || - !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO)) + !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO)) { + nf_ct_helper_log(skb, ct, "cannot mangle SIP from/to"); return NF_DROP; + } + + /* Mangle destination port for Cisco phones, then fix up checksums */ + if (dir == IP_CT_DIR_REPLY && ct_sip_info->forced_dport) { + struct udphdr *uh; + + if (!skb_make_writable(skb, skb->len)) { + nf_ct_helper_log(skb, ct, "cannot mangle packet"); + return NF_DROP; + } + + uh = (void *)skb->data + protoff; + uh->dest = ct_sip_info->forced_dport; + + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, protoff, + 0, 0, NULL, 0)) { + nf_ct_helper_log(skb, ct, "cannot mangle packet"); + return NF_DROP; + } + } return NF_ACCEPT; } @@ -311,8 +346,10 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); union nf_inet_addr newaddr; u_int16_t port; + __be16 srcport; char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")]; unsigned int buflen; @@ -326,8 +363,9 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, /* If the signalling port matches the connection's source port in the * original direction, try to use the destination port in the opposite * direction. */ - if (exp->tuple.dst.u.udp.port == - ct->tuplehash[dir].tuple.src.u.udp.port) + srcport = ct_sip_info->forced_dport ? : + ct->tuplehash[dir].tuple.src.u.udp.port; + if (exp->tuple.dst.u.udp.port == srcport) port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port); else port = ntohs(exp->tuple.dst.u.udp.port); @@ -351,15 +389,19 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, } } - if (port == 0) + if (port == 0) { + nf_ct_helper_log(skb, ct, "all ports in use for SIP"); return NF_DROP; + } if (!nf_inet_addr_cmp(&exp->tuple.dst.u3, &exp->saved_addr) || exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { buflen = sip_sprintf_addr_port(ct, buffer, &newaddr, port); if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, - matchoff, matchlen, buffer, buflen)) + matchoff, matchlen, buffer, buflen)) { + nf_ct_helper_log(skb, ct, "cannot mangle packet"); goto err; + } } return NF_ACCEPT; @@ -552,14 +594,18 @@ static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff, } } - if (port == 0) + if (port == 0) { + nf_ct_helper_log(skb, ct, "all ports in use for SDP media"); goto err1; + } /* Update media port. */ if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port && !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen, - mediaoff, medialen, port)) + mediaoff, medialen, port)) { + nf_ct_helper_log(skb, ct, "cannot mangle SDP message"); goto err2; + } return NF_ACCEPT; diff --git a/net/netfilter/nf_nat_tftp.c b/net/netfilter/nf_nat_tftp.c index ccabbda71a3..7f67e1d5310 100644 --- a/net/netfilter/nf_nat_tftp.c +++ b/net/netfilter/nf_nat_tftp.c @@ -28,8 +28,10 @@ static unsigned int help(struct sk_buff *skb, = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; exp->dir = IP_CT_DIR_REPLY; exp->expectfn = nf_nat_follow_master; - if (nf_ct_expect_related(exp) != 0) + if (nf_ct_expect_related(exp) != 0) { + nf_ct_helper_log(skb, exp->master, "cannot add expectation"); return NF_DROP; + } return NF_ACCEPT; } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 58a09b7c3f6..d578ec25171 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -36,8 +36,10 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; -static const struct nfnetlink_subsystem __rcu *subsys_table[NFNL_SUBSYS_COUNT]; -static DEFINE_MUTEX(nfnl_mutex); +static struct { + struct mutex mutex; + const struct nfnetlink_subsystem __rcu *subsys; +} table[NFNL_SUBSYS_COUNT]; static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_CONNTRACK_NEW] = NFNL_SUBSYS_CTNETLINK, @@ -48,27 +50,32 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP, }; -void nfnl_lock(void) +void nfnl_lock(__u8 subsys_id) { - mutex_lock(&nfnl_mutex); + mutex_lock(&table[subsys_id].mutex); } EXPORT_SYMBOL_GPL(nfnl_lock); -void nfnl_unlock(void) +void nfnl_unlock(__u8 subsys_id) { - mutex_unlock(&nfnl_mutex); + mutex_unlock(&table[subsys_id].mutex); } EXPORT_SYMBOL_GPL(nfnl_unlock); +static struct mutex *nfnl_get_lock(__u8 subsys_id) +{ + return &table[subsys_id].mutex; +} + int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) { - nfnl_lock(); - if (subsys_table[n->subsys_id]) { - nfnl_unlock(); + nfnl_lock(n->subsys_id); + if (table[n->subsys_id].subsys) { + nfnl_unlock(n->subsys_id); return -EBUSY; } - rcu_assign_pointer(subsys_table[n->subsys_id], n); - nfnl_unlock(); + rcu_assign_pointer(table[n->subsys_id].subsys, n); + nfnl_unlock(n->subsys_id); return 0; } @@ -76,9 +83,9 @@ EXPORT_SYMBOL_GPL(nfnetlink_subsys_register); int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n) { - nfnl_lock(); - subsys_table[n->subsys_id] = NULL; - nfnl_unlock(); + nfnl_lock(n->subsys_id); + table[n->subsys_id].subsys = NULL; + nfnl_unlock(n->subsys_id); synchronize_rcu(); return 0; } @@ -91,7 +98,7 @@ static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t t if (subsys_id >= NFNL_SUBSYS_COUNT) return NULL; - return rcu_dereference(subsys_table[subsys_id]); + return rcu_dereference(table[subsys_id].subsys); } static inline const struct nfnl_callback * @@ -175,6 +182,7 @@ replay: struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; struct nlattr *attr = (void *)nlh + min_len; int attrlen = nlh->nlmsg_len - min_len; + __u8 subsys_id = NFNL_SUBSYS_ID(type); err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen, ss->cb[cb_id].policy); @@ -189,10 +197,9 @@ replay: rcu_read_unlock(); } else { rcu_read_unlock(); - nfnl_lock(); - if (rcu_dereference_protected( - subsys_table[NFNL_SUBSYS_ID(type)], - lockdep_is_held(&nfnl_mutex)) != ss || + nfnl_lock(subsys_id); + if (rcu_dereference_protected(table[subsys_id].subsys, + lockdep_is_held(nfnl_get_lock(subsys_id))) != ss || nfnetlink_find_client(type, ss) != nc) err = -EAGAIN; else if (nc->call) @@ -200,7 +207,7 @@ replay: (const struct nlattr **)cda); else err = -EINVAL; - nfnl_unlock(); + nfnl_unlock(subsys_id); } if (err == -EAGAIN) goto replay; @@ -267,6 +274,11 @@ static struct pernet_operations nfnetlink_net_ops = { static int __init nfnetlink_init(void) { + int i; + + for (i=0; i<NFNL_SUBSYS_COUNT; i++) + mutex_init(&table[i].mutex); + pr_info("Netfilter messages via NETLINK v%s.\n", nfversion); return register_pernet_subsys(&nfnetlink_net_ops); } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 7b3a9e5999c..686c7715d77 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1323,12 +1323,12 @@ int xt_proto_init(struct net *net, u_int8_t af) out_remove_matches: strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc_net_remove(net, buf); + remove_proc_entry(buf, net->proc_net); out_remove_tables: strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc_net_remove(net, buf); + remove_proc_entry(buf, net->proc_net); out: return -1; #endif @@ -1342,15 +1342,15 @@ void xt_proto_fini(struct net *net, u_int8_t af) strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc_net_remove(net, buf); + remove_proc_entry(buf, net->proc_net); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); - proc_net_remove(net, buf); + remove_proc_entry(buf, net->proc_net); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc_net_remove(net, buf); + remove_proc_entry(buf, net->proc_net); #endif /*CONFIG_PROC_FS*/ } EXPORT_SYMBOL_GPL(xt_proto_fini); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index bde009ed8d3..a60261cb0e8 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -20,12 +20,8 @@ #include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_conntrack_zones.h> -static unsigned int xt_ct_target_v0(struct sk_buff *skb, - const struct xt_action_param *par) +static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) { - const struct xt_ct_target_info *info = par->targinfo; - struct nf_conn *ct = info->ct; - /* Previously seen (loopback)? Ignore. */ if (skb->nfct != NULL) return XT_CONTINUE; @@ -37,21 +33,22 @@ static unsigned int xt_ct_target_v0(struct sk_buff *skb, return XT_CONTINUE; } -static unsigned int xt_ct_target_v1(struct sk_buff *skb, +static unsigned int xt_ct_target_v0(struct sk_buff *skb, const struct xt_action_param *par) { - const struct xt_ct_target_info_v1 *info = par->targinfo; + const struct xt_ct_target_info *info = par->targinfo; struct nf_conn *ct = info->ct; - /* Previously seen (loopback)? Ignore. */ - if (skb->nfct != NULL) - return XT_CONTINUE; + return xt_ct_target(skb, ct); +} - atomic_inc(&ct->ct_general.use); - skb->nfct = &ct->ct_general; - skb->nfctinfo = IP_CT_NEW; +static unsigned int xt_ct_target_v1(struct sk_buff *skb, + const struct xt_action_param *par) +{ + const struct xt_ct_target_info_v1 *info = par->targinfo; + struct nf_conn *ct = info->ct; - return XT_CONTINUE; + return xt_ct_target(skb, ct); } static u8 xt_ct_find_proto(const struct xt_tgchk_param *par) @@ -104,67 +101,6 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name, return 0; } -static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) -{ - struct xt_ct_target_info *info = par->targinfo; - struct nf_conntrack_tuple t; - struct nf_conn *ct; - int ret = -EOPNOTSUPP; - - if (info->flags & ~XT_CT_NOTRACK) - return -EINVAL; - - if (info->flags & XT_CT_NOTRACK) { - ct = nf_ct_untracked_get(); - atomic_inc(&ct->ct_general.use); - goto out; - } - -#ifndef CONFIG_NF_CONNTRACK_ZONES - if (info->zone) - goto err1; -#endif - - ret = nf_ct_l3proto_try_module_get(par->family); - if (ret < 0) - goto err1; - - memset(&t, 0, sizeof(t)); - ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL); - ret = PTR_ERR(ct); - if (IS_ERR(ct)) - goto err2; - - ret = 0; - if ((info->ct_events || info->exp_events) && - !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events, - GFP_KERNEL)) - goto err3; - - if (info->helper[0]) { - ret = xt_ct_set_helper(ct, info->helper, par); - if (ret < 0) - goto err3; - } - - __set_bit(IPS_TEMPLATE_BIT, &ct->status); - __set_bit(IPS_CONFIRMED_BIT, &ct->status); - - /* Overload tuple linked list to put us in template list. */ - hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &par->net->ct.tmpl); -out: - info->ct = ct; - return 0; - -err3: - nf_conntrack_free(ct); -err2: - nf_ct_l3proto_module_put(par->family); -err1: - return ret; -} - #ifdef CONFIG_NF_CONNTRACK_TIMEOUT static void __xt_ct_tg_timeout_put(struct ctnl_timeout *timeout) { @@ -242,16 +178,13 @@ out: #endif } -static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) +static int xt_ct_tg_check(const struct xt_tgchk_param *par, + struct xt_ct_target_info_v1 *info) { - struct xt_ct_target_info_v1 *info = par->targinfo; struct nf_conntrack_tuple t; struct nf_conn *ct; int ret = -EOPNOTSUPP; - if (info->flags & ~XT_CT_NOTRACK) - return -EINVAL; - if (info->flags & XT_CT_NOTRACK) { ct = nf_ct_untracked_get(); atomic_inc(&ct->ct_general.use); @@ -309,20 +242,49 @@ err1: return ret; } -static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par) +static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) { struct xt_ct_target_info *info = par->targinfo; - struct nf_conn *ct = info->ct; - struct nf_conn_help *help; + struct xt_ct_target_info_v1 info_v1 = { + .flags = info->flags, + .zone = info->zone, + .ct_events = info->ct_events, + .exp_events = info->exp_events, + }; + int ret; - if (!nf_ct_is_untracked(ct)) { - help = nfct_help(ct); - if (help) - module_put(help->helper->me); + if (info->flags & ~XT_CT_NOTRACK) + return -EINVAL; - nf_ct_l3proto_module_put(par->family); - } - nf_ct_put(info->ct); + memcpy(info_v1.helper, info->helper, sizeof(info->helper)); + + ret = xt_ct_tg_check(par, &info_v1); + if (ret < 0) + return ret; + + info->ct = info_v1.ct; + + return ret; +} + +static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) +{ + struct xt_ct_target_info_v1 *info = par->targinfo; + + if (info->flags & ~XT_CT_NOTRACK) + return -EINVAL; + + return xt_ct_tg_check(par, par->targinfo); +} + +static int xt_ct_tg_check_v2(const struct xt_tgchk_param *par) +{ + struct xt_ct_target_info_v1 *info = par->targinfo; + + if (info->flags & ~XT_CT_MASK) + return -EINVAL; + + return xt_ct_tg_check(par, par->targinfo); } static void xt_ct_destroy_timeout(struct nf_conn *ct) @@ -343,9 +305,9 @@ static void xt_ct_destroy_timeout(struct nf_conn *ct) #endif } -static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) +static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, + struct xt_ct_target_info_v1 *info) { - struct xt_ct_target_info_v1 *info = par->targinfo; struct nf_conn *ct = info->ct; struct nf_conn_help *help; @@ -361,6 +323,26 @@ static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) nf_ct_put(info->ct); } +static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par) +{ + struct xt_ct_target_info *info = par->targinfo; + struct xt_ct_target_info_v1 info_v1 = { + .flags = info->flags, + .zone = info->zone, + .ct_events = info->ct_events, + .exp_events = info->exp_events, + .ct = info->ct, + }; + memcpy(info_v1.helper, info->helper, sizeof(info->helper)); + + xt_ct_tg_destroy(par, &info_v1); +} + +static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) +{ + xt_ct_tg_destroy(par, par->targinfo); +} + static struct xt_target xt_ct_tg_reg[] __read_mostly = { { .name = "CT", @@ -383,6 +365,17 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .table = "raw", .me = THIS_MODULE, }, + { + .name = "CT", + .family = NFPROTO_UNSPEC, + .revision = 2, + .targetsize = sizeof(struct xt_ct_target_info_v1), + .checkentry = xt_ct_tg_check_v2, + .destroy = xt_ct_tg_destroy_v1, + .target = xt_ct_target_v1, + .table = "raw", + .me = THIS_MODULE, + }, }; static unsigned int diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c new file mode 100644 index 00000000000..12d4da8e6c7 --- /dev/null +++ b/net/netfilter/xt_bpf.c @@ -0,0 +1,73 @@ +/* Xtables module to match packets using a BPF filter. + * Copyright 2013 Google Inc. + * Written by Willem de Bruijn <willemb@google.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/filter.h> + +#include <linux/netfilter/xt_bpf.h> +#include <linux/netfilter/x_tables.h> + +MODULE_AUTHOR("Willem de Bruijn <willemb@google.com>"); +MODULE_DESCRIPTION("Xtables: BPF filter match"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_bpf"); +MODULE_ALIAS("ip6t_bpf"); + +static int bpf_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_bpf_info *info = par->matchinfo; + struct sock_fprog program; + + program.len = info->bpf_program_num_elem; + program.filter = (struct sock_filter __user *) info->bpf_program; + if (sk_unattached_filter_create(&info->filter, &program)) { + pr_info("bpf: check failed: parse error\n"); + return -EINVAL; + } + + return 0; +} + +static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_bpf_info *info = par->matchinfo; + + return SK_RUN_FILTER(info->filter, skb); +} + +static void bpf_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_bpf_info *info = par->matchinfo; + sk_unattached_filter_destroy(info->filter); +} + +static struct xt_match bpf_mt_reg __read_mostly = { + .name = "bpf", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = bpf_mt_check, + .match = bpf_mt, + .destroy = bpf_mt_destroy, + .matchsize = sizeof(struct xt_bpf_info), + .me = THIS_MODULE, +}; + +static int __init bpf_mt_init(void) +{ + return xt_register_match(&bpf_mt_reg); +} + +static void __exit bpf_mt_exit(void) +{ + xt_unregister_match(&bpf_mt_reg); +} + +module_init(bpf_mt_init); +module_exit(bpf_mt_exit); diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c new file mode 100644 index 00000000000..9f8719df200 --- /dev/null +++ b/net/netfilter/xt_connlabel.c @@ -0,0 +1,99 @@ +/* + * (C) 2013 Astaro GmbH & Co KG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_labels.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_DESCRIPTION("Xtables: add/match connection trackling labels"); +MODULE_ALIAS("ipt_connlabel"); +MODULE_ALIAS("ip6t_connlabel"); + +static bool +connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_connlabel_mtinfo *info = par->matchinfo; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + bool invert = info->options & XT_CONNLABEL_OP_INVERT; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL || nf_ct_is_untracked(ct)) + return invert; + + if (info->options & XT_CONNLABEL_OP_SET) + return (nf_connlabel_set(ct, info->bit) == 0) ^ invert; + + return nf_connlabel_match(ct, info->bit) ^ invert; +} + +static int connlabel_mt_check(const struct xt_mtchk_param *par) +{ + const int options = XT_CONNLABEL_OP_INVERT | + XT_CONNLABEL_OP_SET; + struct xt_connlabel_mtinfo *info = par->matchinfo; + int ret; + size_t words; + + if (info->bit > XT_CONNLABEL_MAXBIT) + return -ERANGE; + + if (info->options & ~options) { + pr_err("Unknown options in mask %x\n", info->options); + return -EINVAL; + } + + ret = nf_ct_l3proto_try_module_get(par->family); + if (ret < 0) { + pr_info("cannot load conntrack support for proto=%u\n", + par->family); + return ret; + } + + par->net->ct.labels_used++; + words = BITS_TO_LONGS(info->bit+1); + if (words > par->net->ct.label_words) + par->net->ct.label_words = words; + + return ret; +} + +static void connlabel_mt_destroy(const struct xt_mtdtor_param *par) +{ + par->net->ct.labels_used--; + if (par->net->ct.labels_used == 0) + par->net->ct.label_words = 0; + nf_ct_l3proto_module_put(par->family); +} + +static struct xt_match connlabels_mt_reg __read_mostly = { + .name = "connlabel", + .family = NFPROTO_UNSPEC, + .checkentry = connlabel_mt_check, + .match = connlabel_mt, + .matchsize = sizeof(struct xt_connlabel_mtinfo), + .destroy = connlabel_mt_destroy, + .me = THIS_MODULE, +}; + +static int __init connlabel_mt_init(void) +{ + return xt_register_match(&connlabels_mt_reg); +} + +static void __exit connlabel_mt_exit(void) +{ + xt_unregister_match(&connlabels_mt_reg); +} + +module_init(connlabel_mt_init); +module_exit(connlabel_mt_exit); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index a9d7af953ce..98218c896d2 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -867,7 +867,7 @@ static int __net_init hashlimit_proc_net_init(struct net *net) #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) hashlimit_net->ip6t_hashlimit = proc_mkdir("ip6t_hashlimit", net->proc_net); if (!hashlimit_net->ip6t_hashlimit) { - proc_net_remove(net, "ipt_hashlimit"); + remove_proc_entry("ipt_hashlimit", net->proc_net); return -ENOMEM; } #endif @@ -897,9 +897,9 @@ static void __net_exit hashlimit_proc_net_exit(struct net *net) hashlimit_net->ip6t_hashlimit = NULL; mutex_unlock(&hashlimit_mutex); - proc_net_remove(net, "ipt_hashlimit"); + remove_proc_entry("ipt_hashlimit", net->proc_net); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - proc_net_remove(net, "ip6t_hashlimit"); + remove_proc_entry("ip6t_hashlimit", net->proc_net); #endif } diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 978efc9b555..31bf233dae9 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -643,7 +643,7 @@ static void __net_exit recent_proc_net_exit(struct net *net) recent_net->xt_recent = NULL; spin_unlock_bh(&recent_lock); - proc_net_remove(net, "xt_recent"); + remove_proc_entry("xt_recent", net->proc_net); } #else static inline int recent_proc_net_init(struct net *net) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c0353d55d56..3d55e0c713e 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2145,7 +2145,7 @@ static const struct net_proto_family netlink_family_ops = { static int __net_init netlink_net_init(struct net *net) { #ifdef CONFIG_PROC_FS - if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops)) + if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops)) return -ENOMEM; #endif return 0; @@ -2154,7 +2154,7 @@ static int __net_init netlink_net_init(struct net *net) static void __net_exit netlink_net_exit(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_remove(net, "netlink"); + remove_proc_entry("netlink", net->proc_net); #endif } @@ -2185,7 +2185,6 @@ static struct pernet_operations __net_initdata netlink_net_ops = { static int __init netlink_proto_init(void) { - struct sk_buff *dummy_skb; int i; unsigned long limit; unsigned int order; @@ -2194,7 +2193,7 @@ static int __init netlink_proto_init(void) if (err != 0) goto out; - BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)); + BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb)); nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); if (!nl_table) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 7261eb81974..297b07a029d 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1452,9 +1452,9 @@ static int __init nr_proto_init(void) nr_loopback_init(); - proc_net_fops_create(&init_net, "nr", S_IRUGO, &nr_info_fops); - proc_net_fops_create(&init_net, "nr_neigh", S_IRUGO, &nr_neigh_fops); - proc_net_fops_create(&init_net, "nr_nodes", S_IRUGO, &nr_nodes_fops); + proc_create("nr", S_IRUGO, init_net.proc_net, &nr_info_fops); + proc_create("nr_neigh", S_IRUGO, init_net.proc_net, &nr_neigh_fops); + proc_create("nr_nodes", S_IRUGO, init_net.proc_net, &nr_nodes_fops); out: return rc; fail: @@ -1482,9 +1482,9 @@ static void __exit nr_exit(void) { int i; - proc_net_remove(&init_net, "nr"); - proc_net_remove(&init_net, "nr_neigh"); - proc_net_remove(&init_net, "nr_nodes"); + remove_proc_entry("nr", init_net.proc_net); + remove_proc_entry("nr_neigh", init_net.proc_net); + remove_proc_entry("nr_nodes", init_net.proc_net); nr_loopback_clear(); nr_rt_free(); diff --git a/net/nfc/core.c b/net/nfc/core.c index aa64ea44167..25522e56d35 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -338,7 +338,7 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol) dev->active_target = target; dev->rf_mode = NFC_RF_INITIATOR; - if (dev->ops->check_presence) + if (dev->ops->check_presence && !dev->shutting_down) mod_timer(&dev->check_pres_timer, jiffies + msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS)); } @@ -429,7 +429,7 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, rc = dev->ops->im_transceive(dev, dev->active_target, skb, cb, cb_context); - if (!rc && dev->ops->check_presence) + if (!rc && dev->ops->check_presence && !dev->shutting_down) mod_timer(&dev->check_pres_timer, jiffies + msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS)); } else if (dev->rf_mode == NFC_RF_TARGET && dev->ops->tm_send != NULL) { @@ -684,11 +684,6 @@ static void nfc_release(struct device *d) pr_debug("dev_name=%s\n", dev_name(&dev->dev)); - if (dev->ops->check_presence) { - del_timer_sync(&dev->check_pres_timer); - cancel_work_sync(&dev->check_pres_work); - } - nfc_genl_data_exit(&dev->genl_data); kfree(dev->targets); kfree(dev); @@ -706,15 +701,16 @@ static void nfc_check_pres_work(struct work_struct *work) rc = dev->ops->check_presence(dev, dev->active_target); if (rc == -EOPNOTSUPP) goto exit; - if (!rc) { - mod_timer(&dev->check_pres_timer, jiffies + - msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS)); - } else { + if (rc) { u32 active_target_idx = dev->active_target->idx; device_unlock(&dev->dev); nfc_target_lost(dev, active_target_idx); return; } + + if (!dev->shutting_down) + mod_timer(&dev->check_pres_timer, jiffies + + msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS)); } exit: @@ -761,6 +757,7 @@ struct nfc_dev *nfc_get_device(unsigned int idx) */ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, u32 supported_protocols, + u32 supported_se, int tx_headroom, int tx_tailroom) { struct nfc_dev *dev; @@ -778,6 +775,8 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->ops = ops; dev->supported_protocols = supported_protocols; + dev->supported_se = supported_se; + dev->active_se = NFC_SE_NONE; dev->tx_headroom = tx_headroom; dev->tx_tailroom = tx_tailroom; @@ -853,26 +852,27 @@ void nfc_unregister_device(struct nfc_dev *dev) id = dev->idx; - mutex_lock(&nfc_devlist_mutex); - nfc_devlist_generation++; - - /* lock to avoid unregistering a device while an operation - is in progress */ - device_lock(&dev->dev); - device_del(&dev->dev); - device_unlock(&dev->dev); + if (dev->ops->check_presence) { + device_lock(&dev->dev); + dev->shutting_down = true; + device_unlock(&dev->dev); + del_timer_sync(&dev->check_pres_timer); + cancel_work_sync(&dev->check_pres_work); + } - mutex_unlock(&nfc_devlist_mutex); + rc = nfc_genl_device_removed(dev); + if (rc) + pr_debug("The userspace won't be notified that the device %s " + "was removed\n", dev_name(&dev->dev)); nfc_llcp_unregister_device(dev); - rc = nfc_genl_device_removed(dev); - if (rc) - pr_debug("The userspace won't be notified that the device %s was removed\n", - dev_name(&dev->dev)); + mutex_lock(&nfc_devlist_mutex); + nfc_devlist_generation++; + device_del(&dev->dev); + mutex_unlock(&nfc_devlist_mutex); ida_simple_remove(&nfc_index_ida, id); - } EXPORT_SYMBOL(nfc_unregister_device); diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index 7d99410e6c1..64f922be928 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -280,14 +280,19 @@ static int nfc_hci_delete_pipe(struct nfc_hci_dev *hdev, u8 pipe) static int nfc_hci_clear_all_pipes(struct nfc_hci_dev *hdev) { u8 param[2]; + size_t param_len = 2; /* TODO: Find out what the identity reference data is * and fill param with it. HCI spec 6.1.3.5 */ pr_debug("\n"); + if (test_bit(NFC_HCI_QUIRK_SHORT_CLEAR, &hdev->quirks)) + param_len = 0; + return nfc_hci_execute_cmd(hdev, NFC_HCI_ADMIN_PIPE, - NFC_HCI_ADM_CLEAR_ALL_PIPE, param, 2, NULL); + NFC_HCI_ADM_CLEAR_ALL_PIPE, param, param_len, + NULL); } int nfc_hci_disconnect_gate(struct nfc_hci_dev *hdev, u8 gate) diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 7bea574d593..91020b210d8 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -57,6 +57,8 @@ static void nfc_hci_msg_tx_work(struct work_struct *work) int r = 0; mutex_lock(&hdev->msg_tx_mutex); + if (hdev->shutting_down) + goto exit; if (hdev->cmd_pending_msg) { if (timer_pending(&hdev->cmd_timer) == 0) { @@ -295,6 +297,12 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event, goto exit; } + if (hdev->ops->event_received) { + r = hdev->ops->event_received(hdev, gate, event, skb); + if (r <= 0) + goto exit_noskb; + } + switch (event) { case NFC_HCI_EVT_TARGET_DISCOVERED: if (skb->len < 1) { /* no status data? */ @@ -320,17 +328,15 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event, r = nfc_hci_target_discovered(hdev, gate); break; default: - if (hdev->ops->event_received) { - hdev->ops->event_received(hdev, gate, event, skb); - return; - } - + pr_info("Discarded unknown event %x to gate %x\n", event, gate); + r = -EINVAL; break; } exit: kfree_skb(skb); +exit_noskb: if (r) { /* TODO: There was an error dispatching the event, * how to propagate up to nfc core? @@ -669,8 +675,10 @@ static int hci_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb) if (hdev->ops->tm_send) return hdev->ops->tm_send(hdev, skb); - else - return -ENOTSUPP; + + kfree_skb(skb); + + return -ENOTSUPP; } static int hci_check_presence(struct nfc_dev *nfc_dev, @@ -787,7 +795,9 @@ static struct nfc_ops hci_nfc_ops = { struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, + unsigned long quirks, u32 protocols, + u32 supported_se, const char *llc_name, int tx_headroom, int tx_tailroom, @@ -813,7 +823,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, return NULL; } - hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, + hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, supported_se, tx_headroom + HCI_CMDS_HEADROOM, tx_tailroom); if (!hdev->ndev) { @@ -830,6 +840,8 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); + hdev->quirks = quirks; + return hdev; } EXPORT_SYMBOL(nfc_hci_allocate_device); @@ -868,6 +880,28 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev) { struct hci_msg *msg, *n; + mutex_lock(&hdev->msg_tx_mutex); + + if (hdev->cmd_pending_msg) { + if (hdev->cmd_pending_msg->cb) + hdev->cmd_pending_msg->cb( + hdev->cmd_pending_msg->cb_context, + NULL, -ESHUTDOWN); + kfree(hdev->cmd_pending_msg); + hdev->cmd_pending_msg = NULL; + } + + hdev->shutting_down = true; + + mutex_unlock(&hdev->msg_tx_mutex); + + del_timer_sync(&hdev->cmd_timer); + cancel_work_sync(&hdev->msg_tx_work); + + cancel_work_sync(&hdev->msg_rx_work); + + nfc_unregister_device(hdev->ndev); + skb_queue_purge(&hdev->rx_hcp_frags); skb_queue_purge(&hdev->msg_rx_queue); @@ -876,13 +910,6 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev) skb_queue_purge(&msg->msg_frags); kfree(msg); } - - del_timer_sync(&hdev->cmd_timer); - - nfc_unregister_device(hdev->ndev); - - cancel_work_sync(&hdev->msg_tx_work); - cancel_work_sync(&hdev->msg_rx_work); } EXPORT_SYMBOL(nfc_hci_unregister_device); diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c index bc308a7ca60..b6b4109f234 100644 --- a/net/nfc/hci/hcp.c +++ b/net/nfc/hci/hcp.c @@ -105,6 +105,13 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe, } mutex_lock(&hdev->msg_tx_mutex); + + if (hdev->shutting_down) { + err = -ESHUTDOWN; + mutex_unlock(&hdev->msg_tx_mutex); + goto out_skb_err; + } + list_add_tail(&cmd->msg_l, &hdev->msg_tx_queue); mutex_unlock(&hdev->msg_tx_mutex); diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp/commands.c index df24be48d4d..c6bc3bd9505 100644 --- a/net/nfc/llcp/commands.c +++ b/net/nfc/llcp/commands.c @@ -304,6 +304,8 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) skb = llcp_add_header(skb, 0, 0, LLCP_PDU_SYMM); + __net_timestamp(skb); + nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX); return nfc_data_exchange(dev, local->target_idx, skb, diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index ec43914c92a..746f5a2f980 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -54,7 +54,6 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock) skb_queue_purge(&sock->tx_queue); skb_queue_purge(&sock->tx_pending_queue); - skb_queue_purge(&sock->tx_backlog_queue); if (local == NULL) return; @@ -550,14 +549,13 @@ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len) pr_err("No LLCP device\n"); return -ENODEV; } + if (gb_len < 3) + return -EINVAL; memset(local->remote_gb, 0, NFC_MAX_GT_LEN); memcpy(local->remote_gb, gb, gb_len); local->remote_gb_len = gb_len; - if (local->remote_gb == NULL || local->remote_gb_len == 0) - return -ENODEV; - if (memcmp(local->remote_gb, llcp_magic, 3)) { pr_err("MAC does not support LLCP\n"); return -EINVAL; @@ -668,6 +666,8 @@ static void nfc_llcp_tx_work(struct work_struct *work) if (ptype == LLCP_PDU_I) copy_skb = skb_copy(skb, GFP_ATOMIC); + __net_timestamp(skb); + nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX); @@ -781,9 +781,15 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local, /* There is no sequence with UI frames */ skb_pull(skb, LLCP_HEADER_SIZE); - if (sock_queue_rcv_skb(&llcp_sock->sk, skb)) { - pr_err("receive queue is full\n"); - skb_queue_head(&llcp_sock->tx_backlog_queue, skb); + if (!sock_queue_rcv_skb(&llcp_sock->sk, skb)) { + /* + * UI frames will be freed from the socket layer, so we + * need to keep them alive until someone receives them. + */ + skb_get(skb); + } else { + pr_err("Receive queue is full\n"); + kfree_skb(skb); } nfc_llcp_sock_put(llcp_sock); @@ -976,9 +982,15 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local, pr_err("Received out of sequence I PDU\n"); skb_pull(skb, LLCP_HEADER_SIZE + LLCP_SEQUENCE_SIZE); - if (sock_queue_rcv_skb(&llcp_sock->sk, skb)) { - pr_err("receive queue is full\n"); - skb_queue_head(&llcp_sock->tx_backlog_queue, skb); + if (!sock_queue_rcv_skb(&llcp_sock->sk, skb)) { + /* + * I frames will be freed from the socket layer, so we + * need to keep them alive until someone receives them. + */ + skb_get(skb); + } else { + pr_err("Receive queue is full\n"); + kfree_skb(skb); } } @@ -1245,6 +1257,8 @@ static void nfc_llcp_rx_work(struct work_struct *work) print_hex_dump(KERN_DEBUG, "LLCP Rx: ", DUMP_PREFIX_OFFSET, 16, 1, skb->data, skb->len, true); + __net_timestamp(skb); + nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX); switch (ptype) { @@ -1296,6 +1310,13 @@ static void nfc_llcp_rx_work(struct work_struct *work) local->rx_pending = NULL; } +static void __nfc_llcp_recv(struct nfc_llcp_local *local, struct sk_buff *skb) +{ + local->rx_pending = skb; + del_timer(&local->link_timer); + schedule_work(&local->rx_work); +} + void nfc_llcp_recv(void *data, struct sk_buff *skb, int err) { struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; @@ -1306,9 +1327,7 @@ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err) return; } - local->rx_pending = skb_get(skb); - del_timer(&local->link_timer); - schedule_work(&local->rx_work); + __nfc_llcp_recv(local, skb); } int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb) @@ -1319,9 +1338,7 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb) if (local == NULL) return -ENODEV; - local->rx_pending = skb_get(skb); - del_timer(&local->link_timer); - schedule_work(&local->rx_work); + __nfc_llcp_recv(local, skb); return 0; } diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h index 0d62366f8cc..0eae5c50950 100644 --- a/net/nfc/llcp/llcp.h +++ b/net/nfc/llcp/llcp.h @@ -121,7 +121,6 @@ struct nfc_llcp_sock { struct sk_buff_head tx_queue; struct sk_buff_head tx_pending_queue; - struct sk_buff_head tx_backlog_queue; struct list_head accept_queue; struct sock *parent; diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index fea22eb41b8..5332751943a 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -672,25 +672,27 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, copied = min_t(unsigned int, rlen, len); cskb = skb; - if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) { + if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) { if (!(flags & MSG_PEEK)) skb_queue_head(&sk->sk_receive_queue, skb); return -EFAULT; } + sock_recv_timestamp(msg, sk, skb); + if (sk->sk_type == SOCK_DGRAM && msg->msg_name) { struct nfc_llcp_ui_cb *ui_cb = nfc_llcp_ui_skb_cb(skb); - struct sockaddr_nfc_llcp sockaddr; + struct sockaddr_nfc_llcp *sockaddr = + (struct sockaddr_nfc_llcp *) msg->msg_name; - pr_debug("Datagram socket %d %d\n", ui_cb->dsap, ui_cb->ssap); + msg->msg_namelen = sizeof(struct sockaddr_nfc_llcp); - sockaddr.sa_family = AF_NFC; - sockaddr.nfc_protocol = NFC_PROTO_NFC_DEP; - sockaddr.dsap = ui_cb->dsap; - sockaddr.ssap = ui_cb->ssap; + pr_debug("Datagram socket %d %d\n", ui_cb->dsap, ui_cb->ssap); - memcpy(msg->msg_name, &sockaddr, sizeof(sockaddr)); - msg->msg_namelen = sizeof(sockaddr); + sockaddr->sa_family = AF_NFC; + sockaddr->nfc_protocol = NFC_PROTO_NFC_DEP; + sockaddr->dsap = ui_cb->dsap; + sockaddr->ssap = ui_cb->ssap; } /* Mark read part of skb as used */ @@ -806,7 +808,6 @@ struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp) llcp_sock->reserved_ssap = LLCP_SAP_MAX; skb_queue_head_init(&llcp_sock->tx_queue); skb_queue_head_init(&llcp_sock->tx_pending_queue); - skb_queue_head_init(&llcp_sock->tx_backlog_queue); INIT_LIST_HEAD(&llcp_sock->accept_queue); if (sock != NULL) @@ -821,7 +822,6 @@ void nfc_llcp_sock_free(struct nfc_llcp_sock *sock) skb_queue_purge(&sock->tx_queue); skb_queue_purge(&sock->tx_pending_queue); - skb_queue_purge(&sock->tx_backlog_queue); list_del_init(&sock->accept_queue); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 5f98dc1bf03..48ada0ec749 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -658,6 +658,7 @@ static struct nfc_ops nci_nfc_ops = { */ struct nci_dev *nci_allocate_device(struct nci_ops *ops, __u32 supported_protocols, + __u32 supported_se, int tx_headroom, int tx_tailroom) { struct nci_dev *ndev; @@ -680,6 +681,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops, supported_protocols, + supported_se, tx_headroom + NCI_DATA_HDR_SIZE, tx_tailroom); if (!ndev->nfc_dev) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 3568ae16786..504b883439f 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -366,6 +366,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) || nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) || + nla_put_u32(msg, NFC_ATTR_SE, dev->supported_se) || nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) || nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode)) goto nla_put_failure; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f996db34324..9dc537df46c 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -301,7 +301,7 @@ static int queue_gso_packets(struct net *net, int dp_ifindex, struct sk_buff *segs, *nskb; int err; - segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM); + segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false); if (IS_ERR(segs)) return PTR_ERR(segs); @@ -1989,10 +1989,9 @@ static struct pernet_operations ovs_net_ops = { static int __init dp_init(void) { - struct sk_buff *dummy_skb; int err; - BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb)); + BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); pr_info("Open vSwitch switching datapath\n"); diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 5d460c37df0..0531de6c7a4 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -69,7 +69,6 @@ static int internal_dev_mac_addr(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - dev->addr_assign_type &= ~NET_ADDR_RANDOM; memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); return 0; } @@ -98,7 +97,7 @@ static int internal_dev_stop(struct net_device *netdev) static void internal_dev_getinfo(struct net_device *netdev, struct ethtool_drvinfo *info) { - strcpy(info->driver, "openvswitch"); + strlcpy(info->driver, "openvswitch", sizeof(info->driver)); } static const struct ethtool_ops internal_dev_ethtool_ops = { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c111bd0e083..c7bfeff1076 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3828,7 +3828,7 @@ static int __net_init packet_net_init(struct net *net) mutex_init(&net->packet.sklist_lock); INIT_HLIST_HEAD(&net->packet.sklist); - if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) + if (!proc_create("packet", 0, net->proc_net, &packet_seq_fops)) return -ENOMEM; return 0; @@ -3836,7 +3836,7 @@ static int __net_init packet_net_init(struct net *net) static void __net_exit packet_net_exit(struct net *net) { - proc_net_remove(net, "packet"); + remove_proc_entry("packet", net->proc_net); } static struct pernet_operations packet_net_ops = { diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 5bf6341e2dd..45a7df6575d 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -320,7 +320,7 @@ static int __net_init phonet_init_net(struct net *net) { struct phonet_net *pnn = phonet_pernet(net); - if (!proc_net_fops_create(net, "phonet", 0, &pn_sock_seq_fops)) + if (!proc_create("phonet", 0, net->proc_net, &pn_sock_seq_fops)) return -ENOMEM; INIT_LIST_HEAD(&pnn->pndevs.list); @@ -331,7 +331,7 @@ static int __net_init phonet_init_net(struct net *net) static void __net_exit phonet_exit_net(struct net *net) { - proc_net_remove(net, "phonet"); + remove_proc_entry("phonet", net->proc_net); } static struct pernet_operations phonet_net_ops = { @@ -348,7 +348,7 @@ int __init phonet_device_init(void) if (err) return err; - proc_net_fops_create(&init_net, "pnresource", 0, &pn_res_seq_fops); + proc_create("pnresource", 0, init_net.proc_net, &pn_res_seq_fops); register_netdevice_notifier(&phonet_device_notifier); err = phonet_netlink_register(); if (err) @@ -361,7 +361,7 @@ void phonet_device_exit(void) rtnl_unregister_all(PF_PHONET); unregister_netdevice_notifier(&phonet_device_notifier); unregister_pernet_subsys(&phonet_net_ops); - proc_net_remove(&init_net, "pnresource"); + remove_proc_entry("pnresource", init_net.proc_net); } int phonet_route_add(struct net_device *dev, u8 daddr) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index c4719ce604c..b768fe9d5e7 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1575,10 +1575,13 @@ static int __init rose_proto_init(void) rose_add_loopback_neigh(); - proc_net_fops_create(&init_net, "rose", S_IRUGO, &rose_info_fops); - proc_net_fops_create(&init_net, "rose_neigh", S_IRUGO, &rose_neigh_fops); - proc_net_fops_create(&init_net, "rose_nodes", S_IRUGO, &rose_nodes_fops); - proc_net_fops_create(&init_net, "rose_routes", S_IRUGO, &rose_routes_fops); + proc_create("rose", S_IRUGO, init_net.proc_net, &rose_info_fops); + proc_create("rose_neigh", S_IRUGO, init_net.proc_net, + &rose_neigh_fops); + proc_create("rose_nodes", S_IRUGO, init_net.proc_net, + &rose_nodes_fops); + proc_create("rose_routes", S_IRUGO, init_net.proc_net, + &rose_routes_fops); out: return rc; fail: @@ -1605,10 +1608,10 @@ static void __exit rose_exit(void) { int i; - proc_net_remove(&init_net, "rose"); - proc_net_remove(&init_net, "rose_neigh"); - proc_net_remove(&init_net, "rose_nodes"); - proc_net_remove(&init_net, "rose_routes"); + remove_proc_entry("rose", init_net.proc_net); + remove_proc_entry("rose_neigh", init_net.proc_net); + remove_proc_entry("rose_nodes", init_net.proc_net); + remove_proc_entry("rose_routes", init_net.proc_net); rose_loopback_clear(); rose_rt_free(); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 05996d0dd82..e61aa6001c6 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -10,6 +10,7 @@ */ #include <linux/module.h> +#include <linux/kernel.h> #include <linux/net.h> #include <linux/slab.h> #include <linux/skbuff.h> @@ -792,10 +793,9 @@ static const struct net_proto_family rxrpc_family_ops = { */ static int __init af_rxrpc_init(void) { - struct sk_buff *dummy_skb; int ret = -1; - BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof(dummy_skb->cb)); + BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb)); rxrpc_epoch = htonl(get_seconds()); @@ -839,8 +839,9 @@ static int __init af_rxrpc_init(void) } #ifdef CONFIG_PROC_FS - proc_net_fops_create(&init_net, "rxrpc_calls", 0, &rxrpc_call_seq_fops); - proc_net_fops_create(&init_net, "rxrpc_conns", 0, &rxrpc_connection_seq_fops); + proc_create("rxrpc_calls", 0, init_net.proc_net, &rxrpc_call_seq_fops); + proc_create("rxrpc_conns", 0, init_net.proc_net, + &rxrpc_connection_seq_fops); #endif return 0; @@ -878,8 +879,8 @@ static void __exit af_rxrpc_exit(void) _debug("flush scheduled work"); flush_workqueue(rxrpc_workqueue); - proc_net_remove(&init_net, "rxrpc_conns"); - proc_net_remove(&init_net, "rxrpc_calls"); + remove_proc_entry("rxrpc_conns", init_net.proc_net); + remove_proc_entry("rxrpc_calls", init_net.proc_net); destroy_workqueue(rxrpc_workqueue); kmem_cache_destroy(rxrpc_call_jar); _leave(""); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 65d240cbf74..8579c4bb20c 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -485,8 +485,9 @@ errout: return err; } -struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind) +struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, + struct nlattr *est, char *name, int ovr, + int bind) { struct tc_action *a; struct tc_action_ops *a_o; @@ -542,9 +543,9 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est, /* backward compatibility for policer */ if (name == NULL) - err = a_o->init(tb[TCA_ACT_OPTIONS], est, a, ovr, bind); + err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, a, ovr, bind); else - err = a_o->init(nla, est, a, ovr, bind); + err = a_o->init(net, nla, est, a, ovr, bind); if (err < 0) goto err_free; @@ -566,8 +567,9 @@ err_out: return ERR_PTR(err); } -struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind) +struct tc_action *tcf_action_init(struct net *net, struct nlattr *nla, + struct nlattr *est, char *name, int ovr, + int bind) { struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *head = NULL, *act, *act_prev = NULL; @@ -579,7 +581,7 @@ struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est, return ERR_PTR(err); for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { - act = tcf_action_init_1(tb[i], est, name, ovr, bind); + act = tcf_action_init_1(net, tb[i], est, name, ovr, bind); if (IS_ERR(act)) goto err; act->order = i; @@ -960,7 +962,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, struct tc_action *a; u32 seq = n->nlmsg_seq; - act = tcf_action_init(nla, NULL, NULL, ovr, 0); + act = tcf_action_init(net, nla, NULL, NULL, ovr, 0); if (act == NULL) goto done; if (IS_ERR(act)) { diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 2c8ad7c86e4..08fa1e8a4ca 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -51,7 +51,7 @@ static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, }; -static int tcf_csum_init(struct nlattr *nla, struct nlattr *est, +static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est, struct tc_action *a, int ovr, int bind) { struct nlattr *tb[TCA_CSUM_MAX + 1]; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 05d60859d8e..fd2b3cff5fa 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -58,8 +58,9 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { [TCA_GACT_PROB] = { .len = sizeof(struct tc_gact_p) }, }; -static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, - struct tc_action *a, int ovr, int bind) +static int tcf_gact_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) { struct nlattr *tb[TCA_GACT_MAX + 1]; struct tc_gact *parm; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 58fb3c7aab9..e0f6de64afe 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -102,7 +102,7 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) }, }; -static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, +static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action *a, int ovr, int bind) { struct nlattr *tb[TCA_IPT_MAX + 1]; @@ -207,10 +207,8 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, struct tcf_ipt *ipt = a->priv; struct xt_action_param par; - if (skb_cloned(skb)) { - if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return TC_ACT_UNSPEC; - } + if (skb_unclone(skb, GFP_ATOMIC)) + return TC_ACT_UNSPEC; spin_lock(&ipt->tcf_lock); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 9c0fd0c7881..5d676edc22a 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -62,8 +62,9 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, }; -static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est, - struct tc_action *a, int ovr, int bind) +static int tcf_mirred_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, int ovr, + int bind) { struct nlattr *tb[TCA_MIRRED_MAX + 1]; struct tc_mirred *parm; @@ -88,7 +89,7 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est, return -EINVAL; } if (parm->ifindex) { - dev = __dev_get_by_index(&init_net, parm->ifindex); + dev = __dev_get_by_index(net, parm->ifindex); if (dev == NULL) return -ENODEV; switch (dev->type) { diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index b5d029eb44f..876f0ef2969 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -44,7 +44,7 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) }, }; -static int tcf_nat_init(struct nlattr *nla, struct nlattr *est, +static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action *a, int ovr, int bind) { struct nlattr *tb[TCA_NAT_MAX + 1]; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 45c53ab067a..7ed78c9e505 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -38,8 +38,9 @@ static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) }, }; -static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est, - struct tc_action *a, int ovr, int bind) +static int tcf_pedit_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) { struct nlattr *tb[TCA_PEDIT_MAX + 1]; struct tc_pedit *parm; @@ -130,8 +131,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, int i, munged = 0; unsigned int off; - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return p->tcf_action; off = skb_network_offset(skb); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index a9de23297d4..823463adbd2 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -22,8 +22,23 @@ #include <net/act_api.h> #include <net/netlink.h> -#define L2T(p, L) qdisc_l2t((p)->tcfp_R_tab, L) -#define L2T_P(p, L) qdisc_l2t((p)->tcfp_P_tab, L) +struct tcf_police { + struct tcf_common common; + int tcfp_result; + u32 tcfp_ewma_rate; + s64 tcfp_burst; + u32 tcfp_mtu; + s64 tcfp_toks; + s64 tcfp_ptoks; + s64 tcfp_mtu_ptoks; + s64 tcfp_t_c; + struct psched_ratecfg rate; + bool rate_present; + struct psched_ratecfg peak; + bool peak_present; +}; +#define to_police(pc) \ + container_of(pc, struct tcf_police, common) #define POL_TAB_MASK 15 static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; @@ -108,10 +123,6 @@ static void tcf_police_destroy(struct tcf_police *p) write_unlock_bh(&police_lock); gen_kill_estimator(&p->tcf_bstats, &p->tcf_rate_est); - if (p->tcfp_R_tab) - qdisc_put_rtab(p->tcfp_R_tab); - if (p->tcfp_P_tab) - qdisc_put_rtab(p->tcfp_P_tab); /* * gen_estimator est_timer() might access p->tcf_lock * or bstats, wait a RCU grace period before freeing p @@ -130,8 +141,9 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { [TCA_POLICE_RESULT] = { .type = NLA_U32 }, }; -static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est, - struct tc_action *a, int ovr, int bind) +static int tcf_act_police_locate(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) { unsigned int h; int ret = 0, err; @@ -211,26 +223,36 @@ override: } /* No failure allowed after this point */ - if (R_tab != NULL) { - qdisc_put_rtab(police->tcfp_R_tab); - police->tcfp_R_tab = R_tab; + police->tcfp_mtu = parm->mtu; + if (police->tcfp_mtu == 0) { + police->tcfp_mtu = ~0; + if (R_tab) + police->tcfp_mtu = 255 << R_tab->rate.cell_log; + } + if (R_tab) { + police->rate_present = true; + psched_ratecfg_precompute(&police->rate, R_tab->rate.rate); + qdisc_put_rtab(R_tab); + } else { + police->rate_present = false; } - if (P_tab != NULL) { - qdisc_put_rtab(police->tcfp_P_tab); - police->tcfp_P_tab = P_tab; + if (P_tab) { + police->peak_present = true; + psched_ratecfg_precompute(&police->peak, P_tab->rate.rate); + qdisc_put_rtab(P_tab); + } else { + police->peak_present = false; } if (tb[TCA_POLICE_RESULT]) police->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]); - police->tcfp_toks = police->tcfp_burst = parm->burst; - police->tcfp_mtu = parm->mtu; - if (police->tcfp_mtu == 0) { - police->tcfp_mtu = ~0; - if (police->tcfp_R_tab) - police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log; + police->tcfp_burst = PSCHED_TICKS2NS(parm->burst); + police->tcfp_toks = police->tcfp_burst; + if (police->peak_present) { + police->tcfp_mtu_ptoks = (s64) psched_l2t_ns(&police->peak, + police->tcfp_mtu); + police->tcfp_ptoks = police->tcfp_mtu_ptoks; } - if (police->tcfp_P_tab) - police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); police->tcf_action = parm->action; if (tb[TCA_POLICE_AVRATE]) @@ -240,7 +262,7 @@ override: if (ret != ACT_P_CREATED) return ret; - police->tcfp_t_c = psched_get_time(); + police->tcfp_t_c = ktime_to_ns(ktime_get()); police->tcf_index = parm->index ? parm->index : tcf_hash_new_index(&police_idx_gen, &police_hash_info); h = tcf_hash(police->tcf_index, POL_TAB_MASK); @@ -286,9 +308,9 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_police *police = a->priv; - psched_time_t now; - long toks; - long ptoks = 0; + s64 now; + s64 toks; + s64 ptoks = 0; spin_lock(&police->tcf_lock); @@ -304,24 +326,25 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, } if (qdisc_pkt_len(skb) <= police->tcfp_mtu) { - if (police->tcfp_R_tab == NULL) { + if (!police->rate_present) { spin_unlock(&police->tcf_lock); return police->tcfp_result; } - now = psched_get_time(); - toks = psched_tdiff_bounded(now, police->tcfp_t_c, - police->tcfp_burst); - if (police->tcfp_P_tab) { + now = ktime_to_ns(ktime_get()); + toks = min_t(s64, now - police->tcfp_t_c, + police->tcfp_burst); + if (police->peak_present) { ptoks = toks + police->tcfp_ptoks; - if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) - ptoks = (long)L2T_P(police, police->tcfp_mtu); - ptoks -= L2T_P(police, qdisc_pkt_len(skb)); + if (ptoks > police->tcfp_mtu_ptoks) + ptoks = police->tcfp_mtu_ptoks; + ptoks -= (s64) psched_l2t_ns(&police->peak, + qdisc_pkt_len(skb)); } toks += police->tcfp_toks; - if (toks > (long)police->tcfp_burst) + if (toks > police->tcfp_burst) toks = police->tcfp_burst; - toks -= L2T(police, qdisc_pkt_len(skb)); + toks -= (s64) psched_l2t_ns(&police->rate, qdisc_pkt_len(skb)); if ((toks|ptoks) >= 0) { police->tcfp_t_c = now; police->tcfp_toks = toks; @@ -347,15 +370,15 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) .index = police->tcf_index, .action = police->tcf_action, .mtu = police->tcfp_mtu, - .burst = police->tcfp_burst, + .burst = PSCHED_NS2TICKS(police->tcfp_burst), .refcnt = police->tcf_refcnt - ref, .bindcnt = police->tcf_bindcnt - bind, }; - if (police->tcfp_R_tab) - opt.rate = police->tcfp_R_tab->rate; - if (police->tcfp_P_tab) - opt.peakrate = police->tcfp_P_tab->rate; + if (police->rate_present) + opt.rate.rate = psched_ratecfg_getrate(&police->rate); + if (police->peak_present) + opt.peakrate.rate = psched_ratecfg_getrate(&police->peak); if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt)) goto nla_put_failure; if (police->tcfp_result && diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 3714f60f0b3..7725eb4ab75 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -95,8 +95,9 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { [TCA_DEF_DATA] = { .type = NLA_STRING, .len = SIMP_MAX_DATA }, }; -static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, - struct tc_action *a, int ovr, int bind) +static int tcf_simp_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) { struct nlattr *tb[TCA_DEF_MAX + 1]; struct tc_defact *parm; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 476e0fac671..cb4221171f9 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -67,8 +67,9 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { [TCA_SKBEDIT_MARK] = { .len = sizeof(u32) }, }; -static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, - struct tc_action *a, int ovr, int bind) +static int tcf_skbedit_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) { struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; struct tc_skbedit *parm; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ff55ed6c49b..964f5e4f4b8 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -321,7 +321,7 @@ replay: } } - err = tp->ops->change(skb, tp, cl, t->tcm_handle, tca, &fh); + err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh); if (err == 0) { if (tp_created) { spin_lock_bh(root_lock); @@ -508,7 +508,7 @@ void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_destroy); -int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb, +int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, const struct tcf_ext_map *map) { @@ -519,7 +519,7 @@ int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb, struct tc_action *act; if (map->police && tb[map->police]) { - act = tcf_action_init_1(tb[map->police], rate_tlv, + act = tcf_action_init_1(net, tb[map->police], rate_tlv, "police", TCA_ACT_NOREPLACE, TCA_ACT_BIND); if (IS_ERR(act)) @@ -528,8 +528,9 @@ int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb, act->type = TCA_OLD_COMPAT; exts->action = act; } else if (map->action && tb[map->action]) { - act = tcf_action_init(tb[map->action], rate_tlv, NULL, - TCA_ACT_NOREPLACE, TCA_ACT_BIND); + act = tcf_action_init(net, tb[map->action], rate_tlv, + NULL, TCA_ACT_NOREPLACE, + TCA_ACT_BIND); if (IS_ERR(act)) return PTR_ERR(act); diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 344a11b342e..d76a35d0dc8 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -132,15 +132,16 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = { [TCA_BASIC_EMATCHES] = { .type = NLA_NESTED }, }; -static int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f, - unsigned long base, struct nlattr **tb, +static int basic_set_parms(struct net *net, struct tcf_proto *tp, + struct basic_filter *f, unsigned long base, + struct nlattr **tb, struct nlattr *est) { int err = -EINVAL; struct tcf_exts e; struct tcf_ematch_tree t; - err = tcf_exts_validate(tp, tb, est, &e, &basic_ext_map); + err = tcf_exts_validate(net, tp, tb, est, &e, &basic_ext_map); if (err < 0) return err; @@ -162,7 +163,7 @@ errout: return err; } -static int basic_change(struct sk_buff *in_skb, +static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) { @@ -182,7 +183,7 @@ static int basic_change(struct sk_buff *in_skb, if (f != NULL) { if (handle && f->handle != handle) return -EINVAL; - return basic_set_parms(tp, f, base, tb, tca[TCA_RATE]); + return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]); } err = -ENOBUFS; @@ -208,7 +209,7 @@ static int basic_change(struct sk_buff *in_skb, f->handle = head->hgenerator; } - err = basic_set_parms(tp, f, base, tb, tca[TCA_RATE]); + err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]); if (err < 0) goto errout; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 6db7855b902..3a294eb98d6 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -178,7 +178,7 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; -static int cls_cgroup_change(struct sk_buff *in_skb, +static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) @@ -215,7 +215,8 @@ static int cls_cgroup_change(struct sk_buff *in_skb, if (err < 0) return err; - err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, + &cgroup_ext_map); if (err < 0) return err; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index ce82d0cb1b4..aa36a8c8b33 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -351,7 +351,7 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; -static int flow_change(struct sk_buff *in_skb, +static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) @@ -397,7 +397,7 @@ static int flow_change(struct sk_buff *in_skb, return -EOPNOTSUPP; } - err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &flow_ext_map); if (err < 0) return err; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 4075a0aef2a..1135d8227f9 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -192,7 +192,7 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { }; static int -fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, +fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, struct nlattr **tb, struct nlattr **tca, unsigned long base) { struct fw_head *head = (struct fw_head *)tp->root; @@ -200,7 +200,7 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, u32 mask; int err; - err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &fw_ext_map); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &fw_ext_map); if (err < 0) return err; @@ -233,7 +233,7 @@ errout: return err; } -static int fw_change(struct sk_buff *in_skb, +static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, @@ -255,7 +255,7 @@ static int fw_change(struct sk_buff *in_skb, if (f != NULL) { if (f->id != handle && handle) return -EINVAL; - return fw_change_attrs(tp, f, tb, tca, base); + return fw_change_attrs(net, tp, f, tb, tca, base); } if (!handle) @@ -282,7 +282,7 @@ static int fw_change(struct sk_buff *in_skb, f->id = handle; - err = fw_change_attrs(tp, f, tb, tca, base); + err = fw_change_attrs(net, tp, f, tb, tca, base); if (err < 0) goto errout; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index c10d57bf98f..37da567d833 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -335,9 +335,10 @@ static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = { [TCA_ROUTE4_IIF] = { .type = NLA_U32 }, }; -static int route4_set_parms(struct tcf_proto *tp, unsigned long base, - struct route4_filter *f, u32 handle, struct route4_head *head, - struct nlattr **tb, struct nlattr *est, int new) +static int route4_set_parms(struct net *net, struct tcf_proto *tp, + unsigned long base, struct route4_filter *f, + u32 handle, struct route4_head *head, + struct nlattr **tb, struct nlattr *est, int new) { int err; u32 id = 0, to = 0, nhandle = 0x8000; @@ -346,7 +347,7 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base, struct route4_bucket *b; struct tcf_exts e; - err = tcf_exts_validate(tp, tb, est, &e, &route_ext_map); + err = tcf_exts_validate(net, tp, tb, est, &e, &route_ext_map); if (err < 0) return err; @@ -427,7 +428,7 @@ errout: return err; } -static int route4_change(struct sk_buff *in_skb, +static int route4_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, @@ -457,7 +458,7 @@ static int route4_change(struct sk_buff *in_skb, if (f->bkt) old_handle = f->handle; - err = route4_set_parms(tp, base, f, handle, head, tb, + err = route4_set_parms(net, tp, base, f, handle, head, tb, tca[TCA_RATE], 0); if (err < 0) return err; @@ -480,7 +481,7 @@ static int route4_change(struct sk_buff *in_skb, if (f == NULL) goto errout; - err = route4_set_parms(tp, base, f, handle, head, tb, + err = route4_set_parms(net, tp, base, f, handle, head, tb, tca[TCA_RATE], 1); if (err < 0) goto errout; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 494bbb90924..252d8b05872 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -416,7 +416,7 @@ static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) }, }; -static int rsvp_change(struct sk_buff *in_skb, +static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, @@ -440,7 +440,7 @@ static int rsvp_change(struct sk_buff *in_skb, if (err < 0) return err; - err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map); if (err < 0) return err; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index a1293b4ab7a..b86535a4016 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -197,9 +197,10 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = { }; static int -tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle, - struct tcindex_data *p, struct tcindex_filter_result *r, - struct nlattr **tb, struct nlattr *est) +tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, + u32 handle, struct tcindex_data *p, + struct tcindex_filter_result *r, struct nlattr **tb, + struct nlattr *est) { int err, balloc = 0; struct tcindex_filter_result new_filter_result, *old_r = r; @@ -208,7 +209,7 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_filter *f = NULL; /* make gcc behave */ struct tcf_exts e; - err = tcf_exts_validate(tp, tb, est, &e, &tcindex_ext_map); + err = tcf_exts_validate(net, tp, tb, est, &e, &tcindex_ext_map); if (err < 0) return err; @@ -332,7 +333,7 @@ errout: } static int -tcindex_change(struct sk_buff *in_skb, +tcindex_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) { @@ -353,7 +354,8 @@ tcindex_change(struct sk_buff *in_skb, if (err < 0) return err; - return tcindex_set_parms(tp, base, handle, p, r, tb, tca[TCA_RATE]); + return tcindex_set_parms(net, tp, base, handle, p, r, tb, + tca[TCA_RATE]); } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index c7c27bc91b5..eb07a1e536e 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -488,15 +488,15 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { [TCA_U32_MARK] = { .len = sizeof(struct tc_u32_mark) }, }; -static int u32_set_parms(struct tcf_proto *tp, unsigned long base, - struct tc_u_hnode *ht, +static int u32_set_parms(struct net *net, struct tcf_proto *tp, + unsigned long base, struct tc_u_hnode *ht, struct tc_u_knode *n, struct nlattr **tb, struct nlattr *est) { int err; struct tcf_exts e; - err = tcf_exts_validate(tp, tb, est, &e, &u32_ext_map); + err = tcf_exts_validate(net, tp, tb, est, &e, &u32_ext_map); if (err < 0) return err; @@ -544,7 +544,7 @@ errout: return err; } -static int u32_change(struct sk_buff *in_skb, +static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) @@ -570,7 +570,8 @@ static int u32_change(struct sk_buff *in_skb, if (TC_U32_KEY(n->handle) == 0) return -EINVAL; - return u32_set_parms(tp, base, n->ht_up, n, tb, tca[TCA_RATE]); + return u32_set_parms(net, tp, base, n->ht_up, n, tb, + tca[TCA_RATE]); } if (tb[TCA_U32_DIVISOR]) { @@ -656,7 +657,7 @@ static int u32_change(struct sk_buff *in_skb, } #endif - err = u32_set_parms(tp, base, ht, n, tb, tca[TCA_RATE]); + err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE]); if (err == 0) { struct tc_u_knode **ins; for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d84f7e734cd..a181b484812 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -493,7 +493,7 @@ void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_watchdog_init); -void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) +void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires) { if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc_root_sleeping(wd->qdisc)->state)) @@ -502,10 +502,10 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) qdisc_throttled(wd->qdisc); hrtimer_start(&wd->timer, - ns_to_ktime(PSCHED_TICKS2NS(expires)), + ns_to_ktime(expires), HRTIMER_MODE_ABS); } -EXPORT_SYMBOL(qdisc_watchdog_schedule); +EXPORT_SYMBOL(qdisc_watchdog_schedule_ns); void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) { @@ -1768,7 +1768,7 @@ static int __net_init psched_net_init(struct net *net) { struct proc_dir_entry *e; - e = proc_net_fops_create(net, "psched", 0, &psched_fops); + e = proc_create("psched", 0, net->proc_net, &psched_fops); if (e == NULL) return -ENOMEM; @@ -1777,7 +1777,7 @@ static int __net_init psched_net_init(struct net *net) static void __net_exit psched_net_exit(struct net *net) { - proc_net_remove(net, "psched"); + remove_proc_entry("psched", net->proc_net); } #else static int __net_init psched_net_init(struct net *net) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5d81a447851..ffad48109a2 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -25,6 +25,7 @@ #include <linux/rcupdate.h> #include <linux/list.h> #include <linux/slab.h> +#include <net/sch_generic.h> #include <net/pkt_sched.h> #include <net/dst.h> @@ -896,3 +897,39 @@ void dev_shutdown(struct net_device *dev) WARN_ON(timer_pending(&dev->watchdog_timer)); } + +void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate) +{ + u64 factor; + u64 mult; + int shift; + + r->rate_bps = rate << 3; + r->shift = 0; + r->mult = 1; + /* + * Calibrate mult, shift so that token counting is accurate + * for smallest packet size (64 bytes). Token (time in ns) is + * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will + * work as long as the smallest packet transfer time can be + * accurately represented in nanosec. + */ + if (r->rate_bps > 0) { + /* + * Higher shift gives better accuracy. Find the largest + * shift such that mult fits in 32 bits. + */ + for (shift = 0; shift < 16; shift++) { + r->shift = shift; + factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); + mult = div64_u64(factor, r->rate_bps); + if (mult > UINT_MAX) + break; + } + + r->shift = shift - 1; + factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); + r->mult = div64_u64(factor, r->rate_bps); + } +} +EXPORT_SYMBOL(psched_ratecfg_precompute); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 79e8ed4ac7c..03c2692ca01 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -38,6 +38,7 @@ #include <linux/workqueue.h> #include <linux/slab.h> #include <net/netlink.h> +#include <net/sch_generic.h> #include <net/pkt_sched.h> /* HTB algorithm. @@ -71,12 +72,6 @@ enum htb_cmode { HTB_CAN_SEND /* class can send */ }; -struct htb_rate_cfg { - u64 rate_bps; - u32 mult; - u32 shift; -}; - /* interior & leaf nodes; props specific to leaves are marked L: */ struct htb_class { struct Qdisc_class_common common; @@ -124,8 +119,8 @@ struct htb_class { int filter_cnt; /* token bucket parameters */ - struct htb_rate_cfg rate; - struct htb_rate_cfg ceil; + struct psched_ratecfg rate; + struct psched_ratecfg ceil; s64 buffer, cbuffer; /* token bucket depth/rate */ psched_tdiff_t mbuffer; /* max wait time */ s64 tokens, ctokens; /* current number of tokens */ @@ -168,45 +163,6 @@ struct htb_sched { struct work_struct work; }; -static u64 l2t_ns(struct htb_rate_cfg *r, unsigned int len) -{ - return ((u64)len * r->mult) >> r->shift; -} - -static void htb_precompute_ratedata(struct htb_rate_cfg *r) -{ - u64 factor; - u64 mult; - int shift; - - r->shift = 0; - r->mult = 1; - /* - * Calibrate mult, shift so that token counting is accurate - * for smallest packet size (64 bytes). Token (time in ns) is - * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will - * work as long as the smallest packet transfer time can be - * accurately represented in nanosec. - */ - if (r->rate_bps > 0) { - /* - * Higher shift gives better accuracy. Find the largest - * shift such that mult fits in 32 bits. - */ - for (shift = 0; shift < 16; shift++) { - r->shift = shift; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - mult = div64_u64(factor, r->rate_bps); - if (mult > UINT_MAX) - break; - } - - r->shift = shift - 1; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - r->mult = div64_u64(factor, r->rate_bps); - } -} - /* find class in global hash table using given handle */ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) { @@ -632,7 +588,7 @@ static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff) if (toks > cl->buffer) toks = cl->buffer; - toks -= (s64) l2t_ns(&cl->rate, bytes); + toks -= (s64) psched_l2t_ns(&cl->rate, bytes); if (toks <= -cl->mbuffer) toks = 1 - cl->mbuffer; @@ -645,7 +601,7 @@ static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff) if (toks > cl->cbuffer) toks = cl->cbuffer; - toks -= (s64) l2t_ns(&cl->ceil, bytes); + toks -= (s64) psched_l2t_ns(&cl->ceil, bytes); if (toks <= -cl->mbuffer) toks = 1 - cl->mbuffer; @@ -1134,9 +1090,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, memset(&opt, 0, sizeof(opt)); - opt.rate.rate = cl->rate.rate_bps >> 3; + opt.rate.rate = psched_ratecfg_getrate(&cl->rate); opt.buffer = PSCHED_NS2TICKS(cl->buffer); - opt.ceil.rate = cl->ceil.rate_bps >> 3; + opt.ceil.rate = psched_ratecfg_getrate(&cl->ceil); opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer); opt.quantum = cl->quantum; opt.prio = cl->prio; @@ -1459,8 +1415,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->parent = parent; /* set class to be in HTB_CAN_SEND state */ - cl->tokens = hopt->buffer; - cl->ctokens = hopt->cbuffer; + cl->tokens = PSCHED_TICKS2NS(hopt->buffer); + cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer); cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */ cl->t_c = psched_get_time(); cl->cmode = HTB_CAN_SEND; @@ -1503,17 +1459,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->prio = TC_HTB_NUMPRIO - 1; } - cl->buffer = hopt->buffer; - cl->cbuffer = hopt->cbuffer; - - cl->rate.rate_bps = (u64)hopt->rate.rate << 3; - cl->ceil.rate_bps = (u64)hopt->ceil.rate << 3; - - htb_precompute_ratedata(&cl->rate); - htb_precompute_ratedata(&cl->ceil); + psched_ratecfg_precompute(&cl->rate, hopt->rate.rate); + psched_ratecfg_precompute(&cl->ceil, hopt->ceil.rate); - cl->buffer = hopt->buffer << PSCHED_SHIFT; - cl->cbuffer = hopt->buffer << PSCHED_SHIFT; + cl->buffer = PSCHED_TICKS2NS(hopt->buffer); + cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer); sch_tree_unlock(sch); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 4b056c15e90..c8388f3c342 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -19,6 +19,7 @@ #include <linux/errno.h> #include <linux/skbuff.h> #include <net/netlink.h> +#include <net/sch_generic.h> #include <net/pkt_sched.h> @@ -100,23 +101,21 @@ struct tbf_sched_data { /* Parameters */ u32 limit; /* Maximal length of backlog: bytes */ - u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ - u32 mtu; + s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ + s64 mtu; u32 max_size; - struct qdisc_rate_table *R_tab; - struct qdisc_rate_table *P_tab; + struct psched_ratecfg rate; + struct psched_ratecfg peak; + bool peak_present; /* Variables */ - long tokens; /* Current number of B tokens */ - long ptokens; /* Current number of P tokens */ - psched_time_t t_c; /* Time check-point */ + s64 tokens; /* Current number of B tokens */ + s64 ptokens; /* Current number of P tokens */ + s64 t_c; /* Time check-point */ struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */ struct qdisc_watchdog watchdog; /* Watchdog timer */ }; -#define L2T(q, L) qdisc_l2t((q)->R_tab, L) -#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L) - static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -156,24 +155,24 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) skb = q->qdisc->ops->peek(q->qdisc); if (skb) { - psched_time_t now; - long toks; - long ptoks = 0; + s64 now; + s64 toks; + s64 ptoks = 0; unsigned int len = qdisc_pkt_len(skb); - now = psched_get_time(); - toks = psched_tdiff_bounded(now, q->t_c, q->buffer); + now = ktime_to_ns(ktime_get()); + toks = min_t(s64, now - q->t_c, q->buffer); - if (q->P_tab) { + if (q->peak_present) { ptoks = toks + q->ptokens; - if (ptoks > (long)q->mtu) + if (ptoks > q->mtu) ptoks = q->mtu; - ptoks -= L2T_P(q, len); + ptoks -= (s64) psched_l2t_ns(&q->peak, len); } toks += q->tokens; - if (toks > (long)q->buffer) + if (toks > q->buffer) toks = q->buffer; - toks -= L2T(q, len); + toks -= (s64) psched_l2t_ns(&q->rate, len); if ((toks|ptoks) >= 0) { skb = qdisc_dequeue_peeked(q->qdisc); @@ -189,8 +188,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) return skb; } - qdisc_watchdog_schedule(&q->watchdog, - now + max_t(long, -toks, -ptoks)); + qdisc_watchdog_schedule_ns(&q->watchdog, + now + max_t(long, -toks, -ptoks)); /* Maybe we have a shorter packet in the queue, which can be sent now. It sounds cool, @@ -214,7 +213,7 @@ static void tbf_reset(struct Qdisc *sch) qdisc_reset(q->qdisc); sch->q.qlen = 0; - q->t_c = psched_get_time(); + q->t_c = ktime_to_ns(ktime_get()); q->tokens = q->buffer; q->ptokens = q->mtu; qdisc_watchdog_cancel(&q->watchdog); @@ -293,14 +292,19 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->qdisc = child; } q->limit = qopt->limit; - q->mtu = qopt->mtu; + q->mtu = PSCHED_TICKS2NS(qopt->mtu); q->max_size = max_size; - q->buffer = qopt->buffer; + q->buffer = PSCHED_TICKS2NS(qopt->buffer); q->tokens = q->buffer; q->ptokens = q->mtu; - swap(q->R_tab, rtab); - swap(q->P_tab, ptab); + psched_ratecfg_precompute(&q->rate, rtab->rate.rate); + if (ptab) { + psched_ratecfg_precompute(&q->peak, ptab->rate.rate); + q->peak_present = true; + } else { + q->peak_present = false; + } sch_tree_unlock(sch); err = 0; @@ -319,7 +323,7 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - q->t_c = psched_get_time(); + q->t_c = ktime_to_ns(ktime_get()); qdisc_watchdog_init(&q->watchdog, sch); q->qdisc = &noop_qdisc; @@ -331,12 +335,6 @@ static void tbf_destroy(struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); qdisc_watchdog_cancel(&q->watchdog); - - if (q->P_tab) - qdisc_put_rtab(q->P_tab); - if (q->R_tab) - qdisc_put_rtab(q->R_tab); - qdisc_destroy(q->qdisc); } @@ -352,13 +350,13 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_put_failure; opt.limit = q->limit; - opt.rate = q->R_tab->rate; - if (q->P_tab) - opt.peakrate = q->P_tab->rate; + opt.rate.rate = psched_ratecfg_getrate(&q->rate); + if (q->peak_present) + opt.peakrate.rate = psched_ratecfg_getrate(&q->peak); else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); - opt.mtu = q->mtu; - opt.buffer = q->buffer; + opt.mtu = PSCHED_NS2TICKS(q->mtu); + opt.buffer = PSCHED_NS2TICKS(q->buffer); if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt)) goto nla_put_failure; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index b45ed1f9692..2f95f5a5145 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -434,8 +434,7 @@ void sctp_association_free(struct sctp_association *asoc) * on our state. */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) { - if (timer_pending(&asoc->timers[i]) && - del_timer(&asoc->timers[i])) + if (del_timer(&asoc->timers[i])) sctp_association_put(asoc); } @@ -1497,7 +1496,7 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len) /* Stop the SACK timer. */ timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK]; - if (timer_pending(timer) && del_timer(timer)) + if (del_timer(timer)) sctp_association_put(asoc); } } diff --git a/net/sctp/auth.c b/net/sctp/auth.c index d8420ae614d..ba1dfc3f8de 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -200,27 +200,28 @@ static struct sctp_auth_bytes *sctp_auth_make_key_vector( struct sctp_auth_bytes *new; __u32 len; __u32 offset = 0; + __u16 random_len, hmacs_len, chunks_len = 0; - len = ntohs(random->param_hdr.length) + ntohs(hmacs->param_hdr.length); - if (chunks) - len += ntohs(chunks->param_hdr.length); + random_len = ntohs(random->param_hdr.length); + hmacs_len = ntohs(hmacs->param_hdr.length); + if (chunks) + chunks_len = ntohs(chunks->param_hdr.length); - new = kmalloc(sizeof(struct sctp_auth_bytes) + len, gfp); + len = random_len + hmacs_len + chunks_len; + + new = sctp_auth_create_key(len, gfp); if (!new) return NULL; - new->len = len; - - memcpy(new->data, random, ntohs(random->param_hdr.length)); - offset += ntohs(random->param_hdr.length); + memcpy(new->data, random, random_len); + offset += random_len; if (chunks) { - memcpy(new->data + offset, chunks, - ntohs(chunks->param_hdr.length)); - offset += ntohs(chunks->param_hdr.length); + memcpy(new->data + offset, chunks, chunks_len); + offset += chunks_len; } - memcpy(new->data + offset, hmacs, ntohs(hmacs->param_hdr.length)); + memcpy(new->data + offset, hmacs, hmacs_len); return new; } @@ -350,8 +351,8 @@ static struct sctp_auth_bytes *sctp_auth_asoc_create_secret( secret = sctp_auth_asoc_set_secret(ep_key, first_vector, last_vector, gfp); out: - kfree(local_key_vector); - kfree(peer_key_vector); + sctp_auth_key_put(local_key_vector); + sctp_auth_key_put(peer_key_vector); return secret; } diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 1a9c5fb7731..73aad3d16a4 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -151,9 +151,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, ep->rcvbuf_policy = net->sctp.rcvbuf_policy; /* Initialize the secret key used with cookie. */ - get_random_bytes(&ep->secret_key[0], SCTP_SECRET_SIZE); - ep->last_key = ep->current_key = 0; - ep->key_changed_at = jiffies; + get_random_bytes(ep->secret_key, sizeof(ep->secret_key)); /* SCTP-AUTH extensions*/ INIT_LIST_HEAD(&ep->endpoint_shared_keys); @@ -249,8 +247,6 @@ void sctp_endpoint_free(struct sctp_endpoint *ep) /* Final destructor for endpoint. */ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { - int i; - SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return); /* Free up the HMAC transform. */ @@ -273,8 +269,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) sctp_inq_free(&ep->base.inqueue); sctp_bind_addr_free(&ep->base.bind_addr); - for (i = 0; i < SCTP_HOW_MANY_SECRETS; ++i) - memset(&ep->secret_key[i], 0, SCTP_SECRET_SIZE); + memset(ep->secret_key, 0, sizeof(ep->secret_key)); /* Remove and free the port */ if (sctp_sk(ep->base.sk)->bind_hash) diff --git a/net/sctp/input.c b/net/sctp/input.c index 8bd3c279427..965bbbbe48d 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -468,8 +468,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk, } else { struct net *net = sock_net(sk); - if (timer_pending(&t->proto_unreach_timer) && - del_timer(&t->proto_unreach_timer)) + if (del_timer(&t->proto_unreach_timer)) sctp_association_put(asoc); sctp_do_sm(net, SCTP_EVENT_T_OTHER, diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 9bcdbd02d77..01dca753db1 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1700,10 +1700,8 @@ static void sctp_check_transmitted(struct sctp_outq *q, * address. */ if (!transport->flight_size) { - if (timer_pending(&transport->T3_rtx_timer) && - del_timer(&transport->T3_rtx_timer)) { + if (del_timer(&transport->T3_rtx_timer)) sctp_transport_put(transport); - } } else if (restart_timer) { if (!mod_timer(&transport->T3_rtx_timer, jiffies + transport->rto)) diff --git a/net/sctp/probe.c b/net/sctp/probe.c index 5f7518de2fd..ad0dba87034 100644 --- a/net/sctp/probe.c +++ b/net/sctp/probe.c @@ -122,12 +122,12 @@ static const struct file_operations sctpprobe_fops = { .llseek = noop_llseek, }; -sctp_disposition_t jsctp_sf_eat_sack(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +static sctp_disposition_t jsctp_sf_eat_sack(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands) { struct sctp_transport *sp; static __u32 lcwnd = 0; @@ -183,13 +183,20 @@ static __init int sctpprobe_init(void) { int ret = -ENOMEM; + /* Warning: if the function signature of sctp_sf_eat_sack_6_2, + * has been changed, you also have to change the signature of + * jsctp_sf_eat_sack, otherwise you end up right here! + */ + BUILD_BUG_ON(__same_type(sctp_sf_eat_sack_6_2, + jsctp_sf_eat_sack) == 0); + init_waitqueue_head(&sctpw.wait); spin_lock_init(&sctpw.lock); if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL)) return ret; - if (!proc_net_fops_create(&init_net, procname, S_IRUSR, - &sctpprobe_fops)) + if (!proc_create(procname, S_IRUSR, init_net.proc_net, + &sctpprobe_fops)) goto free_kfifo; ret = register_jprobe(&sctp_recv_probe); @@ -201,7 +208,7 @@ static __init int sctpprobe_init(void) return 0; remove_proc: - proc_net_remove(&init_net, procname); + remove_proc_entry(procname, init_net.proc_net); free_kfifo: kfifo_free(&sctpw.fifo); return ret; @@ -210,7 +217,7 @@ free_kfifo: static __exit void sctpprobe_exit(void) { kfifo_free(&sctpw.fifo); - proc_net_remove(&init_net, procname); + remove_proc_entry(procname, init_net.proc_net); unregister_jprobe(&sctp_recv_probe); } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f898b1c58bd..1c2e46cb919 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -595,7 +595,7 @@ static void sctp_v4_ecn_capable(struct sock *sk) INET_ECN_xmit(sk); } -void sctp_addr_wq_timeout_handler(unsigned long arg) +static void sctp_addr_wq_timeout_handler(unsigned long arg) { struct net *net = (struct net *)arg; struct sctp_sockaddr_entry *addrw, *temp; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index e1c5fc2be6b..a193f3bc814 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1589,8 +1589,6 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, struct sctp_signed_cookie *cookie; struct scatterlist sg; int headersize, bodysize; - unsigned int keylen; - char *key; /* Header size is static data prior to the actual cookie, including * any padding. @@ -1650,12 +1648,11 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, /* Sign the message. */ sg_init_one(&sg, &cookie->c, bodysize); - keylen = SCTP_SECRET_SIZE; - key = (char *)ep->secret_key[ep->current_key]; desc.tfm = sctp_sk(ep->base.sk)->hmac; desc.flags = 0; - if (crypto_hash_setkey(desc.tfm, key, keylen) || + if (crypto_hash_setkey(desc.tfm, ep->secret_key, + sizeof(ep->secret_key)) || crypto_hash_digest(&desc, &sg, bodysize, cookie->signature)) goto free_cookie; } @@ -1682,8 +1679,7 @@ struct sctp_association *sctp_unpack_cookie( int headersize, bodysize, fixed_size; __u8 *digest = ep->digest; struct scatterlist sg; - unsigned int keylen, len; - char *key; + unsigned int len; sctp_scope_t scope; struct sk_buff *skb = chunk->skb; struct timeval tv; @@ -1718,34 +1714,21 @@ struct sctp_association *sctp_unpack_cookie( goto no_hmac; /* Check the signature. */ - keylen = SCTP_SECRET_SIZE; sg_init_one(&sg, bear_cookie, bodysize); - key = (char *)ep->secret_key[ep->current_key]; desc.tfm = sctp_sk(ep->base.sk)->hmac; desc.flags = 0; memset(digest, 0x00, SCTP_SIGNATURE_SIZE); - if (crypto_hash_setkey(desc.tfm, key, keylen) || + if (crypto_hash_setkey(desc.tfm, ep->secret_key, + sizeof(ep->secret_key)) || crypto_hash_digest(&desc, &sg, bodysize, digest)) { *error = -SCTP_IERROR_NOMEM; goto fail; } if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { - /* Try the previous key. */ - key = (char *)ep->secret_key[ep->last_key]; - memset(digest, 0x00, SCTP_SIGNATURE_SIZE); - if (crypto_hash_setkey(desc.tfm, key, keylen) || - crypto_hash_digest(&desc, &sg, bodysize, digest)) { - *error = -SCTP_IERROR_NOMEM; - goto fail; - } - - if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { - /* Yikes! Still bad signature! */ - *error = -SCTP_IERROR_BAD_SIG; - goto fail; - } + *error = -SCTP_IERROR_BAD_SIG; + goto fail; } no_hmac: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index c9577754a70..8aab894aeab 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -674,10 +674,8 @@ static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds, list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { - if (timer_pending(&t->T3_rtx_timer) && - del_timer(&t->T3_rtx_timer)) { + if (del_timer(&t->T3_rtx_timer)) sctp_transport_put(t); - } } } @@ -1517,7 +1515,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_TIMER_STOP: timer = &asoc->timers[cmd->obj.to]; - if (timer_pending(timer) && del_timer(timer)) + if (del_timer(timer)) sctp_association_put(asoc); break; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 4e45bb68aef..fafd2a461ba 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -151,13 +151,11 @@ void sctp_transport_free(struct sctp_transport *transport) * structure hang around in memory since we know * the tranport is going away. */ - if (timer_pending(&transport->T3_rtx_timer) && - del_timer(&transport->T3_rtx_timer)) + if (del_timer(&transport->T3_rtx_timer)) sctp_transport_put(transport); /* Delete the ICMP proto unreachable timer if it's active. */ - if (timer_pending(&transport->proto_unreach_timer) && - del_timer(&transport->proto_unreach_timer)) + if (del_timer(&transport->proto_unreach_timer)) sctp_association_put(transport->asoc); sctp_transport_put(transport); @@ -168,10 +166,6 @@ static void sctp_transport_destroy_rcu(struct rcu_head *head) struct sctp_transport *transport; transport = container_of(head, struct sctp_transport, rcu); - if (transport->asoc) - sctp_association_put(transport->asoc); - - sctp_packet_free(&transport->packet); dst_release(transport->dst); kfree(transport); @@ -186,6 +180,11 @@ static void sctp_transport_destroy(struct sctp_transport *transport) SCTP_ASSERT(transport->dead, "Transport is not dead", return); call_rcu(&transport->rcu, sctp_transport_destroy_rcu); + + sctp_packet_free(&transport->packet); + + if (transport->asoc) + sctp_association_put(transport->asoc); } /* Start T3_rtx timer if it is not already running and update the heartbeat @@ -654,10 +653,9 @@ void sctp_transport_reset(struct sctp_transport *t) void sctp_transport_immediate_rtx(struct sctp_transport *t) { /* Stop pending T3_rtx_timer */ - if (timer_pending(&t->T3_rtx_timer)) { - (void)del_timer(&t->T3_rtx_timer); + if (del_timer(&t->T3_rtx_timer)) sctp_transport_put(t); - } + sctp_retransmit(&t->asoc->outqueue, t, SCTP_RTXR_T3_RTX); if (!timer_pending(&t->T3_rtx_timer)) { if (!mod_timer(&t->T3_rtx_timer, jiffies + t->rto)) diff --git a/net/socket.c b/net/socket.c index 2ca51c719ef..ee0d029e513 100644 --- a/net/socket.c +++ b/net/socket.c @@ -69,7 +69,6 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/mutex.h> -#include <linux/wanrouter.h> #include <linux/if_bridge.h> #include <linux/if_frad.h> #include <linux/if_vlan.h> @@ -2838,7 +2837,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) } ifr = compat_alloc_user_space(buf_size); - rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8); + rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8); if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) return -EFAULT; @@ -2862,12 +2861,12 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) offsetof(struct ethtool_rxnfc, fs.ring_cookie)); if (copy_in_user(rxnfc, compat_rxnfc, - (void *)(&rxnfc->fs.m_ext + 1) - - (void *)rxnfc) || + (void __user *)(&rxnfc->fs.m_ext + 1) - + (void __user *)rxnfc) || copy_in_user(&rxnfc->fs.ring_cookie, &compat_rxnfc->fs.ring_cookie, - (void *)(&rxnfc->fs.location + 1) - - (void *)&rxnfc->fs.ring_cookie) || + (void __user *)(&rxnfc->fs.location + 1) - + (void __user *)&rxnfc->fs.ring_cookie) || copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt, sizeof(rxnfc->rule_cnt))) return -EFAULT; @@ -2879,12 +2878,12 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) if (convert_out) { if (copy_in_user(compat_rxnfc, rxnfc, - (const void *)(&rxnfc->fs.m_ext + 1) - - (const void *)rxnfc) || + (const void __user *)(&rxnfc->fs.m_ext + 1) - + (const void __user *)rxnfc) || copy_in_user(&compat_rxnfc->fs.ring_cookie, &rxnfc->fs.ring_cookie, - (const void *)(&rxnfc->fs.location + 1) - - (const void *)&rxnfc->fs.ring_cookie) || + (const void __user *)(&rxnfc->fs.location + 1) - + (const void __user *)&rxnfc->fs.ring_cookie) || copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt, sizeof(rxnfc->rule_cnt))) return -EFAULT; diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 54f89f90ac3..2655c9f4eca 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -774,6 +774,7 @@ void tipc_bclink_init(void) bcl->owner = &bclink->node; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); + spin_lock_init(&bcbearer->bearer.lock); bcl->b_ptr = &bcbearer->bearer; bcl->state = WORKING_WORKING; strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9b4e4833a48..a9622b6cd91 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -43,7 +43,8 @@ #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ -#define OVERLOAD_LIMIT_BASE 10000 +#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ + SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ struct tipc_sock { @@ -129,19 +130,6 @@ static void advance_rx_queue(struct sock *sk) } /** - * discard_rx_queue - discard all buffers in socket receive queue - * - * Caller must hold socket lock - */ -static void discard_rx_queue(struct sock *sk) -{ - struct sk_buff *buf; - - while ((buf = __skb_dequeue(&sk->sk_receive_queue))) - kfree_skb(buf); -} - -/** * reject_rx_queue - reject all buffers in socket receive queue * * Caller must hold socket lock @@ -215,7 +203,6 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, sock_init_data(sock, sk); sk->sk_backlog_rcv = backlog_rcv; - sk->sk_rcvbuf = TIPC_FLOW_CONTROL_WIN * 2 * TIPC_MAX_USER_MSG_SIZE * 2; sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; tipc_sk(sk)->p = tp_ptr; @@ -292,7 +279,7 @@ static int release(struct socket *sock) res = tipc_deleteport(tport->ref); /* Discard any remaining (connection-based) messages in receive queue */ - discard_rx_queue(sk); + __skb_queue_purge(&sk->sk_receive_queue); /* Reject any messages that accumulated in backlog queue */ sock->state = SS_DISCONNECTING; @@ -516,8 +503,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, if (unlikely((m->msg_namelen < sizeof(*dest)) || (dest->family != AF_TIPC))) return -EINVAL; - if ((total_len > TIPC_MAX_USER_MSG_SIZE) || - (m->msg_iovlen > (unsigned int)INT_MAX)) + if (total_len > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; if (iocb) @@ -625,8 +611,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, if (unlikely(dest)) return send_msg(iocb, sock, m, total_len); - if ((total_len > TIPC_MAX_USER_MSG_SIZE) || - (m->msg_iovlen > (unsigned int)INT_MAX)) + if (total_len > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; if (iocb) @@ -711,8 +696,7 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, goto exit; } - if ((total_len > (unsigned int)INT_MAX) || - (m->msg_iovlen > (unsigned int)INT_MAX)) { + if (total_len > (unsigned int)INT_MAX) { res = -EMSGSIZE; goto exit; } @@ -1155,34 +1139,6 @@ static void tipc_data_ready(struct sock *sk, int len) } /** - * rx_queue_full - determine if receive queue can accept another message - * @msg: message to be added to queue - * @queue_size: current size of queue - * @base: nominal maximum size of queue - * - * Returns 1 if queue is unable to accept message, 0 otherwise - */ -static int rx_queue_full(struct tipc_msg *msg, u32 queue_size, u32 base) -{ - u32 threshold; - u32 imp = msg_importance(msg); - - if (imp == TIPC_LOW_IMPORTANCE) - threshold = base; - else if (imp == TIPC_MEDIUM_IMPORTANCE) - threshold = base * 2; - else if (imp == TIPC_HIGH_IMPORTANCE) - threshold = base * 100; - else - return 0; - - if (msg_connected(msg)) - threshold *= 4; - - return queue_size >= threshold; -} - -/** * filter_connect - Handle all incoming messages for a connection-based socket * @tsock: TIPC socket * @msg: message @@ -1260,6 +1216,36 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) } /** + * rcvbuf_limit - get proper overload limit of socket receive queue + * @sk: socket + * @buf: message + * + * For all connection oriented messages, irrespective of importance, + * the default overload value (i.e. 67MB) is set as limit. + * + * For all connectionless messages, by default new queue limits are + * as belows: + * + * TIPC_LOW_IMPORTANCE (5MB) + * TIPC_MEDIUM_IMPORTANCE (10MB) + * TIPC_HIGH_IMPORTANCE (20MB) + * TIPC_CRITICAL_IMPORTANCE (40MB) + * + * Returns overload limit according to corresponding message importance + */ +static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + unsigned int limit; + + if (msg_connected(msg)) + limit = CONN_OVERLOAD_LIMIT; + else + limit = sk->sk_rcvbuf << (msg_importance(msg) + 5); + return limit; +} + +/** * filter_rcv - validate incoming message * @sk: socket * @buf: message @@ -1275,7 +1261,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) { struct socket *sock = sk->sk_socket; struct tipc_msg *msg = buf_msg(buf); - u32 recv_q_len; + unsigned int limit = rcvbuf_limit(sk, buf); u32 res = TIPC_OK; /* Reject message if it is wrong sort of message for socket */ @@ -1292,15 +1278,13 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) } /* Reject message if there isn't room to queue it */ - recv_q_len = skb_queue_len(&sk->sk_receive_queue); - if (unlikely(recv_q_len >= (OVERLOAD_LIMIT_BASE / 2))) { - if (rx_queue_full(msg, recv_q_len, OVERLOAD_LIMIT_BASE / 2)) - return TIPC_ERR_OVERLOAD; - } + if (sk_rmem_alloc_get(sk) + buf->truesize >= limit) + return TIPC_ERR_OVERLOAD; - /* Enqueue message (finally!) */ + /* Enqueue message */ TIPC_SKB_CB(buf)->handle = 0; __skb_queue_tail(&sk->sk_receive_queue, buf); + skb_set_owner_r(buf, sk); sk->sk_data_ready(sk, 0); return TIPC_OK; @@ -1349,7 +1333,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) if (!sock_owned_by_user(sk)) { res = filter_rcv(sk, buf); } else { - if (sk_add_backlog(sk, buf, sk->sk_rcvbuf)) + if (sk_add_backlog(sk, buf, rcvbuf_limit(sk, buf))) res = TIPC_ERR_OVERLOAD; else res = TIPC_OK; @@ -1583,6 +1567,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) } else { __skb_dequeue(&sk->sk_receive_queue); __skb_queue_head(&new_sk->sk_receive_queue, buf); + skb_set_owner_r(buf, new_sk); } release_sock(new_sk); @@ -1637,7 +1622,7 @@ restart: case SS_DISCONNECTING: /* Discard any unreceived messages */ - discard_rx_queue(sk); + __skb_queue_purge(&sk->sk_receive_queue); /* Wake up anyone sleeping in poll */ sk->sk_state_change(sk); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5b5c876c80e..87d28428901 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2402,7 +2402,7 @@ static int __net_init unix_net_init(struct net *net) goto out; #ifdef CONFIG_PROC_FS - if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) { + if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) { unix_sysctl_unregister(net); goto out; } @@ -2415,7 +2415,7 @@ out: static void __net_exit unix_net_exit(struct net *net) { unix_sysctl_unregister(net); - proc_net_remove(net, "unix"); + remove_proc_entry("unix", net->proc_net); } static struct pernet_operations unix_net_ops = { @@ -2426,9 +2426,8 @@ static struct pernet_operations unix_net_ops = { static int __init af_unix_init(void) { int rc = -1; - struct sk_buff *dummy_skb; - BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)); + BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb)); rc = proto_register(&unix_proto, 1); if (rc != 0) { diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig new file mode 100644 index 00000000000..b5fa7e40cdc --- /dev/null +++ b/net/vmw_vsock/Kconfig @@ -0,0 +1,28 @@ +# +# Vsock protocol +# + +config VSOCKETS + tristate "Virtual Socket protocol" + help + Virtual Socket Protocol is a socket protocol similar to TCP/IP + allowing comunication between Virtual Machines and hypervisor + or host. + + You should also select one or more hypervisor-specific transports + below. + + To compile this driver as a module, choose M here: the module + will be called vsock. If unsure, say N. + +config VMWARE_VMCI_VSOCKETS + tristate "VMware VMCI transport for Virtual Sockets" + depends on VSOCKETS && VMWARE_VMCI + help + This module implements a VMCI transport for Virtual Sockets. + + Enable this transport if your Virtual Machine runs on a VMware + hypervisor. + + To compile this driver as a module, choose M here: the module + will be called vmw_vsock_vmci_transport. If unsure, say N. diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile new file mode 100644 index 00000000000..2ce52d70f22 --- /dev/null +++ b/net/vmw_vsock/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_VSOCKETS) += vsock.o +obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o + +vsock-y += af_vsock.o vsock_addr.o + +vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ + vmci_transport_notify_qstate.o diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c new file mode 100644 index 00000000000..ca511c4f388 --- /dev/null +++ b/net/vmw_vsock/af_vsock.c @@ -0,0 +1,2012 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +/* Implementation notes: + * + * - There are two kinds of sockets: those created by user action (such as + * calling socket(2)) and those created by incoming connection request packets. + * + * - There are two "global" tables, one for bound sockets (sockets that have + * specified an address that they are responsible for) and one for connected + * sockets (sockets that have established a connection with another socket). + * These tables are "global" in that all sockets on the system are placed + * within them. - Note, though, that the bound table contains an extra entry + * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in + * that list. The bound table is used solely for lookup of sockets when packets + * are received and that's not necessary for SOCK_DGRAM sockets since we create + * a datagram handle for each and need not perform a lookup. Keeping SOCK_DGRAM + * sockets out of the bound hash buckets will reduce the chance of collisions + * when looking for SOCK_STREAM sockets and prevents us from having to check the + * socket type in the hash table lookups. + * + * - Sockets created by user action will either be "client" sockets that + * initiate a connection or "server" sockets that listen for connections; we do + * not support simultaneous connects (two "client" sockets connecting). + * + * - "Server" sockets are referred to as listener sockets throughout this + * implementation because they are in the SS_LISTEN state. When a connection + * request is received (the second kind of socket mentioned above), we create a + * new socket and refer to it as a pending socket. These pending sockets are + * placed on the pending connection list of the listener socket. When future + * packets are received for the address the listener socket is bound to, we + * check if the source of the packet is from one that has an existing pending + * connection. If it does, we process the packet for the pending socket. When + * that socket reaches the connected state, it is removed from the listener + * socket's pending list and enqueued in the listener socket's accept queue. + * Callers of accept(2) will accept connected sockets from the listener socket's + * accept queue. If the socket cannot be accepted for some reason then it is + * marked rejected. Once the connection is accepted, it is owned by the user + * process and the responsibility for cleanup falls with that user process. + * + * - It is possible that these pending sockets will never reach the connected + * state; in fact, we may never receive another packet after the connection + * request. Because of this, we must schedule a cleanup function to run in the + * future, after some amount of time passes where a connection should have been + * established. This function ensures that the socket is off all lists so it + * cannot be retrieved, then drops all references to the socket so it is cleaned + * up (sock_put() -> sk_free() -> our sk_destruct implementation). Note this + * function will also cleanup rejected sockets, those that reach the connected + * state but leave it before they have been accepted. + * + * - Sockets created by user action will be cleaned up when the user process + * calls close(2), causing our release implementation to be called. Our release + * implementation will perform some cleanup then drop the last reference so our + * sk_destruct implementation is invoked. Our sk_destruct implementation will + * perform additional cleanup that's common for both types of sockets. + * + * - A socket's reference count is what ensures that the structure won't be + * freed. Each entry in a list (such as the "global" bound and connected tables + * and the listener socket's pending list and connected queue) ensures a + * reference. When we defer work until process context and pass a socket as our + * argument, we must ensure the reference count is increased to ensure the + * socket isn't freed before the function is run; the deferred function will + * then drop the reference. + */ + +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/cred.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/kmod.h> +#include <linux/list.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/net.h> +#include <linux/poll.h> +#include <linux/skbuff.h> +#include <linux/smp.h> +#include <linux/socket.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/wait.h> +#include <linux/workqueue.h> +#include <net/sock.h> + +#include "af_vsock.h" + +static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); +static void vsock_sk_destruct(struct sock *sk); +static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); + +/* Protocol family. */ +static struct proto vsock_proto = { + .name = "AF_VSOCK", + .owner = THIS_MODULE, + .obj_size = sizeof(struct vsock_sock), +}; + +/* The default peer timeout indicates how long we will wait for a peer response + * to a control message. + */ +#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) + +#define SS_LISTEN 255 + +static const struct vsock_transport *transport; +static DEFINE_MUTEX(vsock_register_mutex); + +/**** EXPORTS ****/ + +/* Get the ID of the local context. This is transport dependent. */ + +int vm_sockets_get_local_cid(void) +{ + return transport->get_local_cid(); +} +EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid); + +/**** UTILS ****/ + +/* Each bound VSocket is stored in the bind hash table and each connected + * VSocket is stored in the connected hash table. + * + * Unbound sockets are all put on the same list attached to the end of the hash + * table (vsock_unbound_sockets). Bound sockets are added to the hash table in + * the bucket that their local address hashes to (vsock_bound_sockets(addr) + * represents the list that addr hashes to). + * + * Specifically, we initialize the vsock_bind_table array to a size of + * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through + * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and + * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function + * mods with VSOCK_HASH_SIZE - 1 to ensure this. + */ +#define VSOCK_HASH_SIZE 251 +#define MAX_PORT_RETRIES 24 + +#define VSOCK_HASH(addr) ((addr)->svm_port % (VSOCK_HASH_SIZE - 1)) +#define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)]) +#define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE]) + +/* XXX This can probably be implemented in a better way. */ +#define VSOCK_CONN_HASH(src, dst) \ + (((src)->svm_cid ^ (dst)->svm_port) % (VSOCK_HASH_SIZE - 1)) +#define vsock_connected_sockets(src, dst) \ + (&vsock_connected_table[VSOCK_CONN_HASH(src, dst)]) +#define vsock_connected_sockets_vsk(vsk) \ + vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr) + +static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; +static struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; +static DEFINE_SPINLOCK(vsock_table_lock); + +static __init void vsock_init_tables(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vsock_bind_table); i++) + INIT_LIST_HEAD(&vsock_bind_table[i]); + + for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) + INIT_LIST_HEAD(&vsock_connected_table[i]); +} + +static void __vsock_insert_bound(struct list_head *list, + struct vsock_sock *vsk) +{ + sock_hold(&vsk->sk); + list_add(&vsk->bound_table, list); +} + +static void __vsock_insert_connected(struct list_head *list, + struct vsock_sock *vsk) +{ + sock_hold(&vsk->sk); + list_add(&vsk->connected_table, list); +} + +static void __vsock_remove_bound(struct vsock_sock *vsk) +{ + list_del_init(&vsk->bound_table); + sock_put(&vsk->sk); +} + +static void __vsock_remove_connected(struct vsock_sock *vsk) +{ + list_del_init(&vsk->connected_table); + sock_put(&vsk->sk); +} + +static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) +{ + struct vsock_sock *vsk; + + list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) + if (vsock_addr_equals_addr_any(addr, &vsk->local_addr)) + return sk_vsock(vsk); + + return NULL; +} + +static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst) +{ + struct vsock_sock *vsk; + + list_for_each_entry(vsk, vsock_connected_sockets(src, dst), + connected_table) { + if (vsock_addr_equals_addr(src, &vsk->remote_addr) + && vsock_addr_equals_addr(dst, &vsk->local_addr)) { + return sk_vsock(vsk); + } + } + + return NULL; +} + +static bool __vsock_in_bound_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->bound_table); +} + +static bool __vsock_in_connected_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->connected_table); +} + +static void vsock_insert_unbound(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_insert_bound(vsock_unbound_sockets, vsk); + spin_unlock_bh(&vsock_table_lock); +} + +void vsock_insert_connected(struct vsock_sock *vsk) +{ + struct list_head *list = vsock_connected_sockets( + &vsk->remote_addr, &vsk->local_addr); + + spin_lock_bh(&vsock_table_lock); + __vsock_insert_connected(list, vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_insert_connected); + +void vsock_remove_bound(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_remove_bound(vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_remove_bound); + +void vsock_remove_connected(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_remove_connected(vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_remove_connected); + +struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr) +{ + struct sock *sk; + + spin_lock_bh(&vsock_table_lock); + sk = __vsock_find_bound_socket(addr); + if (sk) + sock_hold(sk); + + spin_unlock_bh(&vsock_table_lock); + + return sk; +} +EXPORT_SYMBOL_GPL(vsock_find_bound_socket); + +struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst) +{ + struct sock *sk; + + spin_lock_bh(&vsock_table_lock); + sk = __vsock_find_connected_socket(src, dst); + if (sk) + sock_hold(sk); + + spin_unlock_bh(&vsock_table_lock); + + return sk; +} +EXPORT_SYMBOL_GPL(vsock_find_connected_socket); + +static bool vsock_in_bound_table(struct vsock_sock *vsk) +{ + bool ret; + + spin_lock_bh(&vsock_table_lock); + ret = __vsock_in_bound_table(vsk); + spin_unlock_bh(&vsock_table_lock); + + return ret; +} + +static bool vsock_in_connected_table(struct vsock_sock *vsk) +{ + bool ret; + + spin_lock_bh(&vsock_table_lock); + ret = __vsock_in_connected_table(vsk); + spin_unlock_bh(&vsock_table_lock); + + return ret; +} + +void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) +{ + int i; + + spin_lock_bh(&vsock_table_lock); + + for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { + struct vsock_sock *vsk; + list_for_each_entry(vsk, &vsock_connected_table[i], + connected_table); + fn(sk_vsock(vsk)); + } + + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_for_each_connected_socket); + +void vsock_add_pending(struct sock *listener, struct sock *pending) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vpending; + + vlistener = vsock_sk(listener); + vpending = vsock_sk(pending); + + sock_hold(pending); + sock_hold(listener); + list_add_tail(&vpending->pending_links, &vlistener->pending_links); +} +EXPORT_SYMBOL_GPL(vsock_add_pending); + +void vsock_remove_pending(struct sock *listener, struct sock *pending) +{ + struct vsock_sock *vpending = vsock_sk(pending); + + list_del_init(&vpending->pending_links); + sock_put(listener); + sock_put(pending); +} +EXPORT_SYMBOL_GPL(vsock_remove_pending); + +void vsock_enqueue_accept(struct sock *listener, struct sock *connected) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vconnected; + + vlistener = vsock_sk(listener); + vconnected = vsock_sk(connected); + + sock_hold(connected); + sock_hold(listener); + list_add_tail(&vconnected->accept_queue, &vlistener->accept_queue); +} +EXPORT_SYMBOL_GPL(vsock_enqueue_accept); + +static struct sock *vsock_dequeue_accept(struct sock *listener) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vconnected; + + vlistener = vsock_sk(listener); + + if (list_empty(&vlistener->accept_queue)) + return NULL; + + vconnected = list_entry(vlistener->accept_queue.next, + struct vsock_sock, accept_queue); + + list_del_init(&vconnected->accept_queue); + sock_put(listener); + /* The caller will need a reference on the connected socket so we let + * it call sock_put(). + */ + + return sk_vsock(vconnected); +} + +static bool vsock_is_accept_queue_empty(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + return list_empty(&vsk->accept_queue); +} + +static bool vsock_is_pending(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + return !list_empty(&vsk->pending_links); +} + +static int vsock_send_shutdown(struct sock *sk, int mode) +{ + return transport->shutdown(vsock_sk(sk), mode); +} + +void vsock_pending_work(struct work_struct *work) +{ + struct sock *sk; + struct sock *listener; + struct vsock_sock *vsk; + bool cleanup; + + vsk = container_of(work, struct vsock_sock, dwork.work); + sk = sk_vsock(vsk); + listener = vsk->listener; + cleanup = true; + + lock_sock(listener); + lock_sock(sk); + + if (vsock_is_pending(sk)) { + vsock_remove_pending(listener, sk); + } else if (!vsk->rejected) { + /* We are not on the pending list and accept() did not reject + * us, so we must have been accepted by our user process. We + * just need to drop our references to the sockets and be on + * our way. + */ + cleanup = false; + goto out; + } + + listener->sk_ack_backlog--; + + /* We need to remove ourself from the global connected sockets list so + * incoming packets can't find this socket, and to reduce the reference + * count. + */ + if (vsock_in_connected_table(vsk)) + vsock_remove_connected(vsk); + + sk->sk_state = SS_FREE; + +out: + release_sock(sk); + release_sock(listener); + if (cleanup) + sock_put(sk); + + sock_put(sk); + sock_put(listener); +} +EXPORT_SYMBOL_GPL(vsock_pending_work); + +/**** SOCKET OPERATIONS ****/ + +static int __vsock_bind_stream(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + static u32 port = LAST_RESERVED_PORT + 1; + struct sockaddr_vm new_addr; + + vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); + + if (addr->svm_port == VMADDR_PORT_ANY) { + bool found = false; + unsigned int i; + + for (i = 0; i < MAX_PORT_RETRIES; i++) { + if (port <= LAST_RESERVED_PORT) + port = LAST_RESERVED_PORT + 1; + + new_addr.svm_port = port++; + + if (!__vsock_find_bound_socket(&new_addr)) { + found = true; + break; + } + } + + if (!found) + return -EADDRNOTAVAIL; + } else { + /* If port is in reserved range, ensure caller + * has necessary privileges. + */ + if (addr->svm_port <= LAST_RESERVED_PORT && + !capable(CAP_NET_BIND_SERVICE)) { + return -EACCES; + } + + if (__vsock_find_bound_socket(&new_addr)) + return -EADDRINUSE; + } + + vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port); + + /* Remove stream sockets from the unbound list and add them to the hash + * table for easy lookup by its address. The unbound list is simply an + * extra entry at the end of the hash table, a trick used by AF_UNIX. + */ + __vsock_remove_bound(vsk); + __vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk); + + return 0; +} + +static int __vsock_bind_dgram(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + return transport->dgram_bind(vsk, addr); +} + +static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) +{ + struct vsock_sock *vsk = vsock_sk(sk); + u32 cid; + int retval; + + /* First ensure this socket isn't already bound. */ + if (vsock_addr_bound(&vsk->local_addr)) + return -EINVAL; + + /* Now bind to the provided address or select appropriate values if + * none are provided (VMADDR_CID_ANY and VMADDR_PORT_ANY). Note that + * like AF_INET prevents binding to a non-local IP address (in most + * cases), we only allow binding to the local CID. + */ + cid = transport->get_local_cid(); + if (addr->svm_cid != cid && addr->svm_cid != VMADDR_CID_ANY) + return -EADDRNOTAVAIL; + + switch (sk->sk_socket->type) { + case SOCK_STREAM: + spin_lock_bh(&vsock_table_lock); + retval = __vsock_bind_stream(vsk, addr); + spin_unlock_bh(&vsock_table_lock); + break; + + case SOCK_DGRAM: + retval = __vsock_bind_dgram(vsk, addr); + break; + + default: + retval = -EINVAL; + break; + } + + return retval; +} + +struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, + gfp_t priority, + unsigned short type) +{ + struct sock *sk; + struct vsock_sock *psk; + struct vsock_sock *vsk; + + sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto); + if (!sk) + return NULL; + + sock_init_data(sock, sk); + + /* sk->sk_type is normally set in sock_init_data, but only if sock is + * non-NULL. We make sure that our sockets always have a type by + * setting it here if needed. + */ + if (!sock) + sk->sk_type = type; + + vsk = vsock_sk(sk); + vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + + sk->sk_destruct = vsock_sk_destruct; + sk->sk_backlog_rcv = vsock_queue_rcv_skb; + sk->sk_state = 0; + sock_reset_flag(sk, SOCK_DONE); + + INIT_LIST_HEAD(&vsk->bound_table); + INIT_LIST_HEAD(&vsk->connected_table); + vsk->listener = NULL; + INIT_LIST_HEAD(&vsk->pending_links); + INIT_LIST_HEAD(&vsk->accept_queue); + vsk->rejected = false; + vsk->sent_request = false; + vsk->ignore_connecting_rst = false; + vsk->peer_shutdown = 0; + + psk = parent ? vsock_sk(parent) : NULL; + if (parent) { + vsk->trusted = psk->trusted; + vsk->owner = get_cred(psk->owner); + vsk->connect_timeout = psk->connect_timeout; + } else { + vsk->trusted = capable(CAP_NET_ADMIN); + vsk->owner = get_current_cred(); + vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT; + } + + if (transport->init(vsk, psk) < 0) { + sk_free(sk); + return NULL; + } + + if (sock) + vsock_insert_unbound(vsk); + + return sk; +} +EXPORT_SYMBOL_GPL(__vsock_create); + +static void __vsock_release(struct sock *sk) +{ + if (sk) { + struct sk_buff *skb; + struct sock *pending; + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + pending = NULL; /* Compiler warning. */ + + if (vsock_in_bound_table(vsk)) + vsock_remove_bound(vsk); + + if (vsock_in_connected_table(vsk)) + vsock_remove_connected(vsk); + + transport->release(vsk); + + lock_sock(sk); + sock_orphan(sk); + sk->sk_shutdown = SHUTDOWN_MASK; + + while ((skb = skb_dequeue(&sk->sk_receive_queue))) + kfree_skb(skb); + + /* Clean up any sockets that never were accepted. */ + while ((pending = vsock_dequeue_accept(sk)) != NULL) { + __vsock_release(pending); + sock_put(pending); + } + + release_sock(sk); + sock_put(sk); + } +} + +static void vsock_sk_destruct(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + transport->destruct(vsk); + + /* When clearing these addresses, there's no need to set the family and + * possibly register the address family with the kernel. + */ + vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + + put_cred(vsk->owner); +} + +static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int err; + + err = sock_queue_rcv_skb(sk, skb); + if (err) + kfree_skb(skb); + + return err; +} + +s64 vsock_stream_has_data(struct vsock_sock *vsk) +{ + return transport->stream_has_data(vsk); +} +EXPORT_SYMBOL_GPL(vsock_stream_has_data); + +s64 vsock_stream_has_space(struct vsock_sock *vsk) +{ + return transport->stream_has_space(vsk); +} +EXPORT_SYMBOL_GPL(vsock_stream_has_space); + +static int vsock_release(struct socket *sock) +{ + __vsock_release(sock->sk); + sock->sk = NULL; + sock->state = SS_FREE; + + return 0; +} + +static int +vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +{ + int err; + struct sock *sk; + struct sockaddr_vm *vm_addr; + + sk = sock->sk; + + if (vsock_addr_cast(addr, addr_len, &vm_addr) != 0) + return -EINVAL; + + lock_sock(sk); + err = __vsock_bind(sk, vm_addr); + release_sock(sk); + + return err; +} + +static int vsock_getname(struct socket *sock, + struct sockaddr *addr, int *addr_len, int peer) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *vm_addr; + + sk = sock->sk; + vsk = vsock_sk(sk); + err = 0; + + lock_sock(sk); + + if (peer) { + if (sock->state != SS_CONNECTED) { + err = -ENOTCONN; + goto out; + } + vm_addr = &vsk->remote_addr; + } else { + vm_addr = &vsk->local_addr; + } + + if (!vm_addr) { + err = -EINVAL; + goto out; + } + + /* sys_getsockname() and sys_getpeername() pass us a + * MAX_SOCK_ADDR-sized buffer and don't set addr_len. Unfortunately + * that macro is defined in socket.c instead of .h, so we hardcode its + * value here. + */ + BUILD_BUG_ON(sizeof(*vm_addr) > 128); + memcpy(addr, vm_addr, sizeof(*vm_addr)); + *addr_len = sizeof(*vm_addr); + +out: + release_sock(sk); + return err; +} + +static int vsock_shutdown(struct socket *sock, int mode) +{ + int err; + struct sock *sk; + + /* User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses + * RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode + * here like the other address families do. Note also that the + * increment makes SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3), + * which is what we want. + */ + mode++; + + if ((mode & ~SHUTDOWN_MASK) || !mode) + return -EINVAL; + + /* If this is a STREAM socket and it is not connected then bail out + * immediately. If it is a DGRAM socket then we must first kick the + * socket so that it wakes up from any sleeping calls, for example + * recv(), and then afterwards return the error. + */ + + sk = sock->sk; + if (sock->state == SS_UNCONNECTED) { + err = -ENOTCONN; + if (sk->sk_type == SOCK_STREAM) + return err; + } else { + sock->state = SS_DISCONNECTING; + err = 0; + } + + /* Receive and send shutdowns are treated alike. */ + mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN); + if (mode) { + lock_sock(sk); + sk->sk_shutdown |= mode; + sk->sk_state_change(sk); + release_sock(sk); + + if (sk->sk_type == SOCK_STREAM) { + sock_reset_flag(sk, SOCK_DONE); + vsock_send_shutdown(sk, mode); + } + } + + return err; +} + +static unsigned int vsock_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk; + unsigned int mask; + struct vsock_sock *vsk; + + sk = sock->sk; + vsk = vsock_sk(sk); + + poll_wait(file, sk_sleep(sk), wait); + mask = 0; + + if (sk->sk_err) + /* Signify that there has been an error on this socket. */ + mask |= POLLERR; + + /* INET sockets treat local write shutdown and peer write shutdown as a + * case of POLLHUP set. + */ + if ((sk->sk_shutdown == SHUTDOWN_MASK) || + ((sk->sk_shutdown & SEND_SHUTDOWN) && + (vsk->peer_shutdown & SEND_SHUTDOWN))) { + mask |= POLLHUP; + } + + if (sk->sk_shutdown & RCV_SHUTDOWN || + vsk->peer_shutdown & SEND_SHUTDOWN) { + mask |= POLLRDHUP; + } + + if (sock->type == SOCK_DGRAM) { + /* For datagram sockets we can read if there is something in + * the queue and write as long as the socket isn't shutdown for + * sending. + */ + if (!skb_queue_empty(&sk->sk_receive_queue) || + (sk->sk_shutdown & RCV_SHUTDOWN)) { + mask |= POLLIN | POLLRDNORM; + } + + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + + } else if (sock->type == SOCK_STREAM) { + lock_sock(sk); + + /* Listening sockets that have connections in their accept + * queue can be read. + */ + if (sk->sk_state == SS_LISTEN + && !vsock_is_accept_queue_empty(sk)) + mask |= POLLIN | POLLRDNORM; + + /* If there is something in the queue then we can read. */ + if (transport->stream_is_active(vsk) && + !(sk->sk_shutdown & RCV_SHUTDOWN)) { + bool data_ready_now = false; + int ret = transport->notify_poll_in( + vsk, 1, &data_ready_now); + if (ret < 0) { + mask |= POLLERR; + } else { + if (data_ready_now) + mask |= POLLIN | POLLRDNORM; + + } + } + + /* Sockets whose connections have been closed, reset, or + * terminated should also be considered read, and we check the + * shutdown flag for that. + */ + if (sk->sk_shutdown & RCV_SHUTDOWN || + vsk->peer_shutdown & SEND_SHUTDOWN) { + mask |= POLLIN | POLLRDNORM; + } + + /* Connected sockets that can produce data can be written. */ + if (sk->sk_state == SS_CONNECTED) { + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { + bool space_avail_now = false; + int ret = transport->notify_poll_out( + vsk, 1, &space_avail_now); + if (ret < 0) { + mask |= POLLERR; + } else { + if (space_avail_now) + /* Remove POLLWRBAND since INET + * sockets are not setting it. + */ + mask |= POLLOUT | POLLWRNORM; + + } + } + } + + /* Simulate INET socket poll behaviors, which sets + * POLLOUT|POLLWRNORM when peer is closed and nothing to read, + * but local send is not shutdown. + */ + if (sk->sk_state == SS_UNCONNECTED) { + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) + mask |= POLLOUT | POLLWRNORM; + + } + + release_sock(sk); + } + + return mask; +} + +static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + /* For now, MSG_DONTWAIT is always assumed... */ + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + /* If the provided message contains an address, use that. Otherwise + * fall back on the socket's remote handle (if it has been connected). + */ + if (msg->msg_name && + vsock_addr_cast(msg->msg_name, msg->msg_namelen, + &remote_addr) == 0) { + /* Ensure this address is of the right type and is a valid + * destination. + */ + + if (remote_addr->svm_cid == VMADDR_CID_ANY) + remote_addr->svm_cid = transport->get_local_cid(); + + if (!vsock_addr_bound(remote_addr)) { + err = -EINVAL; + goto out; + } + } else if (sock->state == SS_CONNECTED) { + remote_addr = &vsk->remote_addr; + + if (remote_addr->svm_cid == VMADDR_CID_ANY) + remote_addr->svm_cid = transport->get_local_cid(); + + /* XXX Should connect() or this function ensure remote_addr is + * bound? + */ + if (!vsock_addr_bound(&vsk->remote_addr)) { + err = -EINVAL; + goto out; + } + } else { + err = -EINVAL; + goto out; + } + + if (!transport->dgram_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -EINVAL; + goto out; + } + + err = transport->dgram_enqueue(vsk, remote_addr, msg->msg_iov, len); + +out: + release_sock(sk); + return err; +} + +static int vsock_dgram_connect(struct socket *sock, + struct sockaddr *addr, int addr_len, int flags) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + + sk = sock->sk; + vsk = vsock_sk(sk); + + err = vsock_addr_cast(addr, addr_len, &remote_addr); + if (err == -EAFNOSUPPORT && remote_addr->svm_family == AF_UNSPEC) { + lock_sock(sk); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, + VMADDR_PORT_ANY); + sock->state = SS_UNCONNECTED; + release_sock(sk); + return 0; + } else if (err != 0) + return -EINVAL; + + lock_sock(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + if (!transport->dgram_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -EINVAL; + goto out; + } + + memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr)); + sock->state = SS_CONNECTED; + +out: + release_sock(sk); + return err; +} + +static int vsock_dgram_recvmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + return transport->dgram_dequeue(kiocb, vsock_sk(sock->sk), msg, len, + flags); +} + +static const struct proto_ops vsock_dgram_ops = { + .family = PF_VSOCK, + .owner = THIS_MODULE, + .release = vsock_release, + .bind = vsock_bind, + .connect = vsock_dgram_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = vsock_getname, + .poll = vsock_poll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = vsock_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = vsock_dgram_sendmsg, + .recvmsg = vsock_dgram_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static void vsock_connect_timeout(struct work_struct *work) +{ + struct sock *sk; + struct vsock_sock *vsk; + + vsk = container_of(work, struct vsock_sock, dwork.work); + sk = sk_vsock(vsk); + + lock_sock(sk); + if (sk->sk_state == SS_CONNECTING && + (sk->sk_shutdown != SHUTDOWN_MASK)) { + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ETIMEDOUT; + sk->sk_error_report(sk); + } + release_sock(sk); + + sock_put(sk); +} + +static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, + int addr_len, int flags) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + long timeout; + DEFINE_WAIT(wait); + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + /* XXX AF_UNSPEC should make us disconnect like AF_INET. */ + switch (sock->state) { + case SS_CONNECTED: + err = -EISCONN; + goto out; + case SS_DISCONNECTING: + err = -EINVAL; + goto out; + case SS_CONNECTING: + /* This continues on so we can move sock into the SS_CONNECTED + * state once the connection has completed (at which point err + * will be set to zero also). Otherwise, we will either wait + * for the connection or return -EALREADY should this be a + * non-blocking call. + */ + err = -EALREADY; + break; + default: + if ((sk->sk_state == SS_LISTEN) || + vsock_addr_cast(addr, addr_len, &remote_addr) != 0) { + err = -EINVAL; + goto out; + } + + /* The hypervisor and well-known contexts do not have socket + * endpoints. + */ + if (!transport->stream_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -ENETUNREACH; + goto out; + } + + /* Set the remote address that we are connecting to. */ + memcpy(&vsk->remote_addr, remote_addr, + sizeof(vsk->remote_addr)); + + /* Autobind this socket to the local address if necessary. */ + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, + VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + sk->sk_state = SS_CONNECTING; + + err = transport->connect(vsk); + if (err < 0) + goto out; + + /* Mark sock as connecting and set the error code to in + * progress in case this is a non-blocking connect. + */ + sock->state = SS_CONNECTING; + err = -EINPROGRESS; + } + + /* The receive path will handle all communication until we are able to + * enter the connected state. Here we wait for the connection to be + * completed or a notification of an error. + */ + timeout = vsk->connect_timeout; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { + if (flags & O_NONBLOCK) { + /* If we're not going to block, we schedule a timeout + * function to generate a timeout on the connection + * attempt, in case the peer doesn't respond in a + * timely manner. We hold on to the socket until the + * timeout fires. + */ + sock_hold(sk); + INIT_DELAYED_WORK(&vsk->dwork, + vsock_connect_timeout); + schedule_delayed_work(&vsk->dwork, timeout); + + /* Skip ahead to preserve error code set above. */ + goto out_wait; + } + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait_error; + } else if (timeout == 0) { + err = -ETIMEDOUT; + goto out_wait_error; + } + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + } + + if (sk->sk_err) { + err = -sk->sk_err; + goto out_wait_error; + } else + err = 0; + +out_wait: + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; + +out_wait_error: + sk->sk_state = SS_UNCONNECTED; + sock->state = SS_UNCONNECTED; + goto out_wait; +} + +static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) +{ + struct sock *listener; + int err; + struct sock *connected; + struct vsock_sock *vconnected; + long timeout; + DEFINE_WAIT(wait); + + err = 0; + listener = sock->sk; + + lock_sock(listener); + + if (sock->type != SOCK_STREAM) { + err = -EOPNOTSUPP; + goto out; + } + + if (listener->sk_state != SS_LISTEN) { + err = -EINVAL; + goto out; + } + + /* Wait for children sockets to appear; these are the new sockets + * created upon connection establishment. + */ + timeout = sock_sndtimeo(listener, flags & O_NONBLOCK); + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); + + while ((connected = vsock_dequeue_accept(listener)) == NULL && + listener->sk_err == 0) { + release_sock(listener); + timeout = schedule_timeout(timeout); + lock_sock(listener); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait; + } else if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); + } + + if (listener->sk_err) + err = -listener->sk_err; + + if (connected) { + listener->sk_ack_backlog--; + + lock_sock(connected); + vconnected = vsock_sk(connected); + + /* If the listener socket has received an error, then we should + * reject this socket and return. Note that we simply mark the + * socket rejected, drop our reference, and let the cleanup + * function handle the cleanup; the fact that we found it in + * the listener's accept queue guarantees that the cleanup + * function hasn't run yet. + */ + if (err) { + vconnected->rejected = true; + release_sock(connected); + sock_put(connected); + goto out_wait; + } + + newsock->state = SS_CONNECTED; + sock_graft(connected, newsock); + release_sock(connected); + sock_put(connected); + } + +out_wait: + finish_wait(sk_sleep(listener), &wait); +out: + release_sock(listener); + return err; +} + +static int vsock_listen(struct socket *sock, int backlog) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + + sk = sock->sk; + + lock_sock(sk); + + if (sock->type != SOCK_STREAM) { + err = -EOPNOTSUPP; + goto out; + } + + if (sock->state != SS_UNCONNECTED) { + err = -EINVAL; + goto out; + } + + vsk = vsock_sk(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + err = -EINVAL; + goto out; + } + + sk->sk_max_ack_backlog = backlog; + sk->sk_state = SS_LISTEN; + + err = 0; + +out: + release_sock(sk); + return err; +} + +static int vsock_stream_setsockopt(struct socket *sock, + int level, + int optname, + char __user *optval, + unsigned int optlen) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + u64 val; + + if (level != AF_VSOCK) + return -ENOPROTOOPT; + +#define COPY_IN(_v) \ + do { \ + if (optlen < sizeof(_v)) { \ + err = -EINVAL; \ + goto exit; \ + } \ + if (copy_from_user(&_v, optval, sizeof(_v)) != 0) { \ + err = -EFAULT; \ + goto exit; \ + } \ + } while (0) + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + switch (optname) { + case SO_VM_SOCKETS_BUFFER_SIZE: + COPY_IN(val); + transport->set_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_BUFFER_MAX_SIZE: + COPY_IN(val); + transport->set_max_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_BUFFER_MIN_SIZE: + COPY_IN(val); + transport->set_min_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_CONNECT_TIMEOUT: { + struct timeval tv; + COPY_IN(tv); + if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC && + tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) { + vsk->connect_timeout = tv.tv_sec * HZ + + DIV_ROUND_UP(tv.tv_usec, (1000000 / HZ)); + if (vsk->connect_timeout == 0) + vsk->connect_timeout = + VSOCK_DEFAULT_CONNECT_TIMEOUT; + + } else { + err = -ERANGE; + } + break; + } + + default: + err = -ENOPROTOOPT; + break; + } + +#undef COPY_IN + +exit: + release_sock(sk); + return err; +} + +static int vsock_stream_getsockopt(struct socket *sock, + int level, int optname, + char __user *optval, + int __user *optlen) +{ + int err; + int len; + struct sock *sk; + struct vsock_sock *vsk; + u64 val; + + if (level != AF_VSOCK) + return -ENOPROTOOPT; + + err = get_user(len, optlen); + if (err != 0) + return err; + +#define COPY_OUT(_v) \ + do { \ + if (len < sizeof(_v)) \ + return -EINVAL; \ + \ + len = sizeof(_v); \ + if (copy_to_user(optval, &_v, len) != 0) \ + return -EFAULT; \ + \ + } while (0) + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + switch (optname) { + case SO_VM_SOCKETS_BUFFER_SIZE: + val = transport->get_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_BUFFER_MAX_SIZE: + val = transport->get_max_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_BUFFER_MIN_SIZE: + val = transport->get_min_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_CONNECT_TIMEOUT: { + struct timeval tv; + tv.tv_sec = vsk->connect_timeout / HZ; + tv.tv_usec = + (vsk->connect_timeout - + tv.tv_sec * HZ) * (1000000 / HZ); + COPY_OUT(tv); + break; + } + default: + return -ENOPROTOOPT; + } + + err = put_user(len, optlen); + if (err != 0) + return -EFAULT; + +#undef COPY_OUT + + return 0; +} + +static int vsock_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + struct sock *sk; + struct vsock_sock *vsk; + ssize_t total_written; + long timeout; + int err; + struct vsock_transport_send_notify_data send_data; + + DEFINE_WAIT(wait); + + sk = sock->sk; + vsk = vsock_sk(sk); + total_written = 0; + err = 0; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + lock_sock(sk); + + /* Callers should not provide a destination with stream sockets. */ + if (msg->msg_namelen) { + err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP; + goto out; + } + + /* Send data only if both sides are not shutdown in the direction. */ + if (sk->sk_shutdown & SEND_SHUTDOWN || + vsk->peer_shutdown & RCV_SHUTDOWN) { + err = -EPIPE; + goto out; + } + + if (sk->sk_state != SS_CONNECTED || + !vsock_addr_bound(&vsk->local_addr)) { + err = -ENOTCONN; + goto out; + } + + if (!vsock_addr_bound(&vsk->remote_addr)) { + err = -EDESTADDRREQ; + goto out; + } + + /* Wait for room in the produce queue to enqueue our user's data. */ + timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + + err = transport->notify_send_init(vsk, &send_data); + if (err < 0) + goto out; + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (total_written < len) { + ssize_t written; + + while (vsock_stream_has_space(vsk) == 0 && + sk->sk_err == 0 && + !(sk->sk_shutdown & SEND_SHUTDOWN) && + !(vsk->peer_shutdown & RCV_SHUTDOWN)) { + + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + err = transport->notify_send_pre_block(vsk, &send_data); + if (err < 0) + goto out_wait; + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait; + } else if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + } + + /* These checks occur both as part of and after the loop + * conditional since we need to check before and after + * sleeping. + */ + if (sk->sk_err) { + err = -sk->sk_err; + goto out_wait; + } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || + (vsk->peer_shutdown & RCV_SHUTDOWN)) { + err = -EPIPE; + goto out_wait; + } + + err = transport->notify_send_pre_enqueue(vsk, &send_data); + if (err < 0) + goto out_wait; + + /* Note that enqueue will only write as many bytes as are free + * in the produce queue, so we don't need to ensure len is + * smaller than the queue size. It is the caller's + * responsibility to check how many bytes we were able to send. + */ + + written = transport->stream_enqueue( + vsk, msg->msg_iov, + len - total_written); + if (written < 0) { + err = -ENOMEM; + goto out_wait; + } + + total_written += written; + + err = transport->notify_send_post_enqueue( + vsk, written, &send_data); + if (err < 0) + goto out_wait; + + } + +out_wait: + if (total_written > 0) + err = total_written; + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; +} + + +static int +vsock_stream_recvmsg(struct kiocb *kiocb, + struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + struct sock *sk; + struct vsock_sock *vsk; + int err; + size_t target; + ssize_t copied; + long timeout; + struct vsock_transport_recv_notify_data recv_data; + + DEFINE_WAIT(wait); + + sk = sock->sk; + vsk = vsock_sk(sk); + err = 0; + + lock_sock(sk); + + if (sk->sk_state != SS_CONNECTED) { + /* Recvmsg is supposed to return 0 if a peer performs an + * orderly shutdown. Differentiate between that case and when a + * peer has not connected or a local shutdown occured with the + * SOCK_DONE flag. + */ + if (sock_flag(sk, SOCK_DONE)) + err = 0; + else + err = -ENOTCONN; + + goto out; + } + + if (flags & MSG_OOB) { + err = -EOPNOTSUPP; + goto out; + } + + /* We don't check peer_shutdown flag here since peer may actually shut + * down, but there can be data in the queue that a local socket can + * receive. + */ + if (sk->sk_shutdown & RCV_SHUTDOWN) { + err = 0; + goto out; + } + + /* It is valid on Linux to pass in a zero-length receive buffer. This + * is not an error. We may as well bail out now. + */ + if (!len) { + err = 0; + goto out; + } + + /* We must not copy less than target bytes into the user's buffer + * before returning successfully, so we wait for the consume queue to + * have that much data to consume before dequeueing. Note that this + * makes it impossible to handle cases where target is greater than the + * queue size. + */ + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + if (target >= transport->stream_rcvhiwat(vsk)) { + err = -ENOMEM; + goto out; + } + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + copied = 0; + + err = transport->notify_recv_init(vsk, target, &recv_data); + if (err < 0) + goto out; + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (1) { + s64 ready = vsock_stream_has_data(vsk); + + if (ready < 0) { + /* Invalid queue pair content. XXX This should be + * changed to a connection reset in a later change. + */ + + err = -ENOMEM; + goto out_wait; + } else if (ready > 0) { + ssize_t read; + + err = transport->notify_recv_pre_dequeue( + vsk, target, &recv_data); + if (err < 0) + break; + + read = transport->stream_dequeue( + vsk, msg->msg_iov, + len - copied, flags); + if (read < 0) { + err = -ENOMEM; + break; + } + + copied += read; + + err = transport->notify_recv_post_dequeue( + vsk, target, read, + !(flags & MSG_PEEK), &recv_data); + if (err < 0) + goto out_wait; + + if (read >= target || flags & MSG_PEEK) + break; + + target -= read; + } else { + if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) + || (vsk->peer_shutdown & SEND_SHUTDOWN)) { + break; + } + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + break; + } + + err = transport->notify_recv_pre_block( + vsk, target, &recv_data); + if (err < 0) + break; + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + break; + } else if (timeout == 0) { + err = -EAGAIN; + break; + } + + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + } + } + + if (sk->sk_err) + err = -sk->sk_err; + else if (sk->sk_shutdown & RCV_SHUTDOWN) + err = 0; + + if (copied > 0) { + /* We only do these additional bookkeeping/notification steps + * if we actually copied something out of the queue pair + * instead of just peeking ahead. + */ + + if (!(flags & MSG_PEEK)) { + /* If the other side has shutdown for sending and there + * is nothing more to read, then modify the socket + * state. + */ + if (vsk->peer_shutdown & SEND_SHUTDOWN) { + if (vsock_stream_has_data(vsk) <= 0) { + sk->sk_state = SS_UNCONNECTED; + sock_set_flag(sk, SOCK_DONE); + sk->sk_state_change(sk); + } + } + } + err = copied; + } + +out_wait: + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; +} + +static const struct proto_ops vsock_stream_ops = { + .family = PF_VSOCK, + .owner = THIS_MODULE, + .release = vsock_release, + .bind = vsock_bind, + .connect = vsock_stream_connect, + .socketpair = sock_no_socketpair, + .accept = vsock_accept, + .getname = vsock_getname, + .poll = vsock_poll, + .ioctl = sock_no_ioctl, + .listen = vsock_listen, + .shutdown = vsock_shutdown, + .setsockopt = vsock_stream_setsockopt, + .getsockopt = vsock_stream_getsockopt, + .sendmsg = vsock_stream_sendmsg, + .recvmsg = vsock_stream_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static int vsock_create(struct net *net, struct socket *sock, + int protocol, int kern) +{ + if (!sock) + return -EINVAL; + + if (protocol && protocol != PF_VSOCK) + return -EPROTONOSUPPORT; + + switch (sock->type) { + case SOCK_DGRAM: + sock->ops = &vsock_dgram_ops; + break; + case SOCK_STREAM: + sock->ops = &vsock_stream_ops; + break; + default: + return -ESOCKTNOSUPPORT; + } + + sock->state = SS_UNCONNECTED; + + return __vsock_create(net, sock, NULL, GFP_KERNEL, 0) ? 0 : -ENOMEM; +} + +static const struct net_proto_family vsock_family_ops = { + .family = AF_VSOCK, + .create = vsock_create, + .owner = THIS_MODULE, +}; + +static long vsock_dev_do_ioctl(struct file *filp, + unsigned int cmd, void __user *ptr) +{ + u32 __user *p = ptr; + int retval = 0; + + switch (cmd) { + case IOCTL_VM_SOCKETS_GET_LOCAL_CID: + if (put_user(transport->get_local_cid(), p) != 0) + retval = -EFAULT; + break; + + default: + pr_err("Unknown ioctl %d\n", cmd); + retval = -EINVAL; + } + + return retval; +} + +static long vsock_dev_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return vsock_dev_do_ioctl(filp, cmd, (void __user *)arg); +} + +#ifdef CONFIG_COMPAT +static long vsock_dev_compat_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return vsock_dev_do_ioctl(filp, cmd, compat_ptr(arg)); +} +#endif + +static const struct file_operations vsock_device_ops = { + .owner = THIS_MODULE, + .unlocked_ioctl = vsock_dev_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vsock_dev_compat_ioctl, +#endif + .open = nonseekable_open, +}; + +static struct miscdevice vsock_device = { + .name = "vsock", + .minor = MISC_DYNAMIC_MINOR, + .fops = &vsock_device_ops, +}; + +static int __vsock_core_init(void) +{ + int err; + + vsock_init_tables(); + + err = misc_register(&vsock_device); + if (err) { + pr_err("Failed to register misc device\n"); + return -ENOENT; + } + + err = proto_register(&vsock_proto, 1); /* we want our slab */ + if (err) { + pr_err("Cannot register vsock protocol\n"); + goto err_misc_deregister; + } + + err = sock_register(&vsock_family_ops); + if (err) { + pr_err("could not register af_vsock (%d) address family: %d\n", + AF_VSOCK, err); + goto err_unregister_proto; + } + + return 0; + +err_unregister_proto: + proto_unregister(&vsock_proto); +err_misc_deregister: + misc_deregister(&vsock_device); + return err; +} + +int vsock_core_init(const struct vsock_transport *t) +{ + int retval = mutex_lock_interruptible(&vsock_register_mutex); + if (retval) + return retval; + + if (transport) { + retval = -EBUSY; + goto out; + } + + transport = t; + retval = __vsock_core_init(); + if (retval) + transport = NULL; + +out: + mutex_unlock(&vsock_register_mutex); + return retval; +} +EXPORT_SYMBOL_GPL(vsock_core_init); + +void vsock_core_exit(void) +{ + mutex_lock(&vsock_register_mutex); + + misc_deregister(&vsock_device); + sock_unregister(AF_VSOCK); + proto_unregister(&vsock_proto); + + /* We do not want the assignment below re-ordered. */ + mb(); + transport = NULL; + + mutex_unlock(&vsock_register_mutex); +} +EXPORT_SYMBOL_GPL(vsock_core_exit); + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMware Virtual Socket Family"); +MODULE_VERSION("1.0.0.0-k"); +MODULE_LICENSE("GPL v2"); diff --git a/net/vmw_vsock/af_vsock.h b/net/vmw_vsock/af_vsock.h new file mode 100644 index 00000000000..7d64d3609ec --- /dev/null +++ b/net/vmw_vsock/af_vsock.h @@ -0,0 +1,175 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __AF_VSOCK_H__ +#define __AF_VSOCK_H__ + +#include <linux/kernel.h> +#include <linux/workqueue.h> +#include <linux/vm_sockets.h> + +#include "vsock_addr.h" + +#define LAST_RESERVED_PORT 1023 + +#define vsock_sk(__sk) ((struct vsock_sock *)__sk) +#define sk_vsock(__vsk) (&(__vsk)->sk) + +struct vsock_sock { + /* sk must be the first member. */ + struct sock sk; + struct sockaddr_vm local_addr; + struct sockaddr_vm remote_addr; + /* Links for the global tables of bound and connected sockets. */ + struct list_head bound_table; + struct list_head connected_table; + /* Accessed without the socket lock held. This means it can never be + * modified outsided of socket create or destruct. + */ + bool trusted; + bool cached_peer_allow_dgram; /* Dgram communication allowed to + * cached peer? + */ + u32 cached_peer; /* Context ID of last dgram destination check. */ + const struct cred *owner; + /* Rest are SOCK_STREAM only. */ + long connect_timeout; + /* Listening socket that this came from. */ + struct sock *listener; + /* Used for pending list and accept queue during connection handshake. + * The listening socket is the head for both lists. Sockets created + * for connection requests are placed in the pending list until they + * are connected, at which point they are put in the accept queue list + * so they can be accepted in accept(). If accept() cannot accept the + * connection, it is marked as rejected so the cleanup function knows + * to clean up the socket. + */ + struct list_head pending_links; + struct list_head accept_queue; + bool rejected; + struct delayed_work dwork; + u32 peer_shutdown; + bool sent_request; + bool ignore_connecting_rst; + + /* Private to transport. */ + void *trans; +}; + +s64 vsock_stream_has_data(struct vsock_sock *vsk); +s64 vsock_stream_has_space(struct vsock_sock *vsk); +void vsock_pending_work(struct work_struct *work); +struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, + gfp_t priority, unsigned short type); + +/**** TRANSPORT ****/ + +struct vsock_transport_recv_notify_data { + u64 data1; /* Transport-defined. */ + u64 data2; /* Transport-defined. */ + bool notify_on_block; +}; + +struct vsock_transport_send_notify_data { + u64 data1; /* Transport-defined. */ + u64 data2; /* Transport-defined. */ +}; + +struct vsock_transport { + /* Initialize/tear-down socket. */ + int (*init)(struct vsock_sock *, struct vsock_sock *); + void (*destruct)(struct vsock_sock *); + void (*release)(struct vsock_sock *); + + /* Connections. */ + int (*connect)(struct vsock_sock *); + + /* DGRAM. */ + int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *); + int (*dgram_dequeue)(struct kiocb *kiocb, struct vsock_sock *vsk, + struct msghdr *msg, size_t len, int flags); + int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *, + struct iovec *, size_t len); + bool (*dgram_allow)(u32 cid, u32 port); + + /* STREAM. */ + /* TODO: stream_bind() */ + ssize_t (*stream_dequeue)(struct vsock_sock *, struct iovec *, + size_t len, int flags); + ssize_t (*stream_enqueue)(struct vsock_sock *, struct iovec *, + size_t len); + s64 (*stream_has_data)(struct vsock_sock *); + s64 (*stream_has_space)(struct vsock_sock *); + u64 (*stream_rcvhiwat)(struct vsock_sock *); + bool (*stream_is_active)(struct vsock_sock *); + bool (*stream_allow)(u32 cid, u32 port); + + /* Notification. */ + int (*notify_poll_in)(struct vsock_sock *, size_t, bool *); + int (*notify_poll_out)(struct vsock_sock *, size_t, bool *); + int (*notify_recv_init)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_pre_block)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t, + ssize_t, bool, struct vsock_transport_recv_notify_data *); + int (*notify_send_init)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_pre_block)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_pre_enqueue)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t, + struct vsock_transport_send_notify_data *); + + /* Shutdown. */ + int (*shutdown)(struct vsock_sock *, int); + + /* Buffer sizes. */ + void (*set_buffer_size)(struct vsock_sock *, u64); + void (*set_min_buffer_size)(struct vsock_sock *, u64); + void (*set_max_buffer_size)(struct vsock_sock *, u64); + u64 (*get_buffer_size)(struct vsock_sock *); + u64 (*get_min_buffer_size)(struct vsock_sock *); + u64 (*get_max_buffer_size)(struct vsock_sock *); + + /* Addressing. */ + u32 (*get_local_cid)(void); +}; + +/**** CORE ****/ + +int vsock_core_init(const struct vsock_transport *t); +void vsock_core_exit(void); + +/**** UTILS ****/ + +void vsock_release_pending(struct sock *pending); +void vsock_add_pending(struct sock *listener, struct sock *pending); +void vsock_remove_pending(struct sock *listener, struct sock *pending); +void vsock_enqueue_accept(struct sock *listener, struct sock *connected); +void vsock_insert_connected(struct vsock_sock *vsk); +void vsock_remove_bound(struct vsock_sock *vsk); +void vsock_remove_connected(struct vsock_sock *vsk); +struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); +struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst); +void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); + +#endif /* __AF_VSOCK_H__ */ diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c new file mode 100644 index 00000000000..a70ace83a15 --- /dev/null +++ b/net/vmw_vsock/vmci_transport.c @@ -0,0 +1,2155 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/cred.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/kmod.h> +#include <linux/list.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/net.h> +#include <linux/poll.h> +#include <linux/skbuff.h> +#include <linux/smp.h> +#include <linux/socket.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/wait.h> +#include <linux/workqueue.h> +#include <net/sock.h> + +#include "af_vsock.h" +#include "vmci_transport_notify.h" + +static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg); +static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg); +static void vmci_transport_peer_attach_cb(u32 sub_id, + const struct vmci_event_data *ed, + void *client_data); +static void vmci_transport_peer_detach_cb(u32 sub_id, + const struct vmci_event_data *ed, + void *client_data); +static void vmci_transport_recv_pkt_work(struct work_struct *work); +static int vmci_transport_recv_listen(struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_server( + struct sock *sk, + struct sock *pending, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client_negotiate( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client_invalid( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connected(struct sock *sk, + struct vmci_transport_packet *pkt); +static bool vmci_transport_old_proto_override(bool *old_pkt_proto); +static u16 vmci_transport_new_proto_supported_versions(void); +static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto, + bool old_pkt_proto); + +struct vmci_transport_recv_pkt_info { + struct work_struct work; + struct sock *sk; + struct vmci_transport_packet pkt; +}; + +static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID, + VMCI_INVALID_ID }; +static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + +static int PROTOCOL_OVERRIDE = -1; + +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN 128 +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE 262144 +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX 262144 + +/* The default peer timeout indicates how long we will wait for a peer response + * to a control message. + */ +#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) + +#define SS_LISTEN 255 + +/* Helper function to convert from a VMCI error code to a VSock error code. */ + +static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) +{ + int err; + + switch (vmci_error) { + case VMCI_ERROR_NO_MEM: + err = ENOMEM; + break; + case VMCI_ERROR_DUPLICATE_ENTRY: + case VMCI_ERROR_ALREADY_EXISTS: + err = EADDRINUSE; + break; + case VMCI_ERROR_NO_ACCESS: + err = EPERM; + break; + case VMCI_ERROR_NO_RESOURCES: + err = ENOBUFS; + break; + case VMCI_ERROR_INVALID_RESOURCE: + err = EHOSTUNREACH; + break; + case VMCI_ERROR_INVALID_ARGS: + default: + err = EINVAL; + } + + return err > 0 ? -err : err; +} + +static inline void +vmci_transport_packet_init(struct vmci_transport_packet *pkt, + struct sockaddr_vm *src, + struct sockaddr_vm *dst, + u8 type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle) +{ + /* We register the stream control handler as an any cid handle so we + * must always send from a source address of VMADDR_CID_ANY + */ + pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY, + VMCI_TRANSPORT_PACKET_RID); + pkt->dg.dst = vmci_make_handle(dst->svm_cid, + VMCI_TRANSPORT_PACKET_RID); + pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg); + pkt->version = VMCI_TRANSPORT_PACKET_VERSION; + pkt->type = type; + pkt->src_port = src->svm_port; + pkt->dst_port = dst->svm_port; + memset(&pkt->proto, 0, sizeof(pkt->proto)); + memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2)); + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_INVALID: + pkt->u.size = 0; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_REQUEST: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE: + pkt->u.size = size; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_OFFER: + case VMCI_TRANSPORT_PACKET_TYPE_ATTACH: + pkt->u.handle = handle; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + case VMCI_TRANSPORT_PACKET_TYPE_READ: + case VMCI_TRANSPORT_PACKET_TYPE_RST: + pkt->u.size = 0; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN: + pkt->u.mode = mode; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ: + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE: + memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait)); + break; + + case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2: + pkt->u.size = size; + pkt->proto = proto; + break; + } +} + +static inline void +vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt, + struct sockaddr_vm *local, + struct sockaddr_vm *remote) +{ + vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port); + vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port); +} + +static int +__vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt, + struct sockaddr_vm *src, + struct sockaddr_vm *dst, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle, + bool convert_error) +{ + int err; + + vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait, + proto, handle); + err = vmci_datagram_send(&pkt->dg); + if (convert_error && (err < 0)) + return vmci_transport_error_to_vsock_error(err); + + return err; +} + +static int +vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + struct vmci_handle handle) +{ + struct vmci_transport_packet reply; + struct sockaddr_vm src, dst; + + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) { + return 0; + } else { + vmci_transport_packet_get_addresses(pkt, &src, &dst); + return __vmci_transport_send_control_pkt(&reply, &src, &dst, + type, + size, mode, wait, + VSOCK_PROTO_INVALID, + handle, true); + } +} + +static int +vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src, + struct sockaddr_vm *dst, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + struct vmci_handle handle) +{ + /* Note that it is safe to use a single packet across all CPUs since + * two tasklets of the same type are guaranteed to not ever run + * simultaneously. If that ever changes, or VMCI stops using tasklets, + * we can use per-cpu packets. + */ + static struct vmci_transport_packet pkt; + + return __vmci_transport_send_control_pkt(&pkt, src, dst, type, + size, mode, wait, + VSOCK_PROTO_INVALID, handle, + false); +} + +static int +vmci_transport_send_control_pkt(struct sock *sk, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle) +{ + struct vmci_transport_packet *pkt; + struct vsock_sock *vsk; + int err; + + vsk = vsock_sk(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) + return -EINVAL; + + if (!vsock_addr_bound(&vsk->remote_addr)) + return -EINVAL; + + pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr, + &vsk->remote_addr, type, size, + mode, wait, proto, handle, + true); + kfree(pkt); + + return err; +} + +static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src, + struct vmci_transport_packet *pkt) +{ + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) + return 0; + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_RST, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_reset(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) + return 0; + return vmci_transport_send_control_pkt(sk, + VMCI_TRANSPORT_PACKET_TYPE_RST, + 0, 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_negotiate(struct sock *sk, size_t size) +{ + return vmci_transport_send_control_pkt( + sk, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE, + size, 0, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_negotiate2(struct sock *sk, size_t size, + u16 version) +{ + return vmci_transport_send_control_pkt( + sk, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2, + size, 0, NULL, version, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_qp_offer(struct sock *sk, + struct vmci_handle handle) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0, + 0, NULL, + VSOCK_PROTO_INVALID, handle); +} + +static int vmci_transport_send_attach(struct sock *sk, + struct vmci_handle handle) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH, + 0, 0, NULL, VSOCK_PROTO_INVALID, + handle); +} + +static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt) +{ + return vmci_transport_reply_control_pkt_fast( + pkt, + VMCI_TRANSPORT_PACKET_TYPE_RST, + 0, 0, NULL, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_INVALID, + 0, 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_read_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_READ, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_wrote(struct sock *sk) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0, + 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_read(struct sock *sk) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0, + 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_waiting_write(struct sock *sk, + struct vmci_transport_waiting_info *wait) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE, + 0, 0, wait, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_waiting_read(struct sock *sk, + struct vmci_transport_waiting_info *wait) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ, + 0, 0, wait, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode) +{ + return vmci_transport_send_control_pkt( + &vsk->sk, + VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN, + 0, mode, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_conn_request(struct sock *sk, size_t size) +{ + return vmci_transport_send_control_pkt(sk, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST, + size, 0, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_conn_request2(struct sock *sk, size_t size, + u16 version) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2, + size, 0, NULL, version, + VMCI_INVALID_HANDLE); +} + +static struct sock *vmci_transport_get_pending( + struct sock *listener, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vpending; + struct sock *pending; + + vlistener = vsock_sk(listener); + + list_for_each_entry(vpending, &vlistener->pending_links, + pending_links) { + struct sockaddr_vm src; + struct sockaddr_vm dst; + + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); + vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port); + + if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && + vsock_addr_equals_addr(&dst, &vpending->local_addr)) { + pending = sk_vsock(vpending); + sock_hold(pending); + goto found; + } + } + + pending = NULL; +found: + return pending; + +} + +static void vmci_transport_release_pending(struct sock *pending) +{ + sock_put(pending); +} + +/* We allow two kinds of sockets to communicate with a restricted VM: 1) + * trusted sockets 2) sockets from applications running as the same user as the + * VM (this is only true for the host side and only when using hosted products) + */ + +static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid) +{ + return vsock->trusted || + vmci_is_context_owner(peer_cid, vsock->owner->uid); +} + +/* We allow sending datagrams to and receiving datagrams from a restricted VM + * only if it is trusted as described in vmci_transport_is_trusted. + */ + +static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid) +{ + if (vsock->cached_peer != peer_cid) { + vsock->cached_peer = peer_cid; + if (!vmci_transport_is_trusted(vsock, peer_cid) && + (vmci_context_get_priv_flags(peer_cid) & + VMCI_PRIVILEGE_FLAG_RESTRICTED)) { + vsock->cached_peer_allow_dgram = false; + } else { + vsock->cached_peer_allow_dgram = true; + } + } + + return vsock->cached_peer_allow_dgram; +} + +static int +vmci_transport_queue_pair_alloc(struct vmci_qp **qpair, + struct vmci_handle *handle, + u64 produce_size, + u64 consume_size, + u32 peer, u32 flags, bool trusted) +{ + int err = 0; + + if (trusted) { + /* Try to allocate our queue pair as trusted. This will only + * work if vsock is running in the host. + */ + + err = vmci_qpair_alloc(qpair, handle, produce_size, + consume_size, + peer, flags, + VMCI_PRIVILEGE_FLAG_TRUSTED); + if (err != VMCI_ERROR_NO_ACCESS) + goto out; + + } + + err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size, + peer, flags, VMCI_NO_PRIVILEGE_FLAGS); +out: + if (err < 0) { + pr_err("Could not attach to queue pair with %d\n", + err); + err = vmci_transport_error_to_vsock_error(err); + } + + return err; +} + +static int +vmci_transport_datagram_create_hnd(u32 resource_id, + u32 flags, + vmci_datagram_recv_cb recv_cb, + void *client_data, + struct vmci_handle *out_handle) +{ + int err = 0; + + /* Try to allocate our datagram handler as trusted. This will only work + * if vsock is running in the host. + */ + + err = vmci_datagram_create_handle_priv(resource_id, flags, + VMCI_PRIVILEGE_FLAG_TRUSTED, + recv_cb, + client_data, out_handle); + + if (err == VMCI_ERROR_NO_ACCESS) + err = vmci_datagram_create_handle(resource_id, flags, + recv_cb, client_data, + out_handle); + + return err; +} + +/* This is invoked as part of a tasklet that's scheduled when the VMCI + * interrupt fires. This is run in bottom-half context and if it ever needs to + * sleep it should defer that work to a work queue. + */ + +static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg) +{ + struct sock *sk; + size_t size; + struct sk_buff *skb; + struct vsock_sock *vsk; + + sk = (struct sock *)data; + + /* This handler is privileged when this module is running on the host. + * We will get datagrams from all endpoints (even VMs that are in a + * restricted context). If we get one from a restricted context then + * the destination socket must be trusted. + * + * NOTE: We access the socket struct without holding the lock here. + * This is ok because the field we are interested is never modified + * outside of the create and destruct socket functions. + */ + vsk = vsock_sk(sk); + if (!vmci_transport_allow_dgram(vsk, dg->src.context)) + return VMCI_ERROR_NO_ACCESS; + + size = VMCI_DG_SIZE(dg); + + /* Attach the packet to the socket's receive queue as an sk_buff. */ + skb = alloc_skb(size, GFP_ATOMIC); + if (skb) { + /* sk_receive_skb() will do a sock_put(), so hold here. */ + sock_hold(sk); + skb_put(skb, size); + memcpy(skb->data, dg, size); + sk_receive_skb(sk, skb, 0); + } + + return VMCI_SUCCESS; +} + +static bool vmci_transport_stream_allow(u32 cid, u32 port) +{ + static const u32 non_socket_contexts[] = { + VMADDR_CID_HYPERVISOR, + VMADDR_CID_RESERVED, + }; + int i; + + BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts)); + + for (i = 0; i < ARRAY_SIZE(non_socket_contexts); i++) { + if (cid == non_socket_contexts[i]) + return false; + } + + return true; +} + +/* This is invoked as part of a tasklet that's scheduled when the VMCI + * interrupt fires. This is run in bottom-half context but it defers most of + * its work to the packet handling work queue. + */ + +static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg) +{ + struct sock *sk; + struct sockaddr_vm dst; + struct sockaddr_vm src; + struct vmci_transport_packet *pkt; + struct vsock_sock *vsk; + bool bh_process_pkt; + int err; + + sk = NULL; + err = VMCI_SUCCESS; + bh_process_pkt = false; + + /* Ignore incoming packets from contexts without sockets, or resources + * that aren't vsock implementations. + */ + + if (!vmci_transport_stream_allow(dg->src.context, -1) + || VMCI_TRANSPORT_PACKET_RID != dg->src.resource) + return VMCI_ERROR_NO_ACCESS; + + if (VMCI_DG_SIZE(dg) < sizeof(*pkt)) + /* Drop datagrams that do not contain full VSock packets. */ + return VMCI_ERROR_INVALID_ARGS; + + pkt = (struct vmci_transport_packet *)dg; + + /* Find the socket that should handle this packet. First we look for a + * connected socket and if there is none we look for a socket bound to + * the destintation address. + */ + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); + vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port); + + sk = vsock_find_connected_socket(&src, &dst); + if (!sk) { + sk = vsock_find_bound_socket(&dst); + if (!sk) { + /* We could not find a socket for this specified + * address. If this packet is a RST, we just drop it. + * If it is another packet, we send a RST. Note that + * we do not send a RST reply to RSTs so that we do not + * continually send RSTs between two endpoints. + * + * Note that since this is a reply, dst is src and src + * is dst. + */ + if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0) + pr_err("unable to send reset\n"); + + err = VMCI_ERROR_NOT_FOUND; + goto out; + } + } + + /* If the received packet type is beyond all types known to this + * implementation, reply with an invalid message. Hopefully this will + * help when implementing backwards compatibility in the future. + */ + if (pkt->type >= VMCI_TRANSPORT_PACKET_TYPE_MAX) { + vmci_transport_send_invalid_bh(&dst, &src); + err = VMCI_ERROR_INVALID_ARGS; + goto out; + } + + /* This handler is privileged when this module is running on the host. + * We will get datagram connect requests from all endpoints (even VMs + * that are in a restricted context). If we get one from a restricted + * context then the destination socket must be trusted. + * + * NOTE: We access the socket struct without holding the lock here. + * This is ok because the field we are interested is never modified + * outside of the create and destruct socket functions. + */ + vsk = vsock_sk(sk); + if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) { + err = VMCI_ERROR_NO_ACCESS; + goto out; + } + + /* We do most everything in a work queue, but let's fast path the + * notification of reads and writes to help data transfer performance. + * We can only do this if there is no process context code executing + * for this socket since that may change the state. + */ + bh_lock_sock(sk); + + if (!sock_owned_by_user(sk) && sk->sk_state == SS_CONNECTED) + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, true, &dst, &src, + &bh_process_pkt); + + bh_unlock_sock(sk); + + if (!bh_process_pkt) { + struct vmci_transport_recv_pkt_info *recv_pkt_info; + + recv_pkt_info = kmalloc(sizeof(*recv_pkt_info), GFP_ATOMIC); + if (!recv_pkt_info) { + if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0) + pr_err("unable to send reset\n"); + + err = VMCI_ERROR_NO_MEM; + goto out; + } + + recv_pkt_info->sk = sk; + memcpy(&recv_pkt_info->pkt, pkt, sizeof(recv_pkt_info->pkt)); + INIT_WORK(&recv_pkt_info->work, vmci_transport_recv_pkt_work); + + schedule_work(&recv_pkt_info->work); + /* Clear sk so that the reference count incremented by one of + * the Find functions above is not decremented below. We need + * that reference count for the packet handler we've scheduled + * to run. + */ + sk = NULL; + } + +out: + if (sk) + sock_put(sk); + + return err; +} + +static void vmci_transport_peer_attach_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + struct sock *sk = client_data; + const struct vmci_event_payload_qp *e_payload; + struct vsock_sock *vsk; + + e_payload = vmci_event_data_const_payload(e_data); + + vsk = vsock_sk(sk); + + /* We don't ask for delayed CBs when we subscribe to this event (we + * pass 0 as flags to vmci_event_subscribe()). VMCI makes no + * guarantees in that case about what context we might be running in, + * so it could be BH or process, blockable or non-blockable. So we + * need to account for all possible contexts here. + */ + local_bh_disable(); + bh_lock_sock(sk); + + /* XXX This is lame, we should provide a way to lookup sockets by + * qp_handle. + */ + if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, + e_payload->handle)) { + /* XXX This doesn't do anything, but in the future we may want + * to set a flag here to verify the attach really did occur and + * we weren't just sent a datagram claiming it was. + */ + goto out; + } + +out: + bh_unlock_sock(sk); + local_bh_enable(); +} + +static void vmci_transport_handle_detach(struct sock *sk) +{ + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { + sock_set_flag(sk, SOCK_DONE); + + /* On a detach the peer will not be sending or receiving + * anymore. + */ + vsk->peer_shutdown = SHUTDOWN_MASK; + + /* We should not be sending anymore since the peer won't be + * there to receive, but we can still receive if there is data + * left in our consume queue. + */ + if (vsock_stream_has_data(vsk) <= 0) { + if (sk->sk_state == SS_CONNECTING) { + /* The peer may detach from a queue pair while + * we are still in the connecting state, i.e., + * if the peer VM is killed after attaching to + * a queue pair, but before we complete the + * handshake. In that case, we treat the detach + * event like a reset. + */ + + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ECONNRESET; + sk->sk_error_report(sk); + return; + } + sk->sk_state = SS_UNCONNECTED; + } + sk->sk_state_change(sk); + } +} + +static void vmci_transport_peer_detach_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + struct sock *sk = client_data; + const struct vmci_event_payload_qp *e_payload; + struct vsock_sock *vsk; + + e_payload = vmci_event_data_const_payload(e_data); + vsk = vsock_sk(sk); + if (vmci_handle_is_invalid(e_payload->handle)) + return; + + /* Same rules for locking as for peer_attach_cb(). */ + local_bh_disable(); + bh_lock_sock(sk); + + /* XXX This is lame, we should provide a way to lookup sockets by + * qp_handle. + */ + if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, + e_payload->handle)) + vmci_transport_handle_detach(sk); + + bh_unlock_sock(sk); + local_bh_enable(); +} + +static void vmci_transport_qp_resumed_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + vsock_for_each_connected_socket(vmci_transport_handle_detach); +} + +static void vmci_transport_recv_pkt_work(struct work_struct *work) +{ + struct vmci_transport_recv_pkt_info *recv_pkt_info; + struct vmci_transport_packet *pkt; + struct sock *sk; + + recv_pkt_info = + container_of(work, struct vmci_transport_recv_pkt_info, work); + sk = recv_pkt_info->sk; + pkt = &recv_pkt_info->pkt; + + lock_sock(sk); + + switch (sk->sk_state) { + case SS_LISTEN: + vmci_transport_recv_listen(sk, pkt); + break; + case SS_CONNECTING: + /* Processing of pending connections for servers goes through + * the listening socket, so see vmci_transport_recv_listen() + * for that path. + */ + vmci_transport_recv_connecting_client(sk, pkt); + break; + case SS_CONNECTED: + vmci_transport_recv_connected(sk, pkt); + break; + default: + /* Because this function does not run in the same context as + * vmci_transport_recv_stream_cb it is possible that the + * socket has closed. We need to let the other side know or it + * could be sitting in a connect and hang forever. Send a + * reset to prevent that. + */ + vmci_transport_send_reset(sk, pkt); + goto out; + } + +out: + release_sock(sk); + kfree(recv_pkt_info); + /* Release reference obtained in the stream callback when we fetched + * this socket out of the bound or connected list. + */ + sock_put(sk); +} + +static int vmci_transport_recv_listen(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct sock *pending; + struct vsock_sock *vpending; + int err; + u64 qp_size; + bool old_request = false; + bool old_pkt_proto = false; + + err = 0; + + /* Because we are in the listen state, we could be receiving a packet + * for ourself or any previous connection requests that we received. + * If it's the latter, we try to find a socket in our list of pending + * connections and, if we do, call the appropriate handler for the + * state that that socket is in. Otherwise we try to service the + * connection request. + */ + pending = vmci_transport_get_pending(sk, pkt); + if (pending) { + lock_sock(pending); + switch (pending->sk_state) { + case SS_CONNECTING: + err = vmci_transport_recv_connecting_server(sk, + pending, + pkt); + break; + default: + vmci_transport_send_reset(pending, pkt); + err = -EINVAL; + } + + if (err < 0) + vsock_remove_pending(sk, pending); + + release_sock(pending); + vmci_transport_release_pending(pending); + + return err; + } + + /* The listen state only accepts connection requests. Reply with a + * reset unless we received a reset. + */ + + if (!(pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST || + pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)) { + vmci_transport_reply_reset(pkt); + return -EINVAL; + } + + if (pkt->u.size == 0) { + vmci_transport_reply_reset(pkt); + return -EINVAL; + } + + /* If this socket can't accommodate this connection request, we send a + * reset. Otherwise we create and initialize a child socket and reply + * with a connection negotiation. + */ + if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) { + vmci_transport_reply_reset(pkt); + return -ECONNREFUSED; + } + + pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, + sk->sk_type); + if (!pending) { + vmci_transport_send_reset(sk, pkt); + return -ENOMEM; + } + + vpending = vsock_sk(pending); + + vsock_addr_init(&vpending->local_addr, pkt->dg.dst.context, + pkt->dst_port); + vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context, + pkt->src_port); + + /* If the proposed size fits within our min/max, accept it. Otherwise + * propose our own size. + */ + if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size && + pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) { + qp_size = pkt->u.size; + } else { + qp_size = vmci_trans(vpending)->queue_pair_size; + } + + /* Figure out if we are using old or new requests based on the + * overrides pkt types sent by our peer. + */ + if (vmci_transport_old_proto_override(&old_pkt_proto)) { + old_request = old_pkt_proto; + } else { + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST) + old_request = true; + else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2) + old_request = false; + + } + + if (old_request) { + /* Handle a REQUEST (or override) */ + u16 version = VSOCK_PROTO_INVALID; + if (vmci_transport_proto_to_notify_struct( + pending, &version, true)) + err = vmci_transport_send_negotiate(pending, qp_size); + else + err = -EINVAL; + + } else { + /* Handle a REQUEST2 (or override) */ + int proto_int = pkt->proto; + int pos; + u16 active_proto_version = 0; + + /* The list of possible protocols is the intersection of all + * protocols the client supports ... plus all the protocols we + * support. + */ + proto_int &= vmci_transport_new_proto_supported_versions(); + + /* We choose the highest possible protocol version and use that + * one. + */ + pos = fls(proto_int); + if (pos) { + active_proto_version = (1 << (pos - 1)); + if (vmci_transport_proto_to_notify_struct( + pending, &active_proto_version, false)) + err = vmci_transport_send_negotiate2(pending, + qp_size, + active_proto_version); + else + err = -EINVAL; + + } else { + err = -EINVAL; + } + } + + if (err < 0) { + vmci_transport_send_reset(sk, pkt); + sock_put(pending); + err = vmci_transport_error_to_vsock_error(err); + goto out; + } + + vsock_add_pending(sk, pending); + sk->sk_ack_backlog++; + + pending->sk_state = SS_CONNECTING; + vmci_trans(vpending)->produce_size = + vmci_trans(vpending)->consume_size = qp_size; + vmci_trans(vpending)->queue_pair_size = qp_size; + + vmci_trans(vpending)->notify_ops->process_request(pending); + + /* We might never receive another message for this socket and it's not + * connected to any process, so we have to ensure it gets cleaned up + * ourself. Our delayed work function will take care of that. Note + * that we do not ever cancel this function since we have few + * guarantees about its state when calling cancel_delayed_work(). + * Instead we hold a reference on the socket for that function and make + * it capable of handling cases where it needs to do nothing but + * release that reference. + */ + vpending->listener = sk; + sock_hold(sk); + sock_hold(pending); + INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work); + schedule_delayed_work(&vpending->dwork, HZ); + +out: + return err; +} + +static int +vmci_transport_recv_connecting_server(struct sock *listener, + struct sock *pending, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vpending; + struct vmci_handle handle; + struct vmci_qp *qpair; + bool is_local; + u32 flags; + u32 detach_sub_id; + int err; + int skerr; + + vpending = vsock_sk(pending); + detach_sub_id = VMCI_INVALID_ID; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_OFFER: + if (vmci_handle_is_invalid(pkt->u.handle)) { + vmci_transport_send_reset(pending, pkt); + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + break; + default: + /* Close and cleanup the connection. */ + vmci_transport_send_reset(pending, pkt); + skerr = EPROTO; + err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL; + goto destroy; + } + + /* In order to complete the connection we need to attach to the offered + * queue pair and send an attach notification. We also subscribe to the + * detach event so we know when our peer goes away, and we do that + * before attaching so we don't miss an event. If all this succeeds, + * we update our state and wakeup anything waiting in accept() for a + * connection. + */ + + /* We don't care about attach since we ensure the other side has + * attached by specifying the ATTACH_ONLY flag below. + */ + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, + vmci_transport_peer_detach_cb, + pending, &detach_sub_id); + if (err < VMCI_SUCCESS) { + vmci_transport_send_reset(pending, pkt); + err = vmci_transport_error_to_vsock_error(err); + skerr = -err; + goto destroy; + } + + vmci_trans(vpending)->detach_sub_id = detach_sub_id; + + /* Now attach to the queue pair the client created. */ + handle = pkt->u.handle; + + /* vpending->local_addr always has a context id so we do not need to + * worry about VMADDR_CID_ANY in this case. + */ + is_local = + vpending->remote_addr.svm_cid == vpending->local_addr.svm_cid; + flags = VMCI_QPFLAG_ATTACH_ONLY; + flags |= is_local ? VMCI_QPFLAG_LOCAL : 0; + + err = vmci_transport_queue_pair_alloc( + &qpair, + &handle, + vmci_trans(vpending)->produce_size, + vmci_trans(vpending)->consume_size, + pkt->dg.src.context, + flags, + vmci_transport_is_trusted( + vpending, + vpending->remote_addr.svm_cid)); + if (err < 0) { + vmci_transport_send_reset(pending, pkt); + skerr = -err; + goto destroy; + } + + vmci_trans(vpending)->qp_handle = handle; + vmci_trans(vpending)->qpair = qpair; + + /* When we send the attach message, we must be ready to handle incoming + * control messages on the newly connected socket. So we move the + * pending socket to the connected state before sending the attach + * message. Otherwise, an incoming packet triggered by the attach being + * received by the peer may be processed concurrently with what happens + * below after sending the attach message, and that incoming packet + * will find the listening socket instead of the (currently) pending + * socket. Note that enqueueing the socket increments the reference + * count, so even if a reset comes before the connection is accepted, + * the socket will be valid until it is removed from the queue. + * + * If we fail sending the attach below, we remove the socket from the + * connected list and move the socket to SS_UNCONNECTED before + * releasing the lock, so a pending slow path processing of an incoming + * packet will not see the socket in the connected state in that case. + */ + pending->sk_state = SS_CONNECTED; + + vsock_insert_connected(vpending); + + /* Notify our peer of our attach. */ + err = vmci_transport_send_attach(pending, handle); + if (err < 0) { + vsock_remove_connected(vpending); + pr_err("Could not send attach\n"); + vmci_transport_send_reset(pending, pkt); + err = vmci_transport_error_to_vsock_error(err); + skerr = -err; + goto destroy; + } + + /* We have a connection. Move the now connected socket from the + * listener's pending list to the accept queue so callers of accept() + * can find it. + */ + vsock_remove_pending(listener, pending); + vsock_enqueue_accept(listener, pending); + + /* Callers of accept() will be be waiting on the listening socket, not + * the pending socket. + */ + listener->sk_state_change(listener); + + return 0; + +destroy: + pending->sk_err = skerr; + pending->sk_state = SS_UNCONNECTED; + /* As long as we drop our reference, all necessary cleanup will handle + * when the cleanup function drops its reference and our destruct + * implementation is called. Note that since the listen handler will + * remove pending from the pending list upon our failure, the cleanup + * function won't drop the additional reference, which is why we do it + * here. + */ + sock_put(pending); + + return err; +} + +static int +vmci_transport_recv_connecting_client(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vsk; + int err; + int skerr; + + vsk = vsock_sk(sk); + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_ATTACH: + if (vmci_handle_is_invalid(pkt->u.handle) || + !vmci_handle_is_equal(pkt->u.handle, + vmci_trans(vsk)->qp_handle)) { + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + + /* Signify the socket is connected and wakeup the waiter in + * connect(). Also place the socket in the connected table for + * accounting (it can already be found since it's in the bound + * table). + */ + sk->sk_state = SS_CONNECTED; + sk->sk_socket->state = SS_CONNECTED; + vsock_insert_connected(vsk); + sk->sk_state_change(sk); + + break; + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2: + if (pkt->u.size == 0 + || pkt->dg.src.context != vsk->remote_addr.svm_cid + || pkt->src_port != vsk->remote_addr.svm_port + || !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle) + || vmci_trans(vsk)->qpair + || vmci_trans(vsk)->produce_size != 0 + || vmci_trans(vsk)->consume_size != 0 + || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID + || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { + skerr = EPROTO; + err = -EINVAL; + + goto destroy; + } + + err = vmci_transport_recv_connecting_client_negotiate(sk, pkt); + if (err) { + skerr = -err; + goto destroy; + } + + break; + case VMCI_TRANSPORT_PACKET_TYPE_INVALID: + err = vmci_transport_recv_connecting_client_invalid(sk, pkt); + if (err) { + skerr = -err; + goto destroy; + } + + break; + case VMCI_TRANSPORT_PACKET_TYPE_RST: + /* Older versions of the linux code (WS 6.5 / ESX 4.0) used to + * continue processing here after they sent an INVALID packet. + * This meant that we got a RST after the INVALID. We ignore a + * RST after an INVALID. The common code doesn't send the RST + * ... so we can hang if an old version of the common code + * fails between getting a REQUEST and sending an OFFER back. + * Not much we can do about it... except hope that it doesn't + * happen. + */ + if (vsk->ignore_connecting_rst) { + vsk->ignore_connecting_rst = false; + } else { + skerr = ECONNRESET; + err = 0; + goto destroy; + } + + break; + default: + /* Close and cleanup the connection. */ + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + + return 0; + +destroy: + vmci_transport_send_reset(sk, pkt); + + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = skerr; + sk->sk_error_report(sk); + return err; +} + +static int vmci_transport_recv_connecting_client_negotiate( + struct sock *sk, + struct vmci_transport_packet *pkt) +{ + int err; + struct vsock_sock *vsk; + struct vmci_handle handle; + struct vmci_qp *qpair; + u32 attach_sub_id; + u32 detach_sub_id; + bool is_local; + u32 flags; + bool old_proto = true; + bool old_pkt_proto; + u16 version; + + vsk = vsock_sk(sk); + handle = VMCI_INVALID_HANDLE; + attach_sub_id = VMCI_INVALID_ID; + detach_sub_id = VMCI_INVALID_ID; + + /* If we have gotten here then we should be past the point where old + * linux vsock could have sent the bogus rst. + */ + vsk->sent_request = false; + vsk->ignore_connecting_rst = false; + + /* Verify that we're OK with the proposed queue pair size */ + if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size || + pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) { + err = -EINVAL; + goto destroy; + } + + /* At this point we know the CID the peer is using to talk to us. */ + + if (vsk->local_addr.svm_cid == VMADDR_CID_ANY) + vsk->local_addr.svm_cid = pkt->dg.dst.context; + + /* Setup the notify ops to be the highest supported version that both + * the server and the client support. + */ + + if (vmci_transport_old_proto_override(&old_pkt_proto)) { + old_proto = old_pkt_proto; + } else { + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE) + old_proto = true; + else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2) + old_proto = false; + + } + + if (old_proto) + version = VSOCK_PROTO_INVALID; + else + version = pkt->proto; + + if (!vmci_transport_proto_to_notify_struct(sk, &version, old_proto)) { + err = -EINVAL; + goto destroy; + } + + /* Subscribe to attach and detach events first. + * + * XXX We attach once for each queue pair created for now so it is easy + * to find the socket (it's provided), but later we should only + * subscribe once and add a way to lookup sockets by queue pair handle. + */ + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH, + vmci_transport_peer_attach_cb, + sk, &attach_sub_id); + if (err < VMCI_SUCCESS) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, + vmci_transport_peer_detach_cb, + sk, &detach_sub_id); + if (err < VMCI_SUCCESS) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + /* Make VMCI select the handle for us. */ + handle = VMCI_INVALID_HANDLE; + is_local = vsk->remote_addr.svm_cid == vsk->local_addr.svm_cid; + flags = is_local ? VMCI_QPFLAG_LOCAL : 0; + + err = vmci_transport_queue_pair_alloc(&qpair, + &handle, + pkt->u.size, + pkt->u.size, + vsk->remote_addr.svm_cid, + flags, + vmci_transport_is_trusted( + vsk, + vsk-> + remote_addr.svm_cid)); + if (err < 0) + goto destroy; + + err = vmci_transport_send_qp_offer(sk, handle); + if (err < 0) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + vmci_trans(vsk)->qp_handle = handle; + vmci_trans(vsk)->qpair = qpair; + + vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = + pkt->u.size; + + vmci_trans(vsk)->attach_sub_id = attach_sub_id; + vmci_trans(vsk)->detach_sub_id = detach_sub_id; + + vmci_trans(vsk)->notify_ops->process_negotiate(sk); + + return 0; + +destroy: + if (attach_sub_id != VMCI_INVALID_ID) + vmci_event_unsubscribe(attach_sub_id); + + if (detach_sub_id != VMCI_INVALID_ID) + vmci_event_unsubscribe(detach_sub_id); + + if (!vmci_handle_is_invalid(handle)) + vmci_qpair_detach(&qpair); + + return err; +} + +static int +vmci_transport_recv_connecting_client_invalid(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + int err = 0; + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsk->sent_request) { + vsk->sent_request = false; + vsk->ignore_connecting_rst = true; + + err = vmci_transport_send_conn_request( + sk, vmci_trans(vsk)->queue_pair_size); + if (err < 0) + err = vmci_transport_error_to_vsock_error(err); + else + err = 0; + + } + + return err; +} + +static int vmci_transport_recv_connected(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vsk; + bool pkt_processed = false; + + /* In cases where we are closing the connection, it's sufficient to + * mark the state change (and maybe error) and wake up any waiting + * threads. Since this is a connected socket, it's owned by a user + * process and will be cleaned up when the failure is passed back on + * the current or next system call. Our system call implementations + * must therefore check for error and state changes on entry and when + * being awoken. + */ + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN: + if (pkt->u.mode) { + vsk = vsock_sk(sk); + + vsk->peer_shutdown |= pkt->u.mode; + sk->sk_state_change(sk); + } + break; + + case VMCI_TRANSPORT_PACKET_TYPE_RST: + vsk = vsock_sk(sk); + /* It is possible that we sent our peer a message (e.g a + * WAITING_READ) right before we got notified that the peer had + * detached. If that happens then we can get a RST pkt back + * from our peer even though there is data available for us to + * read. In that case, don't shutdown the socket completely but + * instead allow the local client to finish reading data off + * the queuepair. Always treat a RST pkt in connected mode like + * a clean shutdown. + */ + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + if (vsock_stream_has_data(vsk) <= 0) + sk->sk_state = SS_DISCONNECTING; + + sk->sk_state_change(sk); + break; + + default: + vsk = vsock_sk(sk); + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, false, NULL, NULL, + &pkt_processed); + if (!pkt_processed) + return -EINVAL; + + break; + } + + return 0; +} + +static int vmci_transport_socket_init(struct vsock_sock *vsk, + struct vsock_sock *psk) +{ + vsk->trans = kmalloc(sizeof(struct vmci_transport), GFP_KERNEL); + if (!vsk->trans) + return -ENOMEM; + + vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->qpair = NULL; + vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0; + vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id = + VMCI_INVALID_ID; + vmci_trans(vsk)->notify_ops = NULL; + if (psk) { + vmci_trans(vsk)->queue_pair_size = + vmci_trans(psk)->queue_pair_size; + vmci_trans(vsk)->queue_pair_min_size = + vmci_trans(psk)->queue_pair_min_size; + vmci_trans(vsk)->queue_pair_max_size = + vmci_trans(psk)->queue_pair_max_size; + } else { + vmci_trans(vsk)->queue_pair_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE; + vmci_trans(vsk)->queue_pair_min_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN; + vmci_trans(vsk)->queue_pair_max_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX; + } + + return 0; +} + +static void vmci_transport_destruct(struct vsock_sock *vsk) +{ + if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id); + vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID; + } + + if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id); + vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID; + } + + if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { + vmci_qpair_detach(&vmci_trans(vsk)->qpair); + vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->produce_size = 0; + vmci_trans(vsk)->consume_size = 0; + } + + if (vmci_trans(vsk)->notify_ops) + vmci_trans(vsk)->notify_ops->socket_destruct(vsk); + + kfree(vsk->trans); + vsk->trans = NULL; +} + +static void vmci_transport_release(struct vsock_sock *vsk) +{ + if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) { + vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle); + vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; + } +} + +static int vmci_transport_dgram_bind(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + u32 port; + u32 flags; + int err; + + /* VMCI will select a resource ID for us if we provide + * VMCI_INVALID_ID. + */ + port = addr->svm_port == VMADDR_PORT_ANY ? + VMCI_INVALID_ID : addr->svm_port; + + if (port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE)) + return -EACCES; + + flags = addr->svm_cid == VMADDR_CID_ANY ? + VMCI_FLAG_ANYCID_DG_HND : 0; + + err = vmci_transport_datagram_create_hnd(port, flags, + vmci_transport_recv_dgram_cb, + &vsk->sk, + &vmci_trans(vsk)->dg_handle); + if (err < VMCI_SUCCESS) + return vmci_transport_error_to_vsock_error(err); + vsock_addr_init(&vsk->local_addr, addr->svm_cid, + vmci_trans(vsk)->dg_handle.resource); + + return 0; +} + +static int vmci_transport_dgram_enqueue( + struct vsock_sock *vsk, + struct sockaddr_vm *remote_addr, + struct iovec *iov, + size_t len) +{ + int err; + struct vmci_datagram *dg; + + if (len > VMCI_MAX_DG_PAYLOAD_SIZE) + return -EMSGSIZE; + + if (!vmci_transport_allow_dgram(vsk, remote_addr->svm_cid)) + return -EPERM; + + /* Allocate a buffer for the user's message and our packet header. */ + dg = kmalloc(len + sizeof(*dg), GFP_KERNEL); + if (!dg) + return -ENOMEM; + + memcpy_fromiovec(VMCI_DG_PAYLOAD(dg), iov, len); + + dg->dst = vmci_make_handle(remote_addr->svm_cid, + remote_addr->svm_port); + dg->src = vmci_make_handle(vsk->local_addr.svm_cid, + vsk->local_addr.svm_port); + dg->payload_size = len; + + err = vmci_datagram_send(dg); + kfree(dg); + if (err < 0) + return vmci_transport_error_to_vsock_error(err); + + return err - sizeof(*dg); +} + +static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, + struct vsock_sock *vsk, + struct msghdr *msg, size_t len, + int flags) +{ + int err; + int noblock; + struct vmci_datagram *dg; + size_t payload_len; + struct sk_buff *skb; + + noblock = flags & MSG_DONTWAIT; + + if (flags & MSG_OOB || flags & MSG_ERRQUEUE) + return -EOPNOTSUPP; + + /* Retrieve the head sk_buff from the socket's receive queue. */ + err = 0; + skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); + if (err) + return err; + + if (!skb) + return -EAGAIN; + + dg = (struct vmci_datagram *)skb->data; + if (!dg) + /* err is 0, meaning we read zero bytes. */ + goto out; + + payload_len = dg->payload_size; + /* Ensure the sk_buff matches the payload size claimed in the packet. */ + if (payload_len != skb->len - sizeof(*dg)) { + err = -EINVAL; + goto out; + } + + if (payload_len > len) { + payload_len = len; + msg->msg_flags |= MSG_TRUNC; + } + + /* Place the datagram payload in the user's iovec. */ + err = skb_copy_datagram_iovec(skb, sizeof(*dg), msg->msg_iov, + payload_len); + if (err) + goto out; + + msg->msg_namelen = 0; + if (msg->msg_name) { + struct sockaddr_vm *vm_addr; + + /* Provide the address of the sender. */ + vm_addr = (struct sockaddr_vm *)msg->msg_name; + vsock_addr_init(vm_addr, dg->src.context, dg->src.resource); + msg->msg_namelen = sizeof(*vm_addr); + } + err = payload_len; + +out: + skb_free_datagram(&vsk->sk, skb); + return err; +} + +static bool vmci_transport_dgram_allow(u32 cid, u32 port) +{ + if (cid == VMADDR_CID_HYPERVISOR) { + /* Registrations of PBRPC Servers do not modify VMX/Hypervisor + * state and are allowed. + */ + return port == VMCI_UNITY_PBRPC_REGISTER; + } + + return true; +} + +static int vmci_transport_connect(struct vsock_sock *vsk) +{ + int err; + bool old_pkt_proto = false; + struct sock *sk = &vsk->sk; + + if (vmci_transport_old_proto_override(&old_pkt_proto) && + old_pkt_proto) { + err = vmci_transport_send_conn_request( + sk, vmci_trans(vsk)->queue_pair_size); + if (err < 0) { + sk->sk_state = SS_UNCONNECTED; + return err; + } + } else { + int supported_proto_versions = + vmci_transport_new_proto_supported_versions(); + err = vmci_transport_send_conn_request2( + sk, vmci_trans(vsk)->queue_pair_size, + supported_proto_versions); + if (err < 0) { + sk->sk_state = SS_UNCONNECTED; + return err; + } + + vsk->sent_request = true; + } + + return err; +} + +static ssize_t vmci_transport_stream_dequeue( + struct vsock_sock *vsk, + struct iovec *iov, + size_t len, + int flags) +{ + if (flags & MSG_PEEK) + return vmci_qpair_peekv(vmci_trans(vsk)->qpair, iov, len, 0); + else + return vmci_qpair_dequev(vmci_trans(vsk)->qpair, iov, len, 0); +} + +static ssize_t vmci_transport_stream_enqueue( + struct vsock_sock *vsk, + struct iovec *iov, + size_t len) +{ + return vmci_qpair_enquev(vmci_trans(vsk)->qpair, iov, len, 0); +} + +static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk) +{ + return vmci_qpair_consume_buf_ready(vmci_trans(vsk)->qpair); +} + +static s64 vmci_transport_stream_has_space(struct vsock_sock *vsk) +{ + return vmci_qpair_produce_free_space(vmci_trans(vsk)->qpair); +} + +static u64 vmci_transport_stream_rcvhiwat(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->consume_size; +} + +static bool vmci_transport_stream_is_active(struct vsock_sock *vsk) +{ + return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle); +} + +static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_size; +} + +static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_min_size; +} + +static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_max_size; +} + +static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) +{ + if (val < vmci_trans(vsk)->queue_pair_min_size) + vmci_trans(vsk)->queue_pair_min_size = val; + if (val > vmci_trans(vsk)->queue_pair_max_size) + vmci_trans(vsk)->queue_pair_max_size = val; + vmci_trans(vsk)->queue_pair_size = val; +} + +static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk, + u64 val) +{ + if (val > vmci_trans(vsk)->queue_pair_size) + vmci_trans(vsk)->queue_pair_size = val; + vmci_trans(vsk)->queue_pair_min_size = val; +} + +static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk, + u64 val) +{ + if (val < vmci_trans(vsk)->queue_pair_size) + vmci_trans(vsk)->queue_pair_size = val; + vmci_trans(vsk)->queue_pair_max_size = val; +} + +static int vmci_transport_notify_poll_in( + struct vsock_sock *vsk, + size_t target, + bool *data_ready_now) +{ + return vmci_trans(vsk)->notify_ops->poll_in( + &vsk->sk, target, data_ready_now); +} + +static int vmci_transport_notify_poll_out( + struct vsock_sock *vsk, + size_t target, + bool *space_available_now) +{ + return vmci_trans(vsk)->notify_ops->poll_out( + &vsk->sk, target, space_available_now); +} + +static int vmci_transport_notify_recv_init( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_init( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_pre_block( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_pre_block( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_pre_dequeue( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_pre_dequeue( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_post_dequeue( + struct vsock_sock *vsk, + size_t target, + ssize_t copied, + bool data_read, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_post_dequeue( + &vsk->sk, target, copied, data_read, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_send_init( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_init( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_pre_block( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_pre_block( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_pre_enqueue( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_pre_enqueue( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_post_enqueue( + struct vsock_sock *vsk, + ssize_t written, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_post_enqueue( + &vsk->sk, written, + (struct vmci_transport_send_notify_data *)data); +} + +static bool vmci_transport_old_proto_override(bool *old_pkt_proto) +{ + if (PROTOCOL_OVERRIDE != -1) { + if (PROTOCOL_OVERRIDE == 0) + *old_pkt_proto = true; + else + *old_pkt_proto = false; + + pr_info("Proto override in use\n"); + return true; + } + + return false; +} + +static bool vmci_transport_proto_to_notify_struct(struct sock *sk, + u16 *proto, + bool old_pkt_proto) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (old_pkt_proto) { + if (*proto != VSOCK_PROTO_INVALID) { + pr_err("Can't set both an old and new protocol\n"); + return false; + } + vmci_trans(vsk)->notify_ops = &vmci_transport_notify_pkt_ops; + goto exit; + } + + switch (*proto) { + case VSOCK_PROTO_PKT_ON_NOTIFY: + vmci_trans(vsk)->notify_ops = + &vmci_transport_notify_pkt_q_state_ops; + break; + default: + pr_err("Unknown notify protocol version\n"); + return false; + } + +exit: + vmci_trans(vsk)->notify_ops->socket_init(sk); + return true; +} + +static u16 vmci_transport_new_proto_supported_versions(void) +{ + if (PROTOCOL_OVERRIDE != -1) + return PROTOCOL_OVERRIDE; + + return VSOCK_PROTO_ALL_SUPPORTED; +} + +static u32 vmci_transport_get_local_cid(void) +{ + return vmci_get_context_id(); +} + +static struct vsock_transport vmci_transport = { + .init = vmci_transport_socket_init, + .destruct = vmci_transport_destruct, + .release = vmci_transport_release, + .connect = vmci_transport_connect, + .dgram_bind = vmci_transport_dgram_bind, + .dgram_dequeue = vmci_transport_dgram_dequeue, + .dgram_enqueue = vmci_transport_dgram_enqueue, + .dgram_allow = vmci_transport_dgram_allow, + .stream_dequeue = vmci_transport_stream_dequeue, + .stream_enqueue = vmci_transport_stream_enqueue, + .stream_has_data = vmci_transport_stream_has_data, + .stream_has_space = vmci_transport_stream_has_space, + .stream_rcvhiwat = vmci_transport_stream_rcvhiwat, + .stream_is_active = vmci_transport_stream_is_active, + .stream_allow = vmci_transport_stream_allow, + .notify_poll_in = vmci_transport_notify_poll_in, + .notify_poll_out = vmci_transport_notify_poll_out, + .notify_recv_init = vmci_transport_notify_recv_init, + .notify_recv_pre_block = vmci_transport_notify_recv_pre_block, + .notify_recv_pre_dequeue = vmci_transport_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = vmci_transport_notify_recv_post_dequeue, + .notify_send_init = vmci_transport_notify_send_init, + .notify_send_pre_block = vmci_transport_notify_send_pre_block, + .notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue, + .notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue, + .shutdown = vmci_transport_shutdown, + .set_buffer_size = vmci_transport_set_buffer_size, + .set_min_buffer_size = vmci_transport_set_min_buffer_size, + .set_max_buffer_size = vmci_transport_set_max_buffer_size, + .get_buffer_size = vmci_transport_get_buffer_size, + .get_min_buffer_size = vmci_transport_get_min_buffer_size, + .get_max_buffer_size = vmci_transport_get_max_buffer_size, + .get_local_cid = vmci_transport_get_local_cid, +}; + +static int __init vmci_transport_init(void) +{ + int err; + + /* Create the datagram handle that we will use to send and receive all + * VSocket control messages for this context. + */ + err = vmci_transport_datagram_create_hnd(VMCI_TRANSPORT_PACKET_RID, + VMCI_FLAG_ANYCID_DG_HND, + vmci_transport_recv_stream_cb, + NULL, + &vmci_transport_stream_handle); + if (err < VMCI_SUCCESS) { + pr_err("Unable to create datagram handle. (%d)\n", err); + return vmci_transport_error_to_vsock_error(err); + } + + err = vmci_event_subscribe(VMCI_EVENT_QP_RESUMED, + vmci_transport_qp_resumed_cb, + NULL, &vmci_transport_qp_resumed_sub_id); + if (err < VMCI_SUCCESS) { + pr_err("Unable to subscribe to resumed event. (%d)\n", err); + err = vmci_transport_error_to_vsock_error(err); + vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + goto err_destroy_stream_handle; + } + + err = vsock_core_init(&vmci_transport); + if (err < 0) + goto err_unsubscribe; + + return 0; + +err_unsubscribe: + vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id); +err_destroy_stream_handle: + vmci_datagram_destroy_handle(vmci_transport_stream_handle); + return err; +} +module_init(vmci_transport_init); + +static void __exit vmci_transport_exit(void) +{ + if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) { + if (vmci_datagram_destroy_handle( + vmci_transport_stream_handle) != VMCI_SUCCESS) + pr_err("Couldn't destroy datagram handle\n"); + vmci_transport_stream_handle = VMCI_INVALID_HANDLE; + } + + if (vmci_transport_qp_resumed_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id); + vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + } + + vsock_core_exit(); +} +module_exit(vmci_transport_exit); + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("vmware_vsock"); +MODULE_ALIAS_NETPROTO(PF_VSOCK); diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h new file mode 100644 index 00000000000..1bf991803ec --- /dev/null +++ b/net/vmw_vsock/vmci_transport.h @@ -0,0 +1,139 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VMCI_TRANSPORT_H_ +#define _VMCI_TRANSPORT_H_ + +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> + +#include "vsock_addr.h" +#include "af_vsock.h" + +/* If the packet format changes in a release then this should change too. */ +#define VMCI_TRANSPORT_PACKET_VERSION 1 + +/* The resource ID on which control packets are sent. */ +#define VMCI_TRANSPORT_PACKET_RID 1 + +#define VSOCK_PROTO_INVALID 0 +#define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0) +#define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY) + +#define vmci_trans(_vsk) ((struct vmci_transport *)((_vsk)->trans)) + +enum vmci_transport_packet_type { + VMCI_TRANSPORT_PACKET_TYPE_INVALID = 0, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE, + VMCI_TRANSPORT_PACKET_TYPE_OFFER, + VMCI_TRANSPORT_PACKET_TYPE_ATTACH, + VMCI_TRANSPORT_PACKET_TYPE_WROTE, + VMCI_TRANSPORT_PACKET_TYPE_READ, + VMCI_TRANSPORT_PACKET_TYPE_RST, + VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN, + VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE, + VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST2, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2, + VMCI_TRANSPORT_PACKET_TYPE_MAX +}; + +struct vmci_transport_waiting_info { + u64 generation; + u64 offset; +}; + +/* Control packet type for STREAM sockets. DGRAMs have no control packets nor + * special packet header for data packets, they are just raw VMCI DGRAM + * messages. For STREAMs, control packets are sent over the control channel + * while data is written and read directly from queue pairs with no packet + * format. + */ +struct vmci_transport_packet { + struct vmci_datagram dg; + u8 version; + u8 type; + u16 proto; + u32 src_port; + u32 dst_port; + u32 _reserved2; + union { + u64 size; + u64 mode; + struct vmci_handle handle; + struct vmci_transport_waiting_info wait; + } u; +}; + +struct vmci_transport_notify_pkt { + u64 write_notify_window; + u64 write_notify_min_window; + bool peer_waiting_read; + bool peer_waiting_write; + bool peer_waiting_write_detected; + bool sent_waiting_read; + bool sent_waiting_write; + struct vmci_transport_waiting_info peer_waiting_read_info; + struct vmci_transport_waiting_info peer_waiting_write_info; + u64 produce_q_generation; + u64 consume_q_generation; +}; + +struct vmci_transport_notify_pkt_q_state { + u64 write_notify_window; + u64 write_notify_min_window; + bool peer_waiting_write; + bool peer_waiting_write_detected; +}; + +union vmci_transport_notify { + struct vmci_transport_notify_pkt pkt; + struct vmci_transport_notify_pkt_q_state pkt_q_state; +}; + +/* Our transport-specific data. */ +struct vmci_transport { + /* For DGRAMs. */ + struct vmci_handle dg_handle; + /* For STREAMs. */ + struct vmci_handle qp_handle; + struct vmci_qp *qpair; + u64 produce_size; + u64 consume_size; + u64 queue_pair_size; + u64 queue_pair_min_size; + u64 queue_pair_max_size; + u32 attach_sub_id; + u32 detach_sub_id; + union vmci_transport_notify notify; + struct vmci_transport_notify_ops *notify_ops; +}; + +int vmci_transport_register(void); +void vmci_transport_unregister(void); + +int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src); +int vmci_transport_send_read_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src); +int vmci_transport_send_wrote(struct sock *sk); +int vmci_transport_send_read(struct sock *sk); +int vmci_transport_send_waiting_write(struct sock *sk, + struct vmci_transport_waiting_info *wait); +int vmci_transport_send_waiting_read(struct sock *sk, + struct vmci_transport_waiting_info *wait); + +#endif diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c new file mode 100644 index 00000000000..9a730744e7b --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -0,0 +1,680 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/stddef.h> +#include <net/sock.h> + +#include "vmci_transport_notify.h" + +#define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name) + +static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + bool retval; + u64 notify_limit; + + if (!PKT_FIELD(vsk, peer_waiting_write)) + return false; + +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + /* When the sender blocks, we take that as a sign that the sender is + * faster than the receiver. To reduce the transmit rate of the sender, + * we delay the sending of the read notification by decreasing the + * write_notify_window. The notification is delayed until the number of + * bytes used in the queue drops below the write_notify_window. + */ + + if (!PKT_FIELD(vsk, peer_waiting_write_detected)) { + PKT_FIELD(vsk, peer_waiting_write_detected) = true; + if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) { + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + } else { + PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + + } + } + notify_limit = vmci_trans(vsk)->consume_size - + PKT_FIELD(vsk, write_notify_window); +#else + notify_limit = 0; +#endif + + /* For now we ignore the wait information and just see if the free + * space exceeds the notify limit. Note that improving this function + * to be more intelligent will not require a protocol change and will + * retain compatibility between endpoints with mixed versions of this + * function. + * + * The notify_limit is used to delay notifications in the case where + * flow control is enabled. Below the test is expressed in terms of + * free space in the queue: if free_space > ConsumeSize - + * write_notify_window then notify An alternate way of expressing this + * is to rewrite the expression to use the data ready in the receive + * queue: if write_notify_window > bufferReady then notify as + * free_space == ConsumeSize - bufferReady. + */ + retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) > + notify_limit; +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + if (retval) { + /* + * Once we notify the peer, we reset the detected flag so the + * next wait will again cause a decrease in the window size. + */ + + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + } +#endif + return retval; +#else + return true; +#endif +} + +static bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + if (!PKT_FIELD(vsk, peer_waiting_read)) + return false; + + /* For now we ignore the wait information and just see if there is any + * data for our peer to read. Note that improving this function to be + * more intelligent will not require a protocol change and will retain + * compatibility between endpoints with mixed versions of this + * function. + */ + return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0; +#else + return true; +#endif +} + +static void +vmci_transport_handle_waiting_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + + PKT_FIELD(vsk, peer_waiting_read) = true; + memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait, + sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); + + if (vmci_transport_notify_waiting_read(vsk)) { + bool sent; + + if (bottom_half) + sent = vmci_transport_send_wrote_bh(dst, src) > 0; + else + sent = vmci_transport_send_wrote(sk) > 0; + + if (sent) + PKT_FIELD(vsk, peer_waiting_read) = false; + } +#endif +} + +static void +vmci_transport_handle_waiting_write(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + + PKT_FIELD(vsk, peer_waiting_write) = true; + memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait, + sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); + + if (vmci_transport_notify_waiting_write(vsk)) { + bool sent; + + if (bottom_half) + sent = vmci_transport_send_read_bh(dst, src) > 0; + else + sent = vmci_transport_send_read(sk) > 0; + + if (sent) + PKT_FIELD(vsk, peer_waiting_write) = false; + } +#endif +} + +static void +vmci_transport_handle_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + PKT_FIELD(vsk, sent_waiting_write) = false; +#endif + + sk->sk_write_space(sk); +} + +static bool send_waiting_read(struct sock *sk, u64 room_needed) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + struct vmci_transport_waiting_info waiting_info; + u64 tail; + u64 head; + u64 room_left; + bool ret; + + vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, sent_waiting_read)) + return true; + + if (PKT_FIELD(vsk, write_notify_window) < + vmci_trans(vsk)->consume_size) + PKT_FIELD(vsk, write_notify_window) = + min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE, + vmci_trans(vsk)->consume_size); + + vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head); + room_left = vmci_trans(vsk)->consume_size - head; + if (room_needed >= room_left) { + waiting_info.offset = room_needed - room_left; + waiting_info.generation = + PKT_FIELD(vsk, consume_q_generation) + 1; + } else { + waiting_info.offset = head + room_needed; + waiting_info.generation = PKT_FIELD(vsk, consume_q_generation); + } + + ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0; + if (ret) + PKT_FIELD(vsk, sent_waiting_read) = true; + + return ret; +#else + return true; +#endif +} + +static bool send_waiting_write(struct sock *sk, u64 room_needed) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + struct vmci_transport_waiting_info waiting_info; + u64 tail; + u64 head; + u64 room_left; + bool ret; + + vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, sent_waiting_write)) + return true; + + vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head); + room_left = vmci_trans(vsk)->produce_size - tail; + if (room_needed + 1 >= room_left) { + /* Wraps around to current generation. */ + waiting_info.offset = room_needed + 1 - room_left; + waiting_info.generation = PKT_FIELD(vsk, produce_q_generation); + } else { + waiting_info.offset = tail + room_needed + 1; + waiting_info.generation = + PKT_FIELD(vsk, produce_q_generation) - 1; + } + + ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0; + if (ret) + PKT_FIELD(vsk, sent_waiting_write) = true; + + return ret; +#else + return true; +#endif +} + +static int vmci_transport_send_read_notification(struct sock *sk) +{ + struct vsock_sock *vsk; + bool sent_read; + unsigned int retries; + int err; + + vsk = vsock_sk(sk); + sent_read = false; + retries = 0; + err = 0; + + if (vmci_transport_notify_waiting_write(vsk)) { + /* Notify the peer that we have read, retrying the send on + * failure up to our maximum value. XXX For now we just log + * the failure, but later we should schedule a work item to + * handle the resend until it succeeds. That would require + * keeping track of work items in the vsk and cleaning them up + * upon socket close. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_read && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_read(sk); + if (err >= 0) + sent_read = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) + pr_err("%p unable to send read notify to peer\n", sk); + else +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + PKT_FIELD(vsk, peer_waiting_write) = false; +#endif + + } + return err; +} + +static void +vmci_transport_handle_wrote(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk = vsock_sk(sk); + PKT_FIELD(vsk, sent_waiting_read) = false; +#endif + sk->sk_data_ready(sk, 0); +} + +static void vmci_transport_notify_pkt_socket_init(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_read) = false; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + PKT_FIELD(vsk, sent_waiting_read) = false; + PKT_FIELD(vsk, sent_waiting_write) = false; + PKT_FIELD(vsk, produce_q_generation) = 0; + PKT_FIELD(vsk, consume_q_generation) = 0; + + memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0, + sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); + memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0, + sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); +} + +static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk) +{ +} + +static int +vmci_transport_notify_pkt_poll_in(struct sock *sk, + size_t target, bool *data_ready_now) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsock_stream_has_data(vsk)) { + *data_ready_now = true; + } else { + /* We can't read right now because there is nothing in the + * queue. Ask for notifications when there is something to + * read. + */ + if (sk->sk_state == SS_CONNECTED) { + if (!send_waiting_read(sk, 1)) + return -1; + + } + *data_ready_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_poll_out(struct sock *sk, + size_t target, bool *space_avail_now) +{ + s64 produce_q_free_space; + struct vsock_sock *vsk = vsock_sk(sk); + + produce_q_free_space = vsock_stream_has_space(vsk); + if (produce_q_free_space > 0) { + *space_avail_now = true; + return 0; + } else if (produce_q_free_space == 0) { + /* This is a connected socket but we can't currently send data. + * Notify the peer that we are waiting if the queue is full. We + * only send a waiting write if the queue is full because + * otherwise we end up in an infinite WAITING_WRITE, READ, + * WAITING_WRITE, READ, etc. loop. Treat failing to send the + * notification as a socket error, passing that back through + * the mask. + */ + if (!send_waiting_write(sk, 1)) + return -1; + + *space_avail_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_init( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + +#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY + data->consume_head = 0; + data->produce_tail = 0; +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + data->notify_on_block = false; + + if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) { + PKT_FIELD(vsk, write_notify_min_window) = target + 1; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) { + /* If the current window is smaller than the new + * minimal window size, we need to reevaluate whether + * we need to notify the sender. If the number of ready + * bytes are smaller than the new window, we need to + * send a notification to the sender before we block. + */ + + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + data->notify_on_block = true; + } + } +#endif +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_pre_block( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + int err = 0; + + /* Notify our peer that we are waiting for data to read. */ + if (!send_waiting_read(sk, target)) { + err = -EHOSTUNREACH; + return err; + } +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + if (data->notify_on_block) { + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + data->notify_on_block = false; + } +#endif + + return err; +} + +static int +vmci_transport_notify_pkt_recv_pre_dequeue( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + /* Now consume up to len bytes from the queue. Note that since we have + * the socket locked we should copy at least ready bytes. + */ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, + &data->produce_tail, + &data->consume_head); +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_post_dequeue( + struct sock *sk, + size_t target, + ssize_t copied, + bool data_read, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk; + int err; + + vsk = vsock_sk(sk); + err = 0; + + if (data_read) { +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + /* Detect a wrap-around to maintain queue generation. Note + * that this is safe since we hold the socket lock across the + * two queue pair operations. + */ + if (copied >= + vmci_trans(vsk)->consume_size - data->consume_head) + PKT_FIELD(vsk, consume_q_generation)++; +#endif + + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + } + return err; +} + +static int +vmci_transport_notify_pkt_send_init( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ +#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY + data->consume_head = 0; + data->produce_tail = 0; +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_send_pre_block( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + /* Notify our peer that we are waiting for room to write. */ + if (!send_waiting_write(sk, 1)) + return -EHOSTUNREACH; + + return 0; +} + +static int +vmci_transport_notify_pkt_send_pre_enqueue( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, + &data->produce_tail, + &data->consume_head); +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_send_post_enqueue( + struct sock *sk, + ssize_t written, + struct vmci_transport_send_notify_data *data) +{ + int err = 0; + struct vsock_sock *vsk; + bool sent_wrote = false; + int retries = 0; + + vsk = vsock_sk(sk); + +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + /* Detect a wrap-around to maintain queue generation. Note that this + * is safe since we hold the socket lock across the two queue pair + * operations. + */ + if (written >= vmci_trans(vsk)->produce_size - data->produce_tail) + PKT_FIELD(vsk, produce_q_generation)++; + +#endif + + if (vmci_transport_notify_waiting_read(vsk)) { + /* Notify the peer that we have written, retrying the send on + * failure up to our maximum value. See the XXX comment for the + * corresponding piece of code in StreamRecvmsg() for potential + * improvements. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_wrote && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_wrote(sk); + if (err >= 0) + sent_wrote = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + pr_err("%p unable to send wrote notify to peer\n", sk); + return err; + } else { +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + PKT_FIELD(vsk, peer_waiting_read) = false; +#endif + } + } + return err; +} + +static void +vmci_transport_notify_pkt_handle_pkt( + struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src, bool *pkt_processed) +{ + bool processed = false; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_READ: + vmci_transport_handle_read(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE: + vmci_transport_handle_waiting_write(sk, pkt, bottom_half, + dst, src); + processed = true; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ: + vmci_transport_handle_waiting_read(sk, pkt, bottom_half, + dst, src); + processed = true; + break; + } + + if (pkt_processed) + *pkt_processed = processed; +} + +static void vmci_transport_notify_pkt_process_request(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +/* Socket control packet based operations. */ +struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { + vmci_transport_notify_pkt_socket_init, + vmci_transport_notify_pkt_socket_destruct, + vmci_transport_notify_pkt_poll_in, + vmci_transport_notify_pkt_poll_out, + vmci_transport_notify_pkt_handle_pkt, + vmci_transport_notify_pkt_recv_init, + vmci_transport_notify_pkt_recv_pre_block, + vmci_transport_notify_pkt_recv_pre_dequeue, + vmci_transport_notify_pkt_recv_post_dequeue, + vmci_transport_notify_pkt_send_init, + vmci_transport_notify_pkt_send_pre_block, + vmci_transport_notify_pkt_send_pre_enqueue, + vmci_transport_notify_pkt_send_post_enqueue, + vmci_transport_notify_pkt_process_request, + vmci_transport_notify_pkt_process_negotiate, +}; diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h new file mode 100644 index 00000000000..7df793249b6 --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify.h @@ -0,0 +1,83 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __VMCI_TRANSPORT_NOTIFY_H__ +#define __VMCI_TRANSPORT_NOTIFY_H__ + +#include <linux/types.h> +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> +#include <linux/vm_sockets.h> + +#include "vmci_transport.h" + +/* Comment this out to compare with old protocol. */ +#define VSOCK_OPTIMIZATION_WAITING_NOTIFY 1 +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) +/* Comment this out to remove flow control for "new" protocol */ +#define VSOCK_OPTIMIZATION_FLOW_CONTROL 1 +#endif + +#define VMCI_TRANSPORT_MAX_DGRAM_RESENDS 10 + +struct vmci_transport_recv_notify_data { + u64 consume_head; + u64 produce_tail; + bool notify_on_block; +}; + +struct vmci_transport_send_notify_data { + u64 consume_head; + u64 produce_tail; +}; + +/* Socket notification callbacks. */ +struct vmci_transport_notify_ops { + void (*socket_init) (struct sock *sk); + void (*socket_destruct) (struct vsock_sock *vsk); + int (*poll_in) (struct sock *sk, size_t target, + bool *data_ready_now); + int (*poll_out) (struct sock *sk, size_t target, + bool *space_avail_now); + void (*handle_notify_pkt) (struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, struct sockaddr_vm *dst, + struct sockaddr_vm *src, + bool *pkt_processed); + int (*recv_init) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_pre_block) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_pre_dequeue) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_post_dequeue) (struct sock *sk, size_t target, + ssize_t copied, bool data_read, + struct vmci_transport_recv_notify_data *data); + int (*send_init) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_pre_block) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_pre_enqueue) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_post_enqueue) (struct sock *sk, ssize_t written, + struct vmci_transport_send_notify_data *data); + void (*process_request) (struct sock *sk); + void (*process_negotiate) (struct sock *sk); +}; + +extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; +extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; + +#endif /* __VMCI_TRANSPORT_NOTIFY_H__ */ diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c new file mode 100644 index 00000000000..622bd7aa101 --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -0,0 +1,438 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/stddef.h> +#include <net/sock.h> + +#include "vmci_transport_notify.h" + +#define PKT_FIELD(vsk, field_name) \ + (vmci_trans(vsk)->notify.pkt_q_state.field_name) + +static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk) +{ + bool retval; + u64 notify_limit; + + if (!PKT_FIELD(vsk, peer_waiting_write)) + return false; + + /* When the sender blocks, we take that as a sign that the sender is + * faster than the receiver. To reduce the transmit rate of the sender, + * we delay the sending of the read notification by decreasing the + * write_notify_window. The notification is delayed until the number of + * bytes used in the queue drops below the write_notify_window. + */ + + if (!PKT_FIELD(vsk, peer_waiting_write_detected)) { + PKT_FIELD(vsk, peer_waiting_write_detected) = true; + if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) { + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + } else { + PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + + } + } + notify_limit = vmci_trans(vsk)->consume_size - + PKT_FIELD(vsk, write_notify_window); + + /* The notify_limit is used to delay notifications in the case where + * flow control is enabled. Below the test is expressed in terms of + * free space in the queue: if free_space > ConsumeSize - + * write_notify_window then notify An alternate way of expressing this + * is to rewrite the expression to use the data ready in the receive + * queue: if write_notify_window > bufferReady then notify as + * free_space == ConsumeSize - bufferReady. + */ + + retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) > + notify_limit; + + if (retval) { + /* Once we notify the peer, we reset the detected flag so the + * next wait will again cause a decrease in the window size. + */ + + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + } + return retval; +} + +static void +vmci_transport_handle_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ + sk->sk_write_space(sk); +} + +static void +vmci_transport_handle_wrote(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ + sk->sk_data_ready(sk, 0); +} + +static void vsock_block_update_write_window(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, write_notify_window) < vmci_trans(vsk)->consume_size) + PKT_FIELD(vsk, write_notify_window) = + min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE, + vmci_trans(vsk)->consume_size); +} + +static int vmci_transport_send_read_notification(struct sock *sk) +{ + struct vsock_sock *vsk; + bool sent_read; + unsigned int retries; + int err; + + vsk = vsock_sk(sk); + sent_read = false; + retries = 0; + err = 0; + + if (vmci_transport_notify_waiting_write(vsk)) { + /* Notify the peer that we have read, retrying the send on + * failure up to our maximum value. XXX For now we just log + * the failure, but later we should schedule a work item to + * handle the resend until it succeeds. That would require + * keeping track of work items in the vsk and cleaning them up + * upon socket close. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_read && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_read(sk); + if (err >= 0) + sent_read = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_read) + pr_err("%p unable to send read notification to peer\n", + sk); + else + PKT_FIELD(vsk, peer_waiting_write) = false; + + } + return err; +} + +static void vmci_transport_notify_pkt_socket_init(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; +} + +static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk) +{ + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; +} + +static int +vmci_transport_notify_pkt_poll_in(struct sock *sk, + size_t target, bool *data_ready_now) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsock_stream_has_data(vsk)) { + *data_ready_now = true; + } else { + /* We can't read right now because there is nothing in the + * queue. Ask for notifications when there is something to + * read. + */ + if (sk->sk_state == SS_CONNECTED) + vsock_block_update_write_window(sk); + *data_ready_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_poll_out(struct sock *sk, + size_t target, bool *space_avail_now) +{ + s64 produce_q_free_space; + struct vsock_sock *vsk = vsock_sk(sk); + + produce_q_free_space = vsock_stream_has_space(vsk); + if (produce_q_free_space > 0) { + *space_avail_now = true; + return 0; + } else if (produce_q_free_space == 0) { + /* This is a connected socket but we can't currently send data. + * Nothing else to do. + */ + *space_avail_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_init( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + data->consume_head = 0; + data->produce_tail = 0; + data->notify_on_block = false; + + if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) { + PKT_FIELD(vsk, write_notify_min_window) = target + 1; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) { + /* If the current window is smaller than the new + * minimal window size, we need to reevaluate whether + * we need to notify the sender. If the number of ready + * bytes are smaller than the new window, we need to + * send a notification to the sender before we block. + */ + + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + data->notify_on_block = true; + } + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_pre_block( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + int err = 0; + + vsock_block_update_write_window(sk); + + if (data->notify_on_block) { + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + data->notify_on_block = false; + } + + return err; +} + +static int +vmci_transport_notify_pkt_recv_post_dequeue( + struct sock *sk, + size_t target, + ssize_t copied, + bool data_read, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk; + int err; + bool was_full = false; + u64 free_space; + + vsk = vsock_sk(sk); + err = 0; + + if (data_read) { + smp_mb(); + + free_space = + vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair); + was_full = free_space == copied; + + if (was_full) + PKT_FIELD(vsk, peer_waiting_write) = true; + + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + /* See the comment in + * vmci_transport_notify_pkt_send_post_enqueue(). + */ + sk->sk_data_ready(sk, 0); + } + + return err; +} + +static int +vmci_transport_notify_pkt_send_init( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + data->consume_head = 0; + data->produce_tail = 0; + + return 0; +} + +static int +vmci_transport_notify_pkt_send_post_enqueue( + struct sock *sk, + ssize_t written, + struct vmci_transport_send_notify_data *data) +{ + int err = 0; + struct vsock_sock *vsk; + bool sent_wrote = false; + bool was_empty; + int retries = 0; + + vsk = vsock_sk(sk); + + smp_mb(); + + was_empty = + vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) == written; + if (was_empty) { + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_wrote && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_wrote(sk); + if (err >= 0) + sent_wrote = true; + + retries++; + } + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_wrote) { + pr_err("%p unable to send wrote notification to peer\n", + sk); + return err; + } + + return err; +} + +static void +vmci_transport_notify_pkt_handle_pkt( + struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src, bool *pkt_processed) +{ + bool processed = false; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_READ: + vmci_transport_handle_read(sk, pkt, bottom_half, dst, src); + processed = true; + break; + } + + if (pkt_processed) + *pkt_processed = processed; +} + +static void vmci_transport_notify_pkt_process_request(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static int +vmci_transport_notify_pkt_recv_pre_dequeue( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +static int +vmci_transport_notify_pkt_send_pre_block( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +static int +vmci_transport_notify_pkt_send_pre_enqueue( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +/* Socket always on control packet based operations. */ +struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { + vmci_transport_notify_pkt_socket_init, + vmci_transport_notify_pkt_socket_destruct, + vmci_transport_notify_pkt_poll_in, + vmci_transport_notify_pkt_poll_out, + vmci_transport_notify_pkt_handle_pkt, + vmci_transport_notify_pkt_recv_init, + vmci_transport_notify_pkt_recv_pre_block, + vmci_transport_notify_pkt_recv_pre_dequeue, + vmci_transport_notify_pkt_recv_post_dequeue, + vmci_transport_notify_pkt_send_init, + vmci_transport_notify_pkt_send_pre_block, + vmci_transport_notify_pkt_send_pre_enqueue, + vmci_transport_notify_pkt_send_post_enqueue, + vmci_transport_notify_pkt_process_request, + vmci_transport_notify_pkt_process_negotiate, +}; diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c new file mode 100644 index 00000000000..b7df1aea7c5 --- /dev/null +++ b/net/vmw_vsock/vsock_addr.c @@ -0,0 +1,86 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/stddef.h> +#include <net/sock.h> + +#include "vsock_addr.h" + +void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port) +{ + memset(addr, 0, sizeof(*addr)); + addr->svm_family = AF_VSOCK; + addr->svm_cid = cid; + addr->svm_port = port; +} +EXPORT_SYMBOL_GPL(vsock_addr_init); + +int vsock_addr_validate(const struct sockaddr_vm *addr) +{ + if (!addr) + return -EFAULT; + + if (addr->svm_family != AF_VSOCK) + return -EAFNOSUPPORT; + + if (addr->svm_zero[0] != 0) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(vsock_addr_validate); + +bool vsock_addr_bound(const struct sockaddr_vm *addr) +{ + return addr->svm_port != VMADDR_PORT_ANY; +} +EXPORT_SYMBOL_GPL(vsock_addr_bound); + +void vsock_addr_unbind(struct sockaddr_vm *addr) +{ + vsock_addr_init(addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); +} +EXPORT_SYMBOL_GPL(vsock_addr_unbind); + +bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other) +{ + return addr->svm_cid == other->svm_cid && + addr->svm_port == other->svm_port; +} +EXPORT_SYMBOL_GPL(vsock_addr_equals_addr); + +bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other) +{ + return (addr->svm_cid == VMADDR_CID_ANY || + other->svm_cid == VMADDR_CID_ANY || + addr->svm_cid == other->svm_cid) && + addr->svm_port == other->svm_port; +} +EXPORT_SYMBOL_GPL(vsock_addr_equals_addr_any); + +int vsock_addr_cast(const struct sockaddr *addr, + size_t len, struct sockaddr_vm **out_addr) +{ + if (len < sizeof(**out_addr)) + return -EFAULT; + + *out_addr = (struct sockaddr_vm *)addr; + return vsock_addr_validate(*out_addr); +} +EXPORT_SYMBOL_GPL(vsock_addr_cast); diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h new file mode 100644 index 00000000000..cdfbcefdf84 --- /dev/null +++ b/net/vmw_vsock/vsock_addr.h @@ -0,0 +1,32 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VSOCK_ADDR_H_ +#define _VSOCK_ADDR_H_ + +#include <linux/vm_sockets.h> + +void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port); +int vsock_addr_validate(const struct sockaddr_vm *addr); +bool vsock_addr_bound(const struct sockaddr_vm *addr); +void vsock_addr_unbind(struct sockaddr_vm *addr); +bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other); +bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other); +int vsock_addr_cast(const struct sockaddr *addr, size_t len, + struct sockaddr_vm **out_addr); + +#endif diff --git a/net/wanrouter/Kconfig b/net/wanrouter/Kconfig deleted file mode 100644 index a157a2e64e1..00000000000 --- a/net/wanrouter/Kconfig +++ /dev/null @@ -1,27 +0,0 @@ -# -# Configuration for WAN router -# - -config WAN_ROUTER - tristate "WAN router (DEPRECATED)" - depends on EXPERIMENTAL - ---help--- - Wide Area Networks (WANs), such as X.25, frame relay and leased - lines, are used to interconnect Local Area Networks (LANs) over vast - distances with data transfer rates significantly higher than those - achievable with commonly used asynchronous modem connections. - Usually, a quite expensive external device called a `WAN router' is - needed to connect to a WAN. - - As an alternative, WAN routing can be built into the Linux kernel. - With relatively inexpensive WAN interface cards available on the - market, a perfectly usable router can be built for less than half - the price of an external router. If you have one of those cards and - wish to use your Linux box as a WAN router, say Y here and also to - the WAN driver for your card, below. You will then need the - wan-tools package which is available from <ftp://ftp.sangoma.com/>. - - To compile WAN routing support as a module, choose M here: the - module will be called wanrouter. - - If unsure, say N. diff --git a/net/wanrouter/Makefile b/net/wanrouter/Makefile deleted file mode 100644 index 4da14bc4807..00000000000 --- a/net/wanrouter/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the Linux WAN router layer. -# - -obj-$(CONFIG_WAN_ROUTER) += wanrouter.o - -wanrouter-y := wanproc.o wanmain.o diff --git a/net/wanrouter/patchlevel b/net/wanrouter/patchlevel deleted file mode 100644 index c043eea7767..00000000000 --- a/net/wanrouter/patchlevel +++ /dev/null @@ -1 +0,0 @@ -2.2.1 diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c deleted file mode 100644 index 2ab785064b7..00000000000 --- a/net/wanrouter/wanmain.c +++ /dev/null @@ -1,782 +0,0 @@ -/***************************************************************************** -* wanmain.c WAN Multiprotocol Router Module. Main code. -* -* This module is completely hardware-independent and provides -* the following common services for the WAN Link Drivers: -* o WAN device management (registering, unregistering) -* o Network interface management -* o Physical connection management (dial-up, incoming calls) -* o Logical connection management (switched virtual circuits) -* o Protocol encapsulation/decapsulation -* -* Author: Gideon Hack -* -* Copyright: (c) 1995-1999 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Nov 24, 2000 Nenad Corbic Updated for 2.4.X kernels -* Nov 07, 2000 Nenad Corbic Fixed the Mulit-Port PPP for kernels 2.2.16 and -* greater. -* Aug 2, 2000 Nenad Corbic Block the Multi-Port PPP from running on -* kernels 2.2.16 or greater. The SyncPPP -* has changed. -* Jul 13, 2000 Nenad Corbic Added SyncPPP support -* Added extra debugging in device_setup(). -* Oct 01, 1999 Gideon Hack Update for s514 PCI card -* Dec 27, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) -* Jan 16, 1997 Gene Kozin router_devlist made public -* Jan 31, 1997 Alan Cox Hacked it about a bit for 2.1 -* Jun 27, 1997 Alan Cox realigned with vendor code -* Oct 15, 1997 Farhan Thawar changed wan_encapsulate to add a pad byte of 0 -* Apr 20, 1998 Alan Cox Fixed 2.1 symbols -* May 17, 1998 K. Baranowski Fixed SNAP encapsulation in wan_encapsulate -* Dec 15, 1998 Arnaldo Melo support for firmwares of up to 128000 bytes -* check wandev->setup return value -* Dec 22, 1998 Arnaldo Melo vmalloc/vfree used in device_setup to allocate -* kernel memory and copy configuration data to -* kernel space (for big firmwares) -* Jun 02, 1999 Gideon Hack Updates for Linux 2.0.X and 2.2.X kernels. -*****************************************************************************/ - -#include <linux/stddef.h> /* offsetof(), etc. */ -#include <linux/capability.h> -#include <linux/errno.h> /* return codes */ -#include <linux/kernel.h> -#include <linux/module.h> /* support for loadable modules */ -#include <linux/slab.h> /* kmalloc(), kfree() */ -#include <linux/mutex.h> -#include <linux/mm.h> -#include <linux/string.h> /* inline mem*, str* functions */ - -#include <asm/byteorder.h> /* htons(), etc. */ -#include <linux/wanrouter.h> /* WAN router API definitions */ - -#include <linux/vmalloc.h> /* vmalloc, vfree */ -#include <asm/uaccess.h> /* copy_to/from_user */ -#include <linux/init.h> /* __initfunc et al. */ - -#define DEV_TO_SLAVE(dev) (*((struct net_device **)netdev_priv(dev))) - -/* - * Function Prototypes - */ - -/* - * WAN device IOCTL handlers - */ - -static DEFINE_MUTEX(wanrouter_mutex); -static int wanrouter_device_setup(struct wan_device *wandev, - wandev_conf_t __user *u_conf); -static int wanrouter_device_stat(struct wan_device *wandev, - wandev_stat_t __user *u_stat); -static int wanrouter_device_shutdown(struct wan_device *wandev); -static int wanrouter_device_new_if(struct wan_device *wandev, - wanif_conf_t __user *u_conf); -static int wanrouter_device_del_if(struct wan_device *wandev, - char __user *u_name); - -/* - * Miscellaneous - */ - -static struct wan_device *wanrouter_find_device(char *name); -static int wanrouter_delete_interface(struct wan_device *wandev, char *name); -static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __acquires(lock); -static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __releases(lock); - - - -/* - * Global Data - */ - -static char wanrouter_fullname[] = "Sangoma WANPIPE Router"; -static char wanrouter_copyright[] = "(c) 1995-2000 Sangoma Technologies Inc."; -static char wanrouter_modname[] = ROUTER_NAME; /* short module name */ -struct wan_device* wanrouter_router_devlist; /* list of registered devices */ - -/* - * Organize Unique Identifiers for encapsulation/decapsulation - */ - -#if 0 -static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 }; -static unsigned char wanrouter_oui_802_2[] = { 0x00, 0x80, 0xC2 }; -#endif - -static int __init wanrouter_init(void) -{ - int err; - - printk(KERN_INFO "%s v%u.%u %s\n", - wanrouter_fullname, ROUTER_VERSION, ROUTER_RELEASE, - wanrouter_copyright); - - err = wanrouter_proc_init(); - if (err) - printk(KERN_INFO "%s: can't create entry in proc filesystem!\n", - wanrouter_modname); - - return err; -} - -static void __exit wanrouter_cleanup (void) -{ - wanrouter_proc_cleanup(); -} - -/* - * This is just plain dumb. We should move the bugger to drivers/net/wan, - * slap it first in directory and make it module_init(). The only reason - * for subsys_initcall() here is that net goes after drivers (why, BTW?) - */ -subsys_initcall(wanrouter_init); -module_exit(wanrouter_cleanup); - -/* - * Kernel APIs - */ - -/* - * Register WAN device. - * o verify device credentials - * o create an entry for the device in the /proc/net/router directory - * o initialize internally maintained fields of the wan_device structure - * o link device data space to a singly-linked list - * o if it's the first device, then start kernel 'thread' - * o increment module use count - * - * Return: - * 0 Ok - * < 0 error. - * - * Context: process - */ - - -int register_wan_device(struct wan_device *wandev) -{ - int err, namelen; - - if ((wandev == NULL) || (wandev->magic != ROUTER_MAGIC) || - (wandev->name == NULL)) - return -EINVAL; - - namelen = strlen(wandev->name); - if (!namelen || (namelen > WAN_DRVNAME_SZ)) - return -EINVAL; - - if (wanrouter_find_device(wandev->name)) - return -EEXIST; - -#ifdef WANDEBUG - printk(KERN_INFO "%s: registering WAN device %s\n", - wanrouter_modname, wandev->name); -#endif - - /* - * Register /proc directory entry - */ - err = wanrouter_proc_add(wandev); - if (err) { - printk(KERN_INFO - "%s: can't create /proc/net/router/%s entry!\n", - wanrouter_modname, wandev->name); - return err; - } - - /* - * Initialize fields of the wan_device structure maintained by the - * router and update local data. - */ - - wandev->ndev = 0; - wandev->dev = NULL; - wandev->next = wanrouter_router_devlist; - wanrouter_router_devlist = wandev; - return 0; -} - -/* - * Unregister WAN device. - * o shut down device - * o unlink device data space from the linked list - * o delete device entry in the /proc/net/router directory - * o decrement module use count - * - * Return: 0 Ok - * <0 error. - * Context: process - */ - - -int unregister_wan_device(char *name) -{ - struct wan_device *wandev, *prev; - - if (name == NULL) - return -EINVAL; - - for (wandev = wanrouter_router_devlist, prev = NULL; - wandev && strcmp(wandev->name, name); - prev = wandev, wandev = wandev->next) - ; - if (wandev == NULL) - return -ENODEV; - -#ifdef WANDEBUG - printk(KERN_INFO "%s: unregistering WAN device %s\n", - wanrouter_modname, name); -#endif - - if (wandev->state != WAN_UNCONFIGURED) - wanrouter_device_shutdown(wandev); - - if (prev) - prev->next = wandev->next; - else - wanrouter_router_devlist = wandev->next; - - wanrouter_proc_delete(wandev); - return 0; -} - -#if 0 - -/* - * Encapsulate packet. - * - * Return: encapsulation header size - * < 0 - unsupported Ethertype - * - * Notes: - * 1. This function may be called on interrupt context. - */ - - -int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev, - unsigned short type) -{ - int hdr_len = 0; - - switch (type) { - case ETH_P_IP: /* IP datagram encapsulation */ - hdr_len += 1; - skb_push(skb, 1); - skb->data[0] = NLPID_IP; - break; - - case ETH_P_IPX: /* SNAP encapsulation */ - case ETH_P_ARP: - hdr_len += 7; - skb_push(skb, 7); - skb->data[0] = 0; - skb->data[1] = NLPID_SNAP; - skb_copy_to_linear_data_offset(skb, 2, wanrouter_oui_ether, - sizeof(wanrouter_oui_ether)); - *((unsigned short*)&skb->data[5]) = htons(type); - break; - - default: /* Unknown packet type */ - printk(KERN_INFO - "%s: unsupported Ethertype 0x%04X on interface %s!\n", - wanrouter_modname, type, dev->name); - hdr_len = -EINVAL; - } - return hdr_len; -} - - -/* - * Decapsulate packet. - * - * Return: Ethertype (in network order) - * 0 unknown encapsulation - * - * Notes: - * 1. This function may be called on interrupt context. - */ - - -__be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev) -{ - int cnt = skb->data[0] ? 0 : 1; /* there may be a pad present */ - __be16 ethertype; - - switch (skb->data[cnt]) { - case NLPID_IP: /* IP datagramm */ - ethertype = htons(ETH_P_IP); - cnt += 1; - break; - - case NLPID_SNAP: /* SNAP encapsulation */ - if (memcmp(&skb->data[cnt + 1], wanrouter_oui_ether, - sizeof(wanrouter_oui_ether))){ - printk(KERN_INFO - "%s: unsupported SNAP OUI %02X-%02X-%02X " - "on interface %s!\n", wanrouter_modname, - skb->data[cnt+1], skb->data[cnt+2], - skb->data[cnt+3], dev->name); - return 0; - } - ethertype = *((__be16*)&skb->data[cnt+4]); - cnt += 6; - break; - - /* add other protocols, e.g. CLNP, ESIS, ISIS, if needed */ - - default: - printk(KERN_INFO - "%s: unsupported NLPID 0x%02X on interface %s!\n", - wanrouter_modname, skb->data[cnt], dev->name); - return 0; - } - skb->protocol = ethertype; - skb->pkt_type = PACKET_HOST; /* Physically point to point */ - skb_pull(skb, cnt); - skb_reset_mac_header(skb); - return ethertype; -} - -#endif /* 0 */ - -/* - * WAN device IOCTL. - * o find WAN device associated with this node - * o execute requested action or pass command to the device driver - */ - -long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = file->f_path.dentry->d_inode; - int err = 0; - struct proc_dir_entry *dent; - struct wan_device *wandev; - void __user *data = (void __user *)arg; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if ((cmd >> 8) != ROUTER_IOCTL) - return -EINVAL; - - dent = PDE(inode); - if ((dent == NULL) || (dent->data == NULL)) - return -EINVAL; - - wandev = dent->data; - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - - mutex_lock(&wanrouter_mutex); - switch (cmd) { - case ROUTER_SETUP: - err = wanrouter_device_setup(wandev, data); - break; - - case ROUTER_DOWN: - err = wanrouter_device_shutdown(wandev); - break; - - case ROUTER_STAT: - err = wanrouter_device_stat(wandev, data); - break; - - case ROUTER_IFNEW: - err = wanrouter_device_new_if(wandev, data); - break; - - case ROUTER_IFDEL: - err = wanrouter_device_del_if(wandev, data); - break; - - case ROUTER_IFSTAT: - break; - - default: - if ((cmd >= ROUTER_USER) && - (cmd <= ROUTER_USER_MAX) && - wandev->ioctl) - err = wandev->ioctl(wandev, cmd, arg); - else err = -EINVAL; - } - mutex_unlock(&wanrouter_mutex); - return err; -} - -/* - * WAN Driver IOCTL Handlers - */ - -/* - * Setup WAN link device. - * o verify user address space - * o allocate kernel memory and copy configuration data to kernel space - * o if configuration data includes extension, copy it to kernel space too - * o call driver's setup() entry point - */ - -static int wanrouter_device_setup(struct wan_device *wandev, - wandev_conf_t __user *u_conf) -{ - void *data = NULL; - wandev_conf_t *conf; - int err = -EINVAL; - - if (wandev->setup == NULL) { /* Nothing to do ? */ - printk(KERN_INFO "%s: ERROR, No setup script: wandev->setup()\n", - wandev->name); - return 0; - } - - conf = kmalloc(sizeof(wandev_conf_t), GFP_KERNEL); - if (conf == NULL){ - printk(KERN_INFO "%s: ERROR, Failed to allocate kernel memory !\n", - wandev->name); - return -ENOBUFS; - } - - if (copy_from_user(conf, u_conf, sizeof(wandev_conf_t))) { - printk(KERN_INFO "%s: Failed to copy user config data to kernel space!\n", - wandev->name); - kfree(conf); - return -EFAULT; - } - - if (conf->magic != ROUTER_MAGIC) { - kfree(conf); - printk(KERN_INFO "%s: ERROR, Invalid MAGIC Number\n", - wandev->name); - return -EINVAL; - } - - if (conf->data_size && conf->data) { - if (conf->data_size > 128000) { - printk(KERN_INFO - "%s: ERROR, Invalid firmware data size %i !\n", - wandev->name, conf->data_size); - kfree(conf); - return -EINVAL; - } - - data = vmalloc(conf->data_size); - if (!data) { - printk(KERN_INFO - "%s: ERROR, Failed allocate kernel memory !\n", - wandev->name); - kfree(conf); - return -ENOBUFS; - } - if (!copy_from_user(data, conf->data, conf->data_size)) { - conf->data = data; - err = wandev->setup(wandev, conf); - } else { - printk(KERN_INFO - "%s: ERROR, Failed to copy from user data !\n", - wandev->name); - err = -EFAULT; - } - vfree(data); - } else { - printk(KERN_INFO - "%s: ERROR, No firmware found ! Firmware size = %i !\n", - wandev->name, conf->data_size); - } - - kfree(conf); - return err; -} - -/* - * Shutdown WAN device. - * o delete all not opened logical channels for this device - * o call driver's shutdown() entry point - */ - -static int wanrouter_device_shutdown(struct wan_device *wandev) -{ - struct net_device *dev; - int err=0; - - if (wandev->state == WAN_UNCONFIGURED) - return 0; - - printk(KERN_INFO "\n%s: Shutting Down!\n",wandev->name); - - for (dev = wandev->dev; dev;) { - err = wanrouter_delete_interface(wandev, dev->name); - if (err) - return err; - /* The above function deallocates the current dev - * structure. Therefore, we cannot use netdev_priv(dev) - * as the next element: wandev->dev points to the - * next element */ - dev = wandev->dev; - } - - if (wandev->ndev) - return -EBUSY; /* there are opened interfaces */ - - if (wandev->shutdown) - err=wandev->shutdown(wandev); - - return err; -} - -/* - * Get WAN device status & statistics. - */ - -static int wanrouter_device_stat(struct wan_device *wandev, - wandev_stat_t __user *u_stat) -{ - wandev_stat_t stat; - - memset(&stat, 0, sizeof(stat)); - - /* Ask device driver to update device statistics */ - if ((wandev->state != WAN_UNCONFIGURED) && wandev->update) - wandev->update(wandev); - - /* Fill out structure */ - stat.ndev = wandev->ndev; - stat.state = wandev->state; - - if (copy_to_user(u_stat, &stat, sizeof(stat))) - return -EFAULT; - - return 0; -} - -/* - * Create new WAN interface. - * o verify user address space - * o copy configuration data to kernel address space - * o allocate network interface data space - * o call driver's new_if() entry point - * o make sure there is no interface name conflict - * o register network interface - */ - -static int wanrouter_device_new_if(struct wan_device *wandev, - wanif_conf_t __user *u_conf) -{ - wanif_conf_t *cnf; - struct net_device *dev = NULL; - int err; - - if ((wandev->state == WAN_UNCONFIGURED) || (wandev->new_if == NULL)) - return -ENODEV; - - cnf = kmalloc(sizeof(wanif_conf_t), GFP_KERNEL); - if (!cnf) - return -ENOBUFS; - - err = -EFAULT; - if (copy_from_user(cnf, u_conf, sizeof(wanif_conf_t))) - goto out; - - err = -EINVAL; - if (cnf->magic != ROUTER_MAGIC) - goto out; - - if (cnf->config_id == WANCONFIG_MPPP) { - printk(KERN_INFO "%s: Wanpipe Mulit-Port PPP support has not been compiled in!\n", - wandev->name); - err = -EPROTONOSUPPORT; - goto out; - } else { - err = wandev->new_if(wandev, dev, cnf); - } - - if (!err) { - /* Register network interface. This will invoke init() - * function supplied by the driver. If device registered - * successfully, add it to the interface list. - */ - -#ifdef WANDEBUG - printk(KERN_INFO "%s: registering interface %s...\n", - wanrouter_modname, dev->name); -#endif - - err = register_netdev(dev); - if (!err) { - struct net_device *slave = NULL; - unsigned long smp_flags=0; - - lock_adapter_irq(&wandev->lock, &smp_flags); - - if (wandev->dev == NULL) { - wandev->dev = dev; - } else { - for (slave=wandev->dev; - DEV_TO_SLAVE(slave); - slave = DEV_TO_SLAVE(slave)) - DEV_TO_SLAVE(slave) = dev; - } - ++wandev->ndev; - - unlock_adapter_irq(&wandev->lock, &smp_flags); - err = 0; /* done !!! */ - goto out; - } - if (wandev->del_if) - wandev->del_if(wandev, dev); - free_netdev(dev); - } - -out: - kfree(cnf); - return err; -} - - -/* - * Delete WAN logical channel. - * o verify user address space - * o copy configuration data to kernel address space - */ - -static int wanrouter_device_del_if(struct wan_device *wandev, char __user *u_name) -{ - char name[WAN_IFNAME_SZ + 1]; - int err = 0; - - if (wandev->state == WAN_UNCONFIGURED) - return -ENODEV; - - memset(name, 0, sizeof(name)); - - if (copy_from_user(name, u_name, WAN_IFNAME_SZ)) - return -EFAULT; - - err = wanrouter_delete_interface(wandev, name); - if (err) - return err; - - /* If last interface being deleted, shutdown card - * This helps with administration at leaf nodes - * (You can tell if the person at the other end of the phone - * has an interface configured) and avoids DoS vulnerabilities - * in binary driver files - this fixes a problem with the current - * Sangoma driver going into strange states when all the network - * interfaces are deleted and the link irrecoverably disconnected. - */ - - if (!wandev->ndev && wandev->shutdown) - err = wandev->shutdown(wandev); - - return err; -} - -/* - * Miscellaneous Functions - */ - -/* - * Find WAN device by name. - * Return pointer to the WAN device data space or NULL if device not found. - */ - -static struct wan_device *wanrouter_find_device(char *name) -{ - struct wan_device *wandev; - - for (wandev = wanrouter_router_devlist; - wandev && strcmp(wandev->name, name); - wandev = wandev->next); - return wandev; -} - -/* - * Delete WAN logical channel identified by its name. - * o find logical channel by its name - * o call driver's del_if() entry point - * o unregister network interface - * o unlink channel data space from linked list of channels - * o release channel data space - * - * Return: 0 success - * -ENODEV channel not found. - * -EBUSY interface is open - * - * Note: If (force != 0), then device will be destroyed even if interface - * associated with it is open. It's caller's responsibility to make - * sure that opened interfaces are not removed! - */ - -static int wanrouter_delete_interface(struct wan_device *wandev, char *name) -{ - struct net_device *dev = NULL, *prev = NULL; - unsigned long smp_flags=0; - - lock_adapter_irq(&wandev->lock, &smp_flags); - dev = wandev->dev; - prev = NULL; - while (dev && strcmp(name, dev->name)) { - struct net_device **slave = netdev_priv(dev); - prev = dev; - dev = *slave; - } - unlock_adapter_irq(&wandev->lock, &smp_flags); - - if (dev == NULL) - return -ENODEV; /* interface not found */ - - if (netif_running(dev)) - return -EBUSY; /* interface in use */ - - if (wandev->del_if) - wandev->del_if(wandev, dev); - - lock_adapter_irq(&wandev->lock, &smp_flags); - if (prev) { - struct net_device **prev_slave = netdev_priv(prev); - struct net_device **slave = netdev_priv(dev); - - *prev_slave = *slave; - } else { - struct net_device **slave = netdev_priv(dev); - wandev->dev = *slave; - } - --wandev->ndev; - unlock_adapter_irq(&wandev->lock, &smp_flags); - - printk(KERN_INFO "%s: unregistering '%s'\n", wandev->name, dev->name); - - unregister_netdev(dev); - - free_netdev(dev); - - return 0; -} - -static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __acquires(lock) -{ - spin_lock_irqsave(lock, *smp_flags); -} - - -static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __releases(lock) -{ - spin_unlock_irqrestore(lock, *smp_flags); -} - -EXPORT_SYMBOL(register_wan_device); -EXPORT_SYMBOL(unregister_wan_device); - -MODULE_LICENSE("GPL"); - -/* - * End - */ diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c deleted file mode 100644 index c43612ee96b..00000000000 --- a/net/wanrouter/wanproc.c +++ /dev/null @@ -1,380 +0,0 @@ -/***************************************************************************** -* wanproc.c WAN Router Module. /proc filesystem interface. -* -* This module is completely hardware-independent and provides -* access to the router using Linux /proc filesystem. -* -* Author: Gideon Hack -* -* Copyright: (c) 1995-1999 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Jun 02, 1999 Gideon Hack Updates for Linux 2.2.X kernels. -* Jun 29, 1997 Alan Cox Merged with 1.0.3 vendor code -* Jan 29, 1997 Gene Kozin v1.0.1. Implemented /proc read routines -* Jan 30, 1997 Alan Cox Hacked around for 2.1 -* Dec 13, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) -*****************************************************************************/ - -#include <linux/init.h> /* __initfunc et al. */ -#include <linux/stddef.h> /* offsetof(), etc. */ -#include <linux/errno.h> /* return codes */ -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/wanrouter.h> /* WAN router API definitions */ -#include <linux/seq_file.h> -#include <linux/mutex.h> - -#include <net/net_namespace.h> -#include <asm/io.h> - -#define PROC_STATS_FORMAT "%30s: %12lu\n" - -/****** Defines and Macros **************************************************/ - -#define PROT_DECODE(prot) ((prot == WANCONFIG_FR) ? " FR" :\ - (prot == WANCONFIG_X25) ? " X25" : \ - (prot == WANCONFIG_PPP) ? " PPP" : \ - (prot == WANCONFIG_CHDLC) ? " CHDLC": \ - (prot == WANCONFIG_MPPP) ? " MPPP" : \ - " Unknown" ) - -/****** Function Prototypes *************************************************/ - -#ifdef CONFIG_PROC_FS - -/* Miscellaneous */ - -/* - * Structures for interfacing with the /proc filesystem. - * Router creates its own directory /proc/net/router with the following - * entries: - * config device configuration - * status global device statistics - * <device> entry for each WAN device - */ - -/* - * Generic /proc/net/router/<file> file and inode operations - */ - -/* - * /proc/net/router - */ - -static DEFINE_MUTEX(config_mutex); -static struct proc_dir_entry *proc_router; - -/* Strings */ - -/* - * Interface functions - */ - -/****** Proc filesystem entry points ****************************************/ - -/* - * Iterator - */ -static void *r_start(struct seq_file *m, loff_t *pos) -{ - struct wan_device *wandev; - loff_t l = *pos; - - mutex_lock(&config_mutex); - if (!l--) - return SEQ_START_TOKEN; - for (wandev = wanrouter_router_devlist; l-- && wandev; - wandev = wandev->next) - ; - return wandev; -} - -static void *r_next(struct seq_file *m, void *v, loff_t *pos) -{ - struct wan_device *wandev = v; - (*pos)++; - return (v == SEQ_START_TOKEN) ? wanrouter_router_devlist : wandev->next; -} - -static void r_stop(struct seq_file *m, void *v) -{ - mutex_unlock(&config_mutex); -} - -static int config_show(struct seq_file *m, void *v) -{ - struct wan_device *p = v; - if (v == SEQ_START_TOKEN) { - seq_puts(m, "Device name | port |IRQ|DMA| mem.addr |" - "mem.size|option1|option2|option3|option4\n"); - return 0; - } - if (!p->state) - return 0; - seq_printf(m, "%-15s|0x%-4X|%3u|%3u| 0x%-8lX |0x%-6X|%7u|%7u|%7u|%7u\n", - p->name, p->ioport, p->irq, p->dma, p->maddr, p->msize, - p->hw_opt[0], p->hw_opt[1], p->hw_opt[2], p->hw_opt[3]); - return 0; -} - -static int status_show(struct seq_file *m, void *v) -{ - struct wan_device *p = v; - if (v == SEQ_START_TOKEN) { - seq_puts(m, "Device name |protocol|station|interface|" - "clocking|baud rate| MTU |ndev|link state\n"); - return 0; - } - if (!p->state) - return 0; - seq_printf(m, "%-15s|%-8s| %-7s| %-9s|%-8s|%9u|%5u|%3u |", - p->name, - PROT_DECODE(p->config_id), - p->config_id == WANCONFIG_FR ? - (p->station ? "Node" : "CPE") : - (p->config_id == WANCONFIG_X25 ? - (p->station ? "DCE" : "DTE") : - ("N/A")), - p->interface ? "V.35" : "RS-232", - p->clocking ? "internal" : "external", - p->bps, - p->mtu, - p->ndev); - - switch (p->state) { - case WAN_UNCONFIGURED: - seq_printf(m, "%-12s\n", "unconfigured"); - break; - case WAN_DISCONNECTED: - seq_printf(m, "%-12s\n", "disconnected"); - break; - case WAN_CONNECTING: - seq_printf(m, "%-12s\n", "connecting"); - break; - case WAN_CONNECTED: - seq_printf(m, "%-12s\n", "connected"); - break; - default: - seq_printf(m, "%-12s\n", "invalid"); - break; - } - return 0; -} - -static const struct seq_operations config_op = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = config_show, -}; - -static const struct seq_operations status_op = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = status_show, -}; - -static int config_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &config_op); -} - -static int status_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &status_op); -} - -static const struct file_operations config_fops = { - .owner = THIS_MODULE, - .open = config_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations status_fops = { - .owner = THIS_MODULE, - .open = status_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int wandev_show(struct seq_file *m, void *v) -{ - struct wan_device *wandev = m->private; - - if (wandev->magic != ROUTER_MAGIC) - return 0; - - if (!wandev->state) { - seq_puts(m, "device is not configured!\n"); - return 0; - } - - /* Update device statistics */ - if (wandev->update) { - int err = wandev->update(wandev); - if (err == -EAGAIN) { - seq_puts(m, "Device is busy!\n"); - return 0; - } - if (err) { - seq_puts(m, "Device is not configured!\n"); - return 0; - } - } - - seq_printf(m, PROC_STATS_FORMAT, - "total packets received", wandev->stats.rx_packets); - seq_printf(m, PROC_STATS_FORMAT, - "total packets transmitted", wandev->stats.tx_packets); - seq_printf(m, PROC_STATS_FORMAT, - "total bytes received", wandev->stats.rx_bytes); - seq_printf(m, PROC_STATS_FORMAT, - "total bytes transmitted", wandev->stats.tx_bytes); - seq_printf(m, PROC_STATS_FORMAT, - "bad packets received", wandev->stats.rx_errors); - seq_printf(m, PROC_STATS_FORMAT, - "packet transmit problems", wandev->stats.tx_errors); - seq_printf(m, PROC_STATS_FORMAT, - "received frames dropped", wandev->stats.rx_dropped); - seq_printf(m, PROC_STATS_FORMAT, - "transmit frames dropped", wandev->stats.tx_dropped); - seq_printf(m, PROC_STATS_FORMAT, - "multicast packets received", wandev->stats.multicast); - seq_printf(m, PROC_STATS_FORMAT, - "transmit collisions", wandev->stats.collisions); - seq_printf(m, PROC_STATS_FORMAT, - "receive length errors", wandev->stats.rx_length_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver overrun errors", wandev->stats.rx_over_errors); - seq_printf(m, PROC_STATS_FORMAT, - "CRC errors", wandev->stats.rx_crc_errors); - seq_printf(m, PROC_STATS_FORMAT, - "frame format errors (aborts)", wandev->stats.rx_frame_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver fifo overrun", wandev->stats.rx_fifo_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver missed packet", wandev->stats.rx_missed_errors); - seq_printf(m, PROC_STATS_FORMAT, - "aborted frames transmitted", wandev->stats.tx_aborted_errors); - return 0; -} - -static int wandev_open(struct inode *inode, struct file *file) -{ - return single_open(file, wandev_show, PDE(inode)->data); -} - -static const struct file_operations wandev_fops = { - .owner = THIS_MODULE, - .open = wandev_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .unlocked_ioctl = wanrouter_ioctl, -}; - -/* - * Initialize router proc interface. - */ - -int __init wanrouter_proc_init(void) -{ - struct proc_dir_entry *p; - proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net); - if (!proc_router) - goto fail; - - p = proc_create("config", S_IRUGO, proc_router, &config_fops); - if (!p) - goto fail_config; - p = proc_create("status", S_IRUGO, proc_router, &status_fops); - if (!p) - goto fail_stat; - return 0; -fail_stat: - remove_proc_entry("config", proc_router); -fail_config: - remove_proc_entry(ROUTER_NAME, init_net.proc_net); -fail: - return -ENOMEM; -} - -/* - * Clean up router proc interface. - */ - -void wanrouter_proc_cleanup(void) -{ - remove_proc_entry("config", proc_router); - remove_proc_entry("status", proc_router); - remove_proc_entry(ROUTER_NAME, init_net.proc_net); -} - -/* - * Add directory entry for WAN device. - */ - -int wanrouter_proc_add(struct wan_device* wandev) -{ - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - - wandev->dent = proc_create(wandev->name, S_IRUGO, - proc_router, &wandev_fops); - if (!wandev->dent) - return -ENOMEM; - wandev->dent->data = wandev; - return 0; -} - -/* - * Delete directory entry for WAN device. - */ -int wanrouter_proc_delete(struct wan_device* wandev) -{ - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - remove_proc_entry(wandev->name, proc_router); - return 0; -} - -#else - -/* - * No /proc - output stubs - */ - -int __init wanrouter_proc_init(void) -{ - return 0; -} - -void wanrouter_proc_cleanup(void) -{ -} - -int wanrouter_proc_add(struct wan_device *wandev) -{ - return 0; -} - -int wanrouter_proc_delete(struct wan_device *wandev) -{ - return 0; -} - -#endif - -/* - * End - */ - diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 324e8d851dc..a4a14e8f55c 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -46,3 +46,65 @@ int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, return err; } + +void cfg80211_ch_switch_notify(struct net_device *dev, + struct cfg80211_chan_def *chandef) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + trace_cfg80211_ch_switch_notify(dev, chandef); + + wdev_lock(wdev); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO)) + goto out; + + wdev->channel = chandef->chan; + nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL); +out: + wdev_unlock(wdev); + return; +} +EXPORT_SYMBOL(cfg80211_ch_switch_notify); + +bool cfg80211_rx_spurious_frame(struct net_device *dev, + const u8 *addr, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + bool ret; + + trace_cfg80211_rx_spurious_frame(dev, addr); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO)) { + trace_cfg80211_return_bool(false); + return false; + } + ret = nl80211_unexpected_frame(dev, addr, gfp); + trace_cfg80211_return_bool(ret); + return ret; +} +EXPORT_SYMBOL(cfg80211_rx_spurious_frame); + +bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev, + const u8 *addr, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + bool ret; + + trace_cfg80211_rx_unexpected_4addr_frame(dev, addr); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO && + wdev->iftype != NL80211_IFTYPE_AP_VLAN)) { + trace_cfg80211_return_bool(false); + return false; + } + ret = nl80211_unexpected_4addr_frame(dev, addr, gfp); + trace_cfg80211_return_bool(ret); + return ret; +} +EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame); diff --git a/net/wireless/chan.c b/net/wireless/chan.c index a7990bb1652..fd556ac05fd 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -76,6 +76,10 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) return false; if (!chandef->center_freq2) return false; + /* adjacent is not allowed -- that's a 160 MHz channel */ + if (chandef->center_freq1 - chandef->center_freq2 == 80 || + chandef->center_freq2 - chandef->center_freq1 == 80) + return false; break; case NL80211_CHAN_WIDTH_80: if (chandef->center_freq1 != control_freq + 30 && @@ -143,6 +147,32 @@ static void chandef_primary_freqs(const struct cfg80211_chan_def *c, } } +static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c) +{ + int width; + + switch (c->width) { + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_20_NOHT: + width = 20; + break; + case NL80211_CHAN_WIDTH_40: + width = 40; + break; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_80: + width = 80; + break; + case NL80211_CHAN_WIDTH_160: + width = 160; + break; + default: + WARN_ON_ONCE(1); + return -1; + } + return width; +} + const struct cfg80211_chan_def * cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, const struct cfg80211_chan_def *c2) @@ -188,6 +218,93 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, } EXPORT_SYMBOL(cfg80211_chandef_compatible); +static void cfg80211_set_chans_dfs_state(struct wiphy *wiphy, u32 center_freq, + u32 bandwidth, + enum nl80211_dfs_state dfs_state) +{ + struct ieee80211_channel *c; + u32 freq; + + for (freq = center_freq - bandwidth/2 + 10; + freq <= center_freq + bandwidth/2 - 10; + freq += 20) { + c = ieee80211_get_channel(wiphy, freq); + if (!c || !(c->flags & IEEE80211_CHAN_RADAR)) + continue; + + c->dfs_state = dfs_state; + c->dfs_state_entered = jiffies; + } +} + +void cfg80211_set_dfs_state(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + enum nl80211_dfs_state dfs_state) +{ + int width; + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return; + + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return; + + cfg80211_set_chans_dfs_state(wiphy, chandef->center_freq1, + width, dfs_state); + + if (!chandef->center_freq2) + return; + cfg80211_set_chans_dfs_state(wiphy, chandef->center_freq2, + width, dfs_state); +} + +static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, + u32 center_freq, + u32 bandwidth) +{ + struct ieee80211_channel *c; + u32 freq; + + for (freq = center_freq - bandwidth/2 + 10; + freq <= center_freq + bandwidth/2 - 10; + freq += 20) { + c = ieee80211_get_channel(wiphy, freq); + if (!c) + return -EINVAL; + + if (c->flags & IEEE80211_CHAN_RADAR) + return 1; + } + return 0; +} + + +int cfg80211_chandef_dfs_required(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef) +{ + int width; + int r; + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return -EINVAL; + + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return -EINVAL; + + r = cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq1, + width); + if (r) + return r; + + if (!chandef->center_freq2) + return 0; + + return cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq2, + width); +} + static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, u32 center_freq, u32 bandwidth, u32 prohibited_flags) @@ -199,7 +316,16 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, freq <= center_freq + bandwidth/2 - 10; freq += 20) { c = ieee80211_get_channel(wiphy, freq); - if (!c || c->flags & prohibited_flags) + if (!c) + return false; + + /* check for radar flags */ + if ((prohibited_flags & c->flags & IEEE80211_CHAN_RADAR) && + (c->dfs_state != NL80211_DFS_AVAILABLE)) + return false; + + /* check for the other flags */ + if (c->flags & prohibited_flags & ~IEEE80211_CHAN_RADAR) return false; } @@ -249,6 +375,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, case NL80211_CHAN_WIDTH_80: if (!vht_cap->vht_supported) return false; + prohibited_flags |= IEEE80211_CHAN_NO_80MHZ; width = 80; break; case NL80211_CHAN_WIDTH_160: @@ -256,6 +383,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, return false; if (!(vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ)) return false; + prohibited_flags |= IEEE80211_CHAN_NO_160MHZ; width = 160; break; default: @@ -263,7 +391,16 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, return false; } - /* TODO: missing regulatory check on 80/160 bandwidth */ + /* + * TODO: What if there are only certain 80/160/80+80 MHz channels + * allowed by the driver, or only certain combinations? + * For 40 MHz the driver can set the NO_HT40 flags, but for + * 80/160 MHz and in particular 80+80 MHz this isn't really + * feasible and we only have NO_80MHZ/NO_160MHZ so far but + * no way to cover 80+80 MHz or more complex restrictions. + * Note that such restrictions also need to be advertised to + * userspace, for example for P2P channel selection. + */ if (width > 20) prohibited_flags |= IEEE80211_CHAN_NO_OFDM; @@ -340,7 +477,10 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - if (wdev->beacon_interval) { + if (wdev->cac_started) { + *chan = wdev->channel; + *chanmode = CHAN_MODE_SHARED; + } else if (wdev->beacon_interval) { *chan = wdev->channel; *chanmode = CHAN_MODE_SHARED; } diff --git a/net/wireless/core.c b/net/wireless/core.c index b677eab55b6..5ffff039b01 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -57,9 +57,6 @@ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) { struct cfg80211_registered_device *result = NULL, *rdev; - if (!wiphy_idx_valid(wiphy_idx)) - return NULL; - assert_cfg80211_lock(); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { @@ -74,10 +71,8 @@ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) int get_wiphy_idx(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev; - if (!wiphy) - return WIPHY_IDX_STALE; - rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + return rdev->wiphy_idx; } @@ -86,9 +81,6 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx) { struct cfg80211_registered_device *rdev; - if (!wiphy_idx_valid(wiphy_idx)) - return NULL; - assert_cfg80211_lock(); rdev = cfg80211_rdev_by_wiphy_idx(wiphy_idx); @@ -309,7 +301,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy_idx = wiphy_counter++; - if (unlikely(!wiphy_idx_valid(rdev->wiphy_idx))) { + if (unlikely(rdev->wiphy_idx < 0)) { wiphy_counter--; mutex_unlock(&cfg80211_mutex); /* ugh, wrapped! */ @@ -332,6 +324,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) INIT_LIST_HEAD(&rdev->bss_list); INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done); INIT_WORK(&rdev->sched_scan_results_wk, __cfg80211_sched_scan_results); + INIT_DELAYED_WORK(&rdev->dfs_update_channels_wk, + cfg80211_dfs_channels_update_work); #ifdef CONFIG_CFG80211_WEXT rdev->wiphy.wext = &cfg80211_wext_handler; #endif @@ -373,7 +367,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.rts_threshold = (u32) -1; rdev->wiphy.coverage_class = 0; - rdev->wiphy.features = NL80211_FEATURE_SCAN_FLUSH; + rdev->wiphy.features = NL80211_FEATURE_SCAN_FLUSH | + NL80211_FEATURE_ADVERTISE_CHAN_LIMITS; return &rdev->wiphy; } @@ -390,8 +385,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) c = &wiphy->iface_combinations[i]; - /* Combinations with just one interface aren't real */ - if (WARN_ON(c->max_interfaces < 2)) + /* + * Combinations with just one interface aren't real, + * however we make an exception for DFS. + */ + if (WARN_ON((c->max_interfaces < 2) && !c->radar_detect_widths)) return -EINVAL; /* Need at least one channel */ @@ -406,6 +404,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) CFG80211_MAX_NUM_DIFFERENT_CHANNELS)) return -EINVAL; + /* DFS only works on one channel. */ + if (WARN_ON(c->radar_detect_widths && + (c->num_different_channels > 1))) + return -EINVAL; + if (WARN_ON(!c->n_limits)) return -EINVAL; @@ -478,6 +481,11 @@ int wiphy_register(struct wiphy *wiphy) ETH_ALEN))) return -EINVAL; + if (WARN_ON(wiphy->max_acl_mac_addrs && + (!(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME) || + !rdev->ops->set_mac_acl))) + return -EINVAL; + if (wiphy->addresses) memcpy(wiphy->perm_addr, wiphy->addresses[0].addr, ETH_ALEN); @@ -690,6 +698,7 @@ void wiphy_unregister(struct wiphy *wiphy) flush_work(&rdev->scan_done_wk); cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); + cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); if (rdev->wowlan && rdev->ops->set_wakeup) rdev_set_wakeup(rdev, false); @@ -710,7 +719,7 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev) kfree(reg); } list_for_each_entry_safe(scan, tmp, &rdev->bss_list, list) - cfg80211_put_bss(&scan->pub); + cfg80211_put_bss(&rdev->wiphy, &scan->pub); kfree(rdev); } diff --git a/net/wireless/core.h b/net/wireless/core.h index 3563097169c..3aec0e429d8 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -8,7 +8,6 @@ #include <linux/mutex.h> #include <linux/list.h> #include <linux/netdevice.h> -#include <linux/kref.h> #include <linux/rbtree.h> #include <linux/debugfs.h> #include <linux/rfkill.h> @@ -18,6 +17,9 @@ #include <net/cfg80211.h> #include "reg.h" + +#define WIPHY_IDX_INVALID -1 + struct cfg80211_registered_device { const struct cfg80211_ops *ops; struct list_head list; @@ -84,9 +86,11 @@ struct cfg80211_registered_device { struct cfg80211_wowlan *wowlan; + struct delayed_work dfs_update_channels_wk; + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ - struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN))); + struct wiphy wiphy __aligned(NETDEV_ALIGN); }; static inline @@ -96,13 +100,6 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy) return container_of(wiphy, struct cfg80211_registered_device, wiphy); } -/* Note 0 is valid, hence phy0 */ -static inline -bool wiphy_idx_valid(int wiphy_idx) -{ - return wiphy_idx >= 0; -} - static inline void cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) { @@ -113,6 +110,9 @@ cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) for (i = 0; i < rdev->wowlan->n_patterns; i++) kfree(rdev->wowlan->patterns[i].mask); kfree(rdev->wowlan->patterns); + if (rdev->wowlan->tcp && rdev->wowlan->tcp->sock) + sock_release(rdev->wowlan->tcp->sock); + kfree(rdev->wowlan->tcp); kfree(rdev->wowlan); } @@ -126,17 +126,12 @@ static inline void assert_cfg80211_lock(void) lockdep_assert_held(&cfg80211_mutex); } -/* - * You can use this to mark a wiphy_idx as not having an associated wiphy. - * It guarantees cfg80211_rdev_by_wiphy_idx(wiphy_idx) will return NULL - */ -#define WIPHY_IDX_STALE -1 - struct cfg80211_internal_bss { struct list_head list; + struct list_head hidden_list; struct rb_node rbn; unsigned long ts; - struct kref ref; + unsigned long refcount; atomic_t hold; /* must be last because of priv member */ @@ -435,7 +430,24 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_iftype iftype, struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode); + enum cfg80211_chan_mode chanmode, + u8 radar_detect); + +/** + * cfg80211_chandef_dfs_required - checks if radar detection is required + * @wiphy: the wiphy to validate against + * @chandef: the channel definition to check + * Return: 1 if radar detection is required, 0 if it is not, < 0 on error + */ +int cfg80211_chandef_dfs_required(struct wiphy *wiphy, + const struct cfg80211_chan_def *c); + +void cfg80211_set_dfs_state(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + enum nl80211_dfs_state dfs_state); + +void cfg80211_dfs_channels_update_work(struct work_struct *work); + static inline int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, @@ -443,7 +455,7 @@ cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype) { return cfg80211_can_use_iftype_chan(rdev, wdev, iftype, NULL, - CHAN_MODE_UNDEFINED); + CHAN_MODE_UNDEFINED, 0); } static inline int @@ -460,7 +472,17 @@ cfg80211_can_use_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chanmode) { return cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - chan, chanmode); + chan, chanmode, 0); +} + +static inline unsigned int elapsed_jiffies_msecs(unsigned long start) +{ + unsigned long end = jiffies; + + if (end >= start) + return jiffies_to_msecs(end - start); + + return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1); } void diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c index 48c48ffafa1..e37862f1b12 100644 --- a/net/wireless/ethtool.c +++ b/net/wireless/ethtool.c @@ -15,10 +15,10 @@ static void cfg80211_get_drvinfo(struct net_device *dev, strlcpy(info->version, init_utsname()->release, sizeof(info->version)); if (wdev->wiphy->fw_version[0]) - strncpy(info->fw_version, wdev->wiphy->fw_version, + strlcpy(info->fw_version, wdev->wiphy->fw_version, sizeof(info->fw_version)); else - strncpy(info->fw_version, "N/A", sizeof(info->fw_version)); + strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); strlcpy(info->bus_info, dev_name(wiphy_dev(wdev->wiphy)), sizeof(info->bus_info)); diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 9b9551e4a6f..d80e47194d4 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -37,7 +37,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid) if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&wdev->current_bss->pub); + cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); } cfg80211_hold_bss(bss_from_pub(bss)); @@ -182,7 +182,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&wdev->current_bss->pub); + cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); } wdev->current_bss = NULL; diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index f9d6ce5cfab..55957a284f6 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -44,6 +44,10 @@ #define MESH_SYNC_NEIGHBOR_OFFSET_MAX 50 +#define MESH_DEFAULT_BEACON_INTERVAL 1000 /* in 1024 us units (=TUs) */ +#define MESH_DEFAULT_DTIM_PERIOD 2 +#define MESH_DEFAULT_AWAKE_WINDOW 10 /* in 1024 us units (=TUs) */ + const struct mesh_config default_mesh_config = { .dot11MeshRetryTimeout = MESH_RET_T, .dot11MeshConfirmTimeout = MESH_CONF_T, @@ -69,6 +73,8 @@ const struct mesh_config default_mesh_config = { .dot11MeshHWMPactivePathToRootTimeout = MESH_PATH_TO_ROOT_TIMEOUT, .dot11MeshHWMProotInterval = MESH_ROOT_INTERVAL, .dot11MeshHWMPconfirmationInterval = MESH_ROOT_CONFIRMATION_INTERVAL, + .power_mode = NL80211_MESH_POWER_ACTIVE, + .dot11MeshAwakeWindowDuration = MESH_DEFAULT_AWAKE_WINDOW, }; const struct mesh_setup default_mesh_setup = { @@ -79,6 +85,8 @@ const struct mesh_setup default_mesh_setup = { .ie = NULL, .ie_len = 0, .is_secure = false, + .beacon_interval = MESH_DEFAULT_BEACON_INTERVAL, + .dtim_period = MESH_DEFAULT_DTIM_PERIOD, }; int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 5e8123ee63f..caddca35d68 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -58,7 +58,7 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, */ if (status_code != WLAN_STATUS_SUCCESS && wdev->conn && cfg80211_sme_failed_reassoc(wdev)) { - cfg80211_put_bss(bss); + cfg80211_put_bss(wiphy, bss); goto out; } @@ -70,7 +70,7 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, * do not call connect_result() now because the * sme will schedule work that does it later. */ - cfg80211_put_bss(bss); + cfg80211_put_bss(wiphy, bss); goto out; } @@ -108,7 +108,7 @@ void __cfg80211_send_deauth(struct net_device *dev, if (wdev->current_bss && ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) { cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&wdev->current_bss->pub); + cfg80211_put_bss(wiphy, &wdev->current_bss->pub); wdev->current_bss = NULL; was_current = true; } @@ -164,7 +164,7 @@ void __cfg80211_send_disassoc(struct net_device *dev, ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) { cfg80211_sme_disassoc(dev, wdev->current_bss); cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&wdev->current_bss->pub); + cfg80211_put_bss(wiphy, &wdev->current_bss->pub); wdev->current_bss = NULL; } else WARN_ON(1); @@ -324,7 +324,7 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, err = rdev_auth(rdev, dev, &req); out: - cfg80211_put_bss(req.bss); + cfg80211_put_bss(&rdev->wiphy, req.bss); return err; } @@ -432,7 +432,7 @@ out: if (err) { if (was_connected) wdev->sme_state = CFG80211_SME_CONNECTED; - cfg80211_put_bss(req.bss); + cfg80211_put_bss(&rdev->wiphy, req.bss); } return err; @@ -514,7 +514,7 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, if (wdev->sme_state != CFG80211_SME_CONNECTED) return -ENOTCONN; - if (WARN_ON(!wdev->current_bss)) + if (WARN(!wdev->current_bss, "sme_state=%d\n", wdev->sme_state)) return -ENOTCONN; memset(&req, 0, sizeof(req)); @@ -572,7 +572,7 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&wdev->current_bss->pub); + cfg80211_put_bss(&rdev->wiphy, &wdev->current_bss->pub); wdev->current_bss = NULL; } } @@ -988,64 +988,122 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, } EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify); -void cfg80211_ch_switch_notify(struct net_device *dev, - struct cfg80211_chan_def *chandef) +void cfg80211_dfs_channels_update_work(struct work_struct *work) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct delayed_work *delayed_work; + struct cfg80211_registered_device *rdev; + struct cfg80211_chan_def chandef; + struct ieee80211_supported_band *sband; + struct ieee80211_channel *c; + struct wiphy *wiphy; + bool check_again = false; + unsigned long timeout, next_time = 0; + int bandid, i; + + delayed_work = container_of(work, struct delayed_work, work); + rdev = container_of(delayed_work, struct cfg80211_registered_device, + dfs_update_channels_wk); + wiphy = &rdev->wiphy; + + mutex_lock(&cfg80211_mutex); + for (bandid = 0; bandid < IEEE80211_NUM_BANDS; bandid++) { + sband = wiphy->bands[bandid]; + if (!sband) + continue; - trace_cfg80211_ch_switch_notify(dev, chandef); + for (i = 0; i < sband->n_channels; i++) { + c = &sband->channels[i]; - wdev_lock(wdev); + if (c->dfs_state != NL80211_DFS_UNAVAILABLE) + continue; - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO)) - goto out; + timeout = c->dfs_state_entered + + IEEE80211_DFS_MIN_NOP_TIME_MS; - wdev->channel = chandef->chan; - nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL); -out: - wdev_unlock(wdev); - return; + if (time_after_eq(jiffies, timeout)) { + c->dfs_state = NL80211_DFS_USABLE; + cfg80211_chandef_create(&chandef, c, + NL80211_CHAN_NO_HT); + + nl80211_radar_notify(rdev, &chandef, + NL80211_RADAR_NOP_FINISHED, + NULL, GFP_ATOMIC); + continue; + } + + if (!check_again) + next_time = timeout - jiffies; + else + next_time = min(next_time, timeout - jiffies); + check_again = true; + } + } + mutex_unlock(&cfg80211_mutex); + + /* reschedule if there are other channels waiting to be cleared again */ + if (check_again) + queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk, + next_time); } -EXPORT_SYMBOL(cfg80211_ch_switch_notify); -bool cfg80211_rx_spurious_frame(struct net_device *dev, - const u8 *addr, gfp_t gfp) + +void cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - bool ret; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + unsigned long timeout; - trace_cfg80211_rx_spurious_frame(dev, addr); + trace_cfg80211_radar_event(wiphy, chandef); - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO)) { - trace_cfg80211_return_bool(false); - return false; - } - ret = nl80211_unexpected_frame(dev, addr, gfp); - trace_cfg80211_return_bool(ret); - return ret; + /* only set the chandef supplied channel to unavailable, in + * case the radar is detected on only one of multiple channels + * spanned by the chandef. + */ + cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_UNAVAILABLE); + + timeout = msecs_to_jiffies(IEEE80211_DFS_MIN_NOP_TIME_MS); + queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk, + timeout); + + nl80211_radar_notify(rdev, chandef, NL80211_RADAR_DETECTED, NULL, gfp); } -EXPORT_SYMBOL(cfg80211_rx_spurious_frame); +EXPORT_SYMBOL(cfg80211_radar_event); -bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev, - const u8 *addr, gfp_t gfp) +void cfg80211_cac_event(struct net_device *netdev, + enum nl80211_radar_event event, gfp_t gfp) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - bool ret; + struct wireless_dev *wdev = netdev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_chan_def chandef; + unsigned long timeout; - trace_cfg80211_rx_unexpected_4addr_frame(dev, addr); + trace_cfg80211_cac_event(netdev, event); - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO && - wdev->iftype != NL80211_IFTYPE_AP_VLAN)) { - trace_cfg80211_return_bool(false); - return false; + if (WARN_ON(!wdev->cac_started)) + return; + + if (WARN_ON(!wdev->channel)) + return; + + cfg80211_chandef_create(&chandef, wdev->channel, NL80211_CHAN_NO_HT); + + switch (event) { + case NL80211_RADAR_CAC_FINISHED: + timeout = wdev->cac_start_time + + msecs_to_jiffies(IEEE80211_DFS_MIN_CAC_TIME_MS); + WARN_ON(!time_after_eq(jiffies, timeout)); + cfg80211_set_dfs_state(wiphy, &chandef, NL80211_DFS_AVAILABLE); + break; + case NL80211_RADAR_CAC_ABORTED: + break; + default: + WARN_ON(1); + return; } - ret = nl80211_unexpected_4addr_frame(dev, addr, gfp); - trace_cfg80211_return_bool(ret); - return ret; + wdev->cac_started = false; + + nl80211_radar_notify(rdev, &chandef, event, netdev, gfp); } -EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame); +EXPORT_SYMBOL(cfg80211_cac_event); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f45706adaf3..35545ccc30f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -19,6 +19,7 @@ #include <net/genetlink.h> #include <net/cfg80211.h> #include <net/sock.h> +#include <net/inet_connection_sock.h> #include "core.h" #include "nl80211.h" #include "reg.h" @@ -365,6 +366,10 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 }, [NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 }, + [NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 }, + [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, + [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 }, + [NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, }, }; /* policy for the key attributes */ @@ -397,6 +402,26 @@ nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = { [NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_RFKILL_RELEASE] = { .type = NLA_FLAG }, + [NL80211_WOWLAN_TRIG_TCP_CONNECTION] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = { + [NL80211_WOWLAN_TCP_SRC_IPV4] = { .type = NLA_U32 }, + [NL80211_WOWLAN_TCP_DST_IPV4] = { .type = NLA_U32 }, + [NL80211_WOWLAN_TCP_DST_MAC] = { .len = ETH_ALEN }, + [NL80211_WOWLAN_TCP_SRC_PORT] = { .type = NLA_U16 }, + [NL80211_WOWLAN_TCP_DST_PORT] = { .type = NLA_U16 }, + [NL80211_WOWLAN_TCP_DATA_PAYLOAD] = { .len = 1 }, + [NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ] = { + .len = sizeof(struct nl80211_wowlan_tcp_data_seq) + }, + [NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN] = { + .len = sizeof(struct nl80211_wowlan_tcp_data_token) + }, + [NL80211_WOWLAN_TCP_DATA_INTERVAL] = { .type = NLA_U32 }, + [NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = { .len = 1 }, + [NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 }, }; /* policy for GTK rekey offload attributes */ @@ -529,8 +554,27 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if ((chan->flags & IEEE80211_CHAN_NO_IBSS) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IBSS)) goto nla_put_failure; - if ((chan->flags & IEEE80211_CHAN_RADAR) && - nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) + if (chan->flags & IEEE80211_CHAN_RADAR) { + u32 time = elapsed_jiffies_msecs(chan->dfs_state_entered); + if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) + goto nla_put_failure; + if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_STATE, + chan->dfs_state)) + goto nla_put_failure; + if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME, time)) + goto nla_put_failure; + } + if ((chan->flags & IEEE80211_CHAN_NO_HT40MINUS) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_MINUS)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_HT40PLUS) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_PLUS)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_80MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_80MHZ)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, @@ -856,6 +900,9 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM, c->max_interfaces)) goto nla_put_failure; + if (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, + c->radar_detect_widths)) + goto nla_put_failure; nla_nest_end(msg, nl_combi); } @@ -867,6 +914,48 @@ nla_put_failure: return -ENOBUFS; } +#ifdef CONFIG_PM +static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, + struct sk_buff *msg) +{ + const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan.tcp; + struct nlattr *nl_tcp; + + if (!tcp) + return 0; + + nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); + if (!nl_tcp) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, + tcp->data_payload_max)) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, + tcp->data_payload_max)) + return -ENOBUFS; + + if (tcp->seq && nla_put_flag(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ)) + return -ENOBUFS; + + if (tcp->tok && nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, + sizeof(*tcp->tok), tcp->tok)) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL, + tcp->data_interval_max)) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD, + tcp->wake_payload_max)) + return -ENOBUFS; + + nla_nest_end(msg, nl_tcp); + return 0; +} +#endif + static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *dev) { @@ -1233,12 +1322,17 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag dev->wiphy.wowlan.pattern_min_len, .max_pattern_len = dev->wiphy.wowlan.pattern_max_len, + .max_pkt_offset = + dev->wiphy.wowlan.max_pkt_offset, }; if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, sizeof(pat), &pat)) goto nla_put_failure; } + if (nl80211_send_wowlan_tcp_caps(dev, msg)) + goto nla_put_failure; + nla_nest_end(msg, nl_wowlan); } #endif @@ -1265,6 +1359,21 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag dev->wiphy.ht_capa_mod_mask)) goto nla_put_failure; + if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && + dev->wiphy.max_acl_mac_addrs && + nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, + dev->wiphy.max_acl_mac_addrs)) + goto nla_put_failure; + + if (dev->wiphy.extended_capabilities && + (nla_put(msg, NL80211_ATTR_EXT_CAPA, + dev->wiphy.extended_capabilities_len, + dev->wiphy.extended_capabilities) || + nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK, + dev->wiphy.extended_capabilities_len, + dev->wiphy.extended_capabilities_mask))) + goto nla_put_failure; + return genlmsg_end(msg, hdr); nla_put_failure: @@ -2079,6 +2188,13 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) !(rdev->wiphy.interface_modes & (1 << type))) return -EOPNOTSUPP; + if (type == NL80211_IFTYPE_P2P_DEVICE && info->attrs[NL80211_ATTR_MAC]) { + nla_memcpy(params.macaddr, info->attrs[NL80211_ATTR_MAC], + ETH_ALEN); + if (!is_valid_ether_addr(params.macaddr)) + return -EADDRNOTAVAIL; + } + if (info->attrs[NL80211_ATTR_4ADDR]) { params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); err = nl80211_valid_4addr(rdev, NULL, params.use_4addr, type); @@ -2481,6 +2597,97 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) return err; } +/* This function returns an error or the number of nested attributes */ +static int validate_acl_mac_addrs(struct nlattr *nl_attr) +{ + struct nlattr *attr; + int n_entries = 0, tmp; + + nla_for_each_nested(attr, nl_attr, tmp) { + if (nla_len(attr) != ETH_ALEN) + return -EINVAL; + + n_entries++; + } + + return n_entries; +} + +/* + * This function parses ACL information and allocates memory for ACL data. + * On successful return, the calling function is responsible to free the + * ACL buffer returned by this function. + */ +static struct cfg80211_acl_data *parse_acl_data(struct wiphy *wiphy, + struct genl_info *info) +{ + enum nl80211_acl_policy acl_policy; + struct nlattr *attr; + struct cfg80211_acl_data *acl; + int i = 0, n_entries, tmp; + + if (!wiphy->max_acl_mac_addrs) + return ERR_PTR(-EOPNOTSUPP); + + if (!info->attrs[NL80211_ATTR_ACL_POLICY]) + return ERR_PTR(-EINVAL); + + acl_policy = nla_get_u32(info->attrs[NL80211_ATTR_ACL_POLICY]); + if (acl_policy != NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED && + acl_policy != NL80211_ACL_POLICY_DENY_UNLESS_LISTED) + return ERR_PTR(-EINVAL); + + if (!info->attrs[NL80211_ATTR_MAC_ADDRS]) + return ERR_PTR(-EINVAL); + + n_entries = validate_acl_mac_addrs(info->attrs[NL80211_ATTR_MAC_ADDRS]); + if (n_entries < 0) + return ERR_PTR(n_entries); + + if (n_entries > wiphy->max_acl_mac_addrs) + return ERR_PTR(-ENOTSUPP); + + acl = kzalloc(sizeof(*acl) + (sizeof(struct mac_address) * n_entries), + GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + + nla_for_each_nested(attr, info->attrs[NL80211_ATTR_MAC_ADDRS], tmp) { + memcpy(acl->mac_addrs[i].addr, nla_data(attr), ETH_ALEN); + i++; + } + + acl->n_acl_entries = n_entries; + acl->acl_policy = acl_policy; + + return acl; +} + +static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct cfg80211_acl_data *acl; + int err; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EOPNOTSUPP; + + if (!dev->ieee80211_ptr->beacon_interval) + return -EINVAL; + + acl = parse_acl_data(&rdev->wiphy, info); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + err = rdev_set_mac_acl(rdev, dev, acl); + + kfree(acl); + + return err; +} + static int nl80211_parse_beacon(struct genl_info *info, struct cfg80211_beacon_data *bcn) { @@ -2598,6 +2805,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ap_settings params; int err; + u8 radar_detect_width = 0; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) @@ -2716,14 +2924,30 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef)) return -EINVAL; + err = cfg80211_chandef_dfs_required(wdev->wiphy, ¶ms.chandef); + if (err < 0) + return err; + if (err) { + radar_detect_width = BIT(params.chandef.width); + params.radar_required = true; + } + mutex_lock(&rdev->devlist_mtx); - err = cfg80211_can_use_chan(rdev, wdev, params.chandef.chan, - CHAN_MODE_SHARED); + err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, + params.chandef.chan, + CHAN_MODE_SHARED, + radar_detect_width); mutex_unlock(&rdev->devlist_mtx); if (err) return err; + if (info->attrs[NL80211_ATTR_ACL_POLICY]) { + params.acl = parse_acl_data(&rdev->wiphy, info); + if (IS_ERR(params.acl)) + return PTR_ERR(params.acl); + } + err = rdev_start_ap(rdev, dev, ¶ms); if (!err) { wdev->preset_chandef = params.chandef; @@ -2732,6 +2956,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->ssid_len = params.ssid_len; memcpy(wdev->ssid, params.ssid, wdev->ssid_len); } + + kfree(params.acl); + return err; } @@ -2939,12 +3166,22 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u32(msg, NL80211_STA_INFO_INACTIVE_TIME, sinfo->inactive_time)) goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_RX_BYTES) && + if ((sinfo->filled & (STATION_INFO_RX_BYTES | + STATION_INFO_RX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES, - sinfo->rx_bytes)) + (u32)sinfo->rx_bytes)) goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_TX_BYTES) && + if ((sinfo->filled & (STATION_INFO_TX_BYTES | + NL80211_STA_INFO_TX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES, + (u32)sinfo->tx_bytes)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_RX_BYTES64) && + nla_put_u64(msg, NL80211_STA_INFO_RX_BYTES64, + sinfo->rx_bytes)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_TX_BYTES64) && + nla_put_u64(msg, NL80211_STA_INFO_TX_BYTES64, sinfo->tx_bytes)) goto nla_put_failure; if ((sinfo->filled & STATION_INFO_LLID) && @@ -3001,6 +3238,18 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS, sinfo->beacon_loss_count)) goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_LOCAL_PM) && + nla_put_u32(msg, NL80211_STA_INFO_LOCAL_PM, + sinfo->local_pm)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_PEER_PM) && + nla_put_u32(msg, NL80211_STA_INFO_PEER_PM, + sinfo->peer_pm)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_NONPEER_PM) && + nla_put_u32(msg, NL80211_STA_INFO_NONPEER_PM, + sinfo->nonpeer_pm)) + goto nla_put_failure; if (sinfo->filled & STATION_INFO_BSS_PARAM) { bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM); if (!bss_param) @@ -3160,6 +3409,54 @@ static struct net_device *get_vlan(struct genl_info *info, return ERR_PTR(ret); } +static struct nla_policy +nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] __read_mostly = { + [NL80211_STA_WME_UAPSD_QUEUES] = { .type = NLA_U8 }, + [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 }, +}; + +static int nl80211_set_station_tdls(struct genl_info *info, + struct station_parameters *params) +{ + struct nlattr *tb[NL80211_STA_WME_MAX + 1]; + struct nlattr *nla; + int err; + + /* Dummy STA entry gets updated once the peer capabilities are known */ + if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) + params->ht_capa = + nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) + params->vht_capa = + nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); + + /* parse WME attributes if present */ + if (!info->attrs[NL80211_ATTR_STA_WME]) + return 0; + + nla = info->attrs[NL80211_ATTR_STA_WME]; + err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla, + nl80211_sta_wme_policy); + if (err) + return err; + + if (tb[NL80211_STA_WME_UAPSD_QUEUES]) + params->uapsd_queues = nla_get_u8( + tb[NL80211_STA_WME_UAPSD_QUEUES]); + if (params->uapsd_queues & ~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK) + return -EINVAL; + + if (tb[NL80211_STA_WME_MAX_SP]) + params->max_sp = nla_get_u8(tb[NL80211_STA_WME_MAX_SP]); + + if (params->max_sp & ~IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK) + return -EINVAL; + + params->sta_modify_mask |= STATION_PARAM_APPLY_UAPSD; + + return 0; +} + static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -3188,13 +3485,21 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); } - if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) - params.listen_interval = - nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) { + params.capability = + nla_get_u16(info->attrs[NL80211_ATTR_STA_CAPABILITY]); + params.sta_modify_mask |= STATION_PARAM_APPLY_CAPABILITY; + } - if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) - params.ht_capa = - nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) { + params.ext_capab = + nla_data(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + params.ext_capab_len = + nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + } + + if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) + return -EINVAL; if (!rdev->ops->change_station) return -EOPNOTSUPP; @@ -3210,6 +3515,17 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.plink_state = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); + if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) { + enum nl80211_mesh_power_mode pm = nla_get_u32( + info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]); + + if (pm <= NL80211_MESH_POWER_UNKNOWN || + pm > NL80211_MESH_POWER_MAX) + return -EINVAL; + + params.local_pm = pm; + } + switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: @@ -3217,6 +3533,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* disallow mesh-specific things */ if (params.plink_action) return -EINVAL; + if (params.local_pm) + return -EINVAL; /* TDLS can't be set, ... */ if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) @@ -3231,11 +3549,32 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* accept only the listed bits */ if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP))) return -EINVAL; + /* but authenticated/associated only if driver handles it */ + if (!(rdev->wiphy.features & + NL80211_FEATURE_FULL_AP_CLIENT_STATE) && + params.sta_flags_mask & + (BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED))) + return -EINVAL; + + /* reject other things that can't change */ + if (params.supported_rates) + return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) + return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) + return -EINVAL; + if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || + info->attrs[NL80211_ATTR_VHT_CAPABILITY]) + return -EINVAL; + /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); if (IS_ERR(params.vlan)) @@ -3250,14 +3589,28 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) * to change the flag. */ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); - /* fall through */ + /* Include parameters for TDLS peer (driver will check) */ + err = nl80211_set_station_tdls(info, ¶ms); + if (err) + return err; + /* disallow things sta doesn't support */ + if (params.plink_action) + return -EINVAL; + if (params.local_pm) + return -EINVAL; + /* reject any changes other than AUTHORIZED or WME (for TDLS) */ + if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_WME))) + return -EINVAL; + break; case NL80211_IFTYPE_ADHOC: /* disallow things sta doesn't support */ if (params.plink_action) return -EINVAL; - if (params.ht_capa) + if (params.local_pm) return -EINVAL; - if (params.listen_interval >= 0) + if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || + info->attrs[NL80211_ATTR_VHT_CAPABILITY]) return -EINVAL; /* reject any changes other than AUTHORIZED */ if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) @@ -3267,9 +3620,14 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* disallow things mesh doesn't support */ if (params.vlan) return -EINVAL; - if (params.ht_capa) + if (params.supported_rates) + return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) + return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) return -EINVAL; - if (params.listen_interval >= 0) + if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || + info->attrs[NL80211_ATTR_VHT_CAPABILITY]) return -EINVAL; /* * No special handling for TDLS here -- the userspace @@ -3295,12 +3653,6 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) return err; } -static struct nla_policy -nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] __read_mostly = { - [NL80211_STA_WME_UAPSD_QUEUES] = { .type = NLA_U8 }, - [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 }, -}; - static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -3335,6 +3687,19 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (!params.aid || params.aid > IEEE80211_MAX_AID) return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) { + params.capability = + nla_get_u16(info->attrs[NL80211_ATTR_STA_CAPABILITY]); + params.sta_modify_mask |= STATION_PARAM_APPLY_CAPABILITY; + } + + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) { + params.ext_capab = + nla_data(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + params.ext_capab_len = + nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + } + if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params.ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); @@ -3393,17 +3758,31 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) /* but don't bother the driver with it */ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); + /* allow authenticated/associated only if driver handles it */ + if (!(rdev->wiphy.features & + NL80211_FEATURE_FULL_AP_CLIENT_STATE) && + params.sta_flags_mask & + (BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED))) + return -EINVAL; + /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); if (IS_ERR(params.vlan)) return PTR_ERR(params.vlan); break; case NL80211_IFTYPE_MESH_POINT: + /* associated is disallowed */ + if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) + return -EINVAL; /* TDLS peers cannot be added */ if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) return -EINVAL; break; case NL80211_IFTYPE_STATION: + /* associated is disallowed */ + if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) + return -EINVAL; /* Only TDLS peers can be added */ if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) return -EINVAL; @@ -3787,12 +4166,8 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) * window between nl80211_init() and regulatory_init(), if that is * even possible. */ - mutex_lock(&cfg80211_mutex); - if (unlikely(!cfg80211_regdomain)) { - mutex_unlock(&cfg80211_mutex); + if (unlikely(!rcu_access_pointer(cfg80211_regdomain))) return -EINPROGRESS; - } - mutex_unlock(&cfg80211_mutex); if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) return -EINVAL; @@ -3908,7 +4283,11 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_u16(msg, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, cur_params.dot11MeshHWMProotInterval) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, - cur_params.dot11MeshHWMPconfirmationInterval)) + cur_params.dot11MeshHWMPconfirmationInterval) || + nla_put_u32(msg, NL80211_MESHCONF_POWER_MODE, + cur_params.power_mode) || + nla_put_u16(msg, NL80211_MESHCONF_AWAKE_WINDOW, + cur_params.dot11MeshAwakeWindowDuration)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); @@ -3947,6 +4326,8 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT] = { .type = NLA_U32 }, [NL80211_MESHCONF_HWMP_ROOT_INTERVAL] = { .type = NLA_U16 }, [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = { .type = NLA_U16 }, + [NL80211_MESHCONF_POWER_MODE] = { .type = NLA_U32 }, + [NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 }, }; static const struct nla_policy @@ -3967,13 +4348,15 @@ static int nl80211_parse_mesh_config(struct genl_info *info, struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1]; u32 mask = 0; -#define FILL_IN_MESH_PARAM_IF_SET(table, cfg, param, mask, attr_num, nla_fn) \ -do {\ - if (table[attr_num]) {\ - cfg->param = nla_fn(table[attr_num]); \ - mask |= (1 << (attr_num - 1)); \ - } \ -} while (0);\ +#define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, min, max, mask, attr, fn) \ +do { \ + if (tb[attr]) { \ + if (fn(tb[attr]) < min || fn(tb[attr]) > max) \ + return -EINVAL; \ + cfg->param = fn(tb[attr]); \ + mask |= (1 << (attr - 1)); \ + } \ +} while (0) if (!info->attrs[NL80211_ATTR_MESH_CONFIG]) @@ -3988,83 +4371,98 @@ do {\ BUILD_BUG_ON(NL80211_MESHCONF_ATTR_MAX > 32); /* Fill in the params struct */ - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, 1, 255, mask, NL80211_MESHCONF_RETRY_TIMEOUT, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, 1, 255, mask, NL80211_MESHCONF_CONFIRM_TIMEOUT, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, 1, 255, mask, NL80211_MESHCONF_HOLDING_TIMEOUT, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, 0, 255, mask, NL80211_MESHCONF_MAX_PEER_LINKS, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, 0, 16, mask, NL80211_MESHCONF_MAX_RETRIES, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, 1, 255, mask, NL80211_MESHCONF_TTL, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, 1, 255, mask, NL80211_MESHCONF_ELEMENT_TTL, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, 0, 1, mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor, mask, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor, + 1, 255, mask, NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, nla_get_u32); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, 0, 255, mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, 1, 65535, mask, NL80211_MESHCONF_PATH_REFRESH_TIME, nla_get_u32); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, 1, 65535, mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout, mask, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout, + 1, 65535, mask, NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, nla_get_u32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval, - mask, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, + 1, 65535, mask, + NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval, - mask, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, + 1, 65535, mask, + NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, - dot11MeshHWMPnetDiameterTraversalTime, mask, + dot11MeshHWMPnetDiameterTraversalTime, + 1, 65535, mask, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, mask, - NL80211_MESHCONF_HWMP_ROOTMODE, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, mask, - NL80211_MESHCONF_HWMP_RANN_INTERVAL, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, 0, 4, + mask, NL80211_MESHCONF_HWMP_ROOTMODE, + nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, 1, 65535, + mask, NL80211_MESHCONF_HWMP_RANN_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, - dot11MeshGateAnnouncementProtocol, mask, - NL80211_MESHCONF_GATE_ANNOUNCEMENTS, + dot11MeshGateAnnouncementProtocol, 0, 1, + mask, NL80211_MESHCONF_GATE_ANNOUNCEMENTS, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, 0, 1, mask, NL80211_MESHCONF_FORWARDING, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, 1, 255, mask, NL80211_MESHCONF_RSSI_THRESHOLD, nla_get_u32); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16, mask, NL80211_MESHCONF_HT_OPMODE, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout, - mask, + 1, 65535, mask, NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, nla_get_u32); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, 1, 65535, mask, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, - dot11MeshHWMPconfirmationInterval, mask, + dot11MeshHWMPconfirmationInterval, + 1, 65535, mask, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, power_mode, + NL80211_MESH_POWER_ACTIVE, + NL80211_MESH_POWER_MAX, + mask, NL80211_MESHCONF_POWER_MODE, + nla_get_u32); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, + 0, 65535, mask, + NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16); if (mask_out) *mask_out = mask; @@ -4152,6 +4550,7 @@ static int nl80211_update_mesh_config(struct sk_buff *skb, static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) { + const struct ieee80211_regdomain *regdom; struct sk_buff *msg; void *hdr = NULL; struct nlattr *nl_reg_rules; @@ -4174,35 +4573,36 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) if (!hdr) goto put_failure; - if (nla_put_string(msg, NL80211_ATTR_REG_ALPHA2, - cfg80211_regdomain->alpha2) || - (cfg80211_regdomain->dfs_region && - nla_put_u8(msg, NL80211_ATTR_DFS_REGION, - cfg80211_regdomain->dfs_region))) - goto nla_put_failure; - if (reg_last_request_cell_base() && nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE, NL80211_USER_REG_HINT_CELL_BASE)) goto nla_put_failure; + rcu_read_lock(); + regdom = rcu_dereference(cfg80211_regdomain); + + if (nla_put_string(msg, NL80211_ATTR_REG_ALPHA2, regdom->alpha2) || + (regdom->dfs_region && + nla_put_u8(msg, NL80211_ATTR_DFS_REGION, regdom->dfs_region))) + goto nla_put_failure_rcu; + nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES); if (!nl_reg_rules) - goto nla_put_failure; + goto nla_put_failure_rcu; - for (i = 0; i < cfg80211_regdomain->n_reg_rules; i++) { + for (i = 0; i < regdom->n_reg_rules; i++) { struct nlattr *nl_reg_rule; const struct ieee80211_reg_rule *reg_rule; const struct ieee80211_freq_range *freq_range; const struct ieee80211_power_rule *power_rule; - reg_rule = &cfg80211_regdomain->reg_rules[i]; + reg_rule = ®dom->reg_rules[i]; freq_range = ®_rule->freq_range; power_rule = ®_rule->power_rule; nl_reg_rule = nla_nest_start(msg, i); if (!nl_reg_rule) - goto nla_put_failure; + goto nla_put_failure_rcu; if (nla_put_u32(msg, NL80211_ATTR_REG_RULE_FLAGS, reg_rule->flags) || @@ -4216,10 +4616,11 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) power_rule->max_antenna_gain) || nla_put_u32(msg, NL80211_ATTR_POWER_RULE_MAX_EIRP, power_rule->max_eirp)) - goto nla_put_failure; + goto nla_put_failure_rcu; nla_nest_end(msg, nl_reg_rule); } + rcu_read_unlock(); nla_nest_end(msg, nl_reg_rules); @@ -4227,6 +4628,8 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) err = genlmsg_reply(msg, info); goto out; +nla_put_failure_rcu: + rcu_read_unlock(); nla_put_failure: genlmsg_cancel(msg, hdr); put_failure: @@ -4259,27 +4662,18 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) dfs_region = nla_get_u8(info->attrs[NL80211_ATTR_DFS_REGION]); nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], - rem_reg_rules) { + rem_reg_rules) { num_rules++; if (num_rules > NL80211_MAX_SUPP_REG_RULES) return -EINVAL; } - mutex_lock(&cfg80211_mutex); - - if (!reg_is_valid_request(alpha2)) { - r = -EINVAL; - goto bad_reg; - } - size_of_regd = sizeof(struct ieee80211_regdomain) + - (num_rules * sizeof(struct ieee80211_reg_rule)); + num_rules * sizeof(struct ieee80211_reg_rule); rd = kzalloc(size_of_regd, GFP_KERNEL); - if (!rd) { - r = -ENOMEM; - goto bad_reg; - } + if (!rd) + return -ENOMEM; rd->n_reg_rules = num_rules; rd->alpha2[0] = alpha2[0]; @@ -4293,10 +4687,10 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) rd->dfs_region = dfs_region; nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], - rem_reg_rules) { + rem_reg_rules) { nla_parse(tb, NL80211_REG_RULE_ATTR_MAX, - nla_data(nl_reg_rule), nla_len(nl_reg_rule), - reg_rule_policy); + nla_data(nl_reg_rule), nla_len(nl_reg_rule), + reg_rule_policy); r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]); if (r) goto bad_reg; @@ -4309,16 +4703,14 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) } } - BUG_ON(rule_idx != num_rules); + mutex_lock(&cfg80211_mutex); r = set_regdom(rd); - + /* set_regdom took ownership */ + rd = NULL; mutex_unlock(&cfg80211_mutex); - return r; - bad_reg: - mutex_unlock(&cfg80211_mutex); kfree(rd); return r; } @@ -4801,6 +5193,54 @@ static int nl80211_stop_sched_scan(struct sk_buff *skb, return err; } +static int nl80211_start_radar_detection(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_chan_def chandef; + int err; + + err = nl80211_parse_chandef(rdev, info, &chandef); + if (err) + return err; + + if (wdev->cac_started) + return -EBUSY; + + err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef); + if (err < 0) + return err; + + if (err == 0) + return -EINVAL; + + if (chandef.chan->dfs_state != NL80211_DFS_USABLE) + return -EINVAL; + + if (!rdev->ops->start_radar_detection) + return -EOPNOTSUPP; + + mutex_lock(&rdev->devlist_mtx); + err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, + chandef.chan, CHAN_MODE_SHARED, + BIT(chandef.width)); + if (err) + goto err_locked; + + err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef); + if (!err) { + wdev->channel = chandef.chan; + wdev->cac_started = true; + wdev->cac_start_time = jiffies; + } +err_locked: + mutex_unlock(&rdev->devlist_mtx); + + return err; +} + static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, u32 seq, int flags, struct cfg80211_registered_device *rdev, @@ -4811,6 +5251,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, const struct cfg80211_bss_ies *ies; void *hdr; struct nlattr *bss; + bool tsf = false; ASSERT_WDEV_LOCK(wdev); @@ -4834,22 +5275,24 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, rcu_read_lock(); ies = rcu_dereference(res->ies); - if (ies && ies->len && nla_put(msg, NL80211_BSS_INFORMATION_ELEMENTS, - ies->len, ies->data)) { - rcu_read_unlock(); - goto nla_put_failure; + if (ies) { + if (nla_put_u64(msg, NL80211_BSS_TSF, ies->tsf)) + goto fail_unlock_rcu; + tsf = true; + if (ies->len && nla_put(msg, NL80211_BSS_INFORMATION_ELEMENTS, + ies->len, ies->data)) + goto fail_unlock_rcu; } ies = rcu_dereference(res->beacon_ies); - if (ies && ies->len && nla_put(msg, NL80211_BSS_BEACON_IES, - ies->len, ies->data)) { - rcu_read_unlock(); - goto nla_put_failure; + if (ies) { + if (!tsf && nla_put_u64(msg, NL80211_BSS_TSF, ies->tsf)) + goto fail_unlock_rcu; + if (ies->len && nla_put(msg, NL80211_BSS_BEACON_IES, + ies->len, ies->data)) + goto fail_unlock_rcu; } rcu_read_unlock(); - if (res->tsf && - nla_put_u64(msg, NL80211_BSS_TSF, res->tsf)) - goto nla_put_failure; if (res->beacon_interval && nla_put_u16(msg, NL80211_BSS_BEACON_INTERVAL, res->beacon_interval)) goto nla_put_failure; @@ -4894,6 +5337,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, return genlmsg_end(msg, hdr); + fail_unlock_rcu: + rcu_read_unlock(); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; @@ -5867,6 +6312,15 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } + if (info->attrs[NL80211_ATTR_USE_MFP]) { + connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); + if (connect.mfp != NL80211_MFP_REQUIRED && + connect.mfp != NL80211_MFP_NO) + return -EINVAL; + } else { + connect.mfp = NL80211_MFP_NO; + } + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { connect.channel = ieee80211_get_channel(wiphy, @@ -6652,6 +7106,21 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]))) return -EINVAL; + if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) { + setup.beacon_interval = + nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); + if (setup.beacon_interval < 10 || + setup.beacon_interval > 10000) + return -EINVAL; + } + + if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) { + setup.dtim_period = + nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]); + if (setup.dtim_period < 1 || setup.dtim_period > 100) + return -EINVAL; + } + if (info->attrs[NL80211_ATTR_MESH_SETUP]) { /* parse additional setup parameters if given */ err = nl80211_parse_mesh_setup(info, &setup); @@ -6680,16 +7149,100 @@ static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info) } #ifdef CONFIG_PM +static int nl80211_send_wowlan_patterns(struct sk_buff *msg, + struct cfg80211_registered_device *rdev) +{ + struct nlattr *nl_pats, *nl_pat; + int i, pat_len; + + if (!rdev->wowlan->n_patterns) + return 0; + + nl_pats = nla_nest_start(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN); + if (!nl_pats) + return -ENOBUFS; + + for (i = 0; i < rdev->wowlan->n_patterns; i++) { + nl_pat = nla_nest_start(msg, i + 1); + if (!nl_pat) + return -ENOBUFS; + pat_len = rdev->wowlan->patterns[i].pattern_len; + if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK, + DIV_ROUND_UP(pat_len, 8), + rdev->wowlan->patterns[i].mask) || + nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN, + pat_len, rdev->wowlan->patterns[i].pattern) || + nla_put_u32(msg, NL80211_WOWLAN_PKTPAT_OFFSET, + rdev->wowlan->patterns[i].pkt_offset)) + return -ENOBUFS; + nla_nest_end(msg, nl_pat); + } + nla_nest_end(msg, nl_pats); + + return 0; +} + +static int nl80211_send_wowlan_tcp(struct sk_buff *msg, + struct cfg80211_wowlan_tcp *tcp) +{ + struct nlattr *nl_tcp; + + if (!tcp) + return 0; + + nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); + if (!nl_tcp) + return -ENOBUFS; + + if (nla_put_be32(msg, NL80211_WOWLAN_TCP_SRC_IPV4, tcp->src) || + nla_put_be32(msg, NL80211_WOWLAN_TCP_DST_IPV4, tcp->dst) || + nla_put(msg, NL80211_WOWLAN_TCP_DST_MAC, ETH_ALEN, tcp->dst_mac) || + nla_put_u16(msg, NL80211_WOWLAN_TCP_SRC_PORT, tcp->src_port) || + nla_put_u16(msg, NL80211_WOWLAN_TCP_DST_PORT, tcp->dst_port) || + nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, + tcp->payload_len, tcp->payload) || + nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL, + tcp->data_interval) || + nla_put(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD, + tcp->wake_len, tcp->wake_data) || + nla_put(msg, NL80211_WOWLAN_TCP_WAKE_MASK, + DIV_ROUND_UP(tcp->wake_len, 8), tcp->wake_mask)) + return -ENOBUFS; + + if (tcp->payload_seq.len && + nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ, + sizeof(tcp->payload_seq), &tcp->payload_seq)) + return -ENOBUFS; + + if (tcp->payload_tok.len && + nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, + sizeof(tcp->payload_tok) + tcp->tokens_size, + &tcp->payload_tok)) + return -ENOBUFS; + + return 0; +} + static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct sk_buff *msg; void *hdr; + u32 size = NLMSG_DEFAULT_SIZE; - if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns) + if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && + !rdev->wiphy.wowlan.tcp) return -EOPNOTSUPP; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (rdev->wowlan && rdev->wowlan->tcp) { + /* adjust size to have room for all the data */ + size += rdev->wowlan->tcp->tokens_size + + rdev->wowlan->tcp->payload_len + + rdev->wowlan->tcp->wake_len + + rdev->wowlan->tcp->wake_len / 8; + } + + msg = nlmsg_new(size, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -6720,31 +7273,12 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) (rdev->wowlan->rfkill_release && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) goto nla_put_failure; - if (rdev->wowlan->n_patterns) { - struct nlattr *nl_pats, *nl_pat; - int i, pat_len; - nl_pats = nla_nest_start(msg, - NL80211_WOWLAN_TRIG_PKT_PATTERN); - if (!nl_pats) - goto nla_put_failure; + if (nl80211_send_wowlan_patterns(msg, rdev)) + goto nla_put_failure; - for (i = 0; i < rdev->wowlan->n_patterns; i++) { - nl_pat = nla_nest_start(msg, i + 1); - if (!nl_pat) - goto nla_put_failure; - pat_len = rdev->wowlan->patterns[i].pattern_len; - if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK, - DIV_ROUND_UP(pat_len, 8), - rdev->wowlan->patterns[i].mask) || - nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN, - pat_len, - rdev->wowlan->patterns[i].pattern)) - goto nla_put_failure; - nla_nest_end(msg, nl_pat); - } - nla_nest_end(msg, nl_pats); - } + if (nl80211_send_wowlan_tcp(msg, rdev->wowlan->tcp)) + goto nla_put_failure; nla_nest_end(msg, nl_wowlan); } @@ -6757,6 +7291,150 @@ nla_put_failure: return -ENOBUFS; } +static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, + struct nlattr *attr, + struct cfg80211_wowlan *trig) +{ + struct nlattr *tb[NUM_NL80211_WOWLAN_TCP]; + struct cfg80211_wowlan_tcp *cfg; + struct nl80211_wowlan_tcp_data_token *tok = NULL; + struct nl80211_wowlan_tcp_data_seq *seq = NULL; + u32 size; + u32 data_size, wake_size, tokens_size = 0, wake_mask_size; + int err, port; + + if (!rdev->wiphy.wowlan.tcp) + return -EINVAL; + + err = nla_parse(tb, MAX_NL80211_WOWLAN_TCP, + nla_data(attr), nla_len(attr), + nl80211_wowlan_tcp_policy); + if (err) + return err; + + if (!tb[NL80211_WOWLAN_TCP_SRC_IPV4] || + !tb[NL80211_WOWLAN_TCP_DST_IPV4] || + !tb[NL80211_WOWLAN_TCP_DST_MAC] || + !tb[NL80211_WOWLAN_TCP_DST_PORT] || + !tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD] || + !tb[NL80211_WOWLAN_TCP_DATA_INTERVAL] || + !tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD] || + !tb[NL80211_WOWLAN_TCP_WAKE_MASK]) + return -EINVAL; + + data_size = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]); + if (data_size > rdev->wiphy.wowlan.tcp->data_payload_max) + return -EINVAL; + + if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) > + rdev->wiphy.wowlan.tcp->data_interval_max) + return -EINVAL; + + wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]); + if (wake_size > rdev->wiphy.wowlan.tcp->wake_payload_max) + return -EINVAL; + + wake_mask_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_MASK]); + if (wake_mask_size != DIV_ROUND_UP(wake_size, 8)) + return -EINVAL; + + if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]) { + u32 tokln = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]); + + tok = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]); + tokens_size = tokln - sizeof(*tok); + + if (!tok->len || tokens_size % tok->len) + return -EINVAL; + if (!rdev->wiphy.wowlan.tcp->tok) + return -EINVAL; + if (tok->len > rdev->wiphy.wowlan.tcp->tok->max_len) + return -EINVAL; + if (tok->len < rdev->wiphy.wowlan.tcp->tok->min_len) + return -EINVAL; + if (tokens_size > rdev->wiphy.wowlan.tcp->tok->bufsize) + return -EINVAL; + if (tok->offset + tok->len > data_size) + return -EINVAL; + } + + if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]) { + seq = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]); + if (!rdev->wiphy.wowlan.tcp->seq) + return -EINVAL; + if (seq->len == 0 || seq->len > 4) + return -EINVAL; + if (seq->len + seq->offset > data_size) + return -EINVAL; + } + + size = sizeof(*cfg); + size += data_size; + size += wake_size + wake_mask_size; + size += tokens_size; + + cfg = kzalloc(size, GFP_KERNEL); + if (!cfg) + return -ENOMEM; + cfg->src = nla_get_be32(tb[NL80211_WOWLAN_TCP_SRC_IPV4]); + cfg->dst = nla_get_be32(tb[NL80211_WOWLAN_TCP_DST_IPV4]); + memcpy(cfg->dst_mac, nla_data(tb[NL80211_WOWLAN_TCP_DST_MAC]), + ETH_ALEN); + if (tb[NL80211_WOWLAN_TCP_SRC_PORT]) + port = nla_get_u16(tb[NL80211_WOWLAN_TCP_SRC_PORT]); + else + port = 0; +#ifdef CONFIG_INET + /* allocate a socket and port for it and use it */ + err = __sock_create(wiphy_net(&rdev->wiphy), PF_INET, SOCK_STREAM, + IPPROTO_TCP, &cfg->sock, 1); + if (err) { + kfree(cfg); + return err; + } + if (inet_csk_get_port(cfg->sock->sk, port)) { + sock_release(cfg->sock); + kfree(cfg); + return -EADDRINUSE; + } + cfg->src_port = inet_sk(cfg->sock->sk)->inet_num; +#else + if (!port) { + kfree(cfg); + return -EINVAL; + } + cfg->src_port = port; +#endif + + cfg->dst_port = nla_get_u16(tb[NL80211_WOWLAN_TCP_DST_PORT]); + cfg->payload_len = data_size; + cfg->payload = (u8 *)cfg + sizeof(*cfg) + tokens_size; + memcpy((void *)cfg->payload, + nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]), + data_size); + if (seq) + cfg->payload_seq = *seq; + cfg->data_interval = nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]); + cfg->wake_len = wake_size; + cfg->wake_data = (u8 *)cfg + sizeof(*cfg) + tokens_size + data_size; + memcpy((void *)cfg->wake_data, + nla_data(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]), + wake_size); + cfg->wake_mask = (u8 *)cfg + sizeof(*cfg) + tokens_size + + data_size + wake_size; + memcpy((void *)cfg->wake_mask, + nla_data(tb[NL80211_WOWLAN_TCP_WAKE_MASK]), + wake_mask_size); + if (tok) { + cfg->tokens_size = tokens_size; + memcpy(&cfg->payload_tok, tok, sizeof(*tok) + tokens_size); + } + + trig->tcp = cfg; + + return 0; +} + static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -6767,7 +7445,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) int err, i; bool prev_enabled = rdev->wowlan; - if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns) + if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && + !rdev->wiphy.wowlan.tcp) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]) { @@ -6831,7 +7510,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) if (tb[NL80211_WOWLAN_TRIG_PKT_PATTERN]) { struct nlattr *pat; int n_patterns = 0; - int rem, pat_len, mask_len; + int rem, pat_len, mask_len, pkt_offset; struct nlattr *pat_tb[NUM_NL80211_WOWLAN_PKTPAT]; nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], @@ -6866,6 +7545,15 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) pat_len < wowlan->pattern_min_len) goto error; + if (!pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET]) + pkt_offset = 0; + else + pkt_offset = nla_get_u32( + pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET]); + if (pkt_offset > wowlan->max_pkt_offset) + goto error; + new_triggers.patterns[i].pkt_offset = pkt_offset; + new_triggers.patterns[i].mask = kmalloc(mask_len + pat_len, GFP_KERNEL); if (!new_triggers.patterns[i].mask) { @@ -6885,6 +7573,14 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) } } + if (tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION]) { + err = nl80211_parse_wowlan_tcp( + rdev, tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION], + &new_triggers); + if (err) + goto error; + } + ntrig = kmemdup(&new_triggers, sizeof(new_triggers), GFP_KERNEL); if (!ntrig) { err = -ENOMEM; @@ -6902,6 +7598,9 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) for (i = 0; i < new_triggers.n_patterns; i++) kfree(new_triggers.patterns[i].mask); kfree(new_triggers.patterns); + if (new_triggers.tcp && new_triggers.tcp->sock) + sock_release(new_triggers.tcp->sock); + kfree(new_triggers.tcp); return err; } #endif @@ -7784,6 +8483,22 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_SET_MAC_ACL, + .doit = nl80211_set_mac_acl, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_RADAR_DETECT, + .doit = nl80211_start_radar_detection, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { @@ -8051,7 +8766,7 @@ void nl80211_send_reg_change_event(struct regulatory_request *request) goto nla_put_failure; } - if (wiphy_idx_valid(request->wiphy_idx) && + if (request->wiphy_idx != WIPHY_IDX_INVALID && nla_put_u32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx)) goto nla_put_failure; @@ -8981,6 +9696,57 @@ nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev, } void +nl80211_radar_notify(struct cfg80211_registered_device *rdev, + struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event, + struct net_device *netdev, gfp_t gfp) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_RADAR_DETECT); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) + goto nla_put_failure; + + /* NOP and radar events don't need a netdev parameter */ + if (netdev) { + struct wireless_dev *wdev = netdev->ieee80211_ptr; + + if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + goto nla_put_failure; + } + + if (nla_put_u32(msg, NL80211_ATTR_RADAR_EVENT, event)) + goto nla_put_failure; + + if (nl80211_send_chandef(msg, chandef)) + goto nla_put_failure; + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + +void nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *peer, u32 num_packets, gfp_t gfp) @@ -9115,6 +9881,114 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_report_obss_beacon); +#ifdef CONFIG_PM +void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, + struct cfg80211_wowlan_wakeup *wakeup, + gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct sk_buff *msg; + void *hdr; + int err, size = 200; + + trace_cfg80211_report_wowlan_wakeup(wdev->wiphy, wdev, wakeup); + + if (wakeup) + size += wakeup->packet_present_len; + + msg = nlmsg_new(size, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_SET_WOWLAN); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + goto free_msg; + + if (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) + goto free_msg; + + if (wakeup) { + struct nlattr *reasons; + + reasons = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); + + if (wakeup->disconnect && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) + goto free_msg; + if (wakeup->magic_pkt && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) + goto free_msg; + if (wakeup->gtk_rekey_failure && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) + goto free_msg; + if (wakeup->eap_identity_req && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) + goto free_msg; + if (wakeup->four_way_handshake && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) + goto free_msg; + if (wakeup->rfkill_release && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)) + goto free_msg; + + if (wakeup->pattern_idx >= 0 && + nla_put_u32(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, + wakeup->pattern_idx)) + goto free_msg; + + if (wakeup->tcp_match) + nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH); + + if (wakeup->tcp_connlost) + nla_put_flag(msg, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST); + + if (wakeup->tcp_nomoretokens) + nla_put_flag(msg, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS); + + if (wakeup->packet) { + u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211; + u32 len_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN; + + if (!wakeup->packet_80211) { + pkt_attr = + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023; + len_attr = + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN; + } + + if (wakeup->packet_len && + nla_put_u32(msg, len_attr, wakeup->packet_len)) + goto free_msg; + + if (nla_put(msg, pkt_attr, wakeup->packet_present_len, + wakeup->packet)) + goto free_msg; + } + + nla_nest_end(msg, reasons); + } + + err = genlmsg_end(msg, hdr); + if (err < 0) + goto free_msg; + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + free_msg: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_report_wowlan_wakeup); +#endif + void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, enum nl80211_tdls_operation oper, u16 reason_code, gfp_t gfp) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 2acba8477e9..b061da4919e 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -108,6 +108,13 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); + +void +nl80211_radar_notify(struct cfg80211_registered_device *rdev, + struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event, + struct net_device *netdev, gfp_t gfp); + void nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *peer, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 6c0c8191f83..422d38291d6 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -875,4 +875,16 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev, rdev->ops->stop_p2p_device(&rdev->wiphy, wdev); trace_rdev_return_void(&rdev->wiphy); } + +static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_acl_data *params) +{ + int ret; + + trace_rdev_set_mac_acl(&rdev->wiphy, dev, params); + ret = rdev->ops->set_mac_acl(&rdev->wiphy, dev, params); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 82c4fc7c994..98532c00242 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -48,7 +48,6 @@ #include <linux/export.h> #include <linux/slab.h> #include <linux/list.h> -#include <linux/random.h> #include <linux/ctype.h> #include <linux/nl80211.h> #include <linux/platform_device.h> @@ -66,6 +65,13 @@ #define REG_DBG_PRINT(args...) #endif +enum reg_request_treatment { + REG_REQ_OK, + REG_REQ_IGNORE, + REG_REQ_INTERSECT, + REG_REQ_ALREADY_SET, +}; + static struct regulatory_request core_request_world = { .initiator = NL80211_REGDOM_SET_BY_CORE, .alpha2[0] = '0', @@ -76,7 +82,8 @@ static struct regulatory_request core_request_world = { }; /* Receipt of information from last regulatory request */ -static struct regulatory_request *last_request = &core_request_world; +static struct regulatory_request __rcu *last_request = + (void __rcu *)&core_request_world; /* To trigger userspace events */ static struct platform_device *reg_pdev; @@ -88,16 +95,16 @@ static struct device_type reg_device_type = { /* * Central wireless core regulatory domains, we only need two, * the current one and a world regulatory domain in case we have no - * information to give us an alpha2 + * information to give us an alpha2. */ -const struct ieee80211_regdomain *cfg80211_regdomain; +const struct ieee80211_regdomain __rcu *cfg80211_regdomain; /* * Protects static reg.c components: - * - cfg80211_world_regdom - * - cfg80211_regdom - * - last_request - * - reg_num_devs_support_basehint + * - cfg80211_regdomain (if not used with RCU) + * - cfg80211_world_regdom + * - last_request (if not used with RCU) + * - reg_num_devs_support_basehint */ static DEFINE_MUTEX(reg_mutex); @@ -112,6 +119,31 @@ static inline void assert_reg_lock(void) lockdep_assert_held(®_mutex); } +static const struct ieee80211_regdomain *get_cfg80211_regdom(void) +{ + return rcu_dereference_protected(cfg80211_regdomain, + lockdep_is_held(®_mutex)); +} + +static const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy) +{ + return rcu_dereference_protected(wiphy->regd, + lockdep_is_held(®_mutex)); +} + +static void rcu_free_regdom(const struct ieee80211_regdomain *r) +{ + if (!r) + return; + kfree_rcu((struct ieee80211_regdomain *)r, rcu_head); +} + +static struct regulatory_request *get_last_request(void) +{ + return rcu_dereference_check(last_request, + lockdep_is_held(®_mutex)); +} + /* Used to queue up regulatory hints */ static LIST_HEAD(reg_requests_list); static spinlock_t reg_requests_lock; @@ -177,28 +209,37 @@ static char user_alpha2[2]; module_param(ieee80211_regdom, charp, 0444); MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); -static void reset_regdomains(bool full_reset) +static void reset_regdomains(bool full_reset, + const struct ieee80211_regdomain *new_regdom) { + const struct ieee80211_regdomain *r; + struct regulatory_request *lr; + + assert_reg_lock(); + + r = get_cfg80211_regdom(); + /* avoid freeing static information or freeing something twice */ - if (cfg80211_regdomain == cfg80211_world_regdom) - cfg80211_regdomain = NULL; + if (r == cfg80211_world_regdom) + r = NULL; if (cfg80211_world_regdom == &world_regdom) cfg80211_world_regdom = NULL; - if (cfg80211_regdomain == &world_regdom) - cfg80211_regdomain = NULL; + if (r == &world_regdom) + r = NULL; - kfree(cfg80211_regdomain); - kfree(cfg80211_world_regdom); + rcu_free_regdom(r); + rcu_free_regdom(cfg80211_world_regdom); cfg80211_world_regdom = &world_regdom; - cfg80211_regdomain = NULL; + rcu_assign_pointer(cfg80211_regdomain, new_regdom); if (!full_reset) return; - if (last_request != &core_request_world) - kfree(last_request); - last_request = &core_request_world; + lr = get_last_request(); + if (lr != &core_request_world && lr) + kfree_rcu(lr, rcu_head); + rcu_assign_pointer(last_request, &core_request_world); } /* @@ -207,30 +248,29 @@ static void reset_regdomains(bool full_reset) */ static void update_world_regdomain(const struct ieee80211_regdomain *rd) { - BUG_ON(!last_request); + struct regulatory_request *lr; + + lr = get_last_request(); + + WARN_ON(!lr); - reset_regdomains(false); + reset_regdomains(false, rd); cfg80211_world_regdom = rd; - cfg80211_regdomain = rd; } bool is_world_regdom(const char *alpha2) { if (!alpha2) return false; - if (alpha2[0] == '0' && alpha2[1] == '0') - return true; - return false; + return alpha2[0] == '0' && alpha2[1] == '0'; } static bool is_alpha2_set(const char *alpha2) { if (!alpha2) return false; - if (alpha2[0] != 0 && alpha2[1] != 0) - return true; - return false; + return alpha2[0] && alpha2[1]; } static bool is_unknown_alpha2(const char *alpha2) @@ -241,9 +281,7 @@ static bool is_unknown_alpha2(const char *alpha2) * Special case where regulatory domain was built by driver * but a specific alpha2 cannot be determined */ - if (alpha2[0] == '9' && alpha2[1] == '9') - return true; - return false; + return alpha2[0] == '9' && alpha2[1] == '9'; } static bool is_intersected_alpha2(const char *alpha2) @@ -255,39 +293,30 @@ static bool is_intersected_alpha2(const char *alpha2) * result of an intersection between two regulatory domain * structures */ - if (alpha2[0] == '9' && alpha2[1] == '8') - return true; - return false; + return alpha2[0] == '9' && alpha2[1] == '8'; } static bool is_an_alpha2(const char *alpha2) { if (!alpha2) return false; - if (isalpha(alpha2[0]) && isalpha(alpha2[1])) - return true; - return false; + return isalpha(alpha2[0]) && isalpha(alpha2[1]); } static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y) { if (!alpha2_x || !alpha2_y) return false; - if (alpha2_x[0] == alpha2_y[0] && - alpha2_x[1] == alpha2_y[1]) - return true; - return false; + return alpha2_x[0] == alpha2_y[0] && alpha2_x[1] == alpha2_y[1]; } static bool regdom_changes(const char *alpha2) { - assert_cfg80211_lock(); + const struct ieee80211_regdomain *r = get_cfg80211_regdom(); - if (!cfg80211_regdomain) + if (!r) return true; - if (alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) - return false; - return true; + return !alpha2_equal(r->alpha2, alpha2); } /* @@ -301,38 +330,36 @@ static bool is_user_regdom_saved(void) return false; /* This would indicate a mistake on the design */ - if (WARN((!is_world_regdom(user_alpha2) && - !is_an_alpha2(user_alpha2)), + if (WARN(!is_world_regdom(user_alpha2) && !is_an_alpha2(user_alpha2), "Unexpected user alpha2: %c%c\n", - user_alpha2[0], - user_alpha2[1])) + user_alpha2[0], user_alpha2[1])) return false; return true; } -static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd, - const struct ieee80211_regdomain *src_regd) +static const struct ieee80211_regdomain * +reg_copy_regd(const struct ieee80211_regdomain *src_regd) { struct ieee80211_regdomain *regd; - int size_of_regd = 0; + int size_of_regd; unsigned int i; - size_of_regd = sizeof(struct ieee80211_regdomain) + - ((src_regd->n_reg_rules + 1) * sizeof(struct ieee80211_reg_rule)); + size_of_regd = + sizeof(struct ieee80211_regdomain) + + src_regd->n_reg_rules * sizeof(struct ieee80211_reg_rule); regd = kzalloc(size_of_regd, GFP_KERNEL); if (!regd) - return -ENOMEM; + return ERR_PTR(-ENOMEM); memcpy(regd, src_regd, sizeof(struct ieee80211_regdomain)); for (i = 0; i < src_regd->n_reg_rules; i++) memcpy(®d->reg_rules[i], &src_regd->reg_rules[i], - sizeof(struct ieee80211_reg_rule)); + sizeof(struct ieee80211_reg_rule)); - *dst_regd = regd; - return 0; + return regd; } #ifdef CONFIG_CFG80211_INTERNAL_REGDB @@ -347,9 +374,8 @@ static DEFINE_MUTEX(reg_regdb_search_mutex); static void reg_regdb_search(struct work_struct *work) { struct reg_regdb_search_request *request; - const struct ieee80211_regdomain *curdom, *regdom; - int i, r; - bool set_reg = false; + const struct ieee80211_regdomain *curdom, *regdom = NULL; + int i; mutex_lock(&cfg80211_mutex); @@ -360,14 +386,11 @@ static void reg_regdb_search(struct work_struct *work) list); list_del(&request->list); - for (i=0; i<reg_regdb_size; i++) { + for (i = 0; i < reg_regdb_size; i++) { curdom = reg_regdb[i]; - if (!memcmp(request->alpha2, curdom->alpha2, 2)) { - r = reg_copy_regd(®dom, curdom); - if (r) - break; - set_reg = true; + if (alpha2_equal(request->alpha2, curdom->alpha2)) { + regdom = reg_copy_regd(curdom); break; } } @@ -376,7 +399,7 @@ static void reg_regdb_search(struct work_struct *work) } mutex_unlock(®_regdb_search_mutex); - if (set_reg) + if (!IS_ERR_OR_NULL(regdom)) set_regdom(regdom); mutex_unlock(&cfg80211_mutex); @@ -434,15 +457,14 @@ static int call_crda(const char *alpha2) return kobject_uevent(®_pdev->dev.kobj, KOBJ_CHANGE); } -/* Used by nl80211 before kmalloc'ing our regulatory domain */ -bool reg_is_valid_request(const char *alpha2) +static bool reg_is_valid_request(const char *alpha2) { - assert_cfg80211_lock(); + struct regulatory_request *lr = get_last_request(); - if (!last_request) + if (!lr || lr->processed) return false; - return alpha2_equal(last_request->alpha2, alpha2); + return alpha2_equal(lr->alpha2, alpha2); } /* Sanity check on a regulatory rule */ @@ -460,7 +482,7 @@ static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule) freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; if (freq_range->end_freq_khz <= freq_range->start_freq_khz || - freq_range->max_bandwidth_khz > freq_diff) + freq_range->max_bandwidth_khz > freq_diff) return false; return true; @@ -487,8 +509,7 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd) } static bool reg_does_bw_fit(const struct ieee80211_freq_range *freq_range, - u32 center_freq_khz, - u32 bw_khz) + u32 center_freq_khz, u32 bw_khz) { u32 start_freq_khz, end_freq_khz; @@ -518,7 +539,7 @@ static bool reg_does_bw_fit(const struct ieee80211_freq_range *freq_range, * regulatory rule support for other "bands". **/ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, - u32 freq_khz) + u32 freq_khz) { #define ONE_GHZ_IN_KHZ 1000000 /* @@ -540,10 +561,9 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, * Helper for regdom_intersect(), this does the real * mathematical intersection fun */ -static int reg_rules_intersect( - const struct ieee80211_reg_rule *rule1, - const struct ieee80211_reg_rule *rule2, - struct ieee80211_reg_rule *intersected_rule) +static int reg_rules_intersect(const struct ieee80211_reg_rule *rule1, + const struct ieee80211_reg_rule *rule2, + struct ieee80211_reg_rule *intersected_rule) { const struct ieee80211_freq_range *freq_range1, *freq_range2; struct ieee80211_freq_range *freq_range; @@ -560,11 +580,11 @@ static int reg_rules_intersect( power_rule = &intersected_rule->power_rule; freq_range->start_freq_khz = max(freq_range1->start_freq_khz, - freq_range2->start_freq_khz); + freq_range2->start_freq_khz); freq_range->end_freq_khz = min(freq_range1->end_freq_khz, - freq_range2->end_freq_khz); + freq_range2->end_freq_khz); freq_range->max_bandwidth_khz = min(freq_range1->max_bandwidth_khz, - freq_range2->max_bandwidth_khz); + freq_range2->max_bandwidth_khz); freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; if (freq_range->max_bandwidth_khz > freq_diff) @@ -575,7 +595,7 @@ static int reg_rules_intersect( power_rule->max_antenna_gain = min(power_rule1->max_antenna_gain, power_rule2->max_antenna_gain); - intersected_rule->flags = (rule1->flags | rule2->flags); + intersected_rule->flags = rule1->flags | rule2->flags; if (!is_valid_reg_rule(intersected_rule)) return -EINVAL; @@ -596,9 +616,9 @@ static int reg_rules_intersect( * resulting intersection of rules between rd1 and rd2. We will * kzalloc() this structure for you. */ -static struct ieee80211_regdomain *regdom_intersect( - const struct ieee80211_regdomain *rd1, - const struct ieee80211_regdomain *rd2) +static struct ieee80211_regdomain * +regdom_intersect(const struct ieee80211_regdomain *rd1, + const struct ieee80211_regdomain *rd2) { int r, size_of_regd; unsigned int x, y; @@ -607,12 +627,7 @@ static struct ieee80211_regdomain *regdom_intersect( struct ieee80211_reg_rule *intersected_rule; struct ieee80211_regdomain *rd; /* This is just a dummy holder to help us count */ - struct ieee80211_reg_rule irule; - - /* Uses the stack temporarily for counter arithmetic */ - intersected_rule = &irule; - - memset(intersected_rule, 0, sizeof(struct ieee80211_reg_rule)); + struct ieee80211_reg_rule dummy_rule; if (!rd1 || !rd2) return NULL; @@ -629,11 +644,8 @@ static struct ieee80211_regdomain *regdom_intersect( rule1 = &rd1->reg_rules[x]; for (y = 0; y < rd2->n_reg_rules; y++) { rule2 = &rd2->reg_rules[y]; - if (!reg_rules_intersect(rule1, rule2, - intersected_rule)) + if (!reg_rules_intersect(rule1, rule2, &dummy_rule)) num_rules++; - memset(intersected_rule, 0, - sizeof(struct ieee80211_reg_rule)); } } @@ -641,15 +653,15 @@ static struct ieee80211_regdomain *regdom_intersect( return NULL; size_of_regd = sizeof(struct ieee80211_regdomain) + - ((num_rules + 1) * sizeof(struct ieee80211_reg_rule)); + num_rules * sizeof(struct ieee80211_reg_rule); rd = kzalloc(size_of_regd, GFP_KERNEL); if (!rd) return NULL; - for (x = 0; x < rd1->n_reg_rules; x++) { + for (x = 0; x < rd1->n_reg_rules && rule_idx < num_rules; x++) { rule1 = &rd1->reg_rules[x]; - for (y = 0; y < rd2->n_reg_rules; y++) { + for (y = 0; y < rd2->n_reg_rules && rule_idx < num_rules; y++) { rule2 = &rd2->reg_rules[y]; /* * This time around instead of using the stack lets @@ -657,8 +669,7 @@ static struct ieee80211_regdomain *regdom_intersect( * a memcpy() */ intersected_rule = &rd->reg_rules[rule_idx]; - r = reg_rules_intersect(rule1, rule2, - intersected_rule); + r = reg_rules_intersect(rule1, rule2, intersected_rule); /* * No need to memset here the intersected rule here as * we're not using the stack anymore @@ -699,34 +710,16 @@ static u32 map_regdom_flags(u32 rd_flags) return channel_flags; } -static int freq_reg_info_regd(struct wiphy *wiphy, - u32 center_freq, - u32 desired_bw_khz, - const struct ieee80211_reg_rule **reg_rule, - const struct ieee80211_regdomain *custom_regd) +static const struct ieee80211_reg_rule * +freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq, + const struct ieee80211_regdomain *regd) { int i; bool band_rule_found = false; - const struct ieee80211_regdomain *regd; bool bw_fits = false; - if (!desired_bw_khz) - desired_bw_khz = MHZ_TO_KHZ(20); - - regd = custom_regd ? custom_regd : cfg80211_regdomain; - - /* - * Follow the driver's regulatory domain, if present, unless a country - * IE has been processed or a user wants to help complaince further - */ - if (!custom_regd && - last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && - last_request->initiator != NL80211_REGDOM_SET_BY_USER && - wiphy->regd) - regd = wiphy->regd; - if (!regd) - return -EINVAL; + return ERR_PTR(-EINVAL); for (i = 0; i < regd->n_reg_rules; i++) { const struct ieee80211_reg_rule *rr; @@ -743,33 +736,36 @@ static int freq_reg_info_regd(struct wiphy *wiphy, if (!band_rule_found) band_rule_found = freq_in_rule_band(fr, center_freq); - bw_fits = reg_does_bw_fit(fr, - center_freq, - desired_bw_khz); + bw_fits = reg_does_bw_fit(fr, center_freq, MHZ_TO_KHZ(20)); - if (band_rule_found && bw_fits) { - *reg_rule = rr; - return 0; - } + if (band_rule_found && bw_fits) + return rr; } if (!band_rule_found) - return -ERANGE; + return ERR_PTR(-ERANGE); - return -EINVAL; + return ERR_PTR(-EINVAL); } -int freq_reg_info(struct wiphy *wiphy, - u32 center_freq, - u32 desired_bw_khz, - const struct ieee80211_reg_rule **reg_rule) +const struct ieee80211_reg_rule *freq_reg_info(struct wiphy *wiphy, + u32 center_freq) { - assert_cfg80211_lock(); - return freq_reg_info_regd(wiphy, - center_freq, - desired_bw_khz, - reg_rule, - NULL); + const struct ieee80211_regdomain *regd; + struct regulatory_request *lr = get_last_request(); + + /* + * Follow the driver's regulatory domain, if present, unless a country + * IE has been processed or a user wants to help complaince further + */ + if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && + lr->initiator != NL80211_REGDOM_SET_BY_USER && + wiphy->regd) + regd = get_wiphy_regdom(wiphy); + else + regd = get_cfg80211_regdom(); + + return freq_reg_info_regd(wiphy, center_freq, regd); } EXPORT_SYMBOL(freq_reg_info); @@ -792,7 +788,6 @@ static const char *reg_initiator_name(enum nl80211_reg_initiator initiator) } static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, - u32 desired_bw_khz, const struct ieee80211_reg_rule *reg_rule) { const struct ieee80211_power_rule *power_rule; @@ -807,21 +802,16 @@ static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, else snprintf(max_antenna_gain, 32, "%d", power_rule->max_antenna_gain); - REG_DBG_PRINT("Updating information on frequency %d MHz " - "for a %d MHz width channel with regulatory rule:\n", - chan->center_freq, - KHZ_TO_MHZ(desired_bw_khz)); + REG_DBG_PRINT("Updating information on frequency %d MHz with regulatory rule:\n", + chan->center_freq); REG_DBG_PRINT("%d KHz - %d KHz @ %d KHz), (%s mBi, %d mBm)\n", - freq_range->start_freq_khz, - freq_range->end_freq_khz, - freq_range->max_bandwidth_khz, - max_antenna_gain, + freq_range->start_freq_khz, freq_range->end_freq_khz, + freq_range->max_bandwidth_khz, max_antenna_gain, power_rule->max_eirp); } #else static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, - u32 desired_bw_khz, const struct ieee80211_reg_rule *reg_rule) { return; @@ -831,43 +821,25 @@ static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, /* * Note that right now we assume the desired channel bandwidth * is always 20 MHz for each individual channel (HT40 uses 20 MHz - * per channel, the primary and the extension channel). To support - * smaller custom bandwidths such as 5 MHz or 10 MHz we'll need a - * new ieee80211_channel.target_bw and re run the regulatory check - * on the wiphy with the target_bw specified. Then we can simply use - * that below for the desired_bw_khz below. + * per channel, the primary and the extension channel). */ static void handle_channel(struct wiphy *wiphy, enum nl80211_reg_initiator initiator, - enum ieee80211_band band, - unsigned int chan_idx) + struct ieee80211_channel *chan) { - int r; u32 flags, bw_flags = 0; - u32 desired_bw_khz = MHZ_TO_KHZ(20); const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; const struct ieee80211_freq_range *freq_range = NULL; - struct ieee80211_supported_band *sband; - struct ieee80211_channel *chan; struct wiphy *request_wiphy = NULL; + struct regulatory_request *lr = get_last_request(); - assert_cfg80211_lock(); - - request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); - - sband = wiphy->bands[band]; - BUG_ON(chan_idx >= sband->n_channels); - chan = &sband->channels[chan_idx]; + request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); flags = chan->orig_flags; - r = freq_reg_info(wiphy, - MHZ_TO_KHZ(chan->center_freq), - desired_bw_khz, - ®_rule); - - if (r) { + reg_rule = freq_reg_info(wiphy, MHZ_TO_KHZ(chan->center_freq)); + if (IS_ERR(reg_rule)) { /* * We will disable all channels that do not match our * received regulatory rule unless the hint is coming @@ -879,7 +851,7 @@ static void handle_channel(struct wiphy *wiphy, * while 5 GHz is still supported. */ if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && - r == -ERANGE) + PTR_ERR(reg_rule) == -ERANGE) return; REG_DBG_PRINT("Disabling freq %d MHz\n", chan->center_freq); @@ -887,15 +859,19 @@ static void handle_channel(struct wiphy *wiphy, return; } - chan_reg_rule_print_dbg(chan, desired_bw_khz, reg_rule); + chan_reg_rule_print_dbg(chan, reg_rule); power_rule = ®_rule->power_rule; freq_range = ®_rule->freq_range; if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(40)) bw_flags = IEEE80211_CHAN_NO_HT40; + if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(80)) + bw_flags |= IEEE80211_CHAN_NO_80MHZ; + if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(160)) + bw_flags |= IEEE80211_CHAN_NO_160MHZ; - if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && + if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && request_wiphy && request_wiphy == wiphy && request_wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) { /* @@ -912,10 +888,14 @@ static void handle_channel(struct wiphy *wiphy, return; } + chan->dfs_state = NL80211_DFS_USABLE; + chan->dfs_state_entered = jiffies; + chan->beacon_found = false; chan->flags = flags | bw_flags | map_regdom_flags(reg_rule->flags); - chan->max_antenna_gain = min(chan->orig_mag, - (int) MBI_TO_DBI(power_rule->max_antenna_gain)); + chan->max_antenna_gain = + min_t(int, chan->orig_mag, + MBI_TO_DBI(power_rule->max_antenna_gain)); chan->max_reg_power = (int) MBM_TO_DBM(power_rule->max_eirp); if (chan->orig_mpwr) { /* @@ -935,68 +915,65 @@ static void handle_channel(struct wiphy *wiphy, } static void handle_band(struct wiphy *wiphy, - enum ieee80211_band band, - enum nl80211_reg_initiator initiator) + enum nl80211_reg_initiator initiator, + struct ieee80211_supported_band *sband) { unsigned int i; - struct ieee80211_supported_band *sband; - BUG_ON(!wiphy->bands[band]); - sband = wiphy->bands[band]; + if (!sband) + return; for (i = 0; i < sband->n_channels; i++) - handle_channel(wiphy, initiator, band, i); + handle_channel(wiphy, initiator, &sband->channels[i]); } static bool reg_request_cell_base(struct regulatory_request *request) { if (request->initiator != NL80211_REGDOM_SET_BY_USER) return false; - if (request->user_reg_hint_type != NL80211_USER_REG_HINT_CELL_BASE) - return false; - return true; + return request->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE; } bool reg_last_request_cell_base(void) { bool val; - assert_cfg80211_lock(); mutex_lock(®_mutex); - val = reg_request_cell_base(last_request); + val = reg_request_cell_base(get_last_request()); mutex_unlock(®_mutex); + return val; } #ifdef CONFIG_CFG80211_CERTIFICATION_ONUS - /* Core specific check */ -static int reg_ignore_cell_hint(struct regulatory_request *pending_request) +static enum reg_request_treatment +reg_ignore_cell_hint(struct regulatory_request *pending_request) { + struct regulatory_request *lr = get_last_request(); + if (!reg_num_devs_support_basehint) - return -EOPNOTSUPP; + return REG_REQ_IGNORE; - if (reg_request_cell_base(last_request)) { - if (!regdom_changes(pending_request->alpha2)) - return -EALREADY; - return 0; - } - return 0; + if (reg_request_cell_base(lr) && + !regdom_changes(pending_request->alpha2)) + return REG_REQ_ALREADY_SET; + + return REG_REQ_OK; } /* Device specific check */ static bool reg_dev_ignore_cell_hint(struct wiphy *wiphy) { - if (!(wiphy->features & NL80211_FEATURE_CELL_BASE_REG_HINTS)) - return true; - return false; + return !(wiphy->features & NL80211_FEATURE_CELL_BASE_REG_HINTS); } #else static int reg_ignore_cell_hint(struct regulatory_request *pending_request) { - return -EOPNOTSUPP; + return REG_REQ_IGNORE; } -static int reg_dev_ignore_cell_hint(struct wiphy *wiphy) + +static bool reg_dev_ignore_cell_hint(struct wiphy *wiphy) { return true; } @@ -1006,18 +983,17 @@ static int reg_dev_ignore_cell_hint(struct wiphy *wiphy) static bool ignore_reg_update(struct wiphy *wiphy, enum nl80211_reg_initiator initiator) { - if (!last_request) { - REG_DBG_PRINT("Ignoring regulatory request %s since " - "last_request is not set\n", + struct regulatory_request *lr = get_last_request(); + + if (!lr) { + REG_DBG_PRINT("Ignoring regulatory request %s since last_request is not set\n", reg_initiator_name(initiator)); return true; } if (initiator == NL80211_REGDOM_SET_BY_CORE && wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) { - REG_DBG_PRINT("Ignoring regulatory request %s " - "since the driver uses its own custom " - "regulatory domain\n", + REG_DBG_PRINT("Ignoring regulatory request %s since the driver uses its own custom regulatory domain\n", reg_initiator_name(initiator)); return true; } @@ -1028,22 +1004,35 @@ static bool ignore_reg_update(struct wiphy *wiphy, */ if (wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY && !wiphy->regd && initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && - !is_world_regdom(last_request->alpha2)) { - REG_DBG_PRINT("Ignoring regulatory request %s " - "since the driver requires its own regulatory " - "domain to be set first\n", + !is_world_regdom(lr->alpha2)) { + REG_DBG_PRINT("Ignoring regulatory request %s since the driver requires its own regulatory domain to be set first\n", reg_initiator_name(initiator)); return true; } - if (reg_request_cell_base(last_request)) + if (reg_request_cell_base(lr)) return reg_dev_ignore_cell_hint(wiphy); return false; } -static void handle_reg_beacon(struct wiphy *wiphy, - unsigned int chan_idx, +static bool reg_is_world_roaming(struct wiphy *wiphy) +{ + const struct ieee80211_regdomain *cr = get_cfg80211_regdom(); + const struct ieee80211_regdomain *wr = get_wiphy_regdom(wiphy); + struct regulatory_request *lr = get_last_request(); + + if (is_world_regdom(cr->alpha2) || (wr && is_world_regdom(wr->alpha2))) + return true; + + if (lr && lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && + wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) + return true; + + return false; +} + +static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx, struct reg_beacon *reg_beacon) { struct ieee80211_supported_band *sband; @@ -1051,8 +1040,6 @@ static void handle_reg_beacon(struct wiphy *wiphy, bool channel_changed = false; struct ieee80211_channel chan_before; - assert_cfg80211_lock(); - sband = wiphy->bands[reg_beacon->chan.band]; chan = &sband->channels[chan_idx]; @@ -1064,6 +1051,9 @@ static void handle_reg_beacon(struct wiphy *wiphy, chan->beacon_found = true; + if (!reg_is_world_roaming(wiphy)) + return; + if (wiphy->flags & WIPHY_FLAG_DISABLE_BEACON_HINTS) return; @@ -1094,8 +1084,6 @@ static void wiphy_update_new_beacon(struct wiphy *wiphy, unsigned int i; struct ieee80211_supported_band *sband; - assert_cfg80211_lock(); - if (!wiphy->bands[reg_beacon->chan.band]) return; @@ -1114,11 +1102,6 @@ static void wiphy_update_beacon_reg(struct wiphy *wiphy) struct ieee80211_supported_band *sband; struct reg_beacon *reg_beacon; - assert_cfg80211_lock(); - - if (list_empty(®_beacon_list)) - return; - list_for_each_entry(reg_beacon, ®_beacon_list, list) { if (!wiphy->bands[reg_beacon->chan.band]) continue; @@ -1128,18 +1111,6 @@ static void wiphy_update_beacon_reg(struct wiphy *wiphy) } } -static bool reg_is_world_roaming(struct wiphy *wiphy) -{ - if (is_world_regdom(cfg80211_regdomain->alpha2) || - (wiphy->regd && is_world_regdom(wiphy->regd->alpha2))) - return true; - if (last_request && - last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && - wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) - return true; - return false; -} - /* Reap the advantages of previously found beacons */ static void reg_process_beacons(struct wiphy *wiphy) { @@ -1149,39 +1120,29 @@ static void reg_process_beacons(struct wiphy *wiphy) */ if (!last_request) return; - if (!reg_is_world_roaming(wiphy)) - return; wiphy_update_beacon_reg(wiphy); } -static bool is_ht40_not_allowed(struct ieee80211_channel *chan) +static bool is_ht40_allowed(struct ieee80211_channel *chan) { if (!chan) - return true; + return false; if (chan->flags & IEEE80211_CHAN_DISABLED) - return true; + return false; /* This would happen when regulatory rules disallow HT40 completely */ - if (IEEE80211_CHAN_NO_HT40 == (chan->flags & (IEEE80211_CHAN_NO_HT40))) - return true; - return false; + if ((chan->flags & IEEE80211_CHAN_NO_HT40) == IEEE80211_CHAN_NO_HT40) + return false; + return true; } static void reg_process_ht_flags_channel(struct wiphy *wiphy, - enum ieee80211_band band, - unsigned int chan_idx) + struct ieee80211_channel *channel) { - struct ieee80211_supported_band *sband; - struct ieee80211_channel *channel; + struct ieee80211_supported_band *sband = wiphy->bands[channel->band]; struct ieee80211_channel *channel_before = NULL, *channel_after = NULL; unsigned int i; - assert_cfg80211_lock(); - - sband = wiphy->bands[band]; - BUG_ON(chan_idx >= sband->n_channels); - channel = &sband->channels[chan_idx]; - - if (is_ht40_not_allowed(channel)) { + if (!is_ht40_allowed(channel)) { channel->flags |= IEEE80211_CHAN_NO_HT40; return; } @@ -1192,6 +1153,7 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy, */ for (i = 0; i < sband->n_channels; i++) { struct ieee80211_channel *c = &sband->channels[i]; + if (c->center_freq == (channel->center_freq - 20)) channel_before = c; if (c->center_freq == (channel->center_freq + 20)) @@ -1203,28 +1165,27 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy, * if that ever changes we also need to change the below logic * to include that as well. */ - if (is_ht40_not_allowed(channel_before)) + if (!is_ht40_allowed(channel_before)) channel->flags |= IEEE80211_CHAN_NO_HT40MINUS; else channel->flags &= ~IEEE80211_CHAN_NO_HT40MINUS; - if (is_ht40_not_allowed(channel_after)) + if (!is_ht40_allowed(channel_after)) channel->flags |= IEEE80211_CHAN_NO_HT40PLUS; else channel->flags &= ~IEEE80211_CHAN_NO_HT40PLUS; } static void reg_process_ht_flags_band(struct wiphy *wiphy, - enum ieee80211_band band) + struct ieee80211_supported_band *sband) { unsigned int i; - struct ieee80211_supported_band *sband; - BUG_ON(!wiphy->bands[band]); - sband = wiphy->bands[band]; + if (!sband) + return; for (i = 0; i < sband->n_channels; i++) - reg_process_ht_flags_channel(wiphy, band, i); + reg_process_ht_flags_channel(wiphy, &sband->channels[i]); } static void reg_process_ht_flags(struct wiphy *wiphy) @@ -1234,34 +1195,29 @@ static void reg_process_ht_flags(struct wiphy *wiphy) if (!wiphy) return; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { - if (wiphy->bands[band]) - reg_process_ht_flags_band(wiphy, band); - } - + for (band = 0; band < IEEE80211_NUM_BANDS; band++) + reg_process_ht_flags_band(wiphy, wiphy->bands[band]); } static void wiphy_update_regulatory(struct wiphy *wiphy, enum nl80211_reg_initiator initiator) { enum ieee80211_band band; - - assert_reg_lock(); + struct regulatory_request *lr = get_last_request(); if (ignore_reg_update(wiphy, initiator)) return; - last_request->dfs_region = cfg80211_regdomain->dfs_region; + lr->dfs_region = get_cfg80211_regdom()->dfs_region; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { - if (wiphy->bands[band]) - handle_band(wiphy, band, initiator); - } + for (band = 0; band < IEEE80211_NUM_BANDS; band++) + handle_band(wiphy, initiator, wiphy->bands[band]); reg_process_beacons(wiphy); reg_process_ht_flags(wiphy); + if (wiphy->reg_notifier) - wiphy->reg_notifier(wiphy, last_request); + wiphy->reg_notifier(wiphy, lr); } static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) @@ -1269,6 +1225,8 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) struct cfg80211_registered_device *rdev; struct wiphy *wiphy; + assert_cfg80211_lock(); + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { wiphy = &rdev->wiphy; wiphy_update_regulatory(wiphy, initiator); @@ -1280,53 +1238,40 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) if (initiator == NL80211_REGDOM_SET_BY_CORE && wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY && wiphy->reg_notifier) - wiphy->reg_notifier(wiphy, last_request); + wiphy->reg_notifier(wiphy, get_last_request()); } } static void handle_channel_custom(struct wiphy *wiphy, - enum ieee80211_band band, - unsigned int chan_idx, + struct ieee80211_channel *chan, const struct ieee80211_regdomain *regd) { - int r; - u32 desired_bw_khz = MHZ_TO_KHZ(20); u32 bw_flags = 0; const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; const struct ieee80211_freq_range *freq_range = NULL; - struct ieee80211_supported_band *sband; - struct ieee80211_channel *chan; - - assert_reg_lock(); - - sband = wiphy->bands[band]; - BUG_ON(chan_idx >= sband->n_channels); - chan = &sband->channels[chan_idx]; - r = freq_reg_info_regd(wiphy, - MHZ_TO_KHZ(chan->center_freq), - desired_bw_khz, - ®_rule, - regd); + reg_rule = freq_reg_info_regd(wiphy, MHZ_TO_KHZ(chan->center_freq), + regd); - if (r) { - REG_DBG_PRINT("Disabling freq %d MHz as custom " - "regd has no rule that fits a %d MHz " - "wide channel\n", - chan->center_freq, - KHZ_TO_MHZ(desired_bw_khz)); + if (IS_ERR(reg_rule)) { + REG_DBG_PRINT("Disabling freq %d MHz as custom regd has no rule that fits it\n", + chan->center_freq); chan->flags = IEEE80211_CHAN_DISABLED; return; } - chan_reg_rule_print_dbg(chan, desired_bw_khz, reg_rule); + chan_reg_rule_print_dbg(chan, reg_rule); power_rule = ®_rule->power_rule; freq_range = ®_rule->freq_range; if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(40)) bw_flags = IEEE80211_CHAN_NO_HT40; + if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(80)) + bw_flags |= IEEE80211_CHAN_NO_80MHZ; + if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(160)) + bw_flags |= IEEE80211_CHAN_NO_160MHZ; chan->flags |= map_regdom_flags(reg_rule->flags) | bw_flags; chan->max_antenna_gain = (int) MBI_TO_DBI(power_rule->max_antenna_gain); @@ -1334,17 +1279,17 @@ static void handle_channel_custom(struct wiphy *wiphy, (int) MBM_TO_DBM(power_rule->max_eirp); } -static void handle_band_custom(struct wiphy *wiphy, enum ieee80211_band band, +static void handle_band_custom(struct wiphy *wiphy, + struct ieee80211_supported_band *sband, const struct ieee80211_regdomain *regd) { unsigned int i; - struct ieee80211_supported_band *sband; - BUG_ON(!wiphy->bands[band]); - sband = wiphy->bands[band]; + if (!sband) + return; for (i = 0; i < sband->n_channels; i++) - handle_channel_custom(wiphy, band, i, regd); + handle_channel_custom(wiphy, &sband->channels[i], regd); } /* Used by drivers prior to wiphy registration */ @@ -1354,60 +1299,50 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy, enum ieee80211_band band; unsigned int bands_set = 0; - mutex_lock(®_mutex); for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (!wiphy->bands[band]) continue; - handle_band_custom(wiphy, band, regd); + handle_band_custom(wiphy, wiphy->bands[band], regd); bands_set++; } - mutex_unlock(®_mutex); /* * no point in calling this if it won't have any effect - * on your device's supportd bands. + * on your device's supported bands. */ WARN_ON(!bands_set); } EXPORT_SYMBOL(wiphy_apply_custom_regulatory); -/* - * Return value which can be used by ignore_request() to indicate - * it has been determined we should intersect two regulatory domains - */ -#define REG_INTERSECT 1 - /* This has the logic which determines when a new request * should be ignored. */ -static int ignore_request(struct wiphy *wiphy, +static enum reg_request_treatment +get_reg_request_treatment(struct wiphy *wiphy, struct regulatory_request *pending_request) { struct wiphy *last_wiphy = NULL; - - assert_cfg80211_lock(); + struct regulatory_request *lr = get_last_request(); /* All initial requests are respected */ - if (!last_request) - return 0; + if (!lr) + return REG_REQ_OK; switch (pending_request->initiator) { case NL80211_REGDOM_SET_BY_CORE: - return 0; + return REG_REQ_OK; case NL80211_REGDOM_SET_BY_COUNTRY_IE: - - if (reg_request_cell_base(last_request)) { + if (reg_request_cell_base(lr)) { /* Trust a Cell base station over the AP's country IE */ if (regdom_changes(pending_request->alpha2)) - return -EOPNOTSUPP; - return -EALREADY; + return REG_REQ_IGNORE; + return REG_REQ_ALREADY_SET; } - last_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + last_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); if (unlikely(!is_an_alpha2(pending_request->alpha2))) return -EINVAL; - if (last_request->initiator == - NL80211_REGDOM_SET_BY_COUNTRY_IE) { + if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) { if (last_wiphy != wiphy) { /* * Two cards with two APs claiming different @@ -1416,23 +1351,23 @@ static int ignore_request(struct wiphy *wiphy, * to be correct. Reject second one for now. */ if (regdom_changes(pending_request->alpha2)) - return -EOPNOTSUPP; - return -EALREADY; + return REG_REQ_IGNORE; + return REG_REQ_ALREADY_SET; } /* * Two consecutive Country IE hints on the same wiphy. * This should be picked up early by the driver/stack */ if (WARN_ON(regdom_changes(pending_request->alpha2))) - return 0; - return -EALREADY; + return REG_REQ_OK; + return REG_REQ_ALREADY_SET; } return 0; case NL80211_REGDOM_SET_BY_DRIVER: - if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE) { + if (lr->initiator == NL80211_REGDOM_SET_BY_CORE) { if (regdom_changes(pending_request->alpha2)) - return 0; - return -EALREADY; + return REG_REQ_OK; + return REG_REQ_ALREADY_SET; } /* @@ -1440,59 +1375,59 @@ static int ignore_request(struct wiphy *wiphy, * back in or if you add a new device for which the previously * loaded card also agrees on the regulatory domain. */ - if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && + if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && !regdom_changes(pending_request->alpha2)) - return -EALREADY; + return REG_REQ_ALREADY_SET; - return REG_INTERSECT; + return REG_REQ_INTERSECT; case NL80211_REGDOM_SET_BY_USER: if (reg_request_cell_base(pending_request)) return reg_ignore_cell_hint(pending_request); - if (reg_request_cell_base(last_request)) - return -EOPNOTSUPP; + if (reg_request_cell_base(lr)) + return REG_REQ_IGNORE; - if (last_request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) - return REG_INTERSECT; + if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) + return REG_REQ_INTERSECT; /* * If the user knows better the user should set the regdom * to their country before the IE is picked up */ - if (last_request->initiator == NL80211_REGDOM_SET_BY_USER && - last_request->intersect) - return -EOPNOTSUPP; + if (lr->initiator == NL80211_REGDOM_SET_BY_USER && + lr->intersect) + return REG_REQ_IGNORE; /* * Process user requests only after previous user/driver/core * requests have been processed */ - if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE || - last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER || - last_request->initiator == NL80211_REGDOM_SET_BY_USER) { - if (regdom_changes(last_request->alpha2)) - return -EAGAIN; - } + if ((lr->initiator == NL80211_REGDOM_SET_BY_CORE || + lr->initiator == NL80211_REGDOM_SET_BY_DRIVER || + lr->initiator == NL80211_REGDOM_SET_BY_USER) && + regdom_changes(lr->alpha2)) + return REG_REQ_IGNORE; if (!regdom_changes(pending_request->alpha2)) - return -EALREADY; + return REG_REQ_ALREADY_SET; - return 0; + return REG_REQ_OK; } - return -EINVAL; + return REG_REQ_IGNORE; } static void reg_set_request_processed(void) { bool need_more_processing = false; + struct regulatory_request *lr = get_last_request(); - last_request->processed = true; + lr->processed = true; spin_lock(®_requests_lock); if (!list_empty(®_requests_list)) need_more_processing = true; spin_unlock(®_requests_lock); - if (last_request->initiator == NL80211_REGDOM_SET_BY_USER) + if (lr->initiator == NL80211_REGDOM_SET_BY_USER) cancel_delayed_work(®_timeout); if (need_more_processing) @@ -1508,116 +1443,122 @@ static void reg_set_request_processed(void) * The Wireless subsystem can use this function to hint to the wireless core * what it believes should be the current regulatory domain. * - * Returns zero if all went fine, %-EALREADY if a regulatory domain had - * already been set or other standard error codes. + * Returns one of the different reg request treatment values. * - * Caller must hold &cfg80211_mutex and ®_mutex + * Caller must hold ®_mutex */ -static int __regulatory_hint(struct wiphy *wiphy, - struct regulatory_request *pending_request) +static enum reg_request_treatment +__regulatory_hint(struct wiphy *wiphy, + struct regulatory_request *pending_request) { + const struct ieee80211_regdomain *regd; bool intersect = false; - int r = 0; - - assert_cfg80211_lock(); + enum reg_request_treatment treatment; + struct regulatory_request *lr; - r = ignore_request(wiphy, pending_request); + treatment = get_reg_request_treatment(wiphy, pending_request); - if (r == REG_INTERSECT) { + switch (treatment) { + case REG_REQ_INTERSECT: if (pending_request->initiator == NL80211_REGDOM_SET_BY_DRIVER) { - r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); - if (r) { + regd = reg_copy_regd(get_cfg80211_regdom()); + if (IS_ERR(regd)) { kfree(pending_request); - return r; + return PTR_ERR(regd); } + rcu_assign_pointer(wiphy->regd, regd); } intersect = true; - } else if (r) { + break; + case REG_REQ_OK: + break; + default: /* * If the regulatory domain being requested by the * driver has already been set just copy it to the * wiphy */ - if (r == -EALREADY && - pending_request->initiator == - NL80211_REGDOM_SET_BY_DRIVER) { - r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); - if (r) { + if (treatment == REG_REQ_ALREADY_SET && + pending_request->initiator == NL80211_REGDOM_SET_BY_DRIVER) { + regd = reg_copy_regd(get_cfg80211_regdom()); + if (IS_ERR(regd)) { kfree(pending_request); - return r; + return REG_REQ_IGNORE; } - r = -EALREADY; + treatment = REG_REQ_ALREADY_SET; + rcu_assign_pointer(wiphy->regd, regd); goto new_request; } kfree(pending_request); - return r; + return treatment; } new_request: - if (last_request != &core_request_world) - kfree(last_request); + lr = get_last_request(); + if (lr != &core_request_world && lr) + kfree_rcu(lr, rcu_head); - last_request = pending_request; - last_request->intersect = intersect; + pending_request->intersect = intersect; + pending_request->processed = false; + rcu_assign_pointer(last_request, pending_request); + lr = pending_request; pending_request = NULL; - if (last_request->initiator == NL80211_REGDOM_SET_BY_USER) { - user_alpha2[0] = last_request->alpha2[0]; - user_alpha2[1] = last_request->alpha2[1]; + if (lr->initiator == NL80211_REGDOM_SET_BY_USER) { + user_alpha2[0] = lr->alpha2[0]; + user_alpha2[1] = lr->alpha2[1]; } - /* When r == REG_INTERSECT we do need to call CRDA */ - if (r < 0) { + /* When r == REG_REQ_INTERSECT we do need to call CRDA */ + if (treatment != REG_REQ_OK && treatment != REG_REQ_INTERSECT) { /* * Since CRDA will not be called in this case as we already * have applied the requested regulatory domain before we just * inform userspace we have processed the request */ - if (r == -EALREADY) { - nl80211_send_reg_change_event(last_request); + if (treatment == REG_REQ_ALREADY_SET) { + nl80211_send_reg_change_event(lr); reg_set_request_processed(); } - return r; + return treatment; } - return call_crda(last_request->alpha2); + if (call_crda(lr->alpha2)) + return REG_REQ_IGNORE; + return REG_REQ_OK; } /* This processes *all* regulatory hints */ static void reg_process_hint(struct regulatory_request *reg_request, enum nl80211_reg_initiator reg_initiator) { - int r = 0; struct wiphy *wiphy = NULL; - BUG_ON(!reg_request->alpha2); + if (WARN_ON(!reg_request->alpha2)) + return; - if (wiphy_idx_valid(reg_request->wiphy_idx)) + if (reg_request->wiphy_idx != WIPHY_IDX_INVALID) wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx); - if (reg_initiator == NL80211_REGDOM_SET_BY_DRIVER && - !wiphy) { + if (reg_initiator == NL80211_REGDOM_SET_BY_DRIVER && !wiphy) { kfree(reg_request); return; } - r = __regulatory_hint(wiphy, reg_request); - /* This is required so that the orig_* parameters are saved */ - if (r == -EALREADY && wiphy && - wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) { - wiphy_update_regulatory(wiphy, reg_initiator); - return; + switch (__regulatory_hint(wiphy, reg_request)) { + case REG_REQ_ALREADY_SET: + /* This is required so that the orig_* parameters are saved */ + if (wiphy && wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) + wiphy_update_regulatory(wiphy, reg_initiator); + break; + default: + if (reg_initiator == NL80211_REGDOM_SET_BY_USER) + schedule_delayed_work(®_timeout, + msecs_to_jiffies(3142)); + break; } - - /* - * We only time out user hints, given that they should be the only - * source of bogus requests. - */ - if (r != -EALREADY && - reg_initiator == NL80211_REGDOM_SET_BY_USER) - schedule_delayed_work(®_timeout, msecs_to_jiffies(3142)); } /* @@ -1627,15 +1568,15 @@ static void reg_process_hint(struct regulatory_request *reg_request, */ static void reg_process_pending_hints(void) { - struct regulatory_request *reg_request; + struct regulatory_request *reg_request, *lr; mutex_lock(&cfg80211_mutex); mutex_lock(®_mutex); + lr = get_last_request(); /* When last_request->processed becomes true this will be rescheduled */ - if (last_request && !last_request->processed) { - REG_DBG_PRINT("Pending regulatory request, waiting " - "for it to be processed...\n"); + if (lr && !lr->processed) { + REG_DBG_PRINT("Pending regulatory request, waiting for it to be processed...\n"); goto out; } @@ -1666,23 +1607,14 @@ static void reg_process_pending_beacon_hints(void) struct cfg80211_registered_device *rdev; struct reg_beacon *pending_beacon, *tmp; - /* - * No need to hold the reg_mutex here as we just touch wiphys - * and do not read or access regulatory variables. - */ mutex_lock(&cfg80211_mutex); + mutex_lock(®_mutex); /* This goes through the _pending_ beacon list */ spin_lock_bh(®_pending_beacons_lock); - if (list_empty(®_pending_beacons)) { - spin_unlock_bh(®_pending_beacons_lock); - goto out; - } - list_for_each_entry_safe(pending_beacon, tmp, ®_pending_beacons, list) { - list_del_init(&pending_beacon->list); /* Applies the beacon hint to current wiphys */ @@ -1694,7 +1626,7 @@ static void reg_process_pending_beacon_hints(void) } spin_unlock_bh(®_pending_beacons_lock); -out: + mutex_unlock(®_mutex); mutex_unlock(&cfg80211_mutex); } @@ -1706,10 +1638,8 @@ static void reg_todo(struct work_struct *work) static void queue_regulatory_request(struct regulatory_request *request) { - if (isalpha(request->alpha2[0])) - request->alpha2[0] = toupper(request->alpha2[0]); - if (isalpha(request->alpha2[1])) - request->alpha2[1] = toupper(request->alpha2[1]); + request->alpha2[0] = toupper(request->alpha2[0]); + request->alpha2[1] = toupper(request->alpha2[1]); spin_lock(®_requests_lock); list_add_tail(&request->list, ®_requests_list); @@ -1726,8 +1656,7 @@ static int regulatory_hint_core(const char *alpha2) { struct regulatory_request *request; - request = kzalloc(sizeof(struct regulatory_request), - GFP_KERNEL); + request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); if (!request) return -ENOMEM; @@ -1746,13 +1675,14 @@ int regulatory_hint_user(const char *alpha2, { struct regulatory_request *request; - BUG_ON(!alpha2); + if (WARN_ON(!alpha2)) + return -EINVAL; request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); if (!request) return -ENOMEM; - request->wiphy_idx = WIPHY_IDX_STALE; + request->wiphy_idx = WIPHY_IDX_INVALID; request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_USER; @@ -1768,8 +1698,8 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) { struct regulatory_request *request; - BUG_ON(!alpha2); - BUG_ON(!wiphy); + if (WARN_ON(!alpha2 || !wiphy)) + return -EINVAL; request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); if (!request) @@ -1777,9 +1707,6 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) request->wiphy_idx = get_wiphy_idx(wiphy); - /* Must have registered wiphy first */ - BUG_ON(!wiphy_idx_valid(request->wiphy_idx)); - request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_DRIVER; @@ -1794,18 +1721,17 @@ EXPORT_SYMBOL(regulatory_hint); * We hold wdev_lock() here so we cannot hold cfg80211_mutex() and * therefore cannot iterate over the rdev list here. */ -void regulatory_hint_11d(struct wiphy *wiphy, - enum ieee80211_band band, - const u8 *country_ie, - u8 country_ie_len) +void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, + const u8 *country_ie, u8 country_ie_len) { char alpha2[2]; enum environment_cap env = ENVIRON_ANY; - struct regulatory_request *request; + struct regulatory_request *request, *lr; mutex_lock(®_mutex); + lr = get_last_request(); - if (unlikely(!last_request)) + if (unlikely(!lr)) goto out; /* IE len must be evenly divisible by 2 */ @@ -1828,9 +1754,8 @@ void regulatory_hint_11d(struct wiphy *wiphy, * We leave conflict resolution to the workqueue, where can hold * cfg80211_mutex. */ - if (likely(last_request->initiator == - NL80211_REGDOM_SET_BY_COUNTRY_IE && - wiphy_idx_valid(last_request->wiphy_idx))) + if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && + lr->wiphy_idx != WIPHY_IDX_INVALID) goto out; request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); @@ -1843,12 +1768,7 @@ void regulatory_hint_11d(struct wiphy *wiphy, request->initiator = NL80211_REGDOM_SET_BY_COUNTRY_IE; request->country_ie_env = env; - mutex_unlock(®_mutex); - queue_regulatory_request(request); - - return; - out: mutex_unlock(®_mutex); } @@ -1863,8 +1783,7 @@ static void restore_alpha2(char *alpha2, bool reset_user) if (is_user_regdom_saved()) { /* Unless we're asked to ignore it and reset it */ if (reset_user) { - REG_DBG_PRINT("Restoring regulatory settings " - "including user preference\n"); + REG_DBG_PRINT("Restoring regulatory settings including user preference\n"); user_alpha2[0] = '9'; user_alpha2[1] = '7'; @@ -1874,26 +1793,20 @@ static void restore_alpha2(char *alpha2, bool reset_user) * back as they were for a full restore. */ if (!is_world_regdom(ieee80211_regdom)) { - REG_DBG_PRINT("Keeping preference on " - "module parameter ieee80211_regdom: %c%c\n", - ieee80211_regdom[0], - ieee80211_regdom[1]); + REG_DBG_PRINT("Keeping preference on module parameter ieee80211_regdom: %c%c\n", + ieee80211_regdom[0], ieee80211_regdom[1]); alpha2[0] = ieee80211_regdom[0]; alpha2[1] = ieee80211_regdom[1]; } } else { - REG_DBG_PRINT("Restoring regulatory settings " - "while preserving user preference for: %c%c\n", - user_alpha2[0], - user_alpha2[1]); + REG_DBG_PRINT("Restoring regulatory settings while preserving user preference for: %c%c\n", + user_alpha2[0], user_alpha2[1]); alpha2[0] = user_alpha2[0]; alpha2[1] = user_alpha2[1]; } } else if (!is_world_regdom(ieee80211_regdom)) { - REG_DBG_PRINT("Keeping preference on " - "module parameter ieee80211_regdom: %c%c\n", - ieee80211_regdom[0], - ieee80211_regdom[1]); + REG_DBG_PRINT("Keeping preference on module parameter ieee80211_regdom: %c%c\n", + ieee80211_regdom[0], ieee80211_regdom[1]); alpha2[0] = ieee80211_regdom[0]; alpha2[1] = ieee80211_regdom[1]; } else @@ -1948,7 +1861,7 @@ static void restore_regulatory_settings(bool reset_user) mutex_lock(&cfg80211_mutex); mutex_lock(®_mutex); - reset_regdomains(true); + reset_regdomains(true, &world_regdom); restore_alpha2(alpha2, reset_user); /* @@ -1958,49 +1871,35 @@ static void restore_regulatory_settings(bool reset_user) * settings. */ spin_lock(®_requests_lock); - if (!list_empty(®_requests_list)) { - list_for_each_entry_safe(reg_request, tmp, - ®_requests_list, list) { - if (reg_request->initiator != - NL80211_REGDOM_SET_BY_USER) - continue; - list_move_tail(®_request->list, &tmp_reg_req_list); - } + list_for_each_entry_safe(reg_request, tmp, ®_requests_list, list) { + if (reg_request->initiator != NL80211_REGDOM_SET_BY_USER) + continue; + list_move_tail(®_request->list, &tmp_reg_req_list); } spin_unlock(®_requests_lock); /* Clear beacon hints */ spin_lock_bh(®_pending_beacons_lock); - if (!list_empty(®_pending_beacons)) { - list_for_each_entry_safe(reg_beacon, btmp, - ®_pending_beacons, list) { - list_del(®_beacon->list); - kfree(reg_beacon); - } + list_for_each_entry_safe(reg_beacon, btmp, ®_pending_beacons, list) { + list_del(®_beacon->list); + kfree(reg_beacon); } spin_unlock_bh(®_pending_beacons_lock); - if (!list_empty(®_beacon_list)) { - list_for_each_entry_safe(reg_beacon, btmp, - ®_beacon_list, list) { - list_del(®_beacon->list); - kfree(reg_beacon); - } + list_for_each_entry_safe(reg_beacon, btmp, ®_beacon_list, list) { + list_del(®_beacon->list); + kfree(reg_beacon); } /* First restore to the basic regulatory settings */ - cfg80211_regdomain = cfg80211_world_regdom; - world_alpha2[0] = cfg80211_regdomain->alpha2[0]; - world_alpha2[1] = cfg80211_regdomain->alpha2[1]; + world_alpha2[0] = cfg80211_world_regdom->alpha2[0]; + world_alpha2[1] = cfg80211_world_regdom->alpha2[1]; list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (rdev->wiphy.flags & WIPHY_FLAG_CUSTOM_REGULATORY) restore_custom_reg_settings(&rdev->wiphy); } - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); - regulatory_hint_core(world_alpha2); /* @@ -2011,20 +1910,8 @@ static void restore_regulatory_settings(bool reset_user) if (is_an_alpha2(alpha2)) regulatory_hint_user(user_alpha2, NL80211_USER_REG_HINT_USER); - if (list_empty(&tmp_reg_req_list)) - return; - - mutex_lock(&cfg80211_mutex); - mutex_lock(®_mutex); - spin_lock(®_requests_lock); - list_for_each_entry_safe(reg_request, tmp, &tmp_reg_req_list, list) { - REG_DBG_PRINT("Adding request for country %c%c back " - "into the queue\n", - reg_request->alpha2[0], - reg_request->alpha2[1]); - list_move_tail(®_request->list, ®_requests_list); - } + list_splice_tail_init(&tmp_reg_req_list, ®_requests_list); spin_unlock(®_requests_lock); mutex_unlock(®_mutex); @@ -2037,8 +1924,7 @@ static void restore_regulatory_settings(bool reset_user) void regulatory_hint_disconnect(void) { - REG_DBG_PRINT("All devices are disconnected, going to " - "restore regulatory settings\n"); + REG_DBG_PRINT("All devices are disconnected, going to restore regulatory settings\n"); restore_regulatory_settings(false); } @@ -2051,31 +1937,48 @@ static bool freq_is_chan_12_13_14(u16 freq) return false; } +static bool pending_reg_beacon(struct ieee80211_channel *beacon_chan) +{ + struct reg_beacon *pending_beacon; + + list_for_each_entry(pending_beacon, ®_pending_beacons, list) + if (beacon_chan->center_freq == + pending_beacon->chan.center_freq) + return true; + return false; +} + int regulatory_hint_found_beacon(struct wiphy *wiphy, struct ieee80211_channel *beacon_chan, gfp_t gfp) { struct reg_beacon *reg_beacon; + bool processing; - if (likely((beacon_chan->beacon_found || - (beacon_chan->flags & IEEE80211_CHAN_RADAR) || + if (beacon_chan->beacon_found || + beacon_chan->flags & IEEE80211_CHAN_RADAR || (beacon_chan->band == IEEE80211_BAND_2GHZ && - !freq_is_chan_12_13_14(beacon_chan->center_freq))))) + !freq_is_chan_12_13_14(beacon_chan->center_freq))) + return 0; + + spin_lock_bh(®_pending_beacons_lock); + processing = pending_reg_beacon(beacon_chan); + spin_unlock_bh(®_pending_beacons_lock); + + if (processing) return 0; reg_beacon = kzalloc(sizeof(struct reg_beacon), gfp); if (!reg_beacon) return -ENOMEM; - REG_DBG_PRINT("Found new beacon on " - "frequency: %d MHz (Ch %d) on %s\n", + REG_DBG_PRINT("Found new beacon on frequency: %d MHz (Ch %d) on %s\n", beacon_chan->center_freq, ieee80211_frequency_to_channel(beacon_chan->center_freq), wiphy_name(wiphy)); memcpy(®_beacon->chan, beacon_chan, - sizeof(struct ieee80211_channel)); - + sizeof(struct ieee80211_channel)); /* * Since we can be called from BH or and non-BH context @@ -2155,21 +2058,19 @@ static void print_dfs_region(u8 dfs_region) pr_info(" DFS Master region JP"); break; default: - pr_info(" DFS Master region Uknown"); + pr_info(" DFS Master region Unknown"); break; } } static void print_regdomain(const struct ieee80211_regdomain *rd) { + struct regulatory_request *lr = get_last_request(); if (is_intersected_alpha2(rd->alpha2)) { - - if (last_request->initiator == - NL80211_REGDOM_SET_BY_COUNTRY_IE) { + if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) { struct cfg80211_registered_device *rdev; - rdev = cfg80211_rdev_by_wiphy_idx( - last_request->wiphy_idx); + rdev = cfg80211_rdev_by_wiphy_idx(lr->wiphy_idx); if (rdev) { pr_info("Current regulatory domain updated by AP to: %c%c\n", rdev->country_ie_alpha2[0], @@ -2178,22 +2079,21 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) pr_info("Current regulatory domain intersected:\n"); } else pr_info("Current regulatory domain intersected:\n"); - } else if (is_world_regdom(rd->alpha2)) + } else if (is_world_regdom(rd->alpha2)) { pr_info("World regulatory domain updated:\n"); - else { + } else { if (is_unknown_alpha2(rd->alpha2)) pr_info("Regulatory domain changed to driver built-in settings (unknown country)\n"); else { - if (reg_request_cell_base(last_request)) - pr_info("Regulatory domain changed " - "to country: %c%c by Cell Station\n", + if (reg_request_cell_base(lr)) + pr_info("Regulatory domain changed to country: %c%c by Cell Station\n", rd->alpha2[0], rd->alpha2[1]); else - pr_info("Regulatory domain changed " - "to country: %c%c\n", + pr_info("Regulatory domain changed to country: %c%c\n", rd->alpha2[0], rd->alpha2[1]); } } + print_dfs_region(rd->dfs_region); print_rd_rules(rd); } @@ -2207,22 +2107,23 @@ static void print_regdomain_info(const struct ieee80211_regdomain *rd) /* Takes ownership of rd only if it doesn't fail */ static int __set_regdom(const struct ieee80211_regdomain *rd) { + const struct ieee80211_regdomain *regd; const struct ieee80211_regdomain *intersected_rd = NULL; struct wiphy *request_wiphy; + struct regulatory_request *lr = get_last_request(); + /* Some basic sanity checks first */ + if (!reg_is_valid_request(rd->alpha2)) + return -EINVAL; + if (is_world_regdom(rd->alpha2)) { - if (WARN_ON(!reg_is_valid_request(rd->alpha2))) - return -EINVAL; update_world_regdomain(rd); return 0; } if (!is_alpha2_set(rd->alpha2) && !is_an_alpha2(rd->alpha2) && - !is_unknown_alpha2(rd->alpha2)) - return -EINVAL; - - if (!last_request) + !is_unknown_alpha2(rd->alpha2)) return -EINVAL; /* @@ -2230,7 +2131,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) * rd is non static (it means CRDA was present and was used last) * and the pending request came in from a country IE */ - if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { + if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { /* * If someone else asked us to change the rd lets only bother * checking if the alpha2 changes if CRDA was already called @@ -2246,29 +2147,23 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) * internal EEPROM data */ - if (WARN_ON(!reg_is_valid_request(rd->alpha2))) - return -EINVAL; - if (!is_valid_rd(rd)) { pr_err("Invalid regulatory domain detected:\n"); print_regdomain_info(rd); return -EINVAL; } - request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); if (!request_wiphy && - (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER || - last_request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE)) { + (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER || + lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE)) { schedule_delayed_work(®_timeout, 0); return -ENODEV; } - if (!last_request->intersect) { - int r; - - if (last_request->initiator != NL80211_REGDOM_SET_BY_DRIVER) { - reset_regdomains(false); - cfg80211_regdomain = rd; + if (!lr->intersect) { + if (lr->initiator != NL80211_REGDOM_SET_BY_DRIVER) { + reset_regdomains(false, rd); return 0; } @@ -2284,20 +2179,19 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) if (request_wiphy->regd) return -EALREADY; - r = reg_copy_regd(&request_wiphy->regd, rd); - if (r) - return r; + regd = reg_copy_regd(rd); + if (IS_ERR(regd)) + return PTR_ERR(regd); - reset_regdomains(false); - cfg80211_regdomain = rd; + rcu_assign_pointer(request_wiphy->regd, regd); + reset_regdomains(false, rd); return 0; } /* Intersection requires a bit more work */ - if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { - - intersected_rd = regdom_intersect(rd, cfg80211_regdomain); + if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { + intersected_rd = regdom_intersect(rd, get_cfg80211_regdom()); if (!intersected_rd) return -EINVAL; @@ -2306,15 +2200,19 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) * However if a driver requested this specific regulatory * domain we keep it for its private use */ - if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER) - request_wiphy->regd = rd; - else + if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER) { + const struct ieee80211_regdomain *tmp; + + tmp = get_wiphy_regdom(request_wiphy); + rcu_assign_pointer(request_wiphy->regd, rd); + rcu_free_regdom(tmp); + } else { kfree(rd); + } rd = NULL; - reset_regdomains(false); - cfg80211_regdomain = intersected_rd; + reset_regdomains(false, intersected_rd); return 0; } @@ -2326,15 +2224,15 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) /* * Use this call to set the current regulatory domain. Conflicts with * multiple drivers can be ironed out later. Caller must've already - * kmalloc'd the rd structure. Caller must hold cfg80211_mutex + * kmalloc'd the rd structure. */ int set_regdom(const struct ieee80211_regdomain *rd) { + struct regulatory_request *lr; int r; - assert_cfg80211_lock(); - mutex_lock(®_mutex); + lr = get_last_request(); /* Note that this doesn't update the wiphys, this is done below */ r = __set_regdom(rd); @@ -2343,23 +2241,25 @@ int set_regdom(const struct ieee80211_regdomain *rd) reg_set_request_processed(); kfree(rd); - mutex_unlock(®_mutex); - return r; + goto out; } /* This would make this whole thing pointless */ - if (!last_request->intersect) - BUG_ON(rd != cfg80211_regdomain); + if (WARN_ON(!lr->intersect && rd != get_cfg80211_regdom())) { + r = -EINVAL; + goto out; + } /* update all wiphys now with the new established regulatory domain */ - update_all_wiphy_regulatory(last_request->initiator); + update_all_wiphy_regulatory(lr->initiator); - print_regdomain(cfg80211_regdomain); + print_regdomain(get_cfg80211_regdom()); - nl80211_send_reg_change_event(last_request); + nl80211_send_reg_change_event(lr); reg_set_request_processed(); + out: mutex_unlock(®_mutex); return r; @@ -2367,20 +2267,26 @@ int set_regdom(const struct ieee80211_regdomain *rd) int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) { - if (last_request && !last_request->processed) { - if (add_uevent_var(env, "COUNTRY=%c%c", - last_request->alpha2[0], - last_request->alpha2[1])) - return -ENOMEM; + struct regulatory_request *lr; + u8 alpha2[2]; + bool add = false; + + rcu_read_lock(); + lr = get_last_request(); + if (lr && !lr->processed) { + memcpy(alpha2, lr->alpha2, 2); + add = true; } + rcu_read_unlock(); + if (add) + return add_uevent_var(env, "COUNTRY=%c%c", + alpha2[0], alpha2[1]); return 0; } void wiphy_regulatory_register(struct wiphy *wiphy) { - assert_cfg80211_lock(); - mutex_lock(®_mutex); if (!reg_dev_ignore_cell_hint(wiphy)) @@ -2395,32 +2301,32 @@ void wiphy_regulatory_register(struct wiphy *wiphy) void wiphy_regulatory_deregister(struct wiphy *wiphy) { struct wiphy *request_wiphy = NULL; - - assert_cfg80211_lock(); + struct regulatory_request *lr; mutex_lock(®_mutex); + lr = get_last_request(); if (!reg_dev_ignore_cell_hint(wiphy)) reg_num_devs_support_basehint--; - kfree(wiphy->regd); + rcu_free_regdom(get_wiphy_regdom(wiphy)); + rcu_assign_pointer(wiphy->regd, NULL); - if (last_request) - request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + if (lr) + request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); if (!request_wiphy || request_wiphy != wiphy) goto out; - last_request->wiphy_idx = WIPHY_IDX_STALE; - last_request->country_ie_env = ENVIRON_ANY; + lr->wiphy_idx = WIPHY_IDX_INVALID; + lr->country_ie_env = ENVIRON_ANY; out: mutex_unlock(®_mutex); } static void reg_timeout_work(struct work_struct *work) { - REG_DBG_PRINT("Timeout while waiting for CRDA to reply, " - "restoring regulatory settings\n"); + REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n"); restore_regulatory_settings(true); } @@ -2439,13 +2345,13 @@ int __init regulatory_init(void) reg_regdb_size_check(); - cfg80211_regdomain = cfg80211_world_regdom; + rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom); user_alpha2[0] = '9'; user_alpha2[1] = '7'; /* We always try to get an update for the static regdomain */ - err = regulatory_hint_core(cfg80211_regdomain->alpha2); + err = regulatory_hint_core(cfg80211_world_regdom->alpha2); if (err) { if (err == -ENOMEM) return err; @@ -2457,10 +2363,6 @@ int __init regulatory_init(void) * errors as non-fatal. */ pr_err("kobject_uevent_env() was unable to call CRDA during init\n"); -#ifdef CONFIG_CFG80211_REG_DEBUG - /* We want to find out exactly why when debugging */ - WARN_ON(err); -#endif } /* @@ -2474,7 +2376,7 @@ int __init regulatory_init(void) return 0; } -void /* __init_or_exit */ regulatory_exit(void) +void regulatory_exit(void) { struct regulatory_request *reg_request, *tmp; struct reg_beacon *reg_beacon, *btmp; @@ -2482,43 +2384,27 @@ void /* __init_or_exit */ regulatory_exit(void) cancel_work_sync(®_work); cancel_delayed_work_sync(®_timeout); - mutex_lock(&cfg80211_mutex); + /* Lock to suppress warnings */ mutex_lock(®_mutex); - - reset_regdomains(true); + reset_regdomains(true, NULL); + mutex_unlock(®_mutex); dev_set_uevent_suppress(®_pdev->dev, true); platform_device_unregister(reg_pdev); - spin_lock_bh(®_pending_beacons_lock); - if (!list_empty(®_pending_beacons)) { - list_for_each_entry_safe(reg_beacon, btmp, - ®_pending_beacons, list) { - list_del(®_beacon->list); - kfree(reg_beacon); - } + list_for_each_entry_safe(reg_beacon, btmp, ®_pending_beacons, list) { + list_del(®_beacon->list); + kfree(reg_beacon); } - spin_unlock_bh(®_pending_beacons_lock); - if (!list_empty(®_beacon_list)) { - list_for_each_entry_safe(reg_beacon, btmp, - ®_beacon_list, list) { - list_del(®_beacon->list); - kfree(reg_beacon); - } + list_for_each_entry_safe(reg_beacon, btmp, ®_beacon_list, list) { + list_del(®_beacon->list); + kfree(reg_beacon); } - spin_lock(®_requests_lock); - if (!list_empty(®_requests_list)) { - list_for_each_entry_safe(reg_request, tmp, - ®_requests_list, list) { - list_del(®_request->list); - kfree(reg_request); - } + list_for_each_entry_safe(reg_request, tmp, ®_requests_list, list) { + list_del(®_request->list); + kfree(reg_request); } - spin_unlock(®_requests_lock); - - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); } diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 4c0a32ffd53..af2d5f8a5d8 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -16,10 +16,9 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -extern const struct ieee80211_regdomain *cfg80211_regdomain; +extern const struct ieee80211_regdomain __rcu *cfg80211_regdomain; bool is_world_regdom(const char *alpha2); -bool reg_is_valid_request(const char *alpha2); bool reg_supported_dfs_region(u8 dfs_region); int regulatory_hint_user(const char *alpha2, @@ -55,8 +54,8 @@ bool reg_last_request_cell_base(void); * set the wiphy->disable_beacon_hints to true. */ int regulatory_hint_found_beacon(struct wiphy *wiphy, - struct ieee80211_channel *beacon_chan, - gfp_t gfp); + struct ieee80211_channel *beacon_chan, + gfp_t gfp); /** * regulatory_hint_11d - hints a country IE as a regulatory domain diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 45f1618c8e2..674aadca007 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -19,55 +19,142 @@ #include "wext-compat.h" #include "rdev-ops.h" +/** + * DOC: BSS tree/list structure + * + * At the top level, the BSS list is kept in both a list in each + * registered device (@bss_list) as well as an RB-tree for faster + * lookup. In the RB-tree, entries can be looked up using their + * channel, MESHID, MESHCONF (for MBSSes) or channel, BSSID, SSID + * for other BSSes. + * + * Due to the possibility of hidden SSIDs, there's a second level + * structure, the "hidden_list" and "hidden_beacon_bss" pointer. + * The hidden_list connects all BSSes belonging to a single AP + * that has a hidden SSID, and connects beacon and probe response + * entries. For a probe response entry for a hidden SSID, the + * hidden_beacon_bss pointer points to the BSS struct holding the + * beacon's information. + * + * Reference counting is done for all these references except for + * the hidden_list, so that a beacon BSS struct that is otherwise + * not referenced has one reference for being on the bss_list and + * one for each probe response entry that points to it using the + * hidden_beacon_bss pointer. When a BSS struct that has such a + * pointer is get/put, the refcount update is also propagated to + * the referenced struct, this ensure that it cannot get removed + * while somebody is using the probe response version. + * + * Note that the hidden_beacon_bss pointer never changes, due to + * the reference counting. Therefore, no locking is needed for + * it. + * + * Also note that the hidden_beacon_bss pointer is only relevant + * if the driver uses something other than the IEs, e.g. private + * data stored stored in the BSS struct, since the beacon IEs are + * also linked into the probe response struct. + */ + #define IEEE80211_SCAN_RESULT_EXPIRE (30 * HZ) -static void bss_release(struct kref *ref) +static void bss_free(struct cfg80211_internal_bss *bss) { struct cfg80211_bss_ies *ies; - struct cfg80211_internal_bss *bss; - - bss = container_of(ref, struct cfg80211_internal_bss, ref); if (WARN_ON(atomic_read(&bss->hold))) return; - if (bss->pub.free_priv) - bss->pub.free_priv(&bss->pub); - ies = (void *)rcu_access_pointer(bss->pub.beacon_ies); - if (ies) + if (ies && !bss->pub.hidden_beacon_bss) kfree_rcu(ies, rcu_head); ies = (void *)rcu_access_pointer(bss->pub.proberesp_ies); if (ies) kfree_rcu(ies, rcu_head); + /* + * This happens when the module is removed, it doesn't + * really matter any more save for completeness + */ + if (!list_empty(&bss->hidden_list)) + list_del(&bss->hidden_list); + kfree(bss); } -/* must hold dev->bss_lock! */ -static void __cfg80211_unlink_bss(struct cfg80211_registered_device *dev, +static inline void bss_ref_get(struct cfg80211_registered_device *dev, + struct cfg80211_internal_bss *bss) +{ + lockdep_assert_held(&dev->bss_lock); + + bss->refcount++; + if (bss->pub.hidden_beacon_bss) { + bss = container_of(bss->pub.hidden_beacon_bss, + struct cfg80211_internal_bss, + pub); + bss->refcount++; + } +} + +static inline void bss_ref_put(struct cfg80211_registered_device *dev, + struct cfg80211_internal_bss *bss) +{ + lockdep_assert_held(&dev->bss_lock); + + if (bss->pub.hidden_beacon_bss) { + struct cfg80211_internal_bss *hbss; + hbss = container_of(bss->pub.hidden_beacon_bss, + struct cfg80211_internal_bss, + pub); + hbss->refcount--; + if (hbss->refcount == 0) + bss_free(hbss); + } + bss->refcount--; + if (bss->refcount == 0) + bss_free(bss); +} + +static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *dev, struct cfg80211_internal_bss *bss) { + lockdep_assert_held(&dev->bss_lock); + + if (!list_empty(&bss->hidden_list)) { + /* + * don't remove the beacon entry if it has + * probe responses associated with it + */ + if (!bss->pub.hidden_beacon_bss) + return false; + /* + * if it's a probe response entry break its + * link to the other entries in the group + */ + list_del_init(&bss->hidden_list); + } + list_del_init(&bss->list); rb_erase(&bss->rbn, &dev->bss_tree); - kref_put(&bss->ref, bss_release); + bss_ref_put(dev, bss); + return true; } -/* must hold dev->bss_lock! */ static void __cfg80211_bss_expire(struct cfg80211_registered_device *dev, unsigned long expire_time) { struct cfg80211_internal_bss *bss, *tmp; bool expired = false; + lockdep_assert_held(&dev->bss_lock); + list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) { if (atomic_read(&bss->hold)) continue; if (!time_after(expire_time, bss->ts)) continue; - __cfg80211_unlink_bss(dev, bss); - expired = true; + if (__cfg80211_unlink_bss(dev, bss)) + expired = true; } if (expired) @@ -234,15 +321,16 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, return 0; } -/* must hold dev->bss_lock! */ void cfg80211_bss_age(struct cfg80211_registered_device *dev, unsigned long age_secs) { struct cfg80211_internal_bss *bss; unsigned long age_jiffies = msecs_to_jiffies(age_secs * MSEC_PER_SEC); + spin_lock_bh(&dev->bss_lock); list_for_each_entry(bss, &dev->bss_list, list) bss->ts -= age_jiffies; + spin_unlock_bh(&dev->bss_lock); } void cfg80211_bss_expire(struct cfg80211_registered_device *dev) @@ -277,40 +365,24 @@ const u8 *cfg80211_find_vendor_ie(unsigned int oui, u8 oui_type, if (!pos) return NULL; - if (end - pos < sizeof(*ie)) - return NULL; - ie = (struct ieee80211_vendor_ie *)pos; + + /* make sure we can access ie->len */ + BUILD_BUG_ON(offsetof(struct ieee80211_vendor_ie, len) != 1); + + if (ie->len < sizeof(*ie)) + goto cont; + ie_oui = ie->oui[0] << 16 | ie->oui[1] << 8 | ie->oui[2]; if (ie_oui == oui && ie->oui_type == oui_type) return pos; - +cont: pos += 2 + ie->len; } return NULL; } EXPORT_SYMBOL(cfg80211_find_vendor_ie); -static int cmp_ies(u8 num, const u8 *ies1, int len1, const u8 *ies2, int len2) -{ - const u8 *ie1 = cfg80211_find_ie(num, ies1, len1); - const u8 *ie2 = cfg80211_find_ie(num, ies2, len2); - - /* equal if both missing */ - if (!ie1 && !ie2) - return 0; - /* sort missing IE before (left of) present IE */ - if (!ie1) - return -1; - if (!ie2) - return 1; - - /* sort by length first, then by contents */ - if (ie1[1] != ie2[1]) - return ie2[1] - ie1[1]; - return memcmp(ie1 + 2, ie2 + 2, ie1[1]); -} - static bool is_bss(struct cfg80211_bss *a, const u8 *bssid, const u8 *ssid, size_t ssid_len) { @@ -334,109 +406,30 @@ static bool is_bss(struct cfg80211_bss *a, const u8 *bssid, return memcmp(ssidie + 2, ssid, ssid_len) == 0; } -static bool is_mesh_bss(struct cfg80211_bss *a) -{ - const struct cfg80211_bss_ies *ies; - const u8 *ie; - - if (!WLAN_CAPABILITY_IS_STA_BSS(a->capability)) - return false; - - ies = rcu_access_pointer(a->ies); - if (!ies) - return false; - - ie = cfg80211_find_ie(WLAN_EID_MESH_ID, ies->data, ies->len); - if (!ie) - return false; - - ie = cfg80211_find_ie(WLAN_EID_MESH_CONFIG, ies->data, ies->len); - if (!ie) - return false; - - return true; -} - -static bool is_mesh(struct cfg80211_bss *a, - const u8 *meshid, size_t meshidlen, - const u8 *meshcfg) -{ - const struct cfg80211_bss_ies *ies; - const u8 *ie; - - if (!WLAN_CAPABILITY_IS_STA_BSS(a->capability)) - return false; - - ies = rcu_access_pointer(a->ies); - if (!ies) - return false; - - ie = cfg80211_find_ie(WLAN_EID_MESH_ID, ies->data, ies->len); - if (!ie) - return false; - if (ie[1] != meshidlen) - return false; - if (memcmp(ie + 2, meshid, meshidlen)) - return false; - - ie = cfg80211_find_ie(WLAN_EID_MESH_CONFIG, ies->data, ies->len); - if (!ie) - return false; - if (ie[1] != sizeof(struct ieee80211_meshconf_ie)) - return false; - - /* - * Ignore mesh capability (last two bytes of the IE) when - * comparing since that may differ between stations taking - * part in the same mesh. - */ - return memcmp(ie + 2, meshcfg, - sizeof(struct ieee80211_meshconf_ie) - 2) == 0; -} +/** + * enum bss_compare_mode - BSS compare mode + * @BSS_CMP_REGULAR: regular compare mode (for insertion and normal find) + * @BSS_CMP_HIDE_ZLEN: find hidden SSID with zero-length mode + * @BSS_CMP_HIDE_NUL: find hidden SSID with NUL-ed out mode + */ +enum bss_compare_mode { + BSS_CMP_REGULAR, + BSS_CMP_HIDE_ZLEN, + BSS_CMP_HIDE_NUL, +}; -static int cmp_bss_core(struct cfg80211_bss *a, struct cfg80211_bss *b) +static int cmp_bss(struct cfg80211_bss *a, + struct cfg80211_bss *b, + enum bss_compare_mode mode) { const struct cfg80211_bss_ies *a_ies, *b_ies; - int r; + const u8 *ie1 = NULL; + const u8 *ie2 = NULL; + int i, r; if (a->channel != b->channel) return b->channel->center_freq - a->channel->center_freq; - if (is_mesh_bss(a) && is_mesh_bss(b)) { - a_ies = rcu_access_pointer(a->ies); - if (!a_ies) - return -1; - b_ies = rcu_access_pointer(b->ies); - if (!b_ies) - return 1; - - r = cmp_ies(WLAN_EID_MESH_ID, - a_ies->data, a_ies->len, - b_ies->data, b_ies->len); - if (r) - return r; - return cmp_ies(WLAN_EID_MESH_CONFIG, - a_ies->data, a_ies->len, - b_ies->data, b_ies->len); - } - - /* - * we can't use compare_ether_addr here since we need a < > operator. - * The binary return value of compare_ether_addr isn't enough - */ - return memcmp(a->bssid, b->bssid, sizeof(a->bssid)); -} - -static int cmp_bss(struct cfg80211_bss *a, - struct cfg80211_bss *b) -{ - const struct cfg80211_bss_ies *a_ies, *b_ies; - int r; - - r = cmp_bss_core(a, b); - if (r) - return r; - a_ies = rcu_access_pointer(a->ies); if (!a_ies) return -1; @@ -444,42 +437,51 @@ static int cmp_bss(struct cfg80211_bss *a, if (!b_ies) return 1; - return cmp_ies(WLAN_EID_SSID, - a_ies->data, a_ies->len, - b_ies->data, b_ies->len); -} - -static int cmp_hidden_bss(struct cfg80211_bss *a, struct cfg80211_bss *b) -{ - const struct cfg80211_bss_ies *a_ies, *b_ies; - const u8 *ie1; - const u8 *ie2; - int i; - int r; + if (WLAN_CAPABILITY_IS_STA_BSS(a->capability)) + ie1 = cfg80211_find_ie(WLAN_EID_MESH_ID, + a_ies->data, a_ies->len); + if (WLAN_CAPABILITY_IS_STA_BSS(b->capability)) + ie2 = cfg80211_find_ie(WLAN_EID_MESH_ID, + b_ies->data, b_ies->len); + if (ie1 && ie2) { + int mesh_id_cmp; + + if (ie1[1] == ie2[1]) + mesh_id_cmp = memcmp(ie1 + 2, ie2 + 2, ie1[1]); + else + mesh_id_cmp = ie2[1] - ie1[1]; + + ie1 = cfg80211_find_ie(WLAN_EID_MESH_CONFIG, + a_ies->data, a_ies->len); + ie2 = cfg80211_find_ie(WLAN_EID_MESH_CONFIG, + b_ies->data, b_ies->len); + if (ie1 && ie2) { + if (mesh_id_cmp) + return mesh_id_cmp; + if (ie1[1] != ie2[1]) + return ie2[1] - ie1[1]; + return memcmp(ie1 + 2, ie2 + 2, ie1[1]); + } + } - r = cmp_bss_core(a, b); + /* + * we can't use compare_ether_addr here since we need a < > operator. + * The binary return value of compare_ether_addr isn't enough + */ + r = memcmp(a->bssid, b->bssid, sizeof(a->bssid)); if (r) return r; - a_ies = rcu_access_pointer(a->ies); - if (!a_ies) - return -1; - b_ies = rcu_access_pointer(b->ies); - if (!b_ies) - return 1; - ie1 = cfg80211_find_ie(WLAN_EID_SSID, a_ies->data, a_ies->len); ie2 = cfg80211_find_ie(WLAN_EID_SSID, b_ies->data, b_ies->len); + if (!ie1 && !ie2) + return 0; + /* - * Key comparator must use same algorithm in any rb-tree - * search function (order is important), otherwise ordering - * of items in the tree is broken and search gives incorrect - * results. This code uses same order as cmp_ies() does. - * - * Note that due to the differring behaviour with hidden SSIDs - * this function only works when "b" is the tree element and - * "a" is the key we're looking for. + * Note that with "hide_ssid", the function returns a match if + * the already-present BSS ("b") is a hidden SSID beacon for + * the new BSS ("a"). */ /* sort missing IE before (left of) present IE */ @@ -488,24 +490,36 @@ static int cmp_hidden_bss(struct cfg80211_bss *a, struct cfg80211_bss *b) if (!ie2) return 1; - /* zero-size SSID is used as an indication of the hidden bss */ - if (!ie2[1]) + switch (mode) { + case BSS_CMP_HIDE_ZLEN: + /* + * In ZLEN mode we assume the BSS entry we're + * looking for has a zero-length SSID. So if + * the one we're looking at right now has that, + * return 0. Otherwise, return the difference + * in length, but since we're looking for the + * 0-length it's really equivalent to returning + * the length of the one we're looking at. + * + * No content comparison is needed as we assume + * the content length is zero. + */ + return ie2[1]; + case BSS_CMP_REGULAR: + default: + /* sort by length first, then by contents */ + if (ie1[1] != ie2[1]) + return ie2[1] - ie1[1]; + return memcmp(ie1 + 2, ie2 + 2, ie1[1]); + case BSS_CMP_HIDE_NUL: + if (ie1[1] != ie2[1]) + return ie2[1] - ie1[1]; + /* this is equivalent to memcmp(zeroes, ie2 + 2, len) */ + for (i = 0; i < ie2[1]; i++) + if (ie2[i + 2]) + return -1; return 0; - - /* sort by length first, then by contents */ - if (ie1[1] != ie2[1]) - return ie2[1] - ie1[1]; - - /* - * zeroed SSID ie is another indication of a hidden bss; - * if it isn't zeroed just return the regular sort value - * to find the next candidate - */ - for (i = 0; i < ie2[1]; i++) - if (ie2[i + 2]) - return memcmp(ie1 + 2, ie2 + 2, ie1[1]); - - return 0; + } } struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, @@ -534,7 +548,7 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, continue; if (is_bss(&bss->pub, bssid, ssid, ssid_len)) { res = bss; - kref_get(&res->ref); + bss_ref_get(dev, res); break; } } @@ -547,34 +561,6 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_get_bss); -struct cfg80211_bss *cfg80211_get_mesh(struct wiphy *wiphy, - struct ieee80211_channel *channel, - const u8 *meshid, size_t meshidlen, - const u8 *meshcfg) -{ - struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); - struct cfg80211_internal_bss *bss, *res = NULL; - - spin_lock_bh(&dev->bss_lock); - - list_for_each_entry(bss, &dev->bss_list, list) { - if (channel && bss->pub.channel != channel) - continue; - if (is_mesh(&bss->pub, meshid, meshidlen, meshcfg)) { - res = bss; - kref_get(&res->ref); - break; - } - } - - spin_unlock_bh(&dev->bss_lock); - if (!res) - return NULL; - return &res->pub; -} -EXPORT_SYMBOL(cfg80211_get_mesh); - - static void rb_insert_bss(struct cfg80211_registered_device *dev, struct cfg80211_internal_bss *bss) { @@ -587,7 +573,7 @@ static void rb_insert_bss(struct cfg80211_registered_device *dev, parent = *p; tbss = rb_entry(parent, struct cfg80211_internal_bss, rbn); - cmp = cmp_bss(&bss->pub, &tbss->pub); + cmp = cmp_bss(&bss->pub, &tbss->pub, BSS_CMP_REGULAR); if (WARN_ON(!cmp)) { /* will sort of leak this BSS */ @@ -606,7 +592,8 @@ static void rb_insert_bss(struct cfg80211_registered_device *dev, static struct cfg80211_internal_bss * rb_find_bss(struct cfg80211_registered_device *dev, - struct cfg80211_internal_bss *res) + struct cfg80211_internal_bss *res, + enum bss_compare_mode mode) { struct rb_node *n = dev->bss_tree.rb_node; struct cfg80211_internal_bss *bss; @@ -614,7 +601,7 @@ rb_find_bss(struct cfg80211_registered_device *dev, while (n) { bss = rb_entry(n, struct cfg80211_internal_bss, rbn); - r = cmp_bss(&res->pub, &bss->pub); + r = cmp_bss(&res->pub, &bss->pub, mode); if (r == 0) return bss; @@ -627,46 +614,67 @@ rb_find_bss(struct cfg80211_registered_device *dev, return NULL; } -static struct cfg80211_internal_bss * -rb_find_hidden_bss(struct cfg80211_registered_device *dev, - struct cfg80211_internal_bss *res) +static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev, + struct cfg80211_internal_bss *new) { - struct rb_node *n = dev->bss_tree.rb_node; + const struct cfg80211_bss_ies *ies; struct cfg80211_internal_bss *bss; - int r; + const u8 *ie; + int i, ssidlen; + u8 fold = 0; - while (n) { - bss = rb_entry(n, struct cfg80211_internal_bss, rbn); - r = cmp_hidden_bss(&res->pub, &bss->pub); + ies = rcu_access_pointer(new->pub.beacon_ies); + if (WARN_ON(!ies)) + return false; - if (r == 0) - return bss; - else if (r < 0) - n = n->rb_left; - else - n = n->rb_right; + ie = cfg80211_find_ie(WLAN_EID_SSID, ies->data, ies->len); + if (!ie) { + /* nothing to do */ + return true; } - return NULL; -} + ssidlen = ie[1]; + for (i = 0; i < ssidlen; i++) + fold |= ie[2 + i]; -static void -copy_hidden_ies(struct cfg80211_internal_bss *res, - struct cfg80211_internal_bss *hidden) -{ - const struct cfg80211_bss_ies *ies; + if (fold) { + /* not a hidden SSID */ + return true; + } - if (rcu_access_pointer(res->pub.beacon_ies)) - return; + /* This is the bad part ... */ - ies = rcu_access_pointer(hidden->pub.beacon_ies); - if (WARN_ON(!ies)) - return; + list_for_each_entry(bss, &dev->bss_list, list) { + if (!ether_addr_equal(bss->pub.bssid, new->pub.bssid)) + continue; + if (bss->pub.channel != new->pub.channel) + continue; + if (rcu_access_pointer(bss->pub.beacon_ies)) + continue; + ies = rcu_access_pointer(bss->pub.ies); + if (!ies) + continue; + ie = cfg80211_find_ie(WLAN_EID_SSID, ies->data, ies->len); + if (!ie) + continue; + if (ssidlen && ie[1] != ssidlen) + continue; + /* that would be odd ... */ + if (bss->pub.beacon_ies) + continue; + if (WARN_ON_ONCE(bss->pub.hidden_beacon_bss)) + continue; + if (WARN_ON_ONCE(!list_empty(&bss->hidden_list))) + list_del(&bss->hidden_list); + /* combine them */ + list_add(&bss->hidden_list, &new->hidden_list); + bss->pub.hidden_beacon_bss = &new->pub; + new->refcount += bss->refcount; + rcu_assign_pointer(bss->pub.beacon_ies, + new->pub.beacon_ies); + } - ies = kmemdup(ies, sizeof(*ies) + ies->len, GFP_ATOMIC); - if (unlikely(!ies)) - return; - rcu_assign_pointer(res->pub.beacon_ies, ies); + return true; } static struct cfg80211_internal_bss * @@ -687,11 +695,10 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, return NULL; } - found = rb_find_bss(dev, tmp); + found = rb_find_bss(dev, tmp, BSS_CMP_REGULAR); if (found) { found->pub.beacon_interval = tmp->pub.beacon_interval; - found->pub.tsf = tmp->pub.tsf; found->pub.signal = tmp->pub.signal; found->pub.capability = tmp->pub.capability; found->ts = tmp->ts; @@ -711,19 +718,45 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); } else if (rcu_access_pointer(tmp->pub.beacon_ies)) { - const struct cfg80211_bss_ies *old, *ies; + const struct cfg80211_bss_ies *old; + struct cfg80211_internal_bss *bss; + + if (found->pub.hidden_beacon_bss && + !list_empty(&found->hidden_list)) { + /* + * The found BSS struct is one of the probe + * response members of a group, but we're + * receiving a beacon (beacon_ies in the tmp + * bss is used). This can only mean that the + * AP changed its beacon from not having an + * SSID to showing it, which is confusing so + * drop this information. + */ + goto drop; + } old = rcu_access_pointer(found->pub.beacon_ies); - ies = rcu_access_pointer(found->pub.ies); rcu_assign_pointer(found->pub.beacon_ies, tmp->pub.beacon_ies); /* Override IEs if they were from a beacon before */ - if (old == ies) + if (old == rcu_access_pointer(found->pub.ies)) rcu_assign_pointer(found->pub.ies, tmp->pub.beacon_ies); + /* Assign beacon IEs to all sub entries */ + list_for_each_entry(bss, &found->hidden_list, + hidden_list) { + const struct cfg80211_bss_ies *ies; + + ies = rcu_access_pointer(bss->pub.beacon_ies); + WARN_ON(ies != old); + + rcu_assign_pointer(bss->pub.beacon_ies, + tmp->pub.beacon_ies); + } + if (old) kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); @@ -733,19 +766,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, struct cfg80211_internal_bss *hidden; struct cfg80211_bss_ies *ies; - /* First check if the beacon is a probe response from - * a hidden bss. If so, copy beacon ies (with nullified - * ssid) into the probe response bss entry (with real ssid). - * It is required basically for PSM implementation - * (probe responses do not contain tim ie) */ - - /* TODO: The code is not trying to update existing probe - * response bss entries when beacon ies are - * getting changed. */ - hidden = rb_find_hidden_bss(dev, tmp); - if (hidden) - copy_hidden_ies(tmp, hidden); - /* * create a copy -- the "res" variable that is passed in * is allocated on the stack since it's not needed in the @@ -760,21 +780,51 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, ies = (void *)rcu_dereference(tmp->pub.proberesp_ies); if (ies) kfree_rcu(ies, rcu_head); - spin_unlock_bh(&dev->bss_lock); - return NULL; + goto drop; } memcpy(new, tmp, sizeof(*new)); - kref_init(&new->ref); + new->refcount = 1; + INIT_LIST_HEAD(&new->hidden_list); + + if (rcu_access_pointer(tmp->pub.proberesp_ies)) { + hidden = rb_find_bss(dev, tmp, BSS_CMP_HIDE_ZLEN); + if (!hidden) + hidden = rb_find_bss(dev, tmp, + BSS_CMP_HIDE_NUL); + if (hidden) { + new->pub.hidden_beacon_bss = &hidden->pub; + list_add(&new->hidden_list, + &hidden->hidden_list); + hidden->refcount++; + rcu_assign_pointer(new->pub.beacon_ies, + hidden->pub.beacon_ies); + } + } else { + /* + * Ok so we found a beacon, and don't have an entry. If + * it's a beacon with hidden SSID, we might be in for an + * expensive search for any probe responses that should + * be grouped with this beacon for updates ... + */ + if (!cfg80211_combine_bsses(dev, new)) { + kfree(new); + goto drop; + } + } + list_add_tail(&new->list, &dev->bss_list); rb_insert_bss(dev, new); found = new; } dev->bss_generation++; + bss_ref_get(dev, found); spin_unlock_bh(&dev->bss_lock); - kref_get(&found->ref); return found; + drop: + spin_unlock_bh(&dev->bss_lock); + return NULL; } static struct ieee80211_channel * @@ -833,7 +883,6 @@ cfg80211_inform_bss(struct wiphy *wiphy, memcpy(tmp.pub.bssid, bssid, ETH_ALEN); tmp.pub.channel = channel; tmp.pub.signal = signal; - tmp.pub.tsf = tsf; tmp.pub.beacon_interval = beacon_interval; tmp.pub.capability = capability; /* @@ -841,16 +890,14 @@ cfg80211_inform_bss(struct wiphy *wiphy, * Response frame, we need to pick one of the options and only use it * with the driver that does not provide the full Beacon/Probe Response * frame. Use Beacon frame pointer to avoid indicating that this should - * override the iies pointer should we have received an earlier + * override the IEs pointer should we have received an earlier * indication of Probe Response data. - * - * The initial buffer for the IEs is allocated with the BSS entry and - * is located after the private area. */ ies = kmalloc(sizeof(*ies) + ielen, gfp); if (!ies) return NULL; ies->len = ielen; + ies->tsf = tsf; memcpy(ies->data, ie, ielen); rcu_assign_pointer(tmp.pub.beacon_ies, ies); @@ -907,6 +954,7 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, if (!ies) return NULL; ies->len = ielen; + ies->tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp); memcpy(ies->data, mgmt->u.probe_resp.variable, ielen); if (ieee80211_is_probe_resp(mgmt->frame_control)) @@ -918,7 +966,6 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, memcpy(tmp.pub.bssid, mgmt->bssid, ETH_ALEN); tmp.pub.channel = channel; tmp.pub.signal = signal; - tmp.pub.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp); tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int); tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); @@ -935,27 +982,35 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_inform_bss_frame); -void cfg80211_ref_bss(struct cfg80211_bss *pub) +void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) { + struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); struct cfg80211_internal_bss *bss; if (!pub) return; bss = container_of(pub, struct cfg80211_internal_bss, pub); - kref_get(&bss->ref); + + spin_lock_bh(&dev->bss_lock); + bss_ref_get(dev, bss); + spin_unlock_bh(&dev->bss_lock); } EXPORT_SYMBOL(cfg80211_ref_bss); -void cfg80211_put_bss(struct cfg80211_bss *pub) +void cfg80211_put_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) { + struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); struct cfg80211_internal_bss *bss; if (!pub) return; bss = container_of(pub, struct cfg80211_internal_bss, pub); - kref_put(&bss->ref, bss_release); + + spin_lock_bh(&dev->bss_lock); + bss_ref_put(dev, bss); + spin_unlock_bh(&dev->bss_lock); } EXPORT_SYMBOL(cfg80211_put_bss); @@ -971,8 +1026,8 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) spin_lock_bh(&dev->bss_lock); if (!list_empty(&bss->list)) { - __cfg80211_unlink_bss(dev, bss); - dev->bss_generation++; + if (__cfg80211_unlink_bss(dev, bss)) + dev->bss_generation++; } spin_unlock_bh(&dev->bss_lock); } @@ -1155,16 +1210,6 @@ static void ieee80211_scan_add_ies(struct iw_request_info *info, } } -static inline unsigned int elapsed_jiffies_msecs(unsigned long start) -{ - unsigned long end = jiffies; - - if (end >= start) - return jiffies_to_msecs(end - start); - - return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1); -} - static char * ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, struct cfg80211_internal_bss *bss, char *current_ev, @@ -1241,15 +1286,10 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, rcu_read_lock(); ies = rcu_dereference(bss->pub.ies); - if (ies) { - rem = ies->len; - ie = ies->data; - } else { - rem = 0; - ie = NULL; - } + rem = ies->len; + ie = ies->data; - while (ies && rem >= 2) { + while (rem >= 2) { /* invalid data */ if (ie[1] > rem - 2) break; @@ -1362,7 +1402,7 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, if (buf) { memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVCUSTOM; - sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->pub.tsf)); + sprintf(buf, "tsf=%016llx", (unsigned long long)(ies->tsf)); iwe.u.data.length = strlen(buf); current_ev = iwe_stream_add_point(info, current_ev, end_buf, &iwe, buf); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f2431e41a37..f432bd3755b 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -192,7 +192,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) prev_bssid, params->ssid, params->ssid_len, params->ie, params->ie_len, - false, ¶ms->crypto, + params->mfp != NL80211_MFP_NO, + ¶ms->crypto, params->flags, ¶ms->ht_capa, ¶ms->ht_capa_mask); if (err) @@ -300,7 +301,7 @@ static void __cfg80211_sme_scan_done(struct net_device *dev) bss = cfg80211_get_conn_bss(wdev); if (bss) { - cfg80211_put_bss(bss); + cfg80211_put_bss(&rdev->wiphy, bss); } else { /* not found */ if (wdev->conn->state == CFG80211_CONN_SCAN_AGAIN) @@ -463,7 +464,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&wdev->current_bss->pub); + cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); wdev->current_bss = NULL; } @@ -479,7 +480,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, kfree(wdev->connect_keys); wdev->connect_keys = NULL; wdev->ssid_len = 0; - cfg80211_put_bss(bss); + cfg80211_put_bss(wdev->wiphy, bss); return; } @@ -519,10 +520,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, * - country_ie + 2, the start of the country ie data, and * - and country_ie[1] which is the IE length */ - regulatory_hint_11d(wdev->wiphy, - bss->channel->band, - country_ie + 2, - country_ie[1]); + regulatory_hint_11d(wdev->wiphy, bss->channel->band, + country_ie + 2, country_ie[1]); kfree(country_ie); } @@ -587,7 +586,7 @@ void __cfg80211_roamed(struct wireless_dev *wdev, } cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&wdev->current_bss->pub); + cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); wdev->current_bss = NULL; cfg80211_hold_bss(bss_from_pub(bss)); @@ -622,7 +621,7 @@ void __cfg80211_roamed(struct wireless_dev *wdev, return; out: - cfg80211_put_bss(bss); + cfg80211_put_bss(wdev->wiphy, bss); } void cfg80211_roamed(struct net_device *dev, @@ -664,7 +663,7 @@ void cfg80211_roamed_bss(struct net_device *dev, ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp); if (!ev) { - cfg80211_put_bss(bss); + cfg80211_put_bss(wdev->wiphy, bss); return; } @@ -705,7 +704,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&wdev->current_bss->pub); + cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); } wdev->current_bss = NULL; @@ -876,7 +875,7 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev, if (bss) { wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; err = cfg80211_conn_do_work(wdev); - cfg80211_put_bss(bss); + cfg80211_put_bss(wdev->wiphy, bss); } else { /* otherwise we'll need to scan for the AP first */ err = cfg80211_conn_scan(wdev); diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 1f6f01e2dc4..238ee49b386 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -106,9 +106,7 @@ static int wiphy_resume(struct device *dev) int ret = 0; /* Age scan results with time spent in suspend */ - spin_lock_bh(&rdev->bss_lock); cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at); - spin_unlock_bh(&rdev->bss_lock); if (rdev->ops->resume) { rtnl_lock(); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 2134576f426..b7a531380e1 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1767,6 +1767,24 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_p2p_device, TP_ARGS(wiphy, wdev) ); +TRACE_EVENT(rdev_set_mac_acl, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_acl_data *params), + TP_ARGS(wiphy, netdev, params), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(u32, acl_policy) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WIPHY_ASSIGN; + __entry->acl_policy = params->acl_policy; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", acl policy: %d", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ @@ -2033,6 +2051,21 @@ TRACE_EVENT(cfg80211_reg_can_beacon, WIPHY_PR_ARG, CHAN_DEF_PR_ARG) ); +TRACE_EVENT(cfg80211_chandef_dfs_required, + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, chandef), + TP_STRUCT__entry( + WIPHY_ENTRY + CHAN_DEF_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, CHAN_DEF_PR_ARG) +); + TRACE_EVENT(cfg80211_ch_switch_notify, TP_PROTO(struct net_device *netdev, struct cfg80211_chan_def *chandef), @@ -2049,6 +2082,36 @@ TRACE_EVENT(cfg80211_ch_switch_notify, NETDEV_PR_ARG, CHAN_DEF_PR_ARG) ); +TRACE_EVENT(cfg80211_radar_event, + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, chandef), + TP_STRUCT__entry( + WIPHY_ENTRY + CHAN_DEF_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, CHAN_DEF_PR_ARG) +); + +TRACE_EVENT(cfg80211_cac_event, + TP_PROTO(struct net_device *netdev, enum nl80211_radar_event evt), + TP_ARGS(netdev, evt), + TP_STRUCT__entry( + NETDEV_ENTRY + __field(enum nl80211_radar_event, evt) + ), + TP_fast_assign( + NETDEV_ASSIGN; + __entry->evt = evt; + ), + TP_printk(NETDEV_PR_FMT ", event: %d", + NETDEV_PR_ARG, __entry->evt) +); + DECLARE_EVENT_CLASS(cfg80211_rx_evt, TP_PROTO(struct net_device *netdev, const u8 *addr), TP_ARGS(netdev, addr), @@ -2315,6 +2378,41 @@ TRACE_EVENT(cfg80211_return_u32, TP_printk("ret: %u", __entry->ret) ); +TRACE_EVENT(cfg80211_report_wowlan_wakeup, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_wowlan_wakeup *wakeup), + TP_ARGS(wiphy, wdev, wakeup), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(bool, disconnect) + __field(bool, magic_pkt) + __field(bool, gtk_rekey_failure) + __field(bool, eap_identity_req) + __field(bool, four_way_handshake) + __field(bool, rfkill_release) + __field(s32, pattern_idx) + __field(u32, packet_len) + __dynamic_array(u8, packet, wakeup->packet_present_len) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->disconnect = wakeup->disconnect; + __entry->magic_pkt = wakeup->magic_pkt; + __entry->gtk_rekey_failure = wakeup->gtk_rekey_failure; + __entry->eap_identity_req = wakeup->eap_identity_req; + __entry->four_way_handshake = wakeup->four_way_handshake; + __entry->rfkill_release = wakeup->rfkill_release; + __entry->pattern_idx = wakeup->pattern_idx; + __entry->packet_len = wakeup->packet_len; + if (wakeup->packet && wakeup->packet_present_len) + memcpy(__get_dynamic_array(packet), wakeup->packet, + wakeup->packet_present_len); + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/wireless/util.c b/net/wireless/util.c index 16d76a807c2..37a56ee1e1e 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1184,7 +1184,8 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_iftype iftype, struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode) + enum cfg80211_chan_mode chanmode, + u8 radar_detect) { struct wireless_dev *wdev_iter; u32 used_iftypes = BIT(iftype); @@ -1195,14 +1196,46 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chmode; int num_different_channels = 0; int total = 1; + bool radar_required; int i, j; ASSERT_RTNL(); lockdep_assert_held(&rdev->devlist_mtx); + if (WARN_ON(hweight32(radar_detect) > 1)) + return -EINVAL; + + switch (iftype) { + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_WDS: + radar_required = !!(chan && + (chan->flags & IEEE80211_CHAN_RADAR)); + break; + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_MONITOR: + radar_required = false; + break; + case NUM_NL80211_IFTYPES: + case NL80211_IFTYPE_UNSPECIFIED: + default: + return -EINVAL; + } + + if (radar_required && !radar_detect) + return -EINVAL; + /* Always allow software iftypes */ - if (rdev->wiphy.software_iftypes & BIT(iftype)) + if (rdev->wiphy.software_iftypes & BIT(iftype)) { + if (radar_detect) + return -EINVAL; return 0; + } memset(num, 0, sizeof(num)); memset(used_channels, 0, sizeof(used_channels)); @@ -1275,7 +1308,7 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, used_iftypes |= BIT(wdev_iter->iftype); } - if (total == 1) + if (total == 1 && !radar_detect) return 0; for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) { @@ -1308,6 +1341,9 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, } } + if (radar_detect && !(c->radar_detect_widths & radar_detect)) + goto cont; + /* * Finally check that all iftypes that we're currently * using are actually part of this combination. If they diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c index 8bafa31fa9f..e98a01c1034 100644 --- a/net/wireless/wext-proc.c +++ b/net/wireless/wext-proc.c @@ -143,7 +143,8 @@ static const struct file_operations wireless_seq_fops = { int __net_init wext_proc_init(struct net *net) { /* Create /proc/net/wireless entry */ - if (!proc_net_fops_create(net, "wireless", S_IRUGO, &wireless_seq_fops)) + if (!proc_create("wireless", S_IRUGO, net->proc_net, + &wireless_seq_fops)) return -ENOMEM; return 0; @@ -151,5 +152,5 @@ int __net_init wext_proc_init(struct net *net) void __net_exit wext_proc_exit(struct net *net) { - proc_net_remove(net, "wireless"); + remove_proc_entry("wireless", net->proc_net); } diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 4ce2d93162c..6fb9d00a75d 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -35,6 +35,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV8, .sadb_alg_ivlen = 8, @@ -51,6 +53,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV12, .sadb_alg_ivlen = 8, @@ -67,6 +71,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV16, .sadb_alg_ivlen = 8, @@ -83,6 +89,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV8, .sadb_alg_ivlen = 8, @@ -99,6 +107,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV12, .sadb_alg_ivlen = 8, @@ -115,6 +125,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV16, .sadb_alg_ivlen = 8, @@ -131,6 +143,8 @@ static struct xfrm_algo_desc aead_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_NULL_AES_GMAC, .sadb_alg_ivlen = 8, @@ -151,6 +165,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_NULL, .sadb_alg_ivlen = 0, @@ -169,6 +185,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_AALG_MD5HMAC, .sadb_alg_ivlen = 0, @@ -187,6 +205,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_AALG_SHA1HMAC, .sadb_alg_ivlen = 0, @@ -205,6 +225,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_SHA2_256HMAC, .sadb_alg_ivlen = 0, @@ -222,6 +244,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_SHA2_384HMAC, .sadb_alg_ivlen = 0, @@ -239,6 +263,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_SHA2_512HMAC, .sadb_alg_ivlen = 0, @@ -257,6 +283,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC, .sadb_alg_ivlen = 0, @@ -274,6 +302,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_AALG_AES_XCBC_MAC, .sadb_alg_ivlen = 0, @@ -295,6 +325,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_EALG_NULL, .sadb_alg_ivlen = 0, @@ -313,6 +345,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_EALG_DESCBC, .sadb_alg_ivlen = 8, @@ -331,6 +365,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_EALG_3DESCBC, .sadb_alg_ivlen = 8, @@ -349,6 +385,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_CASTCBC, .sadb_alg_ivlen = 8, @@ -367,6 +405,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_BLOWFISHCBC, .sadb_alg_ivlen = 8, @@ -385,6 +425,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AESCBC, .sadb_alg_ivlen = 8, @@ -403,6 +445,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_SERPENTCBC, .sadb_alg_ivlen = 8, @@ -421,6 +465,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_CAMELLIACBC, .sadb_alg_ivlen = 8, @@ -439,6 +485,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_TWOFISHCBC, .sadb_alg_ivlen = 8, @@ -456,6 +504,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, + .pfkey_supported = 1, + .desc = { .sadb_alg_id = SADB_X_EALG_AESCTR, .sadb_alg_ivlen = 8, @@ -473,6 +523,7 @@ static struct xfrm_algo_desc calg_list[] = { .threshold = 90, } }, + .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE } }, { @@ -482,6 +533,7 @@ static struct xfrm_algo_desc calg_list[] = { .threshold = 90, } }, + .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_CALG_LZS } }, { @@ -491,6 +543,7 @@ static struct xfrm_algo_desc calg_list[] = { .threshold = 50, } }, + .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_CALG_LZJH } }, }; @@ -700,8 +753,7 @@ void xfrm_probe_algs(void) } for (i = 0; i < ealg_entries(); i++) { - status = crypto_has_blkcipher(ealg_list[i].name, 0, - CRYPTO_ALG_ASYNC); + status = crypto_has_ablkcipher(ealg_list[i].name, 0, 0); if (ealg_list[i].available != status) ealg_list[i].available = status; } @@ -715,27 +767,27 @@ void xfrm_probe_algs(void) } EXPORT_SYMBOL_GPL(xfrm_probe_algs); -int xfrm_count_auth_supported(void) +int xfrm_count_pfkey_auth_supported(void) { int i, n; for (i = 0, n = 0; i < aalg_entries(); i++) - if (aalg_list[i].available) + if (aalg_list[i].available && aalg_list[i].pfkey_supported) n++; return n; } -EXPORT_SYMBOL_GPL(xfrm_count_auth_supported); +EXPORT_SYMBOL_GPL(xfrm_count_pfkey_auth_supported); -int xfrm_count_enc_supported(void) +int xfrm_count_pfkey_enc_supported(void) { int i, n; for (i = 0, n = 0; i < ealg_entries(); i++) - if (ealg_list[i].available) + if (ealg_list[i].available && ealg_list[i].pfkey_supported) n++; return n; } -EXPORT_SYMBOL_GPL(xfrm_count_enc_supported); +EXPORT_SYMBOL_GPL(xfrm_count_pfkey_enc_supported); #if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 95a338c89f9..bcfda8921b5 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -61,6 +61,12 @@ static int xfrm_output_one(struct sk_buff *skb, int err) } spin_lock_bh(&x->lock); + + if (unlikely(x->km.state != XFRM_STATE_VALID)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); + goto error; + } + err = xfrm_state_check_expire(x); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 07c585756d2..5b47180986f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -35,6 +35,10 @@ #include "xfrm_hash.h" +#define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10)) +#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) +#define XFRM_MAX_QUEUE_LEN 100 + DEFINE_MUTEX(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex); @@ -51,7 +55,7 @@ static struct kmem_cache *xfrm_dst_cache __read_mostly; static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); static int xfrm_bundle_ok(struct xfrm_dst *xdst); - +static void xfrm_policy_queue_process(unsigned long arg); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir); @@ -287,8 +291,11 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); atomic_set(&policy->refcnt, 1); + skb_queue_head_init(&policy->polq.hold_queue); setup_timer(&policy->timer, xfrm_policy_timer, (unsigned long)policy); + setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, + (unsigned long)policy); policy->flo.ops = &xfrm_policy_fc_ops; } return policy; @@ -309,6 +316,16 @@ void xfrm_policy_destroy(struct xfrm_policy *policy) } EXPORT_SYMBOL(xfrm_policy_destroy); +static void xfrm_queue_purge(struct sk_buff_head *list) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(list)) != NULL) { + dev_put(skb->dev); + kfree_skb(skb); + } +} + /* Rule must be locked. Release descentant resources, announce * entry dead. The rule must be unlinked from lists to the moment. */ @@ -319,6 +336,9 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) atomic_inc(&policy->genid); + del_timer(&policy->polq.hold_timer); + xfrm_queue_purge(&policy->polq.hold_queue); + if (del_timer(&policy->timer)) xfrm_pol_put(policy); @@ -562,6 +582,46 @@ static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s return 0; } +static void xfrm_policy_requeue(struct xfrm_policy *old, + struct xfrm_policy *new) +{ + struct xfrm_policy_queue *pq = &old->polq; + struct sk_buff_head list; + + __skb_queue_head_init(&list); + + spin_lock_bh(&pq->hold_queue.lock); + skb_queue_splice_init(&pq->hold_queue, &list); + del_timer(&pq->hold_timer); + spin_unlock_bh(&pq->hold_queue.lock); + + if (skb_queue_empty(&list)) + return; + + pq = &new->polq; + + spin_lock_bh(&pq->hold_queue.lock); + skb_queue_splice(&list, &pq->hold_queue); + pq->timeout = XFRM_QUEUE_TMO_MIN; + mod_timer(&pq->hold_timer, jiffies); + spin_unlock_bh(&pq->hold_queue.lock); +} + +static bool xfrm_policy_mark_match(struct xfrm_policy *policy, + struct xfrm_policy *pol) +{ + u32 mark = policy->mark.v & policy->mark.m; + + if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m) + return true; + + if ((mark & pol->mark.m) == pol->mark.v && + policy->priority == pol->priority) + return true; + + return false; +} + int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) { struct net *net = xp_net(policy); @@ -569,7 +629,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct xfrm_policy *delpol; struct hlist_head *chain; struct hlist_node *entry, *newpos; - u32 mark = policy->mark.v & policy->mark.m; write_lock_bh(&xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); @@ -578,7 +637,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) hlist_for_each_entry(pol, entry, chain, bydst) { if (pol->type == policy->type && !selector_cmp(&pol->selector, &policy->selector) && - (mark & pol->mark.m) == pol->mark.v && + xfrm_policy_mark_match(policy, pol) && xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) { @@ -603,8 +662,10 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) net->xfrm.policy_count[dir]++; atomic_inc(&flow_cache_genid); rt_genid_bump(net); - if (delpol) + if (delpol) { + xfrm_policy_requeue(delpol, policy); __xfrm_policy_unlink(delpol, dir); + } policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); policy->curlft.add_time = get_seconds(); @@ -1115,11 +1176,15 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } - if (old_pol) + if (old_pol) { + if (pol) + xfrm_policy_requeue(old_pol, pol); + /* Unlinking succeeds always. This is the only function * allowed to delete or replace socket policy. */ __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); + } write_unlock_bh(&xfrm_policy_lock); if (old_pol) { @@ -1310,6 +1375,8 @@ static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *f * It means we need to try again resolving. */ if (xdst->num_xfrms > 0) return NULL; + } else if (dst->flags & DST_XFRM_QUEUE) { + return NULL; } else { /* Real bundle */ if (stale_bundle(dst)) @@ -1673,6 +1740,171 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, return xdst; } +static void xfrm_policy_queue_process(unsigned long arg) +{ + int err = 0; + struct sk_buff *skb; + struct sock *sk; + struct dst_entry *dst; + struct net_device *dev; + struct xfrm_policy *pol = (struct xfrm_policy *)arg; + struct xfrm_policy_queue *pq = &pol->polq; + struct flowi fl; + struct sk_buff_head list; + + spin_lock(&pq->hold_queue.lock); + skb = skb_peek(&pq->hold_queue); + dst = skb_dst(skb); + sk = skb->sk; + xfrm_decode_session(skb, &fl, dst->ops->family); + spin_unlock(&pq->hold_queue.lock); + + dst_hold(dst->path); + dst = xfrm_lookup(xp_net(pol), dst->path, &fl, + sk, 0); + if (IS_ERR(dst)) + goto purge_queue; + + if (dst->flags & DST_XFRM_QUEUE) { + dst_release(dst); + + if (pq->timeout >= XFRM_QUEUE_TMO_MAX) + goto purge_queue; + + pq->timeout = pq->timeout << 1; + mod_timer(&pq->hold_timer, jiffies + pq->timeout); + return; + } + + dst_release(dst); + + __skb_queue_head_init(&list); + + spin_lock(&pq->hold_queue.lock); + pq->timeout = 0; + skb_queue_splice_init(&pq->hold_queue, &list); + spin_unlock(&pq->hold_queue.lock); + + while (!skb_queue_empty(&list)) { + skb = __skb_dequeue(&list); + + xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); + dst_hold(skb_dst(skb)->path); + dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path, + &fl, skb->sk, 0); + if (IS_ERR(dst)) { + dev_put(skb->dev); + kfree_skb(skb); + continue; + } + + nf_reset(skb); + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + dev = skb->dev; + err = dst_output(skb); + dev_put(dev); + } + + return; + +purge_queue: + pq->timeout = 0; + xfrm_queue_purge(&pq->hold_queue); +} + +static int xdst_queue_output(struct sk_buff *skb) +{ + unsigned long sched_next; + struct dst_entry *dst = skb_dst(skb); + struct xfrm_dst *xdst = (struct xfrm_dst *) dst; + struct xfrm_policy_queue *pq = &xdst->pols[0]->polq; + + if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) { + kfree_skb(skb); + return -EAGAIN; + } + + skb_dst_force(skb); + dev_hold(skb->dev); + + spin_lock_bh(&pq->hold_queue.lock); + + if (!pq->timeout) + pq->timeout = XFRM_QUEUE_TMO_MIN; + + sched_next = jiffies + pq->timeout; + + if (del_timer(&pq->hold_timer)) { + if (time_before(pq->hold_timer.expires, sched_next)) + sched_next = pq->hold_timer.expires; + } + + __skb_queue_tail(&pq->hold_queue, skb); + mod_timer(&pq->hold_timer, sched_next); + + spin_unlock_bh(&pq->hold_queue.lock); + + return 0; +} + +static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, + struct dst_entry *dst, + const struct flowi *fl, + int num_xfrms, + u16 family) +{ + int err; + struct net_device *dev; + struct dst_entry *dst1; + struct xfrm_dst *xdst; + + xdst = xfrm_alloc_dst(net, family); + if (IS_ERR(xdst)) + return xdst; + + if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0 || + (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP)) + return xdst; + + dst1 = &xdst->u.dst; + dst_hold(dst); + xdst->route = dst; + + dst_copy_metrics(dst1, dst); + + dst1->obsolete = DST_OBSOLETE_FORCE_CHK; + dst1->flags |= DST_HOST | DST_XFRM_QUEUE; + dst1->lastuse = jiffies; + + dst1->input = dst_discard; + dst1->output = xdst_queue_output; + + dst_hold(dst); + dst1->child = dst; + dst1->path = dst; + + xfrm_init_path((struct xfrm_dst *)dst1, dst, 0); + + err = -ENODEV; + dev = dst->dev; + if (!dev) + goto free_dst; + + err = xfrm_fill_dst(xdst, dev, fl); + if (err) + goto free_dst; + +out: + return xdst; + +free_dst: + dst_release(dst1); + xdst = ERR_PTR(err); + goto out; +} + static struct flow_cache_object * xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct flow_cache_object *oldflo, void *ctx) @@ -1751,7 +1983,7 @@ make_dummy_bundle: /* We found policies, but there's no bundles to instantiate: * either because the policy blocks, has no transformations or * we could not build template (no xfrm_states).*/ - xdst = xfrm_alloc_dst(net, family); + xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family); if (IS_ERR(xdst)) { xfrm_pols_put(pols, num_pols); return ERR_CAST(xdst); @@ -2359,6 +2591,9 @@ static int xfrm_bundle_ok(struct xfrm_dst *first) (dst->dev && !netif_running(dst->dev))) return 0; + if (dst->flags & DST_XFRM_QUEUE) + return 1; + last = NULL; do { @@ -2786,10 +3021,10 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, { if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { if (sel_tgt->family == sel_cmp->family && - xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr, - sel_cmp->family) == 0 && - xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr, - sel_cmp->family) == 0 && + xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, + sel_cmp->family) && + xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, + sel_cmp->family) && sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { return true; @@ -2847,10 +3082,10 @@ static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tm switch (t->mode) { case XFRM_MODE_TUNNEL: case XFRM_MODE_BEET: - if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr, - m->old_family) == 0 && - xfrm_addr_cmp(&t->saddr, &m->old_saddr, - m->old_family) == 0) { + if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr, + m->old_family) && + xfrm_addr_equal(&t->saddr, &m->old_saddr, + m->old_family)) { match = 1; } break; @@ -2916,10 +3151,10 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) return -EINVAL; for (i = 0; i < num_migrate; i++) { - if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr, - m[i].old_family) == 0) && - (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr, - m[i].old_family) == 0)) + if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr, + m[i].old_family) && + xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr, + m[i].old_family)) return -EINVAL; if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) || xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index d0a1af8ed58..c721b0d9ab8 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -43,6 +43,7 @@ static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD), SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR), SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR), + SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID), SNMP_MIB_SENTINEL }; @@ -73,13 +74,13 @@ static const struct file_operations xfrm_statistics_seq_fops = { int __net_init xfrm_proc_init(struct net *net) { - if (!proc_net_fops_create(net, "xfrm_stat", S_IRUGO, - &xfrm_statistics_seq_fops)) + if (!proc_create("xfrm_stat", S_IRUGO, net->proc_net, + &xfrm_statistics_seq_fops)) return -ENOMEM; return 0; } void xfrm_proc_fini(struct net *net) { - proc_net_remove(net, "xfrm_stat"); + remove_proc_entry("xfrm_stat", net->proc_net); } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 3459692092e..ae01bdbcb29 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -158,8 +158,8 @@ out_unlock: mutex_unlock(&hash_resize_mutex); } -static DEFINE_RWLOCK(xfrm_state_afinfo_lock); -static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; +static DEFINE_SPINLOCK(xfrm_state_afinfo_lock); +static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO]; static DEFINE_SPINLOCK(xfrm_state_gc_lock); @@ -168,58 +168,45 @@ int __xfrm_state_delete(struct xfrm_state *x); int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); void km_state_expired(struct xfrm_state *x, int hard, u32 portid); -static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family) -{ - struct xfrm_state_afinfo *afinfo; - if (unlikely(family >= NPROTO)) - return NULL; - write_lock_bh(&xfrm_state_afinfo_lock); - afinfo = xfrm_state_afinfo[family]; - if (unlikely(!afinfo)) - write_unlock_bh(&xfrm_state_afinfo_lock); - return afinfo; -} - -static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo) - __releases(xfrm_state_afinfo_lock) -{ - write_unlock_bh(&xfrm_state_afinfo_lock); -} - +static DEFINE_SPINLOCK(xfrm_type_lock); int xfrm_register_type(const struct xfrm_type *type, unsigned short family) { - struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); const struct xfrm_type **typemap; int err = 0; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; typemap = afinfo->type_map; + spin_lock_bh(&xfrm_type_lock); if (likely(typemap[type->proto] == NULL)) typemap[type->proto] = type; else err = -EEXIST; - xfrm_state_unlock_afinfo(afinfo); + spin_unlock_bh(&xfrm_type_lock); + xfrm_state_put_afinfo(afinfo); return err; } EXPORT_SYMBOL(xfrm_register_type); int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family) { - struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); const struct xfrm_type **typemap; int err = 0; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; typemap = afinfo->type_map; + spin_lock_bh(&xfrm_type_lock); if (unlikely(typemap[type->proto] != type)) err = -ENOENT; else typemap[type->proto] = NULL; - xfrm_state_unlock_afinfo(afinfo); + spin_unlock_bh(&xfrm_type_lock); + xfrm_state_put_afinfo(afinfo); return err; } EXPORT_SYMBOL(xfrm_unregister_type); @@ -256,6 +243,7 @@ static void xfrm_put_type(const struct xfrm_type *type) module_put(type->owner); } +static DEFINE_SPINLOCK(xfrm_mode_lock); int xfrm_register_mode(struct xfrm_mode *mode, int family) { struct xfrm_state_afinfo *afinfo; @@ -265,12 +253,13 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family) if (unlikely(mode->encap >= XFRM_MODE_MAX)) return -EINVAL; - afinfo = xfrm_state_lock_afinfo(family); + afinfo = xfrm_state_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; err = -EEXIST; modemap = afinfo->mode_map; + spin_lock_bh(&xfrm_mode_lock); if (modemap[mode->encap]) goto out; @@ -283,7 +272,8 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family) err = 0; out: - xfrm_state_unlock_afinfo(afinfo); + spin_unlock_bh(&xfrm_mode_lock); + xfrm_state_put_afinfo(afinfo); return err; } EXPORT_SYMBOL(xfrm_register_mode); @@ -297,19 +287,21 @@ int xfrm_unregister_mode(struct xfrm_mode *mode, int family) if (unlikely(mode->encap >= XFRM_MODE_MAX)) return -EINVAL; - afinfo = xfrm_state_lock_afinfo(family); + afinfo = xfrm_state_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; err = -ENOENT; modemap = afinfo->mode_map; + spin_lock_bh(&xfrm_mode_lock); if (likely(modemap[mode->encap] == mode)) { modemap[mode->encap] = NULL; module_put(mode->afinfo->owner); err = 0; } - xfrm_state_unlock_afinfo(afinfo); + spin_unlock_bh(&xfrm_mode_lock); + xfrm_state_put_afinfo(afinfo); return err; } EXPORT_SYMBOL(xfrm_unregister_mode); @@ -699,7 +691,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, if (x->props.family != family || x->id.spi != spi || x->id.proto != proto || - xfrm_addr_cmp(&x->id.daddr, daddr, family)) + !xfrm_addr_equal(&x->id.daddr, daddr, family)) continue; if ((mark & x->mark.m) != x->mark.v) @@ -723,8 +715,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) { if (x->props.family != family || x->id.proto != proto || - xfrm_addr_cmp(&x->id.daddr, daddr, family) || - xfrm_addr_cmp(&x->props.saddr, saddr, family)) + !xfrm_addr_equal(&x->id.daddr, daddr, family) || + !xfrm_addr_equal(&x->props.saddr, saddr, family)) continue; if ((mark & x->mark.m) != x->mark.v) @@ -989,8 +981,8 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) if (x->props.family == family && x->props.reqid == reqid && (mark & x->mark.m) == x->mark.v && - !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) && - !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family)) + xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) && + xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family)) x->genid++; } } @@ -1024,8 +1016,8 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, x->id.spi != 0 || x->id.proto != proto || (mark & x->mark.m) != x->mark.v || - xfrm_addr_cmp(&x->id.daddr, daddr, family) || - xfrm_addr_cmp(&x->props.saddr, saddr, family)) + !xfrm_addr_equal(&x->id.daddr, daddr, family) || + !xfrm_addr_equal(&x->props.saddr, saddr, family)) continue; xfrm_state_hold(x); @@ -1108,7 +1100,7 @@ int xfrm_state_add(struct xfrm_state *x) if (use_spi && x->km.seq) { x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq); if (x1 && ((x1->id.proto != x->id.proto) || - xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) { + !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) { to_put = x1; x1 = NULL; } @@ -1234,10 +1226,10 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) continue; if (m->reqid && x->props.reqid != m->reqid) continue; - if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr, - m->old_family) || - xfrm_addr_cmp(&x->props.saddr, &m->old_saddr, - m->old_family)) + if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, + m->old_family) || + !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, + m->old_family)) continue; xfrm_state_hold(x); return x; @@ -1249,10 +1241,10 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) if (x->props.mode != m->mode || x->id.proto != m->proto) continue; - if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr, - m->old_family) || - xfrm_addr_cmp(&x->props.saddr, &m->old_saddr, - m->old_family)) + if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, + m->old_family) || + !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, + m->old_family)) continue; xfrm_state_hold(x); return x; @@ -1277,7 +1269,7 @@ struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x, memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr)); /* add state */ - if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) { + if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) { /* a care is needed when the destination address of the state is to be updated as it is a part of triplet */ xfrm_state_insert(xc); @@ -1370,9 +1362,6 @@ int xfrm_state_check_expire(struct xfrm_state *x) if (!x->curlft.use_time) x->curlft.use_time = get_seconds(); - if (x->km.state != XFRM_STATE_VALID) - return -EINVAL; - if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { x->km.state = XFRM_STATE_EXPIRED; @@ -1648,27 +1637,26 @@ static void xfrm_replay_timer_handler(unsigned long data) } static LIST_HEAD(xfrm_km_list); -static DEFINE_RWLOCK(xfrm_km_lock); void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) if (km->notify_policy) km->notify_policy(xp, dir, c); - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); } void km_state_notify(struct xfrm_state *x, const struct km_event *c) { struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) if (km->notify) km->notify(x, c); - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); } EXPORT_SYMBOL(km_policy_notify); @@ -1698,13 +1686,13 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) int err = -EINVAL, acqret; struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { acqret = km->acquire(x, t, pol); if (!acqret) err = acqret; } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(km_query); @@ -1714,14 +1702,14 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) int err = -EINVAL; struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { if (km->new_mapping) err = km->new_mapping(x, ipaddr, sport); if (!err) break; } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(km_new_mapping); @@ -1750,15 +1738,15 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, int ret; struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { if (km->migrate) { ret = km->migrate(sel, dir, type, m, num_migrate, k); if (!ret) err = ret; } } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(km_migrate); @@ -1770,15 +1758,15 @@ int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address int ret; struct xfrm_mgr *km; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { if (km->report) { ret = km->report(net, proto, sel, addr); if (!ret) err = ret; } } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(km_report); @@ -1802,14 +1790,14 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen goto out; err = -EINVAL; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { pol = km->compile_policy(sk, optname, data, optlen, &err); if (err >= 0) break; } - read_unlock(&xfrm_km_lock); + rcu_read_unlock(); if (err >= 0) { xfrm_sk_policy_insert(sk, err, pol); @@ -1823,20 +1811,23 @@ out: } EXPORT_SYMBOL(xfrm_user_policy); +static DEFINE_SPINLOCK(xfrm_km_lock); + int xfrm_register_km(struct xfrm_mgr *km) { - write_lock_bh(&xfrm_km_lock); - list_add_tail(&km->list, &xfrm_km_list); - write_unlock_bh(&xfrm_km_lock); + spin_lock_bh(&xfrm_km_lock); + list_add_tail_rcu(&km->list, &xfrm_km_list); + spin_unlock_bh(&xfrm_km_lock); return 0; } EXPORT_SYMBOL(xfrm_register_km); int xfrm_unregister_km(struct xfrm_mgr *km) { - write_lock_bh(&xfrm_km_lock); - list_del(&km->list); - write_unlock_bh(&xfrm_km_lock); + spin_lock_bh(&xfrm_km_lock); + list_del_rcu(&km->list); + spin_unlock_bh(&xfrm_km_lock); + synchronize_rcu(); return 0; } EXPORT_SYMBOL(xfrm_unregister_km); @@ -1848,12 +1839,12 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock_bh(&xfrm_state_afinfo_lock); + spin_lock_bh(&xfrm_state_afinfo_lock); if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; else - xfrm_state_afinfo[afinfo->family] = afinfo; - write_unlock_bh(&xfrm_state_afinfo_lock); + rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo); + spin_unlock_bh(&xfrm_state_afinfo_lock); return err; } EXPORT_SYMBOL(xfrm_state_register_afinfo); @@ -1865,14 +1856,15 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock_bh(&xfrm_state_afinfo_lock); + spin_lock_bh(&xfrm_state_afinfo_lock); if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) err = -EINVAL; else - xfrm_state_afinfo[afinfo->family] = NULL; + RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL); } - write_unlock_bh(&xfrm_state_afinfo_lock); + spin_unlock_bh(&xfrm_state_afinfo_lock); + synchronize_rcu(); return err; } EXPORT_SYMBOL(xfrm_state_unregister_afinfo); @@ -1882,17 +1874,16 @@ static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) struct xfrm_state_afinfo *afinfo; if (unlikely(family >= NPROTO)) return NULL; - read_lock(&xfrm_state_afinfo_lock); - afinfo = xfrm_state_afinfo[family]; + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_state_afinfo[family]); if (unlikely(!afinfo)) - read_unlock(&xfrm_state_afinfo_lock); + rcu_read_unlock(); return afinfo; } static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) - __releases(xfrm_state_afinfo_lock) { - read_unlock(&xfrm_state_afinfo_lock); + rcu_read_unlock(); } /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index eb872b2e366..fbd9e6cd0fd 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1112,7 +1112,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, mark = xfrm_mark_get(attrs, &m); if (p->info.seq) { x = xfrm_find_acq_byseq(net, mark, p->info.seq); - if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) { + if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) { xfrm_state_put(x); x = NULL; } |