diff options
Diffstat (limited to 'net')
495 files changed, 18347 insertions, 9546 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index ec9909935fb..175273f38cb 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -307,9 +307,11 @@ static void vlan_sync_address(struct net_device *dev, static void vlan_transfer_features(struct net_device *dev, struct net_device *vlandev) { + struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); + vlandev->gso_max_size = dev->gso_max_size; - if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) + if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto)) vlandev->hard_header_len = dev->hard_header_len; else vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 5704ed9c3a2..9d010a09ab9 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -38,9 +38,9 @@ struct vlan_info { static inline unsigned int vlan_proto_idx(__be16 proto) { switch (proto) { - case __constant_htons(ETH_P_8021Q): + case htons(ETH_P_8021Q): return VLAN_PROTO_8021Q; - case __constant_htons(ETH_P_8021AD): + case htons(ETH_P_8021AD): return VLAN_PROTO_8021AD; default: BUG(); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 6ee48aac776..3c32bd257b7 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -22,11 +22,11 @@ bool vlan_do_receive(struct sk_buff **skbp) return false; skb->dev = vlan_dev; - if (skb->pkt_type == PACKET_OTHERHOST) { + if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) { /* Our lower layer thinks this is not local, let's make sure. * This allows the VLAN to have a different MAC than the * underlying device, and still route correctly. */ - if (ether_addr_equal(eth_hdr(skb)->h_dest, vlan_dev->dev_addr)) + if (ether_addr_equal_64bits(eth_hdr(skb)->h_dest, vlan_dev->dev_addr)) skb->pkt_type = PACKET_HOST; } @@ -106,6 +106,12 @@ u16 vlan_dev_vlan_id(const struct net_device *dev) } EXPORT_SYMBOL(vlan_dev_vlan_id); +__be16 vlan_dev_vlan_proto(const struct net_device *dev) +{ + return vlan_dev_priv(dev)->vlan_proto; +} +EXPORT_SYMBOL(vlan_dev_vlan_proto); + static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) { if (skb_cow(skb, skb_headroom(skb)) < 0) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index de51c48c439..6f142f03716 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -538,6 +538,9 @@ static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; + if (saddr == NULL) + saddr = dev->dev_addr; + return dev_hard_header(skb, real_dev, type, daddr, saddr, len); } @@ -556,7 +559,7 @@ static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - int subclass = 0, i; + int subclass = 0; netif_carrier_off(dev); @@ -575,6 +578,9 @@ static int vlan_dev_init(struct net_device *dev) dev->features |= real_dev->vlan_features | NETIF_F_LLTX; dev->gso_max_size = real_dev->gso_max_size; + if (dev->features & NETIF_F_VLAN_FEATURES) + netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n"); + /* ipv6 shared card related stuff */ dev->dev_id = real_dev->dev_id; @@ -589,7 +595,8 @@ static int vlan_dev_init(struct net_device *dev) #endif dev->needed_headroom = real_dev->needed_headroom; - if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) { + if (vlan_hw_offload_capable(real_dev->features, + vlan_dev_priv(dev)->vlan_proto)) { dev->header_ops = &vlan_passthru_header_ops; dev->hard_header_len = real_dev->hard_header_len; } else { @@ -606,17 +613,10 @@ static int vlan_dev_init(struct net_device *dev) vlan_dev_set_lockdep_class(dev, subclass); - vlan_dev_priv(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats); + vlan_dev_priv(dev)->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan_dev_priv(dev)->vlan_pcpu_stats) return -ENOMEM; - for_each_possible_cpu(i) { - struct vlan_pcpu_stats *vlan_stat; - vlan_stat = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); - u64_stats_init(&vlan_stat->syncp); - } - - return 0; } @@ -682,13 +682,13 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); do { - start = u64_stats_fetch_begin_bh(&p->syncp); + start = u64_stats_fetch_begin_irq(&p->syncp); rxpackets = p->rx_packets; rxbytes = p->rx_bytes; rxmulticast = p->rx_multicast; txpackets = p->tx_packets; txbytes = p->tx_bytes; - } while (u64_stats_fetch_retry_bh(&p->syncp, start)); + } while (u64_stats_fetch_retry_irq(&p->syncp, start)); stats->rx_packets += rxpackets; stats->rx_bytes += rxbytes; @@ -711,20 +711,19 @@ static void vlan_dev_poll_controller(struct net_device *dev) return; } -static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo, - gfp_t gfp) +static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; struct netpoll *netpoll; int err = 0; - netpoll = kzalloc(sizeof(*netpoll), gfp); + netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL); err = -ENOMEM; if (!netpoll) goto out; - err = __netpoll_setup(netpoll, real_dev, gfp); + err = __netpoll_setup(netpoll, real_dev); if (err) { kfree(netpoll); goto out; diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index c7e634af851..8ac8a5cc214 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -56,8 +56,8 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) if (data[IFLA_VLAN_PROTOCOL]) { switch (nla_get_be16(data[IFLA_VLAN_PROTOCOL])) { - case __constant_htons(ETH_P_8021Q): - case __constant_htons(ETH_P_8021AD): + case htons(ETH_P_8021Q): + case htons(ETH_P_8021AD): break; default: return -EPROTONOSUPPORT; diff --git a/net/9p/client.c b/net/9p/client.c index a5e4d2dcb03..0004cbaac4a 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -204,7 +204,7 @@ free_and_return: return ret; } -struct p9_fcall *p9_fcall_alloc(int alloc_msize) +static struct p9_fcall *p9_fcall_alloc(int alloc_msize) { struct p9_fcall *fc; fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS); @@ -415,9 +415,17 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r) * req: request received * */ -void p9_client_cb(struct p9_client *c, struct p9_req_t *req) +void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) { p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); + + /* + * This barrier is needed to make sure any change made to req before + * the other thread wakes up will indeed be seen by the waiting side. + */ + smp_wmb(); + req->status = status; + wake_up(req->wq); p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); } @@ -655,16 +663,13 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) if (IS_ERR(req)) return PTR_ERR(req); - /* * if we haven't received a response for oldreq, * remove it from the list */ - if (oldreq->status == REQ_STATUS_FLSH) { - spin_lock(&c->lock); - list_del(&oldreq->req_list); - spin_unlock(&c->lock); - } + if (oldreq->status == REQ_STATUS_SENT) + if (c->trans_mod->cancelled) + c->trans_mod->cancelled(c, oldreq); p9_free_req(c, req); return 0; @@ -751,6 +756,12 @@ again: err = wait_event_interruptible(*req->wq, req->status >= REQ_STATUS_RCVD); + /* + * Make sure our req is coherent with regard to updates in other + * threads - echoes to wmb() in the callback + */ + smp_rmb(); + if ((err == -ERESTARTSYS) && (c->status == Connected) && (type == P9_TFLUSH)) { sigpending = 1; diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index b7bd7f2961b..80d08f6664c 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -66,20 +66,6 @@ struct p9_fd_opts { int privport; }; -/** - * struct p9_trans_fd - transport state - * @rd: reference to file to read from - * @wr: reference of file to write to - * @conn: connection state reference - * - */ - -struct p9_trans_fd { - struct file *rd; - struct file *wr; - struct p9_conn *conn; -}; - /* * Option Parsing (code inspired by NFS code) * - a little lazy - parse all fd-transport options @@ -159,6 +145,20 @@ struct p9_conn { unsigned long wsched; }; +/** + * struct p9_trans_fd - transport state + * @rd: reference to file to read from + * @wr: reference of file to write to + * @conn: connection state reference + * + */ + +struct p9_trans_fd { + struct file *rd; + struct file *wr; + struct p9_conn conn; +}; + static void p9_poll_workfn(struct work_struct *work); static DEFINE_SPINLOCK(p9_poll_lock); @@ -212,15 +212,9 @@ static void p9_conn_cancel(struct p9_conn *m, int err) m->err = err; list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { - req->status = REQ_STATUS_ERROR; - if (!req->t_err) - req->t_err = err; list_move(&req->req_list, &cancel_list); } list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { - req->status = REQ_STATUS_ERROR; - if (!req->t_err) - req->t_err = err; list_move(&req->req_list, &cancel_list); } spin_unlock_irqrestore(&m->client->lock, flags); @@ -228,7 +222,9 @@ static void p9_conn_cancel(struct p9_conn *m, int err) list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); list_del(&req->req_list); - p9_client_cb(m->client, req); + if (!req->t_err) + req->t_err = err; + p9_client_cb(m->client, req, REQ_STATUS_ERROR); } } @@ -302,6 +298,7 @@ static void p9_read_work(struct work_struct *work) { int n, err; struct p9_conn *m; + int status = REQ_STATUS_ERROR; m = container_of(work, struct p9_conn, rq); @@ -348,8 +345,7 @@ static void p9_read_work(struct work_struct *work) "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); m->req = p9_tag_lookup(m->client, tag); - if (!m->req || (m->req->status != REQ_STATUS_SENT && - m->req->status != REQ_STATUS_FLSH)) { + if (!m->req || (m->req->status != REQ_STATUS_SENT)) { p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", tag); err = -EIO; @@ -375,10 +371,10 @@ static void p9_read_work(struct work_struct *work) p9_debug(P9_DEBUG_TRANS, "got new packet\n"); spin_lock(&m->client->lock); if (m->req->status != REQ_STATUS_ERROR) - m->req->status = REQ_STATUS_RCVD; + status = REQ_STATUS_RCVD; list_del(&m->req->req_list); spin_unlock(&m->client->lock); - p9_client_cb(m->client, m->req); + p9_client_cb(m->client, m->req, status); m->rbuf = NULL; m->rpos = 0; m->rsize = 0; @@ -573,21 +569,19 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) } /** - * p9_conn_create - allocate and initialize the per-session mux data + * p9_conn_create - initialize the per-session mux data * @client: client instance * * Note: Creates the polling task if this is the first session. */ -static struct p9_conn *p9_conn_create(struct p9_client *client) +static void p9_conn_create(struct p9_client *client) { int n; - struct p9_conn *m; + struct p9_trans_fd *ts = client->trans; + struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize); - m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL); - if (!m) - return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&m->mux_list); m->client = client; @@ -609,8 +603,6 @@ static struct p9_conn *p9_conn_create(struct p9_client *client) p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); set_bit(Wpending, &m->wsched); } - - return m; } /** @@ -669,7 +661,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) { int n; struct p9_trans_fd *ts = client->trans; - struct p9_conn *m = ts->conn; + struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m, current, req->tc, req->tc->id); @@ -704,14 +696,26 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) list_del(&req->req_list); req->status = REQ_STATUS_FLSHD; ret = 0; - } else if (req->status == REQ_STATUS_SENT) - req->status = REQ_STATUS_FLSH; - + } spin_unlock(&client->lock); return ret; } +static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) +{ + p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); + + /* we haven't received a response for oldreq, + * remove it from the list. + */ + spin_lock(&client->lock); + list_del(&req->req_list); + spin_unlock(&client->lock); + + return 0; +} + /** * parse_opts - parse mount options into p9_fd_opts structure * @params: options string passed from mount @@ -780,7 +784,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) static int p9_fd_open(struct p9_client *client, int rfd, int wfd) { - struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd), + struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!ts) return -ENOMEM; @@ -806,9 +810,8 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) { struct p9_trans_fd *p; struct file *file; - int ret; - p = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); + p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!p) return -ENOMEM; @@ -829,20 +832,12 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) p->rd->f_flags |= O_NONBLOCK; - p->conn = p9_conn_create(client); - if (IS_ERR(p->conn)) { - ret = PTR_ERR(p->conn); - p->conn = NULL; - kfree(p); - sockfd_put(csocket); - sockfd_put(csocket); - return ret; - } + p9_conn_create(client); return 0; } /** - * p9_mux_destroy - cancels all pending requests and frees mux resources + * p9_mux_destroy - cancels all pending requests of mux * @m: mux to destroy * */ @@ -859,7 +854,6 @@ static void p9_conn_destroy(struct p9_conn *m) p9_conn_cancel(m, -ECONNRESET); m->client = NULL; - kfree(m); } /** @@ -881,7 +875,7 @@ static void p9_fd_close(struct p9_client *client) client->status = Disconnected; - p9_conn_destroy(ts->conn); + p9_conn_destroy(&ts->conn); if (ts->rd) fput(ts->rd); @@ -1033,14 +1027,7 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args) return err; p = (struct p9_trans_fd *) client->trans; - p->conn = p9_conn_create(client); - if (IS_ERR(p->conn)) { - err = PTR_ERR(p->conn); - p->conn = NULL; - fput(p->rd); - fput(p->wr); - return err; - } + p9_conn_create(client); return 0; } @@ -1053,6 +1040,7 @@ static struct p9_trans_module p9_tcp_trans = { .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, + .cancelled = p9_fd_cancelled, .owner = THIS_MODULE, }; @@ -1064,6 +1052,7 @@ static struct p9_trans_module p9_unix_trans = { .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, + .cancelled = p9_fd_cancelled, .owner = THIS_MODULE, }; @@ -1075,6 +1064,7 @@ static struct p9_trans_module p9_fd_trans = { .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, + .cancelled = p9_fd_cancelled, .owner = THIS_MODULE, }; diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 8f68df5d297..14ad43b5cf8 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -193,6 +193,8 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) if (!*p) continue; token = match_token(p, tokens, args); + if (token == Opt_err) + continue; r = match_int(&args[0], &option); if (r < 0) { p9_debug(P9_DEBUG_ERROR, @@ -305,8 +307,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, } req->rc = c->rc; - req->status = REQ_STATUS_RCVD; - p9_client_cb(client, req); + p9_client_cb(client, req, REQ_STATUS_RCVD); return; @@ -511,6 +512,11 @@ dont_need_post_recv: goto send_error; } + /* Mark request as `sent' *before* we actually send it, + * because doing if after could erase the REQ_STATUS_RCVD + * status in case of a very fast reply. + */ + req->status = REQ_STATUS_SENT; err = ib_post_send(rdma->qp, &wr, &bad_wr); if (err) goto send_error; @@ -520,6 +526,7 @@ dont_need_post_recv: /* Handle errors that happened during or while preparing the send: */ send_error: + req->status = REQ_STATUS_ERROR; kfree(c); p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err); @@ -582,12 +589,24 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) return rdma; } -/* its not clear to me we can do anything after send has been posted */ static int rdma_cancel(struct p9_client *client, struct p9_req_t *req) { + /* Nothing to do here. + * We will take care of it (if we have to) in rdma_cancelled() + */ return 1; } +/* A request has been fully flushed without a reply. + * That means we have posted one buffer in excess. + */ +static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req) +{ + struct p9_trans_rdma *rdma = client->trans; + atomic_inc(&rdma->excess_rc); + return 0; +} + /** * trans_create_rdma - Transport method for creating atransport instance * @client: client instance @@ -721,6 +740,7 @@ static struct p9_trans_module p9_rdma_trans = { .close = rdma_close, .request = rdma_request, .cancel = rdma_cancel, + .cancelled = rdma_cancelled, }; /** diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index cd1e1ede73a..6940d8fe897 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -164,8 +164,7 @@ static void req_done(struct virtqueue *vq) p9_debug(P9_DEBUG_TRANS, ": rc %p\n", rc); p9_debug(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); req = p9_tag_lookup(chan->client, rc->tag); - req->status = REQ_STATUS_RCVD; - p9_client_cb(chan->client, req); + p9_client_cb(chan->client, req, REQ_STATUS_RCVD); } } @@ -340,7 +339,10 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, int count = nr_pages; while (nr_pages) { s = rest_of_page(data); - pages[index++] = kmap_to_page(data); + if (is_vmalloc_addr(data)) + pages[index++] = vmalloc_to_page(data); + else + pages[index++] = kmap_to_page(data); data += s; nr_pages--; } diff --git a/net/Kconfig b/net/Kconfig index e411046a62e..d92afe4204d 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -89,8 +89,12 @@ config NETWORK_SECMARK to nfmark, but designated for security purposes. If you are unsure how to answer this question, answer N. +config NET_PTP_CLASSIFY + def_bool n + config NETWORK_PHY_TIMESTAMPING bool "Timestamping in PHY devices" + select NET_PTP_CLASSIFY help This allows timestamping of network packets by PHYs with hardware timestamping capabilities. This option adds some @@ -239,7 +243,7 @@ config XPS default y config CGROUP_NET_PRIO - tristate "Network priority cgroup" + bool "Network priority cgroup" depends on CGROUPS ---help--- Cgroup subsystem for use in assigning processes to network priorities on diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index d27b86dfb0e..d1c55d8dd0a 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -926,7 +926,7 @@ static struct aarp_entry *iter_next(struct aarp_iter_state *iter, loff_t *pos) struct aarp_entry *entry; rescan: - while(ct < AARP_HASH_SIZE) { + while (ct < AARP_HASH_SIZE) { for (entry = table[ct]; entry; entry = entry->next) { if (!pos || ++off == *pos) { iter->table = table; @@ -995,7 +995,7 @@ static const char *dt2str(unsigned long ticks) { static char buf[32]; - sprintf(buf, "%ld.%02ld", ticks / HZ, ((ticks % HZ) * 100 ) / HZ); + sprintf(buf, "%ld.%02ld", ticks / HZ, ((ticks % HZ) * 100) / HZ); return buf; } diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 02806c6b2ff..786ee2f83d5 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -293,7 +293,7 @@ static int atif_probe_device(struct atalk_iface *atif) /* Perform AARP probing for a proxy address */ static int atif_proxy_probe_device(struct atalk_iface *atif, - struct atalk_addr* proxy_addr) + struct atalk_addr *proxy_addr) { int netrange = ntohs(atif->nets.nr_lastnet) - ntohs(atif->nets.nr_firstnet) + 1; @@ -581,7 +581,7 @@ out: } /* Delete a route. Find it and discard it */ -static int atrtr_delete(struct atalk_addr * addr) +static int atrtr_delete(struct atalk_addr *addr) { struct atalk_route **r = &atalk_routes; int retval = 0; @@ -936,11 +936,11 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset, int i, copy; /* checksum stuff in header space */ - if ( (copy = start - offset) > 0) { + if ((copy = start - offset) > 0) { if (copy > len) copy = len; sum = atalk_sum_partial(skb->data + offset, copy, sum); - if ( (len -= copy) == 0) + if ((len -= copy) == 0) return sum; offset += copy; @@ -1151,7 +1151,7 @@ static int atalk_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; at->src_net = addr->sat_addr.s_net = ap->s_net; - at->src_node = addr->sat_addr.s_node= ap->s_node; + at->src_node = addr->sat_addr.s_node = ap->s_node; } else { err = -EADDRNOTAVAIL; if (!atalk_find_interface(addr->sat_addr.s_net, @@ -1790,53 +1790,53 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) void __user *argp = (void __user *)arg; switch (cmd) { - /* Protocol layer */ - case TIOCOUTQ: { - long amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); + /* Protocol layer */ + case TIOCOUTQ: { + long amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); - if (amount < 0) - amount = 0; - rc = put_user(amount, (int __user *)argp); - break; - } - case TIOCINQ: { - /* - * These two are safe on a single CPU system as only - * user tasks fiddle here - */ - struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); - long amount = 0; + if (amount < 0) + amount = 0; + rc = put_user(amount, (int __user *)argp); + break; + } + case TIOCINQ: { + /* + * These two are safe on a single CPU system as only + * user tasks fiddle here + */ + struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); + long amount = 0; - if (skb) - amount = skb->len - sizeof(struct ddpehdr); - rc = put_user(amount, (int __user *)argp); - break; - } - case SIOCGSTAMP: - rc = sock_get_timestamp(sk, argp); - break; - case SIOCGSTAMPNS: - rc = sock_get_timestampns(sk, argp); - break; - /* Routing */ - case SIOCADDRT: - case SIOCDELRT: - rc = -EPERM; - if (capable(CAP_NET_ADMIN)) - rc = atrtr_ioctl(cmd, argp); - break; - /* Interface */ - case SIOCGIFADDR: - case SIOCSIFADDR: - case SIOCGIFBRDADDR: - case SIOCATALKDIFADDR: - case SIOCDIFADDR: - case SIOCSARP: /* proxy AARP */ - case SIOCDARP: /* proxy AARP */ - rtnl_lock(); - rc = atif_ioctl(cmd, argp); - rtnl_unlock(); - break; + if (skb) + amount = skb->len - sizeof(struct ddpehdr); + rc = put_user(amount, (int __user *)argp); + break; + } + case SIOCGSTAMP: + rc = sock_get_timestamp(sk, argp); + break; + case SIOCGSTAMPNS: + rc = sock_get_timestampns(sk, argp); + break; + /* Routing */ + case SIOCADDRT: + case SIOCDELRT: + rc = -EPERM; + if (capable(CAP_NET_ADMIN)) + rc = atrtr_ioctl(cmd, argp); + break; + /* Interface */ + case SIOCGIFADDR: + case SIOCSIFADDR: + case SIOCGIFBRDADDR: + case SIOCATALKDIFADDR: + case SIOCDIFADDR: + case SIOCSARP: /* proxy AARP */ + case SIOCDARP: /* proxy AARP */ + rtnl_lock(); + rc = atif_ioctl(cmd, argp); + rtnl_unlock(); + break; } return rc; diff --git a/net/atm/clip.c b/net/atm/clip.c index 8215f7cb170..ba291ce4bdf 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -68,7 +68,7 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip) sk = sk_atm(atmarpd); skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk); return 0; } diff --git a/net/atm/lec.c b/net/atm/lec.c index 5a2f602d07e..4c5b8ba0f84 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -152,7 +152,7 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) atm_force_charge(priv->lecd, skb2->truesize); sk = sk_atm(priv->lecd); skb_queue_tail(&sk->sk_receive_queue, skb2); - sk->sk_data_ready(sk, skb2->len); + sk->sk_data_ready(sk); } } #endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */ @@ -447,7 +447,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) atm_force_charge(priv->lecd, skb2->truesize); sk = sk_atm(priv->lecd); skb_queue_tail(&sk->sk_receive_queue, skb2); - sk->sk_data_ready(sk, skb2->len); + sk->sk_data_ready(sk); } } #endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */ @@ -530,13 +530,13 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, atm_force_charge(priv->lecd, skb->truesize); sk = sk_atm(priv->lecd); skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk); if (data != NULL) { pr_debug("about to send %d bytes of data\n", data->len); atm_force_charge(priv->lecd, data->truesize); skb_queue_tail(&sk->sk_receive_queue, data); - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk); } return 0; @@ -616,7 +616,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) pr_debug("%s: To daemon\n", dev->name); skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk); } else { /* Data frame, queue to protocol handlers */ struct lec_arp_table *entry; unsigned char *src, *dst; diff --git a/net/atm/mpc.c b/net/atm/mpc.c index b71ff6b234f..e8e0e7a8a23 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -706,7 +706,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb) dprintk("(%s) control packet arrived\n", dev->name); /* Pass control packets to daemon */ skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk); return; } @@ -992,7 +992,7 @@ int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc) sk = sk_atm(mpc->mpoad_vcc); skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk); return 0; } @@ -1273,7 +1273,7 @@ static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry) sk = sk_atm(vcc); skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk); dprintk("exiting\n"); } @@ -1492,7 +1492,7 @@ static void __exit atm_mpoa_cleanup(void) mpc_proc_clean(); - del_timer(&mpc_timer); + del_timer_sync(&mpc_timer); unregister_netdevice_notifier(&mpoa_notifier); deregister_atm_ioctl(&atm_ioctl_ops); diff --git a/net/atm/raw.c b/net/atm/raw.c index b4f7b9ff3c7..2e17e97a7a8 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -25,7 +25,7 @@ static void atm_push_raw(struct atm_vcc *vcc, struct sk_buff *skb) struct sock *sk = sk_atm(vcc); skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk); } } diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 4176887e72e..523bce72f69 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -51,7 +51,7 @@ static void sigd_put_skb(struct sk_buff *skb) #endif atm_force_charge(sigd, skb->truesize); skb_queue_tail(&sk_atm(sigd)->sk_receive_queue, skb); - sk_atm(sigd)->sk_data_ready(sk_atm(sigd), skb->len); + sk_atm(sigd)->sk_data_ready(sk_atm(sigd)); } static void modify_qos(struct atm_vcc *vcc, struct atmsvc_msg *msg) diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 96f4cab3a2f..7ed8ab72481 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -422,7 +422,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, if (sk) { if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk); sock_put(sk); } else { free: diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index fa780b76630..11660a3aab5 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -50,6 +50,15 @@ config BATMAN_ADV_NC If you think that your network does not need this feature you can safely disable it and save some space. +config BATMAN_ADV_MCAST + bool "Multicast optimisation" + depends on BATMAN_ADV + default n + help + This option enables the multicast optimisation which aims to + reduce the air overhead while improving the reliability of + multicast messages. + config BATMAN_ADV_DEBUG bool "B.A.T.M.A.N. debugging" depends on BATMAN_ADV diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 42df18f877e..eb7d8c0388e 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -36,3 +36,4 @@ batman-adv-y += send.o batman-adv-y += soft-interface.o batman-adv-y += sysfs.o batman-adv-y += translation-table.o +batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 512159bf607..b3bd4ec3fd9 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -241,19 +241,19 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const uint8_t *addr) size = bat_priv->num_ifaces * sizeof(uint8_t); orig_node->bat_iv.bcast_own_sum = kzalloc(size, GFP_ATOMIC); if (!orig_node->bat_iv.bcast_own_sum) - goto free_bcast_own; + goto free_orig_node; hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig, batadv_choose_orig, orig_node, &orig_node->hash_entry); if (hash_added != 0) - goto free_bcast_own; + goto free_orig_node; return orig_node; -free_bcast_own: - kfree(orig_node->bat_iv.bcast_own); free_orig_node: + /* free twice, as batadv_orig_node_new sets refcount to 2 */ + batadv_orig_node_free_ref(orig_node); batadv_orig_node_free_ref(orig_node); return NULL; @@ -266,7 +266,7 @@ batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, struct batadv_orig_node *orig_neigh) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - struct batadv_neigh_node *neigh_node; + struct batadv_neigh_node *neigh_node, *tmp_neigh_node; neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, orig_node); if (!neigh_node) @@ -281,14 +281,24 @@ batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, neigh_node->orig_node = orig_neigh; neigh_node->if_incoming = hard_iface; - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Creating new neighbor %pM for orig_node %pM on interface %s\n", - neigh_addr, orig_node->orig, hard_iface->net_dev->name); - spin_lock_bh(&orig_node->neigh_list_lock); - hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); + tmp_neigh_node = batadv_neigh_node_get(orig_node, hard_iface, + neigh_addr); + if (!tmp_neigh_node) { + hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); + } else { + kfree(neigh_node); + batadv_hardif_free_ref(hard_iface); + neigh_node = tmp_neigh_node; + } spin_unlock_bh(&orig_node->neigh_list_lock); + if (!tmp_neigh_node) + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Creating new neighbor %pM for orig_node %pM on interface %s\n", + neigh_addr, orig_node->orig, + hard_iface->net_dev->name); + out: return neigh_node; } @@ -337,10 +347,10 @@ static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface) unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; - memcpy(batadv_ogm_packet->orig, - hard_iface->net_dev->dev_addr, ETH_ALEN); - memcpy(batadv_ogm_packet->prev_sender, - hard_iface->net_dev->dev_addr, ETH_ALEN); + ether_addr_copy(batadv_ogm_packet->orig, + hard_iface->net_dev->dev_addr); + ether_addr_copy(batadv_ogm_packet->prev_sender, + hard_iface->net_dev->dev_addr); } static void @@ -820,7 +830,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, tvlv_len = ntohs(batadv_ogm_packet->tvlv_len); batadv_ogm_packet->ttl--; - memcpy(batadv_ogm_packet->prev_sender, ethhdr->h_source, ETH_ALEN); + ether_addr_copy(batadv_ogm_packet->prev_sender, ethhdr->h_source); /* apply hop penalty */ batadv_ogm_packet->tq = batadv_hop_penalty(batadv_ogm_packet->tq, diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 05f0712be5e..6f0d9ec3795 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -191,7 +191,7 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, if (!hash) return NULL; - memcpy(search_entry.orig, addr, ETH_ALEN); + ether_addr_copy(search_entry.orig, addr); search_entry.vid = vid; index = batadv_choose_backbone_gw(&search_entry, hash->size); @@ -305,7 +305,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, /* normal claim frame * set Ethernet SRC to the clients mac */ - memcpy(ethhdr->h_source, mac, ETH_ALEN); + ether_addr_copy(ethhdr->h_source, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): CLAIM %pM on vid %d\n", mac, BATADV_PRINT_VID(vid)); @@ -314,7 +314,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, /* unclaim frame * set HW SRC to the clients mac */ - memcpy(hw_src, mac, ETH_ALEN); + ether_addr_copy(hw_src, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): UNCLAIM %pM on vid %d\n", mac, BATADV_PRINT_VID(vid)); @@ -323,7 +323,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, /* announcement frame * set HW SRC to the special mac containg the crc */ - memcpy(hw_src, mac, ETH_ALEN); + ether_addr_copy(hw_src, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): ANNOUNCE of %pM on vid %d\n", ethhdr->h_source, BATADV_PRINT_VID(vid)); @@ -333,8 +333,8 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, * set HW SRC and header destination to the receiving backbone * gws mac */ - memcpy(hw_src, mac, ETH_ALEN); - memcpy(ethhdr->h_dest, mac, ETH_ALEN); + ether_addr_copy(hw_src, mac); + ether_addr_copy(ethhdr->h_dest, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): REQUEST of %pM to %pM on vid %d\n", ethhdr->h_source, ethhdr->h_dest, @@ -395,7 +395,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, entry->bat_priv = bat_priv; atomic_set(&entry->request_sent, 0); atomic_set(&entry->wait_periods, 0); - memcpy(entry->orig, orig, ETH_ALEN); + ether_addr_copy(entry->orig, orig); /* one for the hash, one for returning */ atomic_set(&entry->refcount, 2); @@ -563,7 +563,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, struct batadv_bla_claim search_claim; int hash_added; - memcpy(search_claim.addr, mac, ETH_ALEN); + ether_addr_copy(search_claim.addr, mac); search_claim.vid = vid; claim = batadv_claim_hash_find(bat_priv, &search_claim); @@ -573,7 +573,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, if (!claim) return; - memcpy(claim->addr, mac, ETH_ALEN); + ether_addr_copy(claim->addr, mac); claim->vid = vid; claim->lasttime = jiffies; claim->backbone_gw = backbone_gw; @@ -624,7 +624,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, { struct batadv_bla_claim search_claim, *claim; - memcpy(search_claim.addr, mac, ETH_ALEN); + ether_addr_copy(search_claim.addr, mac); search_claim.vid = vid; claim = batadv_claim_hash_find(bat_priv, &search_claim); if (!claim) @@ -882,7 +882,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, proto = ethhdr->h_proto; headlen = ETH_HLEN; if (vid & BATADV_VLAN_HAS_TAG) { - vhdr = (struct vlan_ethhdr *)ethhdr; + vhdr = vlan_eth_hdr(skb); proto = vhdr->h_vlan_encapsulated_proto; headlen += VLAN_HLEN; } @@ -1103,8 +1103,8 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, oldif->net_dev->dev_addr)) continue; - memcpy(backbone_gw->orig, - primary_if->net_dev->dev_addr, ETH_ALEN); + ether_addr_copy(backbone_gw->orig, + primary_if->net_dev->dev_addr); /* send an announce frame so others will ask for our * claims and update their tables. */ @@ -1310,7 +1310,7 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, entry = &bat_priv->bla.bcast_duplist[curr]; entry->crc = crc; entry->entrytime = jiffies; - memcpy(entry->orig, bcast_packet->orig, ETH_ALEN); + ether_addr_copy(entry->orig, bcast_packet->orig); bat_priv->bla.bcast_duplist_curr = curr; out: @@ -1458,7 +1458,7 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) goto handled; - memcpy(search_claim.addr, ethhdr->h_source, ETH_ALEN); + ether_addr_copy(search_claim.addr, ethhdr->h_source); search_claim.vid = vid; claim = batadv_claim_hash_find(bat_priv, &search_claim); @@ -1547,9 +1547,6 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, if (!atomic_read(&bat_priv->bridge_loop_avoidance)) goto allow; - /* in VLAN case, the mac header might not be set. */ - skb_reset_mac_header(skb); - if (batadv_bla_process_claim(bat_priv, primary_if, skb)) goto handled; @@ -1560,7 +1557,7 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, if (is_multicast_ether_addr(ethhdr->h_dest)) goto handled; - memcpy(search_claim.addr, ethhdr->h_source, ETH_ALEN); + ether_addr_copy(search_claim.addr, ethhdr->h_source); search_claim.vid = vid; claim = batadv_claim_hash_find(bat_priv, &search_claim); diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index edee5041189..b25fd64d727 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -277,7 +277,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, /* if this entry is already known, just update it */ if (dat_entry) { if (!batadv_compare_eth(dat_entry->mac_addr, mac_addr)) - memcpy(dat_entry->mac_addr, mac_addr, ETH_ALEN); + ether_addr_copy(dat_entry->mac_addr, mac_addr); dat_entry->last_update = jiffies; batadv_dbg(BATADV_DBG_DAT, bat_priv, "Entry updated: %pI4 %pM (vid: %d)\n", @@ -292,7 +292,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, dat_entry->ip = ip; dat_entry->vid = vid; - memcpy(dat_entry->mac_addr, mac_addr, ETH_ALEN); + ether_addr_copy(dat_entry->mac_addr, mac_addr); dat_entry->last_update = jiffies; atomic_set(&dat_entry->refcount, 2); @@ -1027,6 +1027,11 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, if (!skb_new) goto out; + /* the rest of the TX path assumes that the mac_header offset pointing + * to the inner Ethernet header has been set, therefore reset it now. + */ + skb_reset_mac_header(skb_new); + if (vid & BATADV_VLAN_HAS_TAG) skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q), vid & VLAN_VID_MASK); diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index ac9be9b67a2..d76e1d06c5b 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -25,6 +25,9 @@ #include <linux/if_arp.h> +/** + * BATADV_DAT_ADDR_MAX - maximum address value in the DHT space + */ #define BATADV_DAT_ADDR_MAX ((batadv_dat_addr_t)~(batadv_dat_addr_t)0) void batadv_dat_status_update(struct net_device *net_dev); diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 88df9b1d552..bcc4bea632f 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -449,8 +449,8 @@ bool batadv_frag_send_packet(struct sk_buff *skb, frag_header.reserved = 0; frag_header.no = 0; frag_header.total_size = htons(skb->len); - memcpy(frag_header.orig, primary_if->net_dev->dev_addr, ETH_ALEN); - memcpy(frag_header.dest, orig_node->orig, ETH_ALEN); + ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr); + ether_addr_copy(frag_header.dest, orig_node->orig); /* Eat and send fragments from the tail of skb */ while (skb->len > max_fragment_size) { diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 55cf2260d29..c835e137423 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -389,8 +389,6 @@ out: batadv_neigh_ifinfo_free_ref(router_gw_tq); if (router_orig_tq) batadv_neigh_ifinfo_free_ref(router_orig_tq); - - return; } /** @@ -680,7 +678,7 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, if (!pskb_may_pull(skb, *header_len + ETH_HLEN)) return BATADV_DHCP_NO; - ethhdr = (struct ethhdr *)skb->data; + ethhdr = eth_hdr(skb); proto = ethhdr->h_proto; *header_len += ETH_HLEN; @@ -689,7 +687,7 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, if (!pskb_may_pull(skb, *header_len + VLAN_HLEN)) return BATADV_DHCP_NO; - vhdr = (struct vlan_ethhdr *)skb->data; + vhdr = vlan_eth_hdr(skb); proto = vhdr->h_vlan_encapsulated_proto; *header_len += VLAN_HLEN; } @@ -728,7 +726,7 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, return BATADV_DHCP_NO; /* skb->data might have been reallocated by pskb_may_pull() */ - ethhdr = (struct ethhdr *)skb->data; + ethhdr = eth_hdr(skb); if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) ethhdr = (struct ethhdr *)(skb->data + VLAN_HLEN); @@ -765,7 +763,7 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, if (*p != ETH_ALEN) return BATADV_DHCP_NO; - memcpy(chaddr, skb->data + chaddr_offset, ETH_ALEN); + ether_addr_copy(chaddr, skb->data + chaddr_offset); } return ret; diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 3d417d3641c..b851cc58085 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -241,7 +241,7 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); const struct batadv_hard_iface *hard_iface; - int min_mtu = ETH_DATA_LEN; + int min_mtu = INT_MAX; rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { @@ -256,8 +256,6 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface) } rcu_read_unlock(); - atomic_set(&bat_priv->packet_size_max, min_mtu); - if (atomic_read(&bat_priv->fragmentation) == 0) goto out; @@ -268,13 +266,21 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface) min_mtu = min_t(int, min_mtu, BATADV_FRAG_MAX_FRAG_SIZE); min_mtu -= sizeof(struct batadv_frag_packet); min_mtu *= BATADV_FRAG_MAX_FRAGMENTS; - atomic_set(&bat_priv->packet_size_max, min_mtu); - - /* with fragmentation enabled we can fragment external packets easily */ - min_mtu = min_t(int, min_mtu, ETH_DATA_LEN); out: - return min_mtu - batadv_max_header_len(); + /* report to the other components the maximum amount of bytes that + * batman-adv can send over the wire (without considering the payload + * overhead). For example, this value is used by TT to compute the + * maximum local table table size + */ + atomic_set(&bat_priv->packet_size_max, min_mtu); + + /* the real soft-interface MTU is computed by removing the payload + * overhead from the maximum amount of bytes that was just computed. + * + * However batman-adv does not support MTUs bigger than ETH_DATA_LEN + */ + return min_t(int, min_mtu - batadv_max_header_len(), ETH_DATA_LEN); } /* adjusts the MTU if a new interface with a smaller MTU appeared. */ diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index abb9d6e0388..161ef8f17d2 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -158,6 +158,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, struct batadv_orig_node *orig_node = NULL; struct batadv_neigh_node *neigh_node = NULL; size_t packet_len = sizeof(struct batadv_icmp_packet); + uint8_t *addr; if (len < sizeof(struct batadv_icmp_header)) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -227,10 +228,10 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, goto dst_unreach; icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmp_header; - if (packet_len == sizeof(*icmp_packet_rr)) - memcpy(icmp_packet_rr->rr, - neigh_node->if_incoming->net_dev->dev_addr, - ETH_ALEN); + if (packet_len == sizeof(*icmp_packet_rr)) { + addr = neigh_node->if_incoming->net_dev->dev_addr; + ether_addr_copy(icmp_packet_rr->rr[0], addr); + } break; default: @@ -250,7 +251,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, goto free_skb; } - memcpy(icmp_header->orig, primary_if->net_dev->dev_addr, ETH_ALEN); + ether_addr_copy(icmp_header->orig, primary_if->net_dev->dev_addr); batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); goto out; diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 66ae135b9f2..d1183e88216 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -34,6 +34,7 @@ #include "gateway_client.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" +#include "multicast.h" #include "gateway_common.h" #include "hash.h" #include "bat_algo.h" @@ -110,6 +111,9 @@ int batadv_mesh_init(struct net_device *soft_iface) spin_lock_init(&bat_priv->tt.last_changeset_lock); spin_lock_init(&bat_priv->tt.commit_lock); spin_lock_init(&bat_priv->gw.list_lock); +#ifdef CONFIG_BATMAN_ADV_MCAST + spin_lock_init(&bat_priv->mcast.want_lists_lock); +#endif spin_lock_init(&bat_priv->tvlv.container_list_lock); spin_lock_init(&bat_priv->tvlv.handler_list_lock); spin_lock_init(&bat_priv->softif_vlan_list_lock); @@ -117,9 +121,17 @@ int batadv_mesh_init(struct net_device *soft_iface) INIT_HLIST_HEAD(&bat_priv->forw_bat_list); INIT_HLIST_HEAD(&bat_priv->forw_bcast_list); INIT_HLIST_HEAD(&bat_priv->gw.list); +#ifdef CONFIG_BATMAN_ADV_MCAST + INIT_HLIST_HEAD(&bat_priv->mcast.want_all_unsnoopables_list); + INIT_HLIST_HEAD(&bat_priv->mcast.want_all_ipv4_list); + INIT_HLIST_HEAD(&bat_priv->mcast.want_all_ipv6_list); +#endif INIT_LIST_HEAD(&bat_priv->tt.changes_list); INIT_LIST_HEAD(&bat_priv->tt.req_list); INIT_LIST_HEAD(&bat_priv->tt.roam_list); +#ifdef CONFIG_BATMAN_ADV_MCAST + INIT_HLIST_HEAD(&bat_priv->mcast.mla_list); +#endif INIT_HLIST_HEAD(&bat_priv->tvlv.container_list); INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list); INIT_HLIST_HEAD(&bat_priv->softif_vlan_list); @@ -145,6 +157,7 @@ int batadv_mesh_init(struct net_device *soft_iface) goto err; batadv_gw_init(bat_priv); + batadv_mcast_init(bat_priv); atomic_set(&bat_priv->gw.reselect, 0); atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE); @@ -169,6 +182,8 @@ void batadv_mesh_free(struct net_device *soft_iface) batadv_dat_free(bat_priv); batadv_bla_free(bat_priv); + batadv_mcast_free(bat_priv); + /* Free the TT and the originator tables only after having terminated * all the other depending components which may use these structures for * their purposes. @@ -1133,8 +1148,8 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src, unicast_tvlv_packet->reserved = 0; unicast_tvlv_packet->tvlv_len = htons(tvlv_len); unicast_tvlv_packet->align = 0; - memcpy(unicast_tvlv_packet->src, src, ETH_ALEN); - memcpy(unicast_tvlv_packet->dst, dst, ETH_ALEN); + ether_addr_copy(unicast_tvlv_packet->src, src); + ether_addr_copy(unicast_tvlv_packet->dst, dst); tvlv_buff = (unsigned char *)(unicast_tvlv_packet + 1); tvlv_hdr = (struct batadv_tvlv_hdr *)tvlv_buff; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 9374f1a5134..770dc890cee 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2014.1.0" +#define BATADV_SOURCE_VERSION "2014.2.0" #endif /* B.A.T.M.A.N. parameters */ @@ -176,6 +176,8 @@ enum batadv_uev_type { #include <linux/percpu.h> #include <linux/slab.h> #include <net/sock.h> /* struct sock */ +#include <net/addrconf.h> /* ipv6 address stuff */ +#include <linux/ip.h> #include <net/rtnetlink.h> #include <linux/jiffies.h> #include <linux/seq_file.h> diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c new file mode 100644 index 00000000000..8c7ca811de6 --- /dev/null +++ b/net/batman-adv/multicast.c @@ -0,0 +1,748 @@ +/* Copyright (C) 2014 B.A.T.M.A.N. contributors: + * + * Linus Lüssing + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "main.h" +#include "multicast.h" +#include "originator.h" +#include "hard-interface.h" +#include "translation-table.h" +#include "multicast.h" + +/** + * batadv_mcast_mla_softif_get - get softif multicast listeners + * @dev: the device to collect multicast addresses from + * @mcast_list: a list to put found addresses into + * + * Collect multicast addresses of the local multicast listeners + * on the given soft interface, dev, in the given mcast_list. + * + * Returns -ENOMEM on memory allocation error or the number of + * items added to the mcast_list otherwise. + */ +static int batadv_mcast_mla_softif_get(struct net_device *dev, + struct hlist_head *mcast_list) +{ + struct netdev_hw_addr *mc_list_entry; + struct batadv_hw_addr *new; + int ret = 0; + + netif_addr_lock_bh(dev); + netdev_for_each_mc_addr(mc_list_entry, dev) { + new = kmalloc(sizeof(*new), GFP_ATOMIC); + if (!new) { + ret = -ENOMEM; + break; + } + + ether_addr_copy(new->addr, mc_list_entry->addr); + hlist_add_head(&new->list, mcast_list); + ret++; + } + netif_addr_unlock_bh(dev); + + return ret; +} + +/** + * batadv_mcast_mla_is_duplicate - check whether an address is in a list + * @mcast_addr: the multicast address to check + * @mcast_list: the list with multicast addresses to search in + * + * Returns true if the given address is already in the given list. + * Otherwise returns false. + */ +static bool batadv_mcast_mla_is_duplicate(uint8_t *mcast_addr, + struct hlist_head *mcast_list) +{ + struct batadv_hw_addr *mcast_entry; + + hlist_for_each_entry(mcast_entry, mcast_list, list) + if (batadv_compare_eth(mcast_entry->addr, mcast_addr)) + return true; + + return false; +} + +/** + * batadv_mcast_mla_list_free - free a list of multicast addresses + * @mcast_list: the list to free + * + * Removes and frees all items in the given mcast_list. + */ +static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list) +{ + struct batadv_hw_addr *mcast_entry; + struct hlist_node *tmp; + + hlist_for_each_entry_safe(mcast_entry, tmp, mcast_list, list) { + hlist_del(&mcast_entry->list); + kfree(mcast_entry); + } +} + +/** + * batadv_mcast_mla_tt_retract - clean up multicast listener announcements + * @bat_priv: the bat priv with all the soft interface information + * @mcast_list: a list of addresses which should _not_ be removed + * + * Retracts the announcement of any multicast listener from the + * translation table except the ones listed in the given mcast_list. + * + * If mcast_list is NULL then all are retracted. + */ +static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv, + struct hlist_head *mcast_list) +{ + struct batadv_hw_addr *mcast_entry; + struct hlist_node *tmp; + + hlist_for_each_entry_safe(mcast_entry, tmp, &bat_priv->mcast.mla_list, + list) { + if (mcast_list && + batadv_mcast_mla_is_duplicate(mcast_entry->addr, + mcast_list)) + continue; + + batadv_tt_local_remove(bat_priv, mcast_entry->addr, + BATADV_NO_FLAGS, + "mcast TT outdated", false); + + hlist_del(&mcast_entry->list); + kfree(mcast_entry); + } +} + +/** + * batadv_mcast_mla_tt_add - add multicast listener announcements + * @bat_priv: the bat priv with all the soft interface information + * @mcast_list: a list of addresses which are going to get added + * + * Adds multicast listener announcements from the given mcast_list to the + * translation table if they have not been added yet. + */ +static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, + struct hlist_head *mcast_list) +{ + struct batadv_hw_addr *mcast_entry; + struct hlist_node *tmp; + + if (!mcast_list) + return; + + hlist_for_each_entry_safe(mcast_entry, tmp, mcast_list, list) { + if (batadv_mcast_mla_is_duplicate(mcast_entry->addr, + &bat_priv->mcast.mla_list)) + continue; + + if (!batadv_tt_local_add(bat_priv->soft_iface, + mcast_entry->addr, BATADV_NO_FLAGS, + BATADV_NULL_IFINDEX, BATADV_NO_MARK)) + continue; + + hlist_del(&mcast_entry->list); + hlist_add_head(&mcast_entry->list, &bat_priv->mcast.mla_list); + } +} + +/** + * batadv_mcast_has_bridge - check whether the soft-iface is bridged + * @bat_priv: the bat priv with all the soft interface information + * + * Checks whether there is a bridge on top of our soft interface. Returns + * true if so, false otherwise. + */ +static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv) +{ + struct net_device *upper = bat_priv->soft_iface; + + rcu_read_lock(); + do { + upper = netdev_master_upper_dev_get_rcu(upper); + } while (upper && !(upper->priv_flags & IFF_EBRIDGE)); + rcu_read_unlock(); + + return upper; +} + +/** + * batadv_mcast_mla_tvlv_update - update multicast tvlv + * @bat_priv: the bat priv with all the soft interface information + * + * Updates the own multicast tvlv with our current multicast related settings, + * capabilities and inabilities. + * + * Returns true if the tvlv container is registered afterwards. Otherwise + * returns false. + */ +static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv) +{ + struct batadv_tvlv_mcast_data mcast_data; + + mcast_data.flags = BATADV_NO_FLAGS; + memset(mcast_data.reserved, 0, sizeof(mcast_data.reserved)); + + /* Avoid attaching MLAs, if there is a bridge on top of our soft + * interface, we don't support that yet (TODO) + */ + if (batadv_mcast_has_bridge(bat_priv)) { + if (bat_priv->mcast.enabled) { + batadv_tvlv_container_unregister(bat_priv, + BATADV_TVLV_MCAST, 1); + bat_priv->mcast.enabled = false; + } + + return false; + } + + if (!bat_priv->mcast.enabled || + mcast_data.flags != bat_priv->mcast.flags) { + batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 1, + &mcast_data, sizeof(mcast_data)); + bat_priv->mcast.flags = mcast_data.flags; + bat_priv->mcast.enabled = true; + } + + return true; +} + +/** + * batadv_mcast_mla_update - update the own MLAs + * @bat_priv: the bat priv with all the soft interface information + * + * Updates the own multicast listener announcements in the translation + * table as well as the own, announced multicast tvlv container. + */ +void batadv_mcast_mla_update(struct batadv_priv *bat_priv) +{ + struct net_device *soft_iface = bat_priv->soft_iface; + struct hlist_head mcast_list = HLIST_HEAD_INIT; + int ret; + + if (!batadv_mcast_mla_tvlv_update(bat_priv)) + goto update; + + ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list); + if (ret < 0) + goto out; + +update: + batadv_mcast_mla_tt_retract(bat_priv, &mcast_list); + batadv_mcast_mla_tt_add(bat_priv, &mcast_list); + +out: + batadv_mcast_mla_list_free(&mcast_list); +} + +/** + * batadv_mcast_forw_mode_check_ipv4 - check for optimized forwarding potential + * @bat_priv: the bat priv with all the soft interface information + * @skb: the IPv4 packet to check + * @is_unsnoopable: stores whether the destination is snoopable + * + * Checks whether the given IPv4 packet has the potential to be forwarded with a + * mode more optimal than classic flooding. + * + * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM in case of + * memory allocation failure. + */ +static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv, + struct sk_buff *skb, + bool *is_unsnoopable) +{ + struct iphdr *iphdr; + + /* We might fail due to out-of-memory -> drop it */ + if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*iphdr))) + return -ENOMEM; + + iphdr = ip_hdr(skb); + + /* TODO: Implement Multicast Router Discovery (RFC4286), + * then allow scope > link local, too + */ + if (!ipv4_is_local_multicast(iphdr->daddr)) + return -EINVAL; + + /* link-local multicast listeners behind a bridge are + * not snoopable (see RFC4541, section 2.1.2.2) + */ + *is_unsnoopable = true; + + return 0; +} + +/** + * batadv_mcast_forw_mode_check_ipv6 - check for optimized forwarding potential + * @bat_priv: the bat priv with all the soft interface information + * @skb: the IPv6 packet to check + * @is_unsnoopable: stores whether the destination is snoopable + * + * Checks whether the given IPv6 packet has the potential to be forwarded with a + * mode more optimal than classic flooding. + * + * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM if we are out + * of memory. + */ +static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv, + struct sk_buff *skb, + bool *is_unsnoopable) +{ + struct ipv6hdr *ip6hdr; + + /* We might fail due to out-of-memory -> drop it */ + if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*ip6hdr))) + return -ENOMEM; + + ip6hdr = ipv6_hdr(skb); + + /* TODO: Implement Multicast Router Discovery (RFC4286), + * then allow scope > link local, too + */ + if (IPV6_ADDR_MC_SCOPE(&ip6hdr->daddr) != IPV6_ADDR_SCOPE_LINKLOCAL) + return -EINVAL; + + /* link-local-all-nodes multicast listeners behind a bridge are + * not snoopable (see RFC4541, section 3, paragraph 3) + */ + if (ipv6_addr_is_ll_all_nodes(&ip6hdr->daddr)) + *is_unsnoopable = true; + + return 0; +} + +/** + * batadv_mcast_forw_mode_check - check for optimized forwarding potential + * @bat_priv: the bat priv with all the soft interface information + * @skb: the multicast frame to check + * @is_unsnoopable: stores whether the destination is snoopable + * + * Checks whether the given multicast ethernet frame has the potential to be + * forwarded with a mode more optimal than classic flooding. + * + * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM if we are out + * of memory. + */ +static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv, + struct sk_buff *skb, + bool *is_unsnoopable) +{ + struct ethhdr *ethhdr = eth_hdr(skb); + + if (!atomic_read(&bat_priv->multicast_mode)) + return -EINVAL; + + if (atomic_read(&bat_priv->mcast.num_disabled)) + return -EINVAL; + + switch (ntohs(ethhdr->h_proto)) { + case ETH_P_IP: + return batadv_mcast_forw_mode_check_ipv4(bat_priv, skb, + is_unsnoopable); + case ETH_P_IPV6: + return batadv_mcast_forw_mode_check_ipv6(bat_priv, skb, + is_unsnoopable); + default: + return -EINVAL; + } +} + +/** + * batadv_mcast_want_all_ip_count - count nodes with unspecific mcast interest + * @bat_priv: the bat priv with all the soft interface information + * @ethhdr: ethernet header of a packet + * + * Returns the number of nodes which want all IPv4 multicast traffic if the + * given ethhdr is from an IPv4 packet or the number of nodes which want all + * IPv6 traffic if it matches an IPv6 packet. + */ +static int batadv_mcast_forw_want_all_ip_count(struct batadv_priv *bat_priv, + struct ethhdr *ethhdr) +{ + switch (ntohs(ethhdr->h_proto)) { + case ETH_P_IP: + return atomic_read(&bat_priv->mcast.num_want_all_ipv4); + case ETH_P_IPV6: + return atomic_read(&bat_priv->mcast.num_want_all_ipv6); + default: + /* we shouldn't be here... */ + return 0; + } +} + +/** + * batadv_mcast_forw_tt_node_get - get a multicast tt node + * @bat_priv: the bat priv with all the soft interface information + * @ethhdr: the ether header containing the multicast destination + * + * Returns an orig_node matching the multicast address provided by ethhdr + * via a translation table lookup. This increases the returned nodes refcount. + */ +static struct batadv_orig_node * +batadv_mcast_forw_tt_node_get(struct batadv_priv *bat_priv, + struct ethhdr *ethhdr) +{ + return batadv_transtable_search(bat_priv, ethhdr->h_source, + ethhdr->h_dest, BATADV_NO_FLAGS); +} + +/** + * batadv_mcast_want_forw_ipv4_node_get - get a node with an ipv4 flag + * @bat_priv: the bat priv with all the soft interface information + * + * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and + * increases its refcount. + */ +static struct batadv_orig_node * +batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv) +{ + struct batadv_orig_node *tmp_orig_node, *orig_node = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_orig_node, + &bat_priv->mcast.want_all_ipv4_list, + mcast_want_all_ipv4_node) { + if (!atomic_inc_not_zero(&orig_node->refcount)) + continue; + + orig_node = tmp_orig_node; + break; + } + rcu_read_unlock(); + + return orig_node; +} + +/** + * batadv_mcast_want_forw_ipv6_node_get - get a node with an ipv6 flag + * @bat_priv: the bat priv with all the soft interface information + * + * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set + * and increases its refcount. + */ +static struct batadv_orig_node * +batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv) +{ + struct batadv_orig_node *tmp_orig_node, *orig_node = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_orig_node, + &bat_priv->mcast.want_all_ipv6_list, + mcast_want_all_ipv6_node) { + if (!atomic_inc_not_zero(&orig_node->refcount)) + continue; + + orig_node = tmp_orig_node; + break; + } + rcu_read_unlock(); + + return orig_node; +} + +/** + * batadv_mcast_want_forw_ip_node_get - get a node with an ipv4/ipv6 flag + * @bat_priv: the bat priv with all the soft interface information + * @ethhdr: an ethernet header to determine the protocol family from + * + * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 or + * BATADV_MCAST_WANT_ALL_IPV6 flag, depending on the provided ethhdr, set and + * increases its refcount. + */ +static struct batadv_orig_node * +batadv_mcast_forw_ip_node_get(struct batadv_priv *bat_priv, + struct ethhdr *ethhdr) +{ + switch (ntohs(ethhdr->h_proto)) { + case ETH_P_IP: + return batadv_mcast_forw_ipv4_node_get(bat_priv); + case ETH_P_IPV6: + return batadv_mcast_forw_ipv6_node_get(bat_priv); + default: + /* we shouldn't be here... */ + return NULL; + } +} + +/** + * batadv_mcast_want_forw_unsnoop_node_get - get a node with an unsnoopable flag + * @bat_priv: the bat priv with all the soft interface information + * + * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag + * set and increases its refcount. + */ +static struct batadv_orig_node * +batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv) +{ + struct batadv_orig_node *tmp_orig_node, *orig_node = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_orig_node, + &bat_priv->mcast.want_all_unsnoopables_list, + mcast_want_all_unsnoopables_node) { + if (!atomic_inc_not_zero(&orig_node->refcount)) + continue; + + orig_node = tmp_orig_node; + break; + } + rcu_read_unlock(); + + return orig_node; +} + +/** + * batadv_mcast_forw_mode - check on how to forward a multicast packet + * @bat_priv: the bat priv with all the soft interface information + * @skb: The multicast packet to check + * @orig: an originator to be set to forward the skb to + * + * Returns the forwarding mode as enum batadv_forw_mode and in case of + * BATADV_FORW_SINGLE set the orig to the single originator the skb + * should be forwarded to. + */ +enum batadv_forw_mode +batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, + struct batadv_orig_node **orig) +{ + int ret, tt_count, ip_count, unsnoop_count, total_count; + bool is_unsnoopable = false; + struct ethhdr *ethhdr; + + ret = batadv_mcast_forw_mode_check(bat_priv, skb, &is_unsnoopable); + if (ret == -ENOMEM) + return BATADV_FORW_NONE; + else if (ret < 0) + return BATADV_FORW_ALL; + + ethhdr = eth_hdr(skb); + + tt_count = batadv_tt_global_hash_count(bat_priv, ethhdr->h_dest, + BATADV_NO_FLAGS); + ip_count = batadv_mcast_forw_want_all_ip_count(bat_priv, ethhdr); + unsnoop_count = !is_unsnoopable ? 0 : + atomic_read(&bat_priv->mcast.num_want_all_unsnoopables); + + total_count = tt_count + ip_count + unsnoop_count; + + switch (total_count) { + case 1: + if (tt_count) + *orig = batadv_mcast_forw_tt_node_get(bat_priv, ethhdr); + else if (ip_count) + *orig = batadv_mcast_forw_ip_node_get(bat_priv, ethhdr); + else if (unsnoop_count) + *orig = batadv_mcast_forw_unsnoop_node_get(bat_priv); + + if (*orig) + return BATADV_FORW_SINGLE; + + /* fall through */ + case 0: + return BATADV_FORW_NONE; + default: + return BATADV_FORW_ALL; + } +} + +/** + * batadv_mcast_want_unsnoop_update - update unsnoop counter and list + * @bat_priv: the bat priv with all the soft interface information + * @orig: the orig_node which multicast state might have changed of + * @mcast_flags: flags indicating the new multicast state + * + * If the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag of this originator, + * orig, has toggled then this method updates counter and list accordingly. + */ +static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + uint8_t mcast_flags) +{ + /* switched from flag unset to set */ + if (mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES && + !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES)) { + atomic_inc(&bat_priv->mcast.num_want_all_unsnoopables); + + spin_lock_bh(&bat_priv->mcast.want_lists_lock); + hlist_add_head_rcu(&orig->mcast_want_all_unsnoopables_node, + &bat_priv->mcast.want_all_unsnoopables_list); + spin_unlock_bh(&bat_priv->mcast.want_lists_lock); + /* switched from flag set to unset */ + } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) && + orig->mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) { + atomic_dec(&bat_priv->mcast.num_want_all_unsnoopables); + + spin_lock_bh(&bat_priv->mcast.want_lists_lock); + hlist_del_rcu(&orig->mcast_want_all_unsnoopables_node); + spin_unlock_bh(&bat_priv->mcast.want_lists_lock); + } +} + +/** + * batadv_mcast_want_ipv4_update - update want-all-ipv4 counter and list + * @bat_priv: the bat priv with all the soft interface information + * @orig: the orig_node which multicast state might have changed of + * @mcast_flags: flags indicating the new multicast state + * + * If the BATADV_MCAST_WANT_ALL_IPV4 flag of this originator, orig, has + * toggled then this method updates counter and list accordingly. + */ +static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + uint8_t mcast_flags) +{ + /* switched from flag unset to set */ + if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV4 && + !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV4)) { + atomic_inc(&bat_priv->mcast.num_want_all_ipv4); + + spin_lock_bh(&bat_priv->mcast.want_lists_lock); + hlist_add_head_rcu(&orig->mcast_want_all_ipv4_node, + &bat_priv->mcast.want_all_ipv4_list); + spin_unlock_bh(&bat_priv->mcast.want_lists_lock); + /* switched from flag set to unset */ + } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_IPV4) && + orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV4) { + atomic_dec(&bat_priv->mcast.num_want_all_ipv4); + + spin_lock_bh(&bat_priv->mcast.want_lists_lock); + hlist_del_rcu(&orig->mcast_want_all_ipv4_node); + spin_unlock_bh(&bat_priv->mcast.want_lists_lock); + } +} + +/** + * batadv_mcast_want_ipv6_update - update want-all-ipv6 counter and list + * @bat_priv: the bat priv with all the soft interface information + * @orig: the orig_node which multicast state might have changed of + * @mcast_flags: flags indicating the new multicast state + * + * If the BATADV_MCAST_WANT_ALL_IPV6 flag of this originator, orig, has + * toggled then this method updates counter and list accordingly. + */ +static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + uint8_t mcast_flags) +{ + /* switched from flag unset to set */ + if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV6 && + !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV6)) { + atomic_inc(&bat_priv->mcast.num_want_all_ipv6); + + spin_lock_bh(&bat_priv->mcast.want_lists_lock); + hlist_add_head_rcu(&orig->mcast_want_all_ipv6_node, + &bat_priv->mcast.want_all_ipv6_list); + spin_unlock_bh(&bat_priv->mcast.want_lists_lock); + /* switched from flag set to unset */ + } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_IPV6) && + orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV6) { + atomic_dec(&bat_priv->mcast.num_want_all_ipv6); + + spin_lock_bh(&bat_priv->mcast.want_lists_lock); + hlist_del_rcu(&orig->mcast_want_all_ipv6_node); + spin_unlock_bh(&bat_priv->mcast.want_lists_lock); + } +} + +/** + * batadv_mcast_tvlv_ogm_handler_v1 - process incoming multicast tvlv container + * @bat_priv: the bat priv with all the soft interface information + * @orig: the orig_node of the ogm + * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) + * @tvlv_value: tvlv buffer containing the multicast data + * @tvlv_value_len: tvlv buffer length + */ +static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + uint8_t flags, + void *tvlv_value, + uint16_t tvlv_value_len) +{ + bool orig_mcast_enabled = !(flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND); + uint8_t mcast_flags = BATADV_NO_FLAGS; + bool orig_initialized; + + orig_initialized = orig->capa_initialized & BATADV_ORIG_CAPA_HAS_MCAST; + + /* If mcast support is turned on decrease the disabled mcast node + * counter only if we had increased it for this node before. If this + * is a completely new orig_node no need to decrease the counter. + */ + if (orig_mcast_enabled && + !(orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST)) { + if (orig_initialized) + atomic_dec(&bat_priv->mcast.num_disabled); + orig->capabilities |= BATADV_ORIG_CAPA_HAS_MCAST; + /* If mcast support is being switched off increase the disabled + * mcast node counter. + */ + } else if (!orig_mcast_enabled && + orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST) { + atomic_inc(&bat_priv->mcast.num_disabled); + orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_MCAST; + } + + orig->capa_initialized |= BATADV_ORIG_CAPA_HAS_MCAST; + + if (orig_mcast_enabled && tvlv_value && + (tvlv_value_len >= sizeof(mcast_flags))) + mcast_flags = *(uint8_t *)tvlv_value; + + batadv_mcast_want_unsnoop_update(bat_priv, orig, mcast_flags); + batadv_mcast_want_ipv4_update(bat_priv, orig, mcast_flags); + batadv_mcast_want_ipv6_update(bat_priv, orig, mcast_flags); + + orig->mcast_flags = mcast_flags; +} + +/** + * batadv_mcast_init - initialize the multicast optimizations structures + * @bat_priv: the bat priv with all the soft interface information + */ +void batadv_mcast_init(struct batadv_priv *bat_priv) +{ + batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler_v1, + NULL, BATADV_TVLV_MCAST, 1, + BATADV_TVLV_HANDLER_OGM_CIFNOTFND); +} + +/** + * batadv_mcast_free - free the multicast optimizations structures + * @bat_priv: the bat priv with all the soft interface information + */ +void batadv_mcast_free(struct batadv_priv *bat_priv) +{ + batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 1); + batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 1); + + batadv_mcast_mla_tt_retract(bat_priv, NULL); +} + +/** + * batadv_mcast_purge_orig - reset originator global mcast state modifications + * @orig: the originator which is going to get purged + */ +void batadv_mcast_purge_orig(struct batadv_orig_node *orig) +{ + struct batadv_priv *bat_priv = orig->bat_priv; + + if (!(orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST)) + atomic_dec(&bat_priv->mcast.num_disabled); + + batadv_mcast_want_unsnoop_update(bat_priv, orig, BATADV_NO_FLAGS); + batadv_mcast_want_ipv4_update(bat_priv, orig, BATADV_NO_FLAGS); + batadv_mcast_want_ipv6_update(bat_priv, orig, BATADV_NO_FLAGS); +} diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h new file mode 100644 index 00000000000..73b5d45819c --- /dev/null +++ b/net/batman-adv/multicast.h @@ -0,0 +1,80 @@ +/* Copyright (C) 2014 B.A.T.M.A.N. contributors: + * + * Linus Lüssing + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _NET_BATMAN_ADV_MULTICAST_H_ +#define _NET_BATMAN_ADV_MULTICAST_H_ + +/** + * batadv_forw_mode - the way a packet should be forwarded as + * @BATADV_FORW_ALL: forward the packet to all nodes (currently via classic + * flooding) + * @BATADV_FORW_SINGLE: forward the packet to a single node (currently via the + * BATMAN unicast routing protocol) + * @BATADV_FORW_NONE: don't forward, drop it + */ +enum batadv_forw_mode { + BATADV_FORW_ALL, + BATADV_FORW_SINGLE, + BATADV_FORW_NONE, +}; + +#ifdef CONFIG_BATMAN_ADV_MCAST + +void batadv_mcast_mla_update(struct batadv_priv *bat_priv); + +enum batadv_forw_mode +batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, + struct batadv_orig_node **mcast_single_orig); + +void batadv_mcast_init(struct batadv_priv *bat_priv); + +void batadv_mcast_free(struct batadv_priv *bat_priv); + +void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node); + +#else + +static inline void batadv_mcast_mla_update(struct batadv_priv *bat_priv) +{ + return; +} + +static inline enum batadv_forw_mode +batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, + struct batadv_orig_node **mcast_single_orig) +{ + return BATADV_FORW_ALL; +} + +static inline int batadv_mcast_init(struct batadv_priv *bat_priv) +{ + return 0; +} + +static inline void batadv_mcast_free(struct batadv_priv *bat_priv) +{ + return; +} + +static inline void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node) +{ + return; +} + +#endif /* CONFIG_BATMAN_ADV_MCAST */ + +#endif /* _NET_BATMAN_ADV_MULTICAST_H_ */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index f1b604d88dc..a9546fe541e 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -819,7 +819,7 @@ static struct batadv_nc_node /* Initialize nc_node */ INIT_LIST_HEAD(&nc_node->list); - memcpy(nc_node->addr, orig_node->orig, ETH_ALEN); + ether_addr_copy(nc_node->addr, orig_node->orig); nc_node->orig_node = orig_neigh_node; atomic_set(&nc_node->refcount, 2); @@ -941,8 +941,8 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, spin_lock_init(&nc_path->packet_list_lock); atomic_set(&nc_path->refcount, 2); nc_path->last_valid = jiffies; - memcpy(nc_path->next_hop, dst, ETH_ALEN); - memcpy(nc_path->prev_hop, src, ETH_ALEN); + ether_addr_copy(nc_path->next_hop, dst); + ether_addr_copy(nc_path->prev_hop, src); batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_path %pM -> %pM\n", nc_path->prev_hop, @@ -1114,15 +1114,15 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, coded_packet->ttl = packet1->ttl; /* Info about first unicast packet */ - memcpy(coded_packet->first_source, first_source, ETH_ALEN); - memcpy(coded_packet->first_orig_dest, packet1->dest, ETH_ALEN); + ether_addr_copy(coded_packet->first_source, first_source); + ether_addr_copy(coded_packet->first_orig_dest, packet1->dest); coded_packet->first_crc = packet_id1; coded_packet->first_ttvn = packet1->ttvn; /* Info about second unicast packet */ - memcpy(coded_packet->second_dest, second_dest, ETH_ALEN); - memcpy(coded_packet->second_source, second_source, ETH_ALEN); - memcpy(coded_packet->second_orig_dest, packet2->dest, ETH_ALEN); + ether_addr_copy(coded_packet->second_dest, second_dest); + ether_addr_copy(coded_packet->second_source, second_source); + ether_addr_copy(coded_packet->second_orig_dest, packet2->dest); coded_packet->second_crc = packet_id2; coded_packet->second_ttl = packet2->ttl; coded_packet->second_ttvn = packet2->ttvn; @@ -1349,8 +1349,8 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, /* Set the mac header as if we actually sent the packet uncoded */ ethhdr = eth_hdr(skb); - memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN); - memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN); + ether_addr_copy(ethhdr->h_source, ethhdr->h_dest); + ether_addr_copy(ethhdr->h_dest, eth_dst_new); /* Set data pointer to MAC header to mimic packets from our tx path */ skb_push(skb, ETH_HLEN); @@ -1636,7 +1636,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, /* Reconstruct original mac header */ ethhdr = eth_hdr(skb); - memcpy(ethhdr, ðhdr_tmp, sizeof(*ethhdr)); + *ethhdr = ethhdr_tmp; /* Select the correct unicast header information based on the location * of our mac address in the coded_packet header @@ -1646,7 +1646,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, * so the Ethernet address must be copied to h_dest and * pkt_type changed from PACKET_OTHERHOST to PACKET_HOST */ - memcpy(ethhdr->h_dest, coded_packet_tmp.second_dest, ETH_ALEN); + ether_addr_copy(ethhdr->h_dest, coded_packet_tmp.second_dest); skb->pkt_type = PACKET_HOST; orig_dest = coded_packet_tmp.second_orig_dest; @@ -1682,7 +1682,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, unicast_packet->packet_type = BATADV_UNICAST; unicast_packet->version = BATADV_COMPAT_VERSION; unicast_packet->ttl = ttl; - memcpy(unicast_packet->dest, orig_dest, ETH_ALEN); + ether_addr_copy(unicast_packet->dest, orig_dest); unicast_packet->ttvn = ttvn; batadv_nc_packet_free(nc_packet); diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 6df12a2e360..ffd9dfbd9b0 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -27,6 +27,7 @@ #include "bridge_loop_avoidance.h" #include "network-coding.h" #include "fragmentation.h" +#include "multicast.h" /* hash class keys */ static struct lock_class_key batadv_orig_hash_lock_class_key; @@ -446,7 +447,7 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, INIT_HLIST_HEAD(&neigh_node->ifinfo_list); spin_lock_init(&neigh_node->ifinfo_lock); - memcpy(neigh_node->addr, neigh_addr, ETH_ALEN); + ether_addr_copy(neigh_node->addr, neigh_addr); neigh_node->if_incoming = hard_iface; neigh_node->orig_node = orig_node; @@ -458,6 +459,42 @@ out: } /** + * batadv_neigh_node_get - retrieve a neighbour from the list + * @orig_node: originator which the neighbour belongs to + * @hard_iface: the interface where this neighbour is connected to + * @addr: the address of the neighbour + * + * Looks for and possibly returns a neighbour belonging to this originator list + * which is connected through the provided hard interface. + * Returns NULL if the neighbour is not found. + */ +struct batadv_neigh_node * +batadv_neigh_node_get(const struct batadv_orig_node *orig_node, + const struct batadv_hard_iface *hard_iface, + const uint8_t *addr) +{ + struct batadv_neigh_node *tmp_neigh_node, *res = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) { + if (!batadv_compare_eth(tmp_neigh_node->addr, addr)) + continue; + + if (tmp_neigh_node->if_incoming != hard_iface) + continue; + + if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) + continue; + + res = tmp_neigh_node; + break; + } + rcu_read_unlock(); + + return res; +} + +/** * batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object * @rcu: rcu pointer of the orig_ifinfo object */ @@ -521,6 +558,8 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) } spin_unlock_bh(&orig_node->neigh_list_lock); + batadv_mcast_purge_orig(orig_node); + /* Free nc_nodes */ batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL); @@ -628,15 +667,17 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, /* extra reference for return */ atomic_set(&orig_node->refcount, 2); - orig_node->tt_initialised = false; orig_node->bat_priv = bat_priv; - memcpy(orig_node->orig, addr, ETH_ALEN); + ether_addr_copy(orig_node->orig, addr); batadv_dat_init_orig_node_addr(orig_node); atomic_set(&orig_node->last_ttvn, 0); orig_node->tt_buff = NULL; orig_node->tt_buff_len = 0; reset_time = jiffies - 1 - msecs_to_jiffies(BATADV_RESET_PROTECTION_MS); orig_node->bcast_seqno_reset = reset_time; +#ifdef CONFIG_BATMAN_ADV_MCAST + orig_node->mcast_flags = BATADV_NO_FLAGS; +#endif /* create a vlan object for the "untagged" LAN */ vlan = batadv_orig_node_vlan_new(orig_node, BATADV_NO_FLAGS); diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 37be290f63f..db3a9ed734c 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -29,6 +29,10 @@ void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node); struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, const uint8_t *addr); struct batadv_neigh_node * +batadv_neigh_node_get(const struct batadv_orig_node *orig_node, + const struct batadv_hard_iface *hard_iface, + const uint8_t *addr); +struct batadv_neigh_node * batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, const uint8_t *neigh_addr, struct batadv_orig_node *orig_node); diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 0a381d1174c..34e096d2dce 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -89,6 +89,19 @@ enum batadv_icmp_packettype { BATADV_PARAMETER_PROBLEM = 12, }; +/** + * enum batadv_mcast_flags - flags for multicast capabilities and settings + * @BATADV_MCAST_WANT_ALL_UNSNOOPABLES: we want all packets destined for + * 224.0.0.0/24 or ff02::1 + * @BATADV_MCAST_WANT_ALL_IPV4: we want all IPv4 multicast packets + * @BATADV_MCAST_WANT_ALL_IPV6: we want all IPv6 multicast packets + */ +enum batadv_mcast_flags { + BATADV_MCAST_WANT_ALL_UNSNOOPABLES = BIT(0), + BATADV_MCAST_WANT_ALL_IPV4 = BIT(1), + BATADV_MCAST_WANT_ALL_IPV6 = BIT(2), +}; + /* tt data subtypes */ #define BATADV_TT_DATA_TYPE_MASK 0x0F @@ -106,10 +119,30 @@ enum batadv_tt_data_flags { BATADV_TT_FULL_TABLE = BIT(4), }; -/* BATADV_TT_CLIENT flags. - * Flags from BIT(0) to BIT(7) are sent on the wire, while flags from BIT(8) to - * BIT(15) are used for local computation only. - * Flags from BIT(4) to BIT(7) are kept in sync with the rest of the network. +/** + * enum batadv_tt_client_flags - TT client specific flags + * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table + * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new + * update telling its new real location has not been received/sent yet + * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface. + * This information is used by the "AP Isolation" feature + * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This + * information is used by the Extended Isolation feature + * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table + * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has + * not been announced yet + * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept + * in the table for one more originator interval for consistency purposes + * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of + * the network but no nnode has already announced it + * + * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire. + * Bits from 8 to 15 are called _local flags_ because they are used for local + * computations only. + * + * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with + * the other nodes in the network. To achieve this goal these flags are included + * in the TT CRC computation. */ enum batadv_tt_client_flags { BATADV_TT_CLIENT_DEL = BIT(0), @@ -145,6 +178,7 @@ enum batadv_bla_claimframe { * @BATADV_TVLV_NC: network coding tvlv * @BATADV_TVLV_TT: translation table tvlv * @BATADV_TVLV_ROAM: roaming advertisement tvlv + * @BATADV_TVLV_MCAST: multicast capability tvlv */ enum batadv_tvlv_type { BATADV_TVLV_GW = 0x01, @@ -152,6 +186,7 @@ enum batadv_tvlv_type { BATADV_TVLV_NC = 0x03, BATADV_TVLV_TT = 0x04, BATADV_TVLV_ROAM = 0x05, + BATADV_TVLV_MCAST = 0x06, }; #pragma pack(2) @@ -504,4 +539,14 @@ struct batadv_tvlv_roam_adv { __be16 vid; }; +/** + * struct batadv_tvlv_mcast_data - payload of a multicast tvlv + * @flags: multicast flags announced by the orig node + * @reserved: reserved field + */ +struct batadv_tvlv_mcast_data { + uint8_t flags; + uint8_t reserved[3]; +}; + #endif /* _NET_BATMAN_ADV_PACKET_H_ */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 1ed9f7c9ece..35141534938 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -222,8 +222,8 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, icmph = (struct batadv_icmp_header *)skb->data; - memcpy(icmph->dst, icmph->orig, ETH_ALEN); - memcpy(icmph->orig, primary_if->net_dev->dev_addr, ETH_ALEN); + ether_addr_copy(icmph->dst, icmph->orig); + ether_addr_copy(icmph->orig, primary_if->net_dev->dev_addr); icmph->msg_type = BATADV_ECHO_REPLY; icmph->ttl = BATADV_TTL; @@ -276,9 +276,8 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet = (struct batadv_icmp_packet *)skb->data; - memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); - memcpy(icmp_packet->orig, primary_if->net_dev->dev_addr, - ETH_ALEN); + ether_addr_copy(icmp_packet->dst, icmp_packet->orig); + ether_addr_copy(icmp_packet->orig, primary_if->net_dev->dev_addr); icmp_packet->msg_type = BATADV_TTL_EXCEEDED; icmp_packet->ttl = BATADV_TTL; @@ -341,8 +340,8 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN) goto out; - memcpy(&(icmp_packet_rr->rr[icmp_packet_rr->rr_cur]), - ethhdr->h_dest, ETH_ALEN); + ether_addr_copy(icmp_packet_rr->rr[icmp_packet_rr->rr_cur], + ethhdr->h_dest); icmp_packet_rr->rr_cur++; } @@ -664,7 +663,7 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, } /* update the packet header */ - memcpy(unicast_packet->dest, orig_addr, ETH_ALEN); + ether_addr_copy(unicast_packet->dest, orig_addr); unicast_packet->ttvn = orig_ttvn; ret = true; @@ -688,7 +687,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, int is_old_ttvn; /* check if there is enough data before accessing it */ - if (pskb_may_pull(skb, hdr_len + ETH_HLEN) < 0) + if (!pskb_may_pull(skb, hdr_len + ETH_HLEN)) return 0; /* create a copy of the skb (in case of for re-routing) to modify it. */ @@ -774,7 +773,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, if (!primary_if) return 0; - memcpy(unicast_packet->dest, primary_if->net_dev->dev_addr, ETH_ALEN); + ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr); batadv_hardif_free_ref(primary_if); @@ -918,6 +917,8 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb, if (ret != NET_RX_SUCCESS) ret = batadv_route_unicast_packet(skb, recv_if); + else + consume_skb(skb); return ret; } diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 579f5f00a38..3d64ed20c39 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -27,6 +27,7 @@ #include "originator.h" #include "network-coding.h" #include "fragmentation.h" +#include "multicast.h" static void batadv_send_outstanding_bcast_packet(struct work_struct *work); @@ -59,8 +60,8 @@ int batadv_send_skb_packet(struct sk_buff *skb, skb_reset_mac_header(skb); ethhdr = eth_hdr(skb); - memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN); - memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN); + ether_addr_copy(ethhdr->h_source, hard_iface->net_dev->dev_addr); + ether_addr_copy(ethhdr->h_dest, dst_addr); ethhdr->h_proto = htons(ETH_P_BATMAN); skb_set_network_header(skb, ETH_HLEN); @@ -165,7 +166,7 @@ batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size, /* set unicast ttl */ unicast_packet->ttl = BATADV_TTL; /* copy the destination for faster routing */ - memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); + ether_addr_copy(unicast_packet->dest, orig_node->orig); /* set the destination tt version number */ unicast_packet->ttvn = ttvn; @@ -220,7 +221,7 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv, uc_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; uc_4addr_packet->u.packet_type = BATADV_UNICAST_4ADDR; - memcpy(uc_4addr_packet->src, primary_if->net_dev->dev_addr, ETH_ALEN); + ether_addr_copy(uc_4addr_packet->src, primary_if->net_dev->dev_addr); uc_4addr_packet->subtype = packet_subtype; uc_4addr_packet->reserved = 0; @@ -248,13 +249,13 @@ out: * * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ -static int batadv_send_skb_unicast(struct batadv_priv *bat_priv, - struct sk_buff *skb, int packet_type, - int packet_subtype, - struct batadv_orig_node *orig_node, - unsigned short vid) +int batadv_send_skb_unicast(struct batadv_priv *bat_priv, + struct sk_buff *skb, int packet_type, + int packet_subtype, + struct batadv_orig_node *orig_node, + unsigned short vid) { - struct ethhdr *ethhdr = (struct ethhdr *)skb->data; + struct ethhdr *ethhdr; struct batadv_unicast_packet *unicast_packet; int ret = NET_XMIT_DROP; @@ -279,6 +280,10 @@ static int batadv_send_skb_unicast(struct batadv_priv *bat_priv, goto out; } + /* skb->data might have been reallocated by + * batadv_send_skb_prepare_unicast{,_4addr}() + */ + ethhdr = eth_hdr(skb); unicast_packet = (struct batadv_unicast_packet *)skb->data; /* inform the destination node that we are still missing a correct route @@ -307,6 +312,7 @@ out: * @packet_type: the batman unicast packet type to use * @packet_subtype: the unicast 4addr packet subtype (only relevant for unicast * 4addr packets) + * @dst_hint: can be used to override the destination contained in the skb * @vid: the vid to be used to search the translation table * * Look up the recipient node for the destination address in the ethernet diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index aaddaa9661c..38d0ec1833a 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -36,6 +36,11 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_orig_node *orig_node, int packet_subtype); +int batadv_send_skb_unicast(struct batadv_priv *bat_priv, + struct sk_buff *skb, int packet_type, + int packet_subtype, + struct batadv_orig_node *orig_node, + unsigned short vid); int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, struct sk_buff *skb, int packet_type, int packet_subtype, uint8_t *dst_hint, @@ -47,6 +52,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, * batadv_send_skb_via_tt - send an skb via TT lookup * @bat_priv: the bat priv with all the soft interface information * @skb: the payload to send + * @dst_hint: can be used to override the destination contained in the skb * @vid: the vid to be used to search the translation table * * Look up the recipient node for the destination address in the ethernet @@ -68,6 +74,7 @@ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @skb: the payload to send * @packet_subtype: the unicast 4addr packet subtype to use + * @dst_hint: can be used to override the destination contained in the skb * @vid: the vid to be used to search the translation table * * Look up the recipient node for the destination address in the ethernet diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index f82c267e188..744a59b85e1 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -32,6 +32,7 @@ #include <linux/ethtool.h> #include <linux/etherdevice.h> #include <linux/if_vlan.h> +#include "multicast.h" #include "bridge_loop_avoidance.h" #include "network-coding.h" @@ -111,8 +112,8 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(old_addr, dev->dev_addr, ETH_ALEN); - memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); + ether_addr_copy(old_addr, dev->dev_addr); + ether_addr_copy(dev->dev_addr, addr->sa_data); /* only modify transtable if it has been initialized before */ if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE) { @@ -170,17 +171,19 @@ static int batadv_interface_tx(struct sk_buff *skb, unsigned short vid; uint32_t seqno; int gw_mode; + enum batadv_forw_mode forw_mode; + struct batadv_orig_node *mcast_single_orig = NULL; if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; soft_iface->trans_start = jiffies; vid = batadv_get_vid(skb, 0); - ethhdr = (struct ethhdr *)skb->data; + ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: - vhdr = (struct vlan_ethhdr *)skb->data; + vhdr = vlan_eth_hdr(skb); if (vhdr->h_vlan_encapsulated_proto != ethertype) break; @@ -194,7 +197,7 @@ static int batadv_interface_tx(struct sk_buff *skb, goto dropped; /* skb->data might have been reallocated by batadv_bla_tx() */ - ethhdr = (struct ethhdr *)skb->data; + ethhdr = eth_hdr(skb); /* Register the client MAC in the transtable */ if (!is_multicast_ether_addr(ethhdr->h_source)) { @@ -230,7 +233,7 @@ static int batadv_interface_tx(struct sk_buff *skb, /* skb->data may have been modified by * batadv_gw_dhcp_recipient_get() */ - ethhdr = (struct ethhdr *)skb->data; + ethhdr = eth_hdr(skb); /* if gw_mode is on, broadcast any non-DHCP message. * All the DHCP packets are going to be sent as unicast */ @@ -247,9 +250,19 @@ static int batadv_interface_tx(struct sk_buff *skb, * directed to a DHCP server */ goto dropped; - } send: + if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) { + forw_mode = batadv_mcast_forw_mode(bat_priv, skb, + &mcast_single_orig); + if (forw_mode == BATADV_FORW_NONE) + goto dropped; + + if (forw_mode == BATADV_FORW_SINGLE) + do_bcast = false; + } + } + batadv_skb_set_priority(skb, 0); /* ethernet packet should be broadcasted */ @@ -279,8 +292,8 @@ send: /* hw address of first interface is the orig mac because only * this mac is known throughout the mesh */ - memcpy(bcast_packet->orig, - primary_if->net_dev->dev_addr, ETH_ALEN); + ether_addr_copy(bcast_packet->orig, + primary_if->net_dev->dev_addr); /* set broadcast sequence number */ seqno = atomic_inc_return(&bat_priv->bcast_seqno); @@ -301,6 +314,10 @@ send: if (ret) goto dropped; ret = batadv_send_skb_via_gw(bat_priv, skb, vid); + } else if (mcast_single_orig) { + ret = batadv_send_skb_unicast(bat_priv, skb, + BATADV_UNICAST, 0, + mcast_single_orig, vid); } else { if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) @@ -652,10 +669,7 @@ static void batadv_softif_destroy_finish(struct work_struct *work) } batadv_sysfs_del_meshif(soft_iface); - - rtnl_lock(); - unregister_netdevice(soft_iface); - rtnl_unlock(); + unregister_netdev(soft_iface); } /** @@ -692,6 +706,14 @@ static int batadv_softif_init_late(struct net_device *dev) #ifdef CONFIG_BATMAN_ADV_DAT atomic_set(&bat_priv->distributed_arp_table, 1); #endif +#ifdef CONFIG_BATMAN_ADV_MCAST + bat_priv->mcast.flags = BATADV_NO_FLAGS; + atomic_set(&bat_priv->multicast_mode, 1); + atomic_set(&bat_priv->mcast.num_disabled, 0); + atomic_set(&bat_priv->mcast.num_want_all_unsnoopables, 0); + atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0); + atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0); +#endif atomic_set(&bat_priv->gw_mode, BATADV_GW_MODE_OFF); atomic_set(&bat_priv->gw_sel_class, 20); atomic_set(&bat_priv->gw.bandwidth_down, 100); diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index e456bf6bb28..1ebb0d9e2ea 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -539,6 +539,9 @@ BATADV_ATTR_SIF_UINT(gw_sel_class, S_IRUGO | S_IWUSR, 1, BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect); static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth, batadv_store_gw_bwidth); +#ifdef CONFIG_BATMAN_ADV_MCAST +BATADV_ATTR_SIF_BOOL(multicast_mode, S_IRUGO | S_IWUSR, NULL); +#endif #ifdef CONFIG_BATMAN_ADV_DEBUG BATADV_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, BATADV_DBG_ALL, NULL); #endif @@ -558,6 +561,9 @@ static struct batadv_attribute *batadv_mesh_attrs[] = { #ifdef CONFIG_BATMAN_ADV_DAT &batadv_attr_distributed_arp_table, #endif +#ifdef CONFIG_BATMAN_ADV_MCAST + &batadv_attr_multicast_mode, +#endif &batadv_attr_fragmentation, &batadv_attr_routing_algo, &batadv_attr_gw_mode, diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index b6071f675a3..d636bde72c9 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -24,6 +24,7 @@ #include "originator.h" #include "routing.h" #include "bridge_loop_avoidance.h" +#include "multicast.h" #include <linux/crc32c.h> @@ -96,7 +97,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const uint8_t *addr, if (!hash) return NULL; - memcpy(to_search.addr, addr, ETH_ALEN); + ether_addr_copy(to_search.addr, addr); to_search.vid = vid; index = batadv_choose_tt(&to_search, hash->size); @@ -192,6 +193,31 @@ batadv_tt_global_entry_free_ref(struct batadv_tt_global_entry *tt_global_entry) } } +/** + * batadv_tt_global_hash_count - count the number of orig entries + * @hash: hash table containing the tt entries + * @addr: the mac address of the client to count entries for + * @vid: VLAN identifier + * + * Return the number of originators advertising the given address/data + * (excluding ourself). + */ +int batadv_tt_global_hash_count(struct batadv_priv *bat_priv, + const uint8_t *addr, unsigned short vid) +{ + struct batadv_tt_global_entry *tt_global_entry; + int count; + + tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr, vid); + if (!tt_global_entry) + return 0; + + count = atomic_read(&tt_global_entry->orig_list_count); + batadv_tt_global_entry_free_ref(tt_global_entry); + + return count; +} + static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) { struct batadv_tt_orig_list_entry *orig_entry; @@ -333,7 +359,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, tt_change_node->change.flags = flags; memset(tt_change_node->change.reserved, 0, sizeof(tt_change_node->change.reserved)); - memcpy(tt_change_node->change.addr, common->addr, ETH_ALEN); + ether_addr_copy(tt_change_node->change.addr, common->addr); tt_change_node->change.vid = htons(common->vid); del_op_requested = flags & BATADV_TT_CLIENT_DEL; @@ -484,7 +510,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, { struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct batadv_tt_local_entry *tt_local; - struct batadv_tt_global_entry *tt_global; + struct batadv_tt_global_entry *tt_global = NULL; struct net_device *in_dev = NULL; struct hlist_head *head; struct batadv_tt_orig_list_entry *orig_entry; @@ -497,7 +523,9 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, in_dev = dev_get_by_index(&init_net, ifindex); tt_local = batadv_tt_local_hash_find(bat_priv, addr, vid); - tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid); + + if (!is_multicast_ether_addr(addr)) + tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid); if (tt_local) { tt_local->last_seen = jiffies; @@ -549,7 +577,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, addr, BATADV_PRINT_VID(vid), (uint8_t)atomic_read(&bat_priv->tt.vn)); - memcpy(tt_local->common.addr, addr, ETH_ALEN); + ether_addr_copy(tt_local->common.addr, addr); /* The local entry has to be marked as NEW to avoid to send it in * a full table response going out before the next ttvn increment * (consistency check) @@ -562,8 +590,11 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, tt_local->last_seen = jiffies; tt_local->common.added_at = tt_local->last_seen; - /* the batman interface mac address should never be purged */ - if (batadv_compare_eth(addr, soft_iface->dev_addr)) + /* the batman interface mac and multicast addresses should never be + * purged + */ + if (batadv_compare_eth(addr, soft_iface->dev_addr) || + is_multicast_ether_addr(addr)) tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE; hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt, @@ -1219,6 +1250,8 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, hlist_add_head_rcu(&orig_entry->list, &tt_global->orig_list); spin_unlock_bh(&tt_global->list_lock); + atomic_inc(&tt_global->orig_list_count); + out: if (orig_entry) batadv_tt_orig_list_entry_free_ref(orig_entry); @@ -1277,7 +1310,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, goto out; common = &tt_global_entry->common; - memcpy(common->addr, tt_addr, ETH_ALEN); + ether_addr_copy(common->addr, tt_addr); common->vid = vid; common->flags = flags; @@ -1292,6 +1325,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, common->added_at = jiffies; INIT_HLIST_HEAD(&tt_global_entry->orig_list); + atomic_set(&tt_global_entry->orig_list_count, 0); spin_lock_init(&tt_global_entry->list_lock); hash_added = batadv_hash_add(bat_priv->tt.global_hash, @@ -1361,6 +1395,11 @@ add_orig_entry: ret = true; out_remove: + /* Do not remove multicast addresses from the local hash on + * global additions + */ + if (is_multicast_ether_addr(tt_addr)) + goto out; /* remove address from local hash if present */ local_flags = batadv_tt_local_remove(bat_priv, tt_addr, vid, @@ -1552,6 +1591,25 @@ out: return 0; } +/** + * batadv_tt_global_del_orig_entry - remove and free an orig_entry + * @tt_global_entry: the global entry to remove the orig_entry from + * @orig_entry: the orig entry to remove and free + * + * Remove an orig_entry from its list in the given tt_global_entry and + * free this orig_entry afterwards. + */ +static void +batadv_tt_global_del_orig_entry(struct batadv_tt_global_entry *tt_global_entry, + struct batadv_tt_orig_list_entry *orig_entry) +{ + batadv_tt_global_size_dec(orig_entry->orig_node, + tt_global_entry->common.vid); + atomic_dec(&tt_global_entry->orig_list_count); + hlist_del_rcu(&orig_entry->list); + batadv_tt_orig_list_entry_free_ref(orig_entry); +} + /* deletes the orig list of a tt_global_entry */ static void batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry) @@ -1562,20 +1620,26 @@ batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry) spin_lock_bh(&tt_global_entry->list_lock); head = &tt_global_entry->orig_list; - hlist_for_each_entry_safe(orig_entry, safe, head, list) { - hlist_del_rcu(&orig_entry->list); - batadv_tt_global_size_dec(orig_entry->orig_node, - tt_global_entry->common.vid); - batadv_tt_orig_list_entry_free_ref(orig_entry); - } + hlist_for_each_entry_safe(orig_entry, safe, head, list) + batadv_tt_global_del_orig_entry(tt_global_entry, orig_entry); spin_unlock_bh(&tt_global_entry->list_lock); } +/** + * batadv_tt_global_del_orig_node - remove orig_node from a global tt entry + * @bat_priv: the bat priv with all the soft interface information + * @tt_global_entry: the global entry to remove the orig_node from + * @orig_node: the originator announcing the client + * @message: message to append to the log on deletion + * + * Remove the given orig_node and its according orig_entry from the given + * global tt entry. + */ static void -batadv_tt_global_del_orig_entry(struct batadv_priv *bat_priv, - struct batadv_tt_global_entry *tt_global_entry, - struct batadv_orig_node *orig_node, - const char *message) +batadv_tt_global_del_orig_node(struct batadv_priv *bat_priv, + struct batadv_tt_global_entry *tt_global_entry, + struct batadv_orig_node *orig_node, + const char *message) { struct hlist_head *head; struct hlist_node *safe; @@ -1592,10 +1656,8 @@ batadv_tt_global_del_orig_entry(struct batadv_priv *bat_priv, orig_node->orig, tt_global_entry->common.addr, BATADV_PRINT_VID(vid), message); - hlist_del_rcu(&orig_entry->list); - batadv_tt_global_size_dec(orig_node, - tt_global_entry->common.vid); - batadv_tt_orig_list_entry_free_ref(orig_entry); + batadv_tt_global_del_orig_entry(tt_global_entry, + orig_entry); } } spin_unlock_bh(&tt_global_entry->list_lock); @@ -1637,8 +1699,8 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv, /* there is another entry, we can simply delete this * one and can still use the other one. */ - batadv_tt_global_del_orig_entry(bat_priv, tt_global_entry, - orig_node, message); + batadv_tt_global_del_orig_node(bat_priv, tt_global_entry, + orig_node, message); } /** @@ -1664,8 +1726,8 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv, goto out; if (!roaming) { - batadv_tt_global_del_orig_entry(bat_priv, tt_global_entry, - orig_node, message); + batadv_tt_global_del_orig_node(bat_priv, tt_global_entry, + orig_node, message); if (hlist_empty(&tt_global_entry->orig_list)) batadv_tt_global_free(bat_priv, tt_global_entry, @@ -1748,8 +1810,8 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_tt_global_entry, common); - batadv_tt_global_del_orig_entry(bat_priv, tt_global, - orig_node, message); + batadv_tt_global_del_orig_node(bat_priv, tt_global, + orig_node, message); if (hlist_empty(&tt_global->orig_list)) { vid = tt_global->common.vid; @@ -1763,7 +1825,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, } spin_unlock_bh(list_lock); } - orig_node->tt_initialised = false; + orig_node->capa_initialized &= ~BATADV_ORIG_CAPA_HAS_TT; } static bool batadv_tt_global_to_purge(struct batadv_tt_global_entry *tt_global, @@ -1975,6 +2037,7 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv, struct hlist_head *head; uint32_t i, crc_tmp, crc = 0; uint8_t flags; + __be16 tmp_vid; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -2011,8 +2074,11 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv, orig_node)) continue; - crc_tmp = crc32c(0, &tt_common->vid, - sizeof(tt_common->vid)); + /* use network order to read the VID: this ensures that + * every node reads the bytes in the same order. + */ + tmp_vid = htons(tt_common->vid); + crc_tmp = crc32c(0, &tmp_vid, sizeof(tmp_vid)); /* compute the CRC on flags that have to be kept in sync * among nodes @@ -2046,6 +2112,7 @@ static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv, struct hlist_head *head; uint32_t i, crc_tmp, crc = 0; uint8_t flags; + __be16 tmp_vid; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -2064,8 +2131,11 @@ static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv, if (tt_common->flags & BATADV_TT_CLIENT_NEW) continue; - crc_tmp = crc32c(0, &tt_common->vid, - sizeof(tt_common->vid)); + /* use network order to read the VID: this ensures that + * every node reads the bytes in the same order. + */ + tmp_vid = htons(tt_common->vid); + crc_tmp = crc32c(0, &tmp_vid, sizeof(tmp_vid)); /* compute the CRC on flags that have to be kept in sync * among nodes @@ -2152,7 +2222,7 @@ batadv_new_tt_req_node(struct batadv_priv *bat_priv, if (!tt_req_node) goto unlock; - memcpy(tt_req_node->addr, orig_node->orig, ETH_ALEN); + ether_addr_copy(tt_req_node->addr, orig_node->orig); tt_req_node->issued_at = jiffies; list_add(&tt_req_node->list, &bat_priv->tt.req_list); @@ -2232,8 +2302,7 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, if ((valid_cb) && (!valid_cb(tt_common_entry, cb_data))) continue; - memcpy(tt_change->addr, tt_common_entry->addr, - ETH_ALEN); + ether_addr_copy(tt_change->addr, tt_common_entry->addr); tt_change->flags = tt_common_entry->flags; tt_change->vid = htons(tt_common_entry->vid); memset(tt_change->reserved, 0, @@ -2262,6 +2331,7 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node, { struct batadv_tvlv_tt_vlan_data *tt_vlan_tmp; struct batadv_orig_node_vlan *vlan; + uint32_t crc; int i; /* check if each received CRC matches the locally stored one */ @@ -2281,7 +2351,10 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node, if (!vlan) return false; - if (vlan->tt.crc != ntohl(tt_vlan_tmp->crc)) + crc = vlan->tt.crc; + batadv_orig_node_vlan_free_ref(vlan); + + if (crc != ntohl(tt_vlan_tmp->crc)) return false; } @@ -2712,7 +2785,7 @@ static void _batadv_tt_update_changes(struct batadv_priv *bat_priv, return; } } - orig_node->tt_initialised = true; + orig_node->capa_initialized |= BATADV_ORIG_CAPA_HAS_TT; } static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv, @@ -2920,7 +2993,7 @@ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, tt_roam_node->first_time = jiffies; atomic_set(&tt_roam_node->counter, BATADV_ROAMING_MAX_COUNT - 1); - memcpy(tt_roam_node->addr, client, ETH_ALEN); + ether_addr_copy(tt_roam_node->addr, client); list_add(&tt_roam_node->list, &bat_priv->tt.roam_list); ret = true; @@ -3109,6 +3182,9 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) */ static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv) { + /* Update multicast addresses in local translation table */ + batadv_mcast_mla_update(bat_priv); + if (atomic_read(&bat_priv->tt.local_changes) < 1) { if (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt)) batadv_tt_tvlv_container_update(bat_priv); @@ -3199,13 +3275,15 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv, uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); struct batadv_tvlv_tt_vlan_data *tt_vlan; bool full_table = true; + bool has_tt_init; tt_vlan = (struct batadv_tvlv_tt_vlan_data *)tt_buff; + has_tt_init = orig_node->capa_initialized & BATADV_ORIG_CAPA_HAS_TT; + /* orig table not initialised AND first diff is in the OGM OR the ttvn * increased by one -> we can apply the attached changes */ - if ((!orig_node->tt_initialised && ttvn == 1) || - ttvn - orig_ttvn == 1) { + if ((!has_tt_init && ttvn == 1) || ttvn - orig_ttvn == 1) { /* the OGM could not contain the changes due to their size or * because they have already been sent BATADV_TT_OGM_APPEND_MAX * times. @@ -3218,7 +3296,6 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv, spin_lock_bh(&orig_node->tt_lock); - tt_change = (struct batadv_tvlv_tt_change *)tt_buff; batadv_tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn, tt_change); @@ -3246,7 +3323,7 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv, /* if we missed more than one change or our tables are not * in sync anymore -> request fresh tt data */ - if (!orig_node->tt_initialised || ttvn != orig_ttvn || + if (!has_tt_init || ttvn != orig_ttvn || !batadv_tt_global_check_crc(orig_node, tt_vlan, tt_num_vlan)) { request_table: diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 20a1d7861de..ad84d7b89e3 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -29,6 +29,8 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset); void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, int32_t match_vid, const char *message); +int batadv_tt_global_hash_count(struct batadv_priv *bat_priv, + const uint8_t *addr, unsigned short vid); struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv, const uint8_t *src, const uint8_t *addr, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 78370ab31f9..34891a56773 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -24,8 +24,9 @@ #ifdef CONFIG_BATMAN_ADV_DAT -/* batadv_dat_addr_t is the type used for all DHT addresses. If it is changed, - * BATADV_DAT_ADDR_MAX is changed as well. +/** + * batadv_dat_addr_t - it is the type used for all DHT addresses. If it is + * changed, BATADV_DAT_ADDR_MAX is changed as well. * * *Please be careful: batadv_dat_addr_t must be UNSIGNED* */ @@ -163,7 +164,7 @@ struct batadv_vlan_tt { }; /** - * batadv_orig_node_vlan - VLAN specific data per orig_node + * struct batadv_orig_node_vlan - VLAN specific data per orig_node * @vid: the VLAN identifier * @tt: VLAN specific TT attributes * @list: list node for orig_node::vlan_list @@ -204,14 +205,18 @@ struct batadv_orig_bat_iv { * @batadv_dat_addr_t: address of the orig node in the distributed hash * @last_seen: time when last packet from this node was received * @bcast_seqno_reset: time when the broadcast seqno window was reset + * @mcast_flags: multicast flags announced by the orig node + * @mcast_want_all_unsnoop_node: a list node for the + * mcast.want_all_unsnoopables list + * @mcast_want_all_ipv4_node: a list node for the mcast.want_all_ipv4 list + * @mcast_want_all_ipv6_node: a list node for the mcast.want_all_ipv6 list * @capabilities: announced capabilities of this originator + * @capa_initialized: bitfield to remember whether a capability was initialized * @last_ttvn: last seen translation table version number * @tt_buff: last tt changeset this node received from the orig node * @tt_buff_len: length of the last tt changeset this node received from the * orig node * @tt_buff_lock: lock that protects tt_buff and tt_buff_len - * @tt_initialised: bool keeping track of whether or not this node have received - * any translation table information from the orig node yet * @tt_lock: prevents from updating the table while reading it. Table update is * made up by two operations (data structure update and metdata -CRC/TTVN- * recalculation) and they have to be executed atomically in order to avoid @@ -247,12 +252,18 @@ struct batadv_orig_node { #endif unsigned long last_seen; unsigned long bcast_seqno_reset; +#ifdef CONFIG_BATMAN_ADV_MCAST + uint8_t mcast_flags; + struct hlist_node mcast_want_all_unsnoopables_node; + struct hlist_node mcast_want_all_ipv4_node; + struct hlist_node mcast_want_all_ipv6_node; +#endif uint8_t capabilities; + uint8_t capa_initialized; atomic_t last_ttvn; unsigned char *tt_buff; int16_t tt_buff_len; spinlock_t tt_buff_lock; /* protects tt_buff & tt_buff_len */ - bool tt_initialised; /* prevents from changing the table while reading it */ spinlock_t tt_lock; DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); @@ -282,10 +293,15 @@ struct batadv_orig_node { * enum batadv_orig_capabilities - orig node capabilities * @BATADV_ORIG_CAPA_HAS_DAT: orig node has distributed arp table enabled * @BATADV_ORIG_CAPA_HAS_NC: orig node has network coding enabled + * @BATADV_ORIG_CAPA_HAS_TT: orig node has tt capability + * @BATADV_ORIG_CAPA_HAS_MCAST: orig node has some multicast capability + * (= orig node announces a tvlv of type BATADV_TVLV_MCAST) */ enum batadv_orig_capabilities { BATADV_ORIG_CAPA_HAS_DAT = BIT(0), BATADV_ORIG_CAPA_HAS_NC = BIT(1), + BATADV_ORIG_CAPA_HAS_TT = BIT(2), + BATADV_ORIG_CAPA_HAS_MCAST = BIT(3), }; /** @@ -334,7 +350,7 @@ struct batadv_neigh_node { }; /** - * struct batadv_neigh_node_bat_iv - neighbor information per outgoing + * struct batadv_neigh_ifinfo_bat_iv - neighbor information per outgoing * interface for BATMAN IV * @tq_recv: ring buffer of received TQ values from this neigh node * @tq_index: ring buffer index @@ -544,7 +560,7 @@ struct batadv_priv_bla { #endif /** - * struct batadv_debug_log - debug logging data + * struct batadv_priv_debug_log - debug logging data * @log_buff: buffer holding the logs (ring bufer) * @log_start: index of next character to read * @log_end: index of next character to write @@ -607,6 +623,39 @@ struct batadv_priv_dat { }; #endif +#ifdef CONFIG_BATMAN_ADV_MCAST +/** + * struct batadv_priv_mcast - per mesh interface mcast data + * @mla_list: list of multicast addresses we are currently announcing via TT + * @want_all_unsnoopables_list: a list of orig_nodes wanting all unsnoopable + * multicast traffic + * @want_all_ipv4_list: a list of orig_nodes wanting all IPv4 multicast traffic + * @want_all_ipv6_list: a list of orig_nodes wanting all IPv6 multicast traffic + * @flags: the flags we have last sent in our mcast tvlv + * @enabled: whether the multicast tvlv is currently enabled + * @num_disabled: number of nodes that have no mcast tvlv + * @num_want_all_unsnoopables: number of nodes wanting unsnoopable IP traffic + * @num_want_all_ipv4: counter for items in want_all_ipv4_list + * @num_want_all_ipv6: counter for items in want_all_ipv6_list + * @want_lists_lock: lock for protecting modifications to mcast want lists + * (traversals are rcu-locked) + */ +struct batadv_priv_mcast { + struct hlist_head mla_list; + struct hlist_head want_all_unsnoopables_list; + struct hlist_head want_all_ipv4_list; + struct hlist_head want_all_ipv6_list; + uint8_t flags; + bool enabled; + atomic_t num_disabled; + atomic_t num_want_all_unsnoopables; + atomic_t num_want_all_ipv4; + atomic_t num_want_all_ipv6; + /* protects want_all_{unsnoopables,ipv4,ipv6}_list */ + spinlock_t want_lists_lock; +}; +#endif + /** * struct batadv_priv_nc - per mesh interface network coding private data * @work: work queue callback item for cleanup @@ -672,6 +721,8 @@ struct batadv_softif_vlan { * enabled * @distributed_arp_table: bool indicating whether distributed ARP table is * enabled + * @multicast_mode: Enable or disable multicast optimizations on this node's + * sender/originating side * @gw_mode: gateway operation: off, client or server (see batadv_gw_modes) * @gw_sel_class: gateway selection class (applies if gw_mode client) * @orig_interval: OGM broadcast interval in milliseconds @@ -702,6 +753,7 @@ struct batadv_softif_vlan { * @tt: translation table data * @tvlv: type-version-length-value data * @dat: distributed arp table data + * @mcast: multicast data * @network_coding: bool indicating whether network coding is enabled * @batadv_priv_nc: network coding data */ @@ -721,6 +773,9 @@ struct batadv_priv { #ifdef CONFIG_BATMAN_ADV_DAT atomic_t distributed_arp_table; #endif +#ifdef CONFIG_BATMAN_ADV_MCAST + atomic_t multicast_mode; +#endif atomic_t gw_mode; atomic_t gw_sel_class; atomic_t orig_interval; @@ -759,6 +814,9 @@ struct batadv_priv { #ifdef CONFIG_BATMAN_ADV_DAT struct batadv_priv_dat dat; #endif +#ifdef CONFIG_BATMAN_ADV_MCAST + struct batadv_priv_mcast mcast; +#endif #ifdef CONFIG_BATMAN_ADV_NC atomic_t network_coding; struct batadv_priv_nc nc; @@ -881,12 +939,14 @@ struct batadv_tt_local_entry { * struct batadv_tt_global_entry - translation table global entry data * @common: general translation table data * @orig_list: list of orig nodes announcing this non-mesh client + * @orig_list_count: number of items in the orig_list * @list_lock: lock protecting orig_list * @roam_at: time at which TT_GLOBAL_ROAM was set */ struct batadv_tt_global_entry { struct batadv_tt_common_entry common; struct hlist_head orig_list; + atomic_t orig_list_count; spinlock_t list_lock; /* protects orig_list */ unsigned long roam_at; }; @@ -1004,8 +1064,8 @@ struct batadv_nc_packet { }; /** - * batadv_skb_cb - control buffer structure used to store private data relevant - * to batman-adv in the skb->cb buffer in skbs. + * struct batadv_skb_cb - control buffer structure used to store private data + * relevant to batman-adv in the skb->cb buffer in skbs. * @decoded: Marks a skb as decoded, which is checked when searching for coding * opportunities in network-coding.c */ @@ -1116,6 +1176,16 @@ struct batadv_dat_entry { }; /** + * struct batadv_hw_addr - a list entry for a MAC address + * @list: list node for the linking of entries + * @addr: the MAC address of this list entry + */ +struct batadv_hw_addr { + struct hlist_node list; + unsigned char addr[ETH_ALEN]; +}; + +/** * struct batadv_dat_candidate - candidate destination for DAT operations * @type: the type of the selected candidate. It can one of the following: * - BATADV_DAT_CANDIDATE_NOT_FOUND diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index adb3ea04ada..73492b91105 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -27,7 +27,7 @@ #include "6lowpan.h" -#include "../ieee802154/6lowpan.h" /* for the compression support */ +#include <net/6lowpan.h> /* for the compression support */ #define IFACE_NAME_TEMPLATE "bt%d" #define EUI64_ADDR_LEN 8 diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 1bb8900086d..8671bc79a35 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -887,14 +887,17 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) { struct hci_cp_auth_requested cp; - /* encrypt must be pending if auth is also pending */ - set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); - cp.handle = cpu_to_le16(conn->handle); hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); + + /* If we're already encrypted set the REAUTH_PEND flag, + * otherwise set the ENCRYPT_PEND. + */ if (conn->key_type != 0xff) set_bit(HCI_CONN_REAUTH_PEND, &conn->flags); + else + set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); } return 0; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 492d8d5071c..3454807a40c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3449,6 +3449,12 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev, if (!conn) goto unlock; + /* For BR/EDR the necessary steps are taken through the + * auth_complete event. + */ + if (conn->type != LE_LINK) + goto unlock; + if (!ev->status) conn->sec_level = conn->pending_sec_level; diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 292e619db89..8181ea4bc2f 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -223,51 +223,6 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb) input_sync(dev); } -static int hidp_send_report(struct hidp_session *session, struct hid_report *report) -{ - unsigned char hdr; - u8 *buf; - int rsize, ret; - - buf = hid_alloc_report_buf(report, GFP_ATOMIC); - if (!buf) - return -EIO; - - hid_output_report(report, buf); - hdr = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT; - - rsize = ((report->size - 1) >> 3) + 1 + (report->id > 0); - ret = hidp_send_intr_message(session, hdr, buf, rsize); - - kfree(buf); - return ret; -} - -static int hidp_hidinput_event(struct input_dev *dev, unsigned int type, - unsigned int code, int value) -{ - struct hid_device *hid = input_get_drvdata(dev); - struct hidp_session *session = hid->driver_data; - struct hid_field *field; - int offset; - - BT_DBG("session %p type %d code %d value %d", - session, type, code, value); - - if (type != EV_LED) - return -1; - - offset = hidinput_find_field(hid, type, code, &field); - if (offset == -1) { - hid_warn(dev, "event field not found\n"); - return -1; - } - - hid_set_field(field, offset, value); - - return hidp_send_report(session, field->report); -} - static int hidp_get_raw_report(struct hid_device *hid, unsigned char report_number, unsigned char *data, size_t count, @@ -353,17 +308,24 @@ err: return ret; } -static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, size_t count, - unsigned char report_type) +static int hidp_set_raw_report(struct hid_device *hid, unsigned char reportnum, + unsigned char *data, size_t count, + unsigned char report_type) { struct hidp_session *session = hid->driver_data; int ret; - if (report_type == HID_OUTPUT_REPORT) { - report_type = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT; - return hidp_send_intr_message(session, report_type, - data, count); - } else if (report_type != HID_FEATURE_REPORT) { + switch (report_type) { + case HID_FEATURE_REPORT: + report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE; + break; + case HID_INPUT_REPORT: + report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_INPUT; + break; + case HID_OUTPUT_REPORT: + report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_OUPUT; + break; + default: return -EINVAL; } @@ -371,8 +333,8 @@ static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, s return -ERESTARTSYS; /* Set up our wait, and send the report request to the device. */ + data[0] = reportnum; set_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags); - report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE; ret = hidp_send_ctrl_message(session, report_type, data, count); if (ret) goto err; @@ -411,6 +373,29 @@ err: return ret; } +static int hidp_output_report(struct hid_device *hid, __u8 *data, size_t count) +{ + struct hidp_session *session = hid->driver_data; + + return hidp_send_intr_message(session, + HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT, + data, count); +} + +static int hidp_raw_request(struct hid_device *hid, unsigned char reportnum, + __u8 *buf, size_t len, unsigned char rtype, + int reqtype) +{ + switch (reqtype) { + case HID_REQ_GET_REPORT: + return hidp_get_raw_report(hid, reportnum, buf, len, rtype); + case HID_REQ_SET_REPORT: + return hidp_set_raw_report(hid, reportnum, buf, len, rtype); + default: + return -EIO; + } +} + static void hidp_idle_timeout(unsigned long arg) { struct hidp_session *session = (struct hidp_session *) arg; @@ -430,6 +415,16 @@ static void hidp_del_timer(struct hidp_session *session) del_timer(&session->timer); } +static void hidp_process_report(struct hidp_session *session, + int type, const u8 *data, int len, int intr) +{ + if (len > HID_MAX_BUFFER_SIZE) + len = HID_MAX_BUFFER_SIZE; + + memcpy(session->input_buf, data, len); + hid_input_report(session->hid, type, session->input_buf, len, intr); +} + static void hidp_process_handshake(struct hidp_session *session, unsigned char param) { @@ -502,7 +497,8 @@ static int hidp_process_data(struct hidp_session *session, struct sk_buff *skb, hidp_input_report(session, skb); if (session->hid) - hid_input_report(session->hid, HID_INPUT_REPORT, skb->data, skb->len, 0); + hidp_process_report(session, HID_INPUT_REPORT, + skb->data, skb->len, 0); break; case HIDP_DATA_RTYPE_OTHER: @@ -584,7 +580,8 @@ static void hidp_recv_intr_frame(struct hidp_session *session, hidp_input_report(session, skb); if (session->hid) { - hid_input_report(session->hid, HID_INPUT_REPORT, skb->data, skb->len, 1); + hidp_process_report(session, HID_INPUT_REPORT, + skb->data, skb->len, 1); BT_DBG("report len %d", skb->len); } } else { @@ -727,7 +724,8 @@ static struct hid_ll_driver hidp_hid_driver = { .stop = hidp_stop, .open = hidp_open, .close = hidp_close, - .hidinput_input_event = hidp_hidinput_event, + .raw_request = hidp_raw_request, + .output_report = hidp_output_report, }; /* This function sets up the hid device. It does not add it @@ -769,15 +767,15 @@ static int hidp_setup_hid(struct hidp_session *session, snprintf(hid->phys, sizeof(hid->phys), "%pMR", &l2cap_pi(session->ctrl_sock->sk)->chan->src); + /* NOTE: Some device modules depend on the dst address being stored in + * uniq. Please be aware of this before making changes to this behavior. + */ snprintf(hid->uniq, sizeof(hid->uniq), "%pMR", &l2cap_pi(session->ctrl_sock->sk)->chan->dst); hid->dev.parent = &session->conn->hcon->dev; hid->ll_driver = &hidp_hid_driver; - hid->hid_get_raw_report = hidp_get_raw_report; - hid->hid_output_raw_report = hidp_output_raw_report; - /* True if device is blacklisted in drivers/hid/hid-core.c */ if (hid_ignore(hid)) { hid_destroy_device(session->hid); diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h index ab5241400cf..8798492a6e9 100644 --- a/net/bluetooth/hidp/hidp.h +++ b/net/bluetooth/hidp/hidp.h @@ -24,6 +24,7 @@ #define __HIDP_H #include <linux/types.h> +#include <linux/hid.h> #include <linux/kref.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/l2cap.h> @@ -179,6 +180,9 @@ struct hidp_session { /* Used in hidp_output_raw_report() */ int output_report_success; /* boolean */ + + /* temporary input buffer */ + u8 input_buf[HID_MAX_BUFFER_SIZE]; }; /* HIDP init defines */ diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index a1e5bb7d06e..dc4d301d3a7 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7519,9 +7519,9 @@ int __init l2cap_init(void) l2cap_debugfs = debugfs_create_file("l2cap", 0444, bt_debugfs, NULL, &l2cap_debugfs_fops); - debugfs_create_u16("l2cap_le_max_credits", 0466, bt_debugfs, + debugfs_create_u16("l2cap_le_max_credits", 0644, bt_debugfs, &le_max_credits); - debugfs_create_u16("l2cap_le_default_mps", 0466, bt_debugfs, + debugfs_create_u16("l2cap_le_default_mps", 0644, bt_debugfs, &le_default_mps); bt_6lowpan_init(); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index f59e00c2daa..ef5e5b04f34 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1271,7 +1271,7 @@ static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err) if (parent) { bt_accept_unlink(sk); - parent->sk_data_ready(parent, 0); + parent->sk_data_ready(parent); } else { sk->sk_state_change(sk); } @@ -1327,7 +1327,7 @@ static void l2cap_sock_ready_cb(struct l2cap_chan *chan) sk->sk_state_change(sk); if (parent) - parent->sk_data_ready(parent, 0); + parent->sk_data_ready(parent); release_sock(sk); } @@ -1340,7 +1340,7 @@ static void l2cap_sock_defer_cb(struct l2cap_chan *chan) parent = bt_sk(sk)->parent; if (parent) - parent->sk_data_ready(parent, 0); + parent->sk_data_ready(parent); release_sock(sk); } diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 05c609b12a1..754b6fe4f74 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -186,9 +186,9 @@ static void rfcomm_l2state_change(struct sock *sk) rfcomm_schedule(); } -static void rfcomm_l2data_ready(struct sock *sk, int bytes) +static void rfcomm_l2data_ready(struct sock *sk) { - BT_DBG("%p bytes %d", sk, bytes); + BT_DBG("%p", sk); rfcomm_schedule(); } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index eabd25ab5ad..c603a5eb472 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -54,7 +54,7 @@ static void rfcomm_sk_data_ready(struct rfcomm_dlc *d, struct sk_buff *skb) atomic_add(skb->len, &sk->sk_rmem_alloc); skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk); if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) rfcomm_dlc_throttle(d); @@ -84,7 +84,7 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err) sock_set_flag(sk, SOCK_ZAPPED); bt_accept_unlink(sk); } - parent->sk_data_ready(parent, 0); + parent->sk_data_ready(parent); } else { if (d->state == BT_CONNECTED) rfcomm_session_getaddr(d->session, diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index ab1e6fcca4c..c06dbd3938e 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1024,7 +1024,7 @@ static void sco_conn_ready(struct sco_conn *conn) sk->sk_state = BT_CONNECTED; /* Wake up parent */ - parent->sk_data_ready(parent, 1); + parent->sk_data_ready(parent); bh_unlock_sock(parent); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index e4401a531af..3e2da2cb72d 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -49,14 +49,14 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) brstats->tx_bytes += skb->len; u64_stats_update_end(&brstats->syncp); - if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid)) - goto out; - BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); + if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid)) + goto out; + if (is_broadcast_ether_addr(dest)) br_flood_deliver(br, skb, false); else if (is_multicast_ether_addr(dest)) { @@ -88,18 +88,11 @@ out: static int br_dev_init(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); - int i; - br->stats = alloc_percpu(struct pcpu_sw_netstats); + br->stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!br->stats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *br_dev_stats; - br_dev_stats = per_cpu_ptr(br->stats, i); - u64_stats_init(&br_dev_stats->syncp); - } - return 0; } @@ -143,9 +136,9 @@ static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev, const struct pcpu_sw_netstats *bstats = per_cpu_ptr(br->stats, cpu); do { - start = u64_stats_fetch_begin_bh(&bstats->syncp); + start = u64_stats_fetch_begin_irq(&bstats->syncp); memcpy(&tmp, bstats, sizeof(tmp)); - } while (u64_stats_fetch_retry_bh(&bstats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&bstats->syncp, start)); sum.tx_bytes += tmp.tx_bytes; sum.tx_packets += tmp.tx_packets; sum.rx_bytes += tmp.rx_bytes; @@ -187,8 +180,7 @@ static int br_set_mac_address(struct net_device *dev, void *p) spin_lock_bh(&br->lock); if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) { - memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); - br_fdb_change_mac_address(br, addr->sa_data); + /* Mac address will be changed in br_stp_change_bridge_id(). */ br_stp_change_bridge_id(br, addr->sa_data); } spin_unlock_bh(&br->lock); @@ -226,8 +218,34 @@ static void br_netpoll_cleanup(struct net_device *dev) br_netpoll_disable(p); } -static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, - gfp_t gfp) +static int __br_netpoll_enable(struct net_bridge_port *p) +{ + struct netpoll *np; + int err; + + np = kzalloc(sizeof(*p->np), GFP_KERNEL); + if (!np) + return -ENOMEM; + + err = __netpoll_setup(np, p->dev); + if (err) { + kfree(np); + return err; + } + + p->np = np; + return err; +} + +int br_netpoll_enable(struct net_bridge_port *p) +{ + if (!p->br->dev->npinfo) + return 0; + + return __br_netpoll_enable(p); +} + +static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) { struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p; @@ -236,7 +254,7 @@ static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, list_for_each_entry(p, &br->port_list, list) { if (!p->dev) continue; - err = br_netpoll_enable(p, gfp); + err = __br_netpoll_enable(p); if (err) goto fail; } @@ -249,28 +267,6 @@ fail: goto out; } -int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) -{ - struct netpoll *np; - int err; - - if (!p->br->dev->npinfo) - return 0; - - np = kzalloc(sizeof(*p->np), gfp); - if (!np) - return -ENOMEM; - - err = __netpoll_setup(np, p->dev, gfp); - if (err) { - kfree(np); - return err; - } - - p->np = np; - return err; -} - void br_netpoll_disable(struct net_bridge_port *p) { struct netpoll *np = p->np; @@ -370,7 +366,7 @@ void br_dev_setup(struct net_device *dev) br->bridge_id.prio[0] = 0x80; br->bridge_id.prio[1] = 0x00; - memcpy(br->group_addr, eth_reserved_addr_base, ETH_ALEN); + ether_addr_copy(br->group_addr, eth_reserved_addr_base); br->stp_enabled = BR_NO_STP; br->group_fwd_mask = BR_GROUPFWD_DEFAULT; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index c5f5a4a933f..9203d5a1943 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -27,6 +27,9 @@ #include "br_private.h" static struct kmem_cache *br_fdb_cache __read_mostly; +static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, + const unsigned char *addr, + __u16 vid); static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid); static void fdb_notify(struct net_bridge *br, @@ -89,11 +92,57 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) call_rcu(&f->rcu, fdb_rcu_free); } +/* Delete a local entry if no other port had the same address. */ +static void fdb_delete_local(struct net_bridge *br, + const struct net_bridge_port *p, + struct net_bridge_fdb_entry *f) +{ + const unsigned char *addr = f->addr.addr; + u16 vid = f->vlan_id; + struct net_bridge_port *op; + + /* Maybe another port has same hw addr? */ + list_for_each_entry(op, &br->port_list, list) { + if (op != p && ether_addr_equal(op->dev->dev_addr, addr) && + (!vid || nbp_vlan_find(op, vid))) { + f->dst = op; + f->added_by_user = 0; + return; + } + } + + /* Maybe bridge device has same hw addr? */ + if (p && ether_addr_equal(br->dev->dev_addr, addr) && + (!vid || br_vlan_find(br, vid))) { + f->dst = NULL; + f->added_by_user = 0; + return; + } + + fdb_delete(br, f); +} + +void br_fdb_find_delete_local(struct net_bridge *br, + const struct net_bridge_port *p, + const unsigned char *addr, u16 vid) +{ + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; + struct net_bridge_fdb_entry *f; + + spin_lock_bh(&br->hash_lock); + f = fdb_find(head, addr, vid); + if (f && f->is_local && !f->added_by_user && f->dst == p) + fdb_delete_local(br, p, f); + spin_unlock_bh(&br->hash_lock); +} + void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) { struct net_bridge *br = p->br; - bool no_vlan = (nbp_get_vlan_info(p) == NULL) ? true : false; + struct net_port_vlans *pv = nbp_get_vlan_info(p); + bool no_vlan = !pv; int i; + u16 vid; spin_lock_bh(&br->hash_lock); @@ -104,38 +153,34 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) struct net_bridge_fdb_entry *f; f = hlist_entry(h, struct net_bridge_fdb_entry, hlist); - if (f->dst == p && f->is_local) { - /* maybe another port has same hw addr? */ - struct net_bridge_port *op; - u16 vid = f->vlan_id; - list_for_each_entry(op, &br->port_list, list) { - if (op != p && - ether_addr_equal(op->dev->dev_addr, - f->addr.addr) && - nbp_vlan_find(op, vid)) { - f->dst = op; - goto insert; - } - } - + if (f->dst == p && f->is_local && !f->added_by_user) { /* delete old one */ - fdb_delete(br, f); -insert: - /* insert new address, may fail if invalid - * address or dup. - */ - fdb_insert(br, p, newaddr, vid); + fdb_delete_local(br, p, f); /* if this port has no vlan information * configured, we can safely be done at * this point. */ if (no_vlan) - goto done; + goto insert; } } } +insert: + /* insert new address, may fail if invalid address or dup. */ + fdb_insert(br, p, newaddr, 0); + + if (no_vlan) + goto done; + + /* Now add entries for every VLAN configured on the port. + * This function runs under RTNL so the bitmap will not change + * from under us. + */ + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) + fdb_insert(br, p, newaddr, vid); + done: spin_unlock_bh(&br->hash_lock); } @@ -146,10 +191,12 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) struct net_port_vlans *pv; u16 vid = 0; + spin_lock_bh(&br->hash_lock); + /* If old entry was unassociated with any port, then delete it. */ f = __br_fdb_get(br, br->dev->dev_addr, 0); if (f && f->is_local && !f->dst) - fdb_delete(br, f); + fdb_delete_local(br, NULL, f); fdb_insert(br, NULL, newaddr, 0); @@ -159,14 +206,16 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) */ pv = br_get_vlan_info(br); if (!pv) - return; + goto out; for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) { f = __br_fdb_get(br, br->dev->dev_addr, vid); if (f && f->is_local && !f->dst) - fdb_delete(br, f); + fdb_delete_local(br, NULL, f); fdb_insert(br, NULL, newaddr, vid); } +out: + spin_unlock_bh(&br->hash_lock); } void br_fdb_cleanup(unsigned long _data) @@ -235,25 +284,11 @@ void br_fdb_delete_by_port(struct net_bridge *br, if (f->is_static && !do_all) continue; - /* - * if multiple ports all have the same device address - * then when one port is deleted, assign - * the local entry to other port - */ - if (f->is_local) { - struct net_bridge_port *op; - list_for_each_entry(op, &br->port_list, list) { - if (op != p && - ether_addr_equal(op->dev->dev_addr, - f->addr.addr)) { - f->dst = op; - goto skip_delete; - } - } - } - fdb_delete(br, f); - skip_delete: ; + if (f->is_local) + fdb_delete_local(br, p, f); + else + fdb_delete(br, f); } } spin_unlock_bh(&br->hash_lock); @@ -397,6 +432,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, fdb->vlan_id = vid; fdb->is_local = 0; fdb->is_static = 0; + fdb->added_by_user = 0; fdb->updated = fdb->used = jiffies; hlist_add_head_rcu(&fdb->hlist, head); } @@ -447,7 +483,7 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, } void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, bool added_by_user) { struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; @@ -473,13 +509,18 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, /* fastpath: update of existing entry */ fdb->dst = source; fdb->updated = jiffies; + if (unlikely(added_by_user)) + fdb->added_by_user = 1; } } else { spin_lock(&br->hash_lock); if (likely(!fdb_find(head, addr, vid))) { fdb = fdb_create(head, source, addr, vid); - if (fdb) + if (fdb) { + if (unlikely(added_by_user)) + fdb->added_by_user = 1; fdb_notify(br, fdb, RTM_NEWNEIGH); + } } /* else we lose race and someone else inserts * it first, don't bother updating @@ -647,6 +688,7 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, modified = true; } + fdb->added_by_user = 1; fdb->used = jiffies; if (modified) { @@ -664,7 +706,7 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p, if (ndm->ndm_flags & NTF_USE) { rcu_read_lock(); - br_fdb_update(p->br, p, addr, vid); + br_fdb_update(p->br, p, addr, vid, true); rcu_read_unlock(); } else { spin_lock_bh(&p->br->hash_lock); @@ -749,8 +791,7 @@ out: return err; } -int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, - u16 vlan) +static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vlan) { struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)]; struct net_bridge_fdb_entry *fdb; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index d3409e6b545..056b67b0e27 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -35,16 +35,11 @@ static inline int should_deliver(const struct net_bridge_port *p, p->state == BR_STATE_FORWARDING; } -static inline unsigned int packet_length(const struct sk_buff *skb) -{ - return skb->len - (skb->protocol == htons(ETH_P_8021Q) ? VLAN_HLEN : 0); -} - int br_dev_queue_push_xmit(struct sk_buff *skb) { /* ip_fragment doesn't copy the MAC header */ if (nf_bridge_maybe_copy_header(skb) || - (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))) { + !is_skb_forwardable(skb->dev, skb)) { kfree_skb(skb); } else { skb_push(skb, ETH_HLEN); @@ -71,7 +66,7 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) skb->dev = to->dev; if (unlikely(netpoll_tx_running(to->br->dev))) { - if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) + if (!is_skb_forwardable(skb->dev, skb)) kfree_skb(skb); else { skb_push(skb, ETH_HLEN); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index cffe1d666ba..5262b8617eb 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -366,7 +366,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (err) goto err2; - err = br_netpoll_enable(p, GFP_KERNEL); + err = br_netpoll_enable(p); if (err) goto err3; @@ -389,6 +389,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (br->dev->needed_headroom < dev->needed_headroom) br->dev->needed_headroom = dev->needed_headroom; + if (br_fdb_insert(br, p, dev->dev_addr, 0)) + netdev_err(dev, "failed insert local address bridge forwarding table\n"); + spin_lock_bh(&br->lock); changed_addr = br_stp_recalculate_bridge_id(br); @@ -404,9 +407,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev_set_mtu(br->dev, br_min_mtu(br)); - if (br_fdb_insert(br, p, dev->dev_addr, 0)) - netdev_err(dev, "failed insert local address bridge forwarding table\n"); - kobject_uevent(&p->kobj, KOBJ_ADD); return 0; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index bf8dc7d308d..7985deaff52 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -29,6 +29,7 @@ static int br_pass_frame_up(struct sk_buff *skb) struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; struct net_bridge *br = netdev_priv(brdev); struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); + struct net_port_vlans *pv; u64_stats_update_begin(&brstats->syncp); brstats->rx_packets++; @@ -39,18 +40,18 @@ static int br_pass_frame_up(struct sk_buff *skb) * packet is allowed except in promisc modue when someone * may be running packet capture. */ + pv = br_get_vlan_info(br); if (!(brdev->flags & IFF_PROMISC) && - !br_allowed_egress(br, br_get_vlan_info(br), skb)) { + !br_allowed_egress(br, pv, skb)) { kfree_skb(skb); return NET_RX_DROP; } - skb = br_handle_vlan(br, br_get_vlan_info(br), skb); - if (!skb) - return NET_RX_DROP; - indev = skb->dev; skb->dev = brdev; + skb = br_handle_vlan(br, pv, skb); + if (!skb) + return NET_RX_DROP; return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL, netif_receive_skb); @@ -72,12 +73,12 @@ int br_handle_frame_finish(struct sk_buff *skb) goto drop; if (!br_allowed_ingress(p->br, nbp_get_vlan_info(p), skb, &vid)) - goto drop; + goto out; /* insert into forwarding database after filtering to avoid spoofing */ br = p->br; if (p->flags & BR_LEARNING) - br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); + br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false); if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && br_multicast_rcv(br, p, skb, vid)) @@ -148,7 +149,7 @@ static int br_handle_local_finish(struct sk_buff *skb) br_vlan_get_tag(skb, &vid); if (p->flags & BR_LEARNING) - br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid); + br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false); return 0; /* process further */ } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index ef66365b735..7b757b5dc77 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -363,7 +363,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, skb_reset_mac_header(skb); eth = eth_hdr(skb); - memcpy(eth->h_source, br->dev->dev_addr, ETH_ALEN); + ether_addr_copy(eth->h_source, br->dev->dev_addr); eth->h_dest[0] = 1; eth->h_dest[1] = 0; eth->h_dest[2] = 0x5e; @@ -433,7 +433,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, skb_reset_mac_header(skb); eth = eth_hdr(skb); - memcpy(eth->h_source, br->dev->dev_addr, ETH_ALEN); + ether_addr_copy(eth->h_source, br->dev->dev_addr); eth->h_proto = htons(ETH_P_IPV6); skb_put(skb, sizeof(*eth)); @@ -1127,9 +1127,10 @@ static void br_multicast_query_received(struct net_bridge *br, struct net_bridge_port *port, struct bridge_mcast_querier *querier, int saddr, + bool is_general_query, unsigned long max_delay) { - if (saddr) + if (saddr && is_general_query) br_multicast_update_querier_timer(br, querier, max_delay); else if (timer_pending(&querier->timer)) return; @@ -1181,8 +1182,16 @@ static int br_ip4_multicast_query(struct net_bridge *br, IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; } + /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer + * all-systems destination addresses (224.0.0.1) for general queries + */ + if (!group && iph->daddr != htonl(INADDR_ALLHOSTS_GROUP)) { + err = -EINVAL; + goto out; + } + br_multicast_query_received(br, port, &br->ip4_querier, !!iph->saddr, - max_delay); + !group, max_delay); if (!group) goto out; @@ -1228,6 +1237,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, unsigned long max_delay; unsigned long now = jiffies; const struct in6_addr *group = NULL; + bool is_general_query; int err = 0; spin_lock(&br->multicast_lock); @@ -1235,6 +1245,12 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; + /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ + if (!(ipv6_addr_type(&ip6h->saddr) & IPV6_ADDR_LINKLOCAL)) { + err = -EINVAL; + goto out; + } + if (skb->len == sizeof(*mld)) { if (!pskb_may_pull(skb, sizeof(*mld))) { err = -EINVAL; @@ -1256,8 +1272,19 @@ static int br_ip6_multicast_query(struct net_bridge *br, max_delay = max(msecs_to_jiffies(mldv2_mrc(mld2q)), 1UL); } + is_general_query = group && ipv6_addr_any(group); + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer + * all-nodes destination address (ff02::1) for general queries + */ + if (is_general_query && !ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) { + err = -EINVAL; + goto out; + } + br_multicast_query_received(br, port, &br->ip6_querier, - !ipv6_addr_any(&ip6h->saddr), max_delay); + !ipv6_addr_any(&ip6h->saddr), + is_general_query, max_delay); if (!group) goto out; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index b008c59a92c..80e1b0f60a3 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -167,7 +167,7 @@ void br_netfilter_rtable_init(struct net_bridge *br) rt->dst.dev = br->dev; rt->dst.path = &rt->dst; dst_init_metrics(&rt->dst, br_dst_default_metrics, true); - rt->dst.flags = DST_NOXFRM | DST_NOPEER | DST_FAKE_RTABLE; + rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE; rt->dst.ops = &fake_dst_ops; } @@ -506,7 +506,7 @@ bridged_dnat: 1); return 0; } - memcpy(eth_hdr(skb)->h_dest, dev->dev_addr, ETH_ALEN); + ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); skb->pkt_type = PACKET_HOST; } } else { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index fcd12333c59..06811d79f89 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -46,12 +46,12 @@ typedef __u16 port_id; struct bridge_id { unsigned char prio[2]; - unsigned char addr[6]; + unsigned char addr[ETH_ALEN]; }; struct mac_addr { - unsigned char addr[6]; + unsigned char addr[ETH_ALEN]; }; struct br_ip @@ -104,6 +104,7 @@ struct net_bridge_fdb_entry mac_addr addr; unsigned char is_local; unsigned char is_static; + unsigned char added_by_user; __u16 vlan_id; }; @@ -348,7 +349,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p, netpoll_send_skb(np, skb); } -int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp); +int br_netpoll_enable(struct net_bridge_port *p); void br_netpoll_disable(struct net_bridge_port *p); #else static inline void br_netpoll_send_skb(const struct net_bridge_port *p, @@ -356,7 +357,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p, { } -static inline int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) +static inline int br_netpoll_enable(struct net_bridge_port *p) { return 0; } @@ -370,6 +371,9 @@ static inline void br_netpoll_disable(struct net_bridge_port *p) int br_fdb_init(void); void br_fdb_fini(void); void br_fdb_flush(struct net_bridge *br); +void br_fdb_find_delete_local(struct net_bridge *br, + const struct net_bridge_port *p, + const unsigned char *addr, u16 vid); void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr); void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr); void br_fdb_cleanup(unsigned long arg); @@ -383,8 +387,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count, int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid); void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr, u16 vid); -int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vid); + const unsigned char *addr, u16 vid, bool added_by_user); int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr); @@ -584,6 +587,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); int br_vlan_delete(struct net_bridge *br, u16 vid); void br_vlan_flush(struct net_bridge *br); +bool br_vlan_find(struct net_bridge *br, u16 vid); int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); @@ -665,6 +669,11 @@ static inline void br_vlan_flush(struct net_bridge *br) { } +static inline bool br_vlan_find(struct net_bridge *br, u16 vid) +{ + return false; +} + static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) { return -EOPNOTSUPP; diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 656a6f3e40d..189ba1e7d85 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -194,6 +194,8 @@ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr) wasroot = br_is_root_bridge(br); + br_fdb_change_mac_address(br, addr); + memcpy(oldaddr, br->bridge_id.addr, ETH_ALEN); memcpy(br->bridge_id.addr, addr, ETH_ALEN); memcpy(br->dev->dev_addr, addr, ETH_ALEN); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 4ca4d0a0151..4a371610278 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -99,9 +99,9 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) v->num_vlans--; if (bitmap_empty(v->vlan_bitmap, VLAN_N_VID)) { if (v->port_idx) - rcu_assign_pointer(v->parent.port->vlan_info, NULL); + RCU_INIT_POINTER(v->parent.port->vlan_info, NULL); else - rcu_assign_pointer(v->parent.br->vlan_info, NULL); + RCU_INIT_POINTER(v->parent.br->vlan_info, NULL); kfree_rcu(v, rcu); } return 0; @@ -113,28 +113,12 @@ static void __vlan_flush(struct net_port_vlans *v) v->pvid = 0; bitmap_zero(v->vlan_bitmap, VLAN_N_VID); if (v->port_idx) - rcu_assign_pointer(v->parent.port->vlan_info, NULL); + RCU_INIT_POINTER(v->parent.port->vlan_info, NULL); else - rcu_assign_pointer(v->parent.br->vlan_info, NULL); + RCU_INIT_POINTER(v->parent.br->vlan_info, NULL); kfree_rcu(v, rcu); } -/* Strip the tag from the packet. Will return skb with tci set 0. */ -static struct sk_buff *br_vlan_untag(struct sk_buff *skb) -{ - if (skb->protocol != htons(ETH_P_8021Q)) { - skb->vlan_tci = 0; - return skb; - } - - skb->vlan_tci = 0; - skb = vlan_untag(skb); - if (skb) - skb->vlan_tci = 0; - - return skb; -} - struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_port_vlans *pv, struct sk_buff *skb) @@ -144,13 +128,27 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, if (!br->vlan_enabled) goto out; + /* Vlan filter table must be configured at this point. The + * only exception is the bridge is set in promisc mode and the + * packet is destined for the bridge device. In this case + * pass the packet as is. + */ + if (!pv) { + if ((br->dev->flags & IFF_PROMISC) && skb->dev == br->dev) { + goto out; + } else { + kfree_skb(skb); + return NULL; + } + } + /* At this point, we know that the frame was filtered and contains * a valid vlan id. If the vlan id is set in the untagged bitmap, * send untagged; otherwise, send tagged. */ br_vlan_get_tag(skb, &vid); if (test_bit(vid, pv->untagged_bitmap)) - skb = br_vlan_untag(skb); + skb->vlan_tci = 0; out: return skb; @@ -172,7 +170,19 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, * rejected. */ if (!v) - return false; + goto drop; + + /* If vlan tx offload is disabled on bridge device and frame was + * sent from vlan device on the bridge device, it does not have + * HW accelerated vlan tag. + */ + if (unlikely(!vlan_tx_tag_present(skb) && + (skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD)))) { + skb = vlan_untag(skb); + if (unlikely(!skb)) + return false; + } err = br_vlan_get_tag(skb, vid); if (!*vid) { @@ -183,7 +193,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, * vlan untagged or priority-tagged traffic belongs to. */ if (pvid == VLAN_N_VID) - return false; + goto drop; /* PVID is set on this port. Any untagged or priority-tagged * ingress frame is considered to belong to this vlan. @@ -206,7 +216,8 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, /* Frame had a valid vlan tag. See if vlan is allowed */ if (test_bit(*vid, v->vlan_bitmap)) return true; - +drop: + kfree_skb(skb); return false; } @@ -275,9 +286,7 @@ int br_vlan_delete(struct net_bridge *br, u16 vid) if (!pv) return -EINVAL; - spin_lock_bh(&br->hash_lock); - fdb_delete_by_addr(br, br->dev->dev_addr, vid); - spin_unlock_bh(&br->hash_lock); + br_fdb_find_delete_local(br, NULL, br->dev->dev_addr, vid); __vlan_del(pv, vid); return 0; @@ -295,6 +304,25 @@ void br_vlan_flush(struct net_bridge *br) __vlan_flush(pv); } +bool br_vlan_find(struct net_bridge *br, u16 vid) +{ + struct net_port_vlans *pv; + bool found = false; + + rcu_read_lock(); + pv = rcu_dereference(br->vlan_info); + + if (!pv) + goto out; + + if (test_bit(vid, pv->vlan_bitmap)) + found = true; + +out: + rcu_read_unlock(); + return found; +} + int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) { if (!rtnl_trylock()) @@ -359,9 +387,7 @@ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) if (!pv) return -EINVAL; - spin_lock_bh(&port->br->hash_lock); - fdb_delete_by_addr(port->br, port->dev->dev_addr, vid); - spin_unlock_bh(&port->br->hash_lock); + br_fdb_find_delete_local(port->br, port, port->dev->dev_addr, vid); return __vlan_del(pv, vid); } diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index 3fb3c848aff..9024283d2bc 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -28,7 +28,7 @@ static bool ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, uint32_t cmp[2] = { 0, 0 }; int key = ((const unsigned char *)mac)[5]; - memcpy(((char *) cmp) + 2, mac, ETH_ALEN); + ether_addr_copy(((char *) cmp) + 2, mac); start = wh->table[key]; limit = wh->table[key + 1]; if (ip) { diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index c59f7bfae6e..4e0b0c35932 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -22,7 +22,7 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) if (!skb_make_writable(skb, 0)) return EBT_DROP; - memcpy(eth_hdr(skb)->h_dest, info->mac, ETH_ALEN); + ether_addr_copy(eth_hdr(skb)->h_dest, info->mac); return info->target; } diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 46624bb6d9b..203964997a5 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -25,10 +25,10 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) if (par->hooknum != NF_BR_BROUTING) /* rcu_read_lock()ed by nf_hook_slow */ - memcpy(eth_hdr(skb)->h_dest, - br_port_get_rcu(par->in)->br->dev->dev_addr, ETH_ALEN); + ether_addr_copy(eth_hdr(skb)->h_dest, + br_port_get_rcu(par->in)->br->dev->dev_addr); else - memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN); + ether_addr_copy(eth_hdr(skb)->h_dest, par->in->dev_addr); skb->pkt_type = PACKET_HOST; return info->target; } diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index 0f6b118d6cb..e56ccd060d2 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -24,7 +24,7 @@ ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par) if (!skb_make_writable(skb, 0)) return EBT_DROP; - memcpy(eth_hdr(skb)->h_source, info->mac, ETH_ALEN); + ether_addr_copy(eth_hdr(skb)->h_source, info->mac); if (!(info->target & NAT_ARP_BIT) && eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { const struct arphdr *ap; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 0e474b13463..1059ed3bc25 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1044,10 +1044,9 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, if (repl->num_counters && copy_to_user(repl->counters, counterstmp, repl->num_counters * sizeof(struct ebt_counter))) { - ret = -EFAULT; + /* Silent error, can't fail, new table is already in place */ + net_warn_ratelimited("ebtables: counters copy to user failed while replacing table\n"); } - else - ret = 0; /* decrease module count and free resources */ EBT_ENTRY_ITERATE(table->entries, table->entries_size, diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 4dca159435c..edbca468fa7 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -22,6 +22,7 @@ #include <net/pkt_sched.h> #include <net/caif/caif_device.h> #include <net/caif/caif_layer.h> +#include <net/caif/caif_dev.h> #include <net/caif/cfpkt.h> #include <net/caif/cfcnfg.h> #include <net/caif/cfserl.h> diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index d6be3edb7a4..e8437094d15 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -124,7 +124,6 @@ static void caif_flow_ctrl(struct sock *sk, int mode) static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int err; - int skb_len; unsigned long flags; struct sk_buff_head *list = &sk->sk_receive_queue; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); @@ -153,14 +152,13 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) * may be freed by other threads of control pulling packets * from the queue. */ - skb_len = skb->len; spin_lock_irqsave(&list->lock, flags); if (!sock_flag(sk, SOCK_DEAD)) __skb_queue_tail(list, skb); spin_unlock_irqrestore(&list->lock, flags); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, skb_len); + sk->sk_data_ready(sk); else kfree_skb(skb); return 0; diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index 353f793d1b3..a6e11546305 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -15,6 +15,7 @@ #include <net/caif/caif_layer.h> #include <net/caif/cfsrvl.h> #include <net/caif/cfpkt.h> +#include <net/caif/caif_dev.h> #define SRVL_CTRL_PKT_SIZE 1 #define SRVL_FLOW_OFF 0x81 diff --git a/net/can/af_can.c b/net/can/af_can.c index d249874a366..a27f8aad9e9 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -57,6 +57,7 @@ #include <linux/skbuff.h> #include <linux/can.h> #include <linux/can/core.h> +#include <linux/can/skb.h> #include <linux/ratelimit.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -290,7 +291,7 @@ int can_send(struct sk_buff *skb, int loop) return -ENOMEM; } - newskb->sk = skb->sk; + can_skb_set_owner(newskb, skb->sk); newskb->ip_summed = CHECKSUM_UNNECESSARY; newskb->pkt_type = PACKET_BROADCAST; } diff --git a/net/can/bcm.c b/net/can/bcm.c index 3fc737b214c..dcb75c0e66c 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -268,7 +268,7 @@ static void bcm_can_tx(struct bcm_op *op) /* send with loopback */ skb->dev = dev; - skb->sk = op->sk; + can_skb_set_owner(skb, op->sk); can_send(skb, 1); /* update statistics */ @@ -1223,7 +1223,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) can_skb_prv(skb)->ifindex = dev->ifindex; skb->dev = dev; - skb->sk = sk; + can_skb_set_owner(skb, sk); err = can_send(skb, 1); /* send with loopback */ dev_put(dev); diff --git a/net/can/raw.c b/net/can/raw.c index 07d72d85232..081e81fd017 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -121,13 +121,9 @@ static void raw_rcv(struct sk_buff *oskb, void *data) if (!ro->recv_own_msgs && oskb->sk == sk) return; - /* do not pass frames with DLC > 8 to a legacy socket */ - if (!ro->fd_frames) { - struct canfd_frame *cfd = (struct canfd_frame *)oskb->data; - - if (unlikely(cfd->len > CAN_MAX_DLEN)) - return; - } + /* do not pass non-CAN2.0 frames to a legacy socket */ + if (!ro->fd_frames && oskb->len != CAN_MTU) + return; /* clone the given skb to be able to enqueue it into the rcv queue */ skb = skb_clone(oskb, GFP_ATOMIC); @@ -715,6 +711,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, skb->dev = dev; skb->sk = sk; + skb->priority = sk->sk_priority; err = can_send(skb, ro->loopback); @@ -737,9 +734,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; - struct raw_sock *ro = raw_sk(sk); struct sk_buff *skb; - int rxmtu; int err = 0; int noblock; @@ -750,20 +745,10 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return err; - /* - * when serving a legacy socket the DLC <= 8 is already checked inside - * raw_rcv(). Now check if we need to pass a canfd_frame to a legacy - * socket and cut the possible CANFD_MTU/CAN_MTU length to CAN_MTU - */ - if (!ro->fd_frames) - rxmtu = CAN_MTU; - else - rxmtu = skb->len; - - if (size < rxmtu) + if (size < skb->len) msg->msg_flags |= MSG_TRUNC; else - size = rxmtu; + size = skb->len; err = memcpy_toiovec(msg->msg_iov, skb->data, size); if (err < 0) { diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index b703790b4e4..a1ef53c0441 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -292,10 +292,12 @@ static int is_out(const struct crush_map *map, * @outpos: our position in that vector * @tries: number of attempts to make * @recurse_tries: number of attempts to have recursive chooseleaf make - * @local_tries: localized retries - * @local_fallback_tries: localized fallback retries + * @local_retries: localized retries + * @local_fallback_retries: localized fallback retries * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose) + * @vary_r: pass r to recursive calls * @out2: second output vector for leaf items (if @recurse_to_leaf) + * @parent_r: r value passed from the parent */ static int crush_choose_firstn(const struct crush_map *map, struct crush_bucket *bucket, @@ -304,10 +306,12 @@ static int crush_choose_firstn(const struct crush_map *map, int *out, int outpos, unsigned int tries, unsigned int recurse_tries, - unsigned int local_tries, - unsigned int local_fallback_tries, + unsigned int local_retries, + unsigned int local_fallback_retries, int recurse_to_leaf, - int *out2) + unsigned int vary_r, + int *out2, + int parent_r) { int rep; unsigned int ftotal, flocal; @@ -319,8 +323,11 @@ static int crush_choose_firstn(const struct crush_map *map, int itemtype; int collide, reject; - dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "", - bucket->id, x, outpos, numrep); + dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n", + recurse_to_leaf ? "_LEAF" : "", + bucket->id, x, outpos, numrep, + tries, recurse_tries, local_retries, local_fallback_retries, + parent_r); for (rep = outpos; rep < numrep; rep++) { /* keep trying until we get a non-out, non-colliding item */ @@ -335,7 +342,7 @@ static int crush_choose_firstn(const struct crush_map *map, do { collide = 0; retry_bucket = 0; - r = rep; + r = rep + parent_r; /* r' = r + f_total */ r += ftotal; @@ -344,9 +351,9 @@ static int crush_choose_firstn(const struct crush_map *map, reject = 1; goto reject; } - if (local_fallback_tries > 0 && + if (local_fallback_retries > 0 && flocal >= (in->size>>1) && - flocal > local_fallback_tries) + flocal > local_fallback_retries) item = bucket_perm_choose(in, x, r); else item = crush_bucket_choose(in, x, r); @@ -387,16 +394,23 @@ static int crush_choose_firstn(const struct crush_map *map, reject = 0; if (!collide && recurse_to_leaf) { if (item < 0) { + int sub_r; + if (vary_r) + sub_r = r >> (vary_r-1); + else + sub_r = 0; if (crush_choose_firstn(map, map->buckets[-1-item], weight, weight_max, x, outpos+1, 0, out2, outpos, recurse_tries, 0, - local_tries, - local_fallback_tries, + local_retries, + local_fallback_retries, 0, - NULL) <= outpos) + vary_r, + NULL, + sub_r) <= outpos) /* didn't get leaf */ reject = 1; } else { @@ -420,14 +434,14 @@ reject: ftotal++; flocal++; - if (collide && flocal <= local_tries) + if (collide && flocal <= local_retries) /* retry locally a few times */ retry_bucket = 1; - else if (local_fallback_tries > 0 && - flocal <= in->size + local_fallback_tries) + else if (local_fallback_retries > 0 && + flocal <= in->size + local_fallback_retries) /* exhaustive bucket search */ retry_bucket = 1; - else if (ftotal <= tries) + else if (ftotal < tries) /* then retry descent */ retry_descent = 1; else @@ -640,10 +654,20 @@ int crush_do_rule(const struct crush_map *map, __u32 step; int i, j; int numrep; - int choose_tries = map->choose_total_tries; - int choose_local_tries = map->choose_local_tries; - int choose_local_fallback_tries = map->choose_local_fallback_tries; + /* + * the original choose_total_tries value was off by one (it + * counted "retries" and not "tries"). add one. + */ + int choose_tries = map->choose_total_tries + 1; int choose_leaf_tries = 0; + /* + * the local tries values were counted as "retries", though, + * and need no adjustment + */ + int choose_local_retries = map->choose_local_tries; + int choose_local_fallback_retries = map->choose_local_fallback_tries; + + int vary_r = map->chooseleaf_vary_r; if ((__u32)ruleno >= map->max_rules) { dprintk(" bad ruleno %d\n", ruleno); @@ -676,13 +700,18 @@ int crush_do_rule(const struct crush_map *map, break; case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES: - if (curstep->arg1 > 0) - choose_local_tries = curstep->arg1; + if (curstep->arg1 >= 0) + choose_local_retries = curstep->arg1; break; case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES: - if (curstep->arg1 > 0) - choose_local_fallback_tries = curstep->arg1; + if (curstep->arg1 >= 0) + choose_local_fallback_retries = curstep->arg1; + break; + + case CRUSH_RULE_SET_CHOOSELEAF_VARY_R: + if (curstep->arg1 >= 0) + vary_r = curstep->arg1; break; case CRUSH_RULE_CHOOSELEAF_FIRSTN: @@ -734,10 +763,12 @@ int crush_do_rule(const struct crush_map *map, o+osize, j, choose_tries, recurse_tries, - choose_local_tries, - choose_local_fallback_tries, + choose_local_retries, + choose_local_fallback_retries, recurse_to_leaf, - c+osize); + vary_r, + c+osize, + 0); } else { crush_choose_indep( map, diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 258a382e75e..10421a4b76f 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -53,34 +53,55 @@ static int osdmap_show(struct seq_file *s, void *p) { int i; struct ceph_client *client = s->private; + struct ceph_osdmap *map = client->osdc.osdmap; struct rb_node *n; - if (client->osdc.osdmap == NULL) + if (map == NULL) return 0; - seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch); + + seq_printf(s, "epoch %d\n", map->epoch); seq_printf(s, "flags%s%s\n", - (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ? - " NEARFULL" : "", - (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ? - " FULL" : ""); - for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { + (map->flags & CEPH_OSDMAP_NEARFULL) ? " NEARFULL" : "", + (map->flags & CEPH_OSDMAP_FULL) ? " FULL" : ""); + + for (n = rb_first(&map->pg_pools); n; n = rb_next(n)) { struct ceph_pg_pool_info *pool = rb_entry(n, struct ceph_pg_pool_info, node); - seq_printf(s, "pg_pool %llu pg_num %d / %d\n", - (unsigned long long)pool->id, pool->pg_num, - pool->pg_num_mask); + + seq_printf(s, "pool %lld pg_num %u (%d) read_tier %lld write_tier %lld\n", + pool->id, pool->pg_num, pool->pg_num_mask, + pool->read_tier, pool->write_tier); } - for (i = 0; i < client->osdc.osdmap->max_osd; i++) { - struct ceph_entity_addr *addr = - &client->osdc.osdmap->osd_addr[i]; - int state = client->osdc.osdmap->osd_state[i]; + for (i = 0; i < map->max_osd; i++) { + struct ceph_entity_addr *addr = &map->osd_addr[i]; + int state = map->osd_state[i]; char sb[64]; - seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n", + seq_printf(s, "osd%d\t%s\t%3d%%\t(%s)\t%3d%%\n", i, ceph_pr_addr(&addr->in_addr), - ((client->osdc.osdmap->osd_weight[i]*100) >> 16), - ceph_osdmap_state_str(sb, sizeof(sb), state)); + ((map->osd_weight[i]*100) >> 16), + ceph_osdmap_state_str(sb, sizeof(sb), state), + ((ceph_get_primary_affinity(map, i)*100) >> 16)); + } + for (n = rb_first(&map->pg_temp); n; n = rb_next(n)) { + struct ceph_pg_mapping *pg = + rb_entry(n, struct ceph_pg_mapping, node); + + seq_printf(s, "pg_temp %llu.%x [", pg->pgid.pool, + pg->pgid.seed); + for (i = 0; i < pg->pg_temp.len; i++) + seq_printf(s, "%s%d", (i == 0 ? "" : ","), + pg->pg_temp.osds[i]); + seq_printf(s, "]\n"); } + for (n = rb_first(&map->primary_temp); n; n = rb_next(n)) { + struct ceph_pg_mapping *pg = + rb_entry(n, struct ceph_pg_mapping, node); + + seq_printf(s, "primary_temp %llu.%x %d\n", pg->pgid.pool, + pg->pgid.seed, pg->primary_temp.osd); + } + return 0; } diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 30efc5c1862..dac7f9b9868 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -383,7 +383,7 @@ static void con_sock_state_closed(struct ceph_connection *con) */ /* data available on socket, or listen socket received a connect */ -static void ceph_sock_data_ready(struct sock *sk, int count_unused) +static void ceph_sock_data_ready(struct sock *sk) { struct ceph_connection *con = sk->sk_user_data; if (atomic_read(&con->msgr->stopping)) { @@ -919,6 +919,9 @@ static bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor, if (!bytes || cursor->page_offset) return false; /* more bytes to process in the current page */ + if (!cursor->resid) + return false; /* no more data */ + /* Move on to the next page; offset is already at 0 */ BUG_ON(cursor->page_index >= cursor->page_count); @@ -1004,6 +1007,9 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor, if (!bytes || cursor->offset & ~PAGE_MASK) return false; /* more bytes to process in the current page */ + if (!cursor->resid) + return false; /* no more data */ + /* Move on to the next page */ BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head)); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 0676f2b199d..b0dfce77656 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -436,6 +436,7 @@ static bool osd_req_opcode_valid(u16 opcode) case CEPH_OSD_OP_OMAPCLEAR: case CEPH_OSD_OP_OMAPRMKEYS: case CEPH_OSD_OP_OMAP_CMP: + case CEPH_OSD_OP_SETALLOCHINT: case CEPH_OSD_OP_CLONERANGE: case CEPH_OSD_OP_ASSERT_SRC_VERSION: case CEPH_OSD_OP_SRC_CMPXATTR: @@ -591,6 +592,26 @@ void osd_req_op_watch_init(struct ceph_osd_request *osd_req, } EXPORT_SYMBOL(osd_req_op_watch_init); +void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, + unsigned int which, + u64 expected_object_size, + u64 expected_write_size) +{ + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, + CEPH_OSD_OP_SETALLOCHINT); + + op->alloc_hint.expected_object_size = expected_object_size; + op->alloc_hint.expected_write_size = expected_write_size; + + /* + * CEPH_OSD_OP_SETALLOCHINT op is advisory and therefore deemed + * not worth a feature bit. Set FAILOK per-op flag to make + * sure older osds don't trip over an unsupported opcode. + */ + op->flags |= CEPH_OSD_OP_FLAG_FAILOK; +} +EXPORT_SYMBOL(osd_req_op_alloc_hint_init); + static void ceph_osdc_msg_data_add(struct ceph_msg *msg, struct ceph_osd_data *osd_data) { @@ -681,6 +702,12 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req, dst->watch.ver = cpu_to_le64(src->watch.ver); dst->watch.flag = src->watch.flag; break; + case CEPH_OSD_OP_SETALLOCHINT: + dst->alloc_hint.expected_object_size = + cpu_to_le64(src->alloc_hint.expected_object_size); + dst->alloc_hint.expected_write_size = + cpu_to_le64(src->alloc_hint.expected_write_size); + break; default: pr_err("unsupported osd opcode %s\n", ceph_osd_op_name(src->op)); @@ -688,7 +715,9 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req, return 0; } + dst->op = cpu_to_le16(src->op); + dst->flags = cpu_to_le32(src->flags); dst->payload_len = cpu_to_le32(src->payload_len); return request_data_len; @@ -1304,7 +1333,7 @@ static int __map_request(struct ceph_osd_client *osdc, { struct ceph_pg pgid; int acting[CEPH_PG_MAX_SIZE]; - int o = -1, num = 0; + int num, o; int err; bool was_paused; @@ -1317,11 +1346,9 @@ static int __map_request(struct ceph_osd_client *osdc, } req->r_pgid = pgid; - err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting); - if (err > 0) { - o = acting[0]; - num = err; - } + num = ceph_calc_pg_acting(osdc->osdmap, pgid, acting, &o); + if (num < 0) + num = 0; was_paused = req->r_paused; req->r_paused = __req_should_be_paused(osdc, req); @@ -2033,7 +2060,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) int skipped_map = 0; dout("taking full map %u len %d\n", epoch, maplen); - newmap = osdmap_decode(&p, p+maplen); + newmap = ceph_osdmap_decode(&p, p+maplen); if (IS_ERR(newmap)) { err = PTR_ERR(newmap); goto bad; @@ -2082,7 +2109,6 @@ bad: pr_err("osdc handle_map corrupt msg\n"); ceph_msg_dump(msg); up_write(&osdc->map_sem); - return; } /* @@ -2281,7 +2307,6 @@ done_err: bad: pr_err("osdc handle_watch_notify corrupt msg\n"); - return; } /* diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index aade4a5c1c0..e632b5a52f5 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -343,7 +343,7 @@ bad: /* * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid - * to a set of osds) + * to a set of osds) and primary_temp (explicit primary setting) */ static int pgid_cmp(struct ceph_pg l, struct ceph_pg r) { @@ -506,7 +506,7 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) kfree(pi); } -static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) +static int decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) { u8 ev, cv; unsigned len, num; @@ -587,7 +587,7 @@ bad: return -EINVAL; } -static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) +static int decode_pool_names(void **p, void *end, struct ceph_osdmap *map) { struct ceph_pg_pool_info *pi; u32 num, len; @@ -633,6 +633,13 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map) rb_erase(&pg->node, &map->pg_temp); kfree(pg); } + while (!RB_EMPTY_ROOT(&map->primary_temp)) { + struct ceph_pg_mapping *pg = + rb_entry(rb_first(&map->primary_temp), + struct ceph_pg_mapping, node); + rb_erase(&pg->node, &map->primary_temp); + kfree(pg); + } while (!RB_EMPTY_ROOT(&map->pg_pools)) { struct ceph_pg_pool_info *pi = rb_entry(rb_first(&map->pg_pools), @@ -642,186 +649,516 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map) kfree(map->osd_state); kfree(map->osd_weight); kfree(map->osd_addr); + kfree(map->osd_primary_affinity); kfree(map); } /* - * adjust max osd value. reallocate arrays. + * Adjust max_osd value, (re)allocate arrays. + * + * The new elements are properly initialized. */ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) { u8 *state; - struct ceph_entity_addr *addr; u32 *weight; + struct ceph_entity_addr *addr; + int i; - state = kcalloc(max, sizeof(*state), GFP_NOFS); - addr = kcalloc(max, sizeof(*addr), GFP_NOFS); - weight = kcalloc(max, sizeof(*weight), GFP_NOFS); - if (state == NULL || addr == NULL || weight == NULL) { + state = krealloc(map->osd_state, max*sizeof(*state), GFP_NOFS); + weight = krealloc(map->osd_weight, max*sizeof(*weight), GFP_NOFS); + addr = krealloc(map->osd_addr, max*sizeof(*addr), GFP_NOFS); + if (!state || !weight || !addr) { kfree(state); - kfree(addr); kfree(weight); + kfree(addr); + return -ENOMEM; } - /* copy old? */ - if (map->osd_state) { - memcpy(state, map->osd_state, map->max_osd*sizeof(*state)); - memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr)); - memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight)); - kfree(map->osd_state); - kfree(map->osd_addr); - kfree(map->osd_weight); + for (i = map->max_osd; i < max; i++) { + state[i] = 0; + weight[i] = CEPH_OSD_OUT; + memset(addr + i, 0, sizeof(*addr)); } map->osd_state = state; map->osd_weight = weight; map->osd_addr = addr; + + if (map->osd_primary_affinity) { + u32 *affinity; + + affinity = krealloc(map->osd_primary_affinity, + max*sizeof(*affinity), GFP_NOFS); + if (!affinity) + return -ENOMEM; + + for (i = map->max_osd; i < max; i++) + affinity[i] = CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; + + map->osd_primary_affinity = affinity; + } + map->max_osd = max; + return 0; } +#define OSDMAP_WRAPPER_COMPAT_VER 7 +#define OSDMAP_CLIENT_DATA_COMPAT_VER 1 + /* - * decode a full map. + * Return 0 or error. On success, *v is set to 0 for old (v6) osdmaps, + * to struct_v of the client_data section for new (v7 and above) + * osdmaps. */ -struct ceph_osdmap *osdmap_decode(void **p, void *end) +static int get_osdmap_client_data_v(void **p, void *end, + const char *prefix, u8 *v) { - struct ceph_osdmap *map; - u16 version; - u32 len, max, i; - int err = -EINVAL; - void *start = *p; - struct ceph_pg_pool_info *pi; + u8 struct_v; + + ceph_decode_8_safe(p, end, struct_v, e_inval); + if (struct_v >= 7) { + u8 struct_compat; + + ceph_decode_8_safe(p, end, struct_compat, e_inval); + if (struct_compat > OSDMAP_WRAPPER_COMPAT_VER) { + pr_warning("got v %d cv %d > %d of %s ceph_osdmap\n", + struct_v, struct_compat, + OSDMAP_WRAPPER_COMPAT_VER, prefix); + return -EINVAL; + } + *p += 4; /* ignore wrapper struct_len */ + + ceph_decode_8_safe(p, end, struct_v, e_inval); + ceph_decode_8_safe(p, end, struct_compat, e_inval); + if (struct_compat > OSDMAP_CLIENT_DATA_COMPAT_VER) { + pr_warning("got v %d cv %d > %d of %s ceph_osdmap client data\n", + struct_v, struct_compat, + OSDMAP_CLIENT_DATA_COMPAT_VER, prefix); + return -EINVAL; + } + *p += 4; /* ignore client data struct_len */ + } else { + u16 version; + + *p -= 1; + ceph_decode_16_safe(p, end, version, e_inval); + if (version < 6) { + pr_warning("got v %d < 6 of %s ceph_osdmap\n", version, + prefix); + return -EINVAL; + } - dout("osdmap_decode %p to %p len %d\n", *p, end, (int)(end - *p)); + /* old osdmap enconding */ + struct_v = 0; + } - map = kzalloc(sizeof(*map), GFP_NOFS); - if (map == NULL) - return ERR_PTR(-ENOMEM); - map->pg_temp = RB_ROOT; + *v = struct_v; + return 0; - ceph_decode_16_safe(p, end, version, bad); - if (version > 6) { - pr_warning("got unknown v %d > 6 of osdmap\n", version); - goto bad; +e_inval: + return -EINVAL; +} + +static int __decode_pools(void **p, void *end, struct ceph_osdmap *map, + bool incremental) +{ + u32 n; + + ceph_decode_32_safe(p, end, n, e_inval); + while (n--) { + struct ceph_pg_pool_info *pi; + u64 pool; + int ret; + + ceph_decode_64_safe(p, end, pool, e_inval); + + pi = __lookup_pg_pool(&map->pg_pools, pool); + if (!incremental || !pi) { + pi = kzalloc(sizeof(*pi), GFP_NOFS); + if (!pi) + return -ENOMEM; + + pi->id = pool; + + ret = __insert_pg_pool(&map->pg_pools, pi); + if (ret) { + kfree(pi); + return ret; + } + } + + ret = decode_pool(p, end, pi); + if (ret) + return ret; } - if (version < 6) { - pr_warning("got old v %d < 6 of osdmap\n", version); - goto bad; + + return 0; + +e_inval: + return -EINVAL; +} + +static int decode_pools(void **p, void *end, struct ceph_osdmap *map) +{ + return __decode_pools(p, end, map, false); +} + +static int decode_new_pools(void **p, void *end, struct ceph_osdmap *map) +{ + return __decode_pools(p, end, map, true); +} + +static int __decode_pg_temp(void **p, void *end, struct ceph_osdmap *map, + bool incremental) +{ + u32 n; + + ceph_decode_32_safe(p, end, n, e_inval); + while (n--) { + struct ceph_pg pgid; + u32 len, i; + int ret; + + ret = ceph_decode_pgid(p, end, &pgid); + if (ret) + return ret; + + ceph_decode_32_safe(p, end, len, e_inval); + + ret = __remove_pg_mapping(&map->pg_temp, pgid); + BUG_ON(!incremental && ret != -ENOENT); + + if (!incremental || len > 0) { + struct ceph_pg_mapping *pg; + + ceph_decode_need(p, end, len*sizeof(u32), e_inval); + + if (len > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) + return -EINVAL; + + pg = kzalloc(sizeof(*pg) + len*sizeof(u32), GFP_NOFS); + if (!pg) + return -ENOMEM; + + pg->pgid = pgid; + pg->pg_temp.len = len; + for (i = 0; i < len; i++) + pg->pg_temp.osds[i] = ceph_decode_32(p); + + ret = __insert_pg_mapping(pg, &map->pg_temp); + if (ret) { + kfree(pg); + return ret; + } + } } - ceph_decode_need(p, end, 2*sizeof(u64)+6*sizeof(u32), bad); + return 0; + +e_inval: + return -EINVAL; +} + +static int decode_pg_temp(void **p, void *end, struct ceph_osdmap *map) +{ + return __decode_pg_temp(p, end, map, false); +} + +static int decode_new_pg_temp(void **p, void *end, struct ceph_osdmap *map) +{ + return __decode_pg_temp(p, end, map, true); +} + +static int __decode_primary_temp(void **p, void *end, struct ceph_osdmap *map, + bool incremental) +{ + u32 n; + + ceph_decode_32_safe(p, end, n, e_inval); + while (n--) { + struct ceph_pg pgid; + u32 osd; + int ret; + + ret = ceph_decode_pgid(p, end, &pgid); + if (ret) + return ret; + + ceph_decode_32_safe(p, end, osd, e_inval); + + ret = __remove_pg_mapping(&map->primary_temp, pgid); + BUG_ON(!incremental && ret != -ENOENT); + + if (!incremental || osd != (u32)-1) { + struct ceph_pg_mapping *pg; + + pg = kzalloc(sizeof(*pg), GFP_NOFS); + if (!pg) + return -ENOMEM; + + pg->pgid = pgid; + pg->primary_temp.osd = osd; + + ret = __insert_pg_mapping(pg, &map->primary_temp); + if (ret) { + kfree(pg); + return ret; + } + } + } + + return 0; + +e_inval: + return -EINVAL; +} + +static int decode_primary_temp(void **p, void *end, struct ceph_osdmap *map) +{ + return __decode_primary_temp(p, end, map, false); +} + +static int decode_new_primary_temp(void **p, void *end, + struct ceph_osdmap *map) +{ + return __decode_primary_temp(p, end, map, true); +} + +u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd) +{ + BUG_ON(osd >= map->max_osd); + + if (!map->osd_primary_affinity) + return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; + + return map->osd_primary_affinity[osd]; +} + +static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff) +{ + BUG_ON(osd >= map->max_osd); + + if (!map->osd_primary_affinity) { + int i; + + map->osd_primary_affinity = kmalloc(map->max_osd*sizeof(u32), + GFP_NOFS); + if (!map->osd_primary_affinity) + return -ENOMEM; + + for (i = 0; i < map->max_osd; i++) + map->osd_primary_affinity[i] = + CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; + } + + map->osd_primary_affinity[osd] = aff; + + return 0; +} + +static int decode_primary_affinity(void **p, void *end, + struct ceph_osdmap *map) +{ + u32 len, i; + + ceph_decode_32_safe(p, end, len, e_inval); + if (len == 0) { + kfree(map->osd_primary_affinity); + map->osd_primary_affinity = NULL; + return 0; + } + if (len != map->max_osd) + goto e_inval; + + ceph_decode_need(p, end, map->max_osd*sizeof(u32), e_inval); + + for (i = 0; i < map->max_osd; i++) { + int ret; + + ret = set_primary_affinity(map, i, ceph_decode_32(p)); + if (ret) + return ret; + } + + return 0; + +e_inval: + return -EINVAL; +} + +static int decode_new_primary_affinity(void **p, void *end, + struct ceph_osdmap *map) +{ + u32 n; + + ceph_decode_32_safe(p, end, n, e_inval); + while (n--) { + u32 osd, aff; + int ret; + + ceph_decode_32_safe(p, end, osd, e_inval); + ceph_decode_32_safe(p, end, aff, e_inval); + + ret = set_primary_affinity(map, osd, aff); + if (ret) + return ret; + + pr_info("osd%d primary-affinity 0x%x\n", osd, aff); + } + + return 0; + +e_inval: + return -EINVAL; +} + +/* + * decode a full map. + */ +static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map) +{ + u8 struct_v; + u32 epoch = 0; + void *start = *p; + u32 max; + u32 len, i; + int err; + + dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p)); + + err = get_osdmap_client_data_v(p, end, "full", &struct_v); + if (err) + goto bad; + + /* fsid, epoch, created, modified */ + ceph_decode_need(p, end, sizeof(map->fsid) + sizeof(u32) + + sizeof(map->created) + sizeof(map->modified), e_inval); ceph_decode_copy(p, &map->fsid, sizeof(map->fsid)); - map->epoch = ceph_decode_32(p); + epoch = map->epoch = ceph_decode_32(p); ceph_decode_copy(p, &map->created, sizeof(map->created)); ceph_decode_copy(p, &map->modified, sizeof(map->modified)); - ceph_decode_32_safe(p, end, max, bad); - while (max--) { - ceph_decode_need(p, end, 8 + 2, bad); - err = -ENOMEM; - pi = kzalloc(sizeof(*pi), GFP_NOFS); - if (!pi) - goto bad; - pi->id = ceph_decode_64(p); - err = __decode_pool(p, end, pi); - if (err < 0) { - kfree(pi); - goto bad; - } - __insert_pg_pool(&map->pg_pools, pi); - } + /* pools */ + err = decode_pools(p, end, map); + if (err) + goto bad; - err = __decode_pool_names(p, end, map); - if (err < 0) { - dout("fail to decode pool names"); + /* pool_name */ + err = decode_pool_names(p, end, map); + if (err) goto bad; - } - ceph_decode_32_safe(p, end, map->pool_max, bad); + ceph_decode_32_safe(p, end, map->pool_max, e_inval); - ceph_decode_32_safe(p, end, map->flags, bad); + ceph_decode_32_safe(p, end, map->flags, e_inval); - max = ceph_decode_32(p); + /* max_osd */ + ceph_decode_32_safe(p, end, max, e_inval); /* (re)alloc osd arrays */ err = osdmap_set_max_osd(map, max); - if (err < 0) + if (err) goto bad; - dout("osdmap_decode max_osd = %d\n", map->max_osd); - /* osds */ - err = -EINVAL; + /* osd_state, osd_weight, osd_addrs->client_addr */ ceph_decode_need(p, end, 3*sizeof(u32) + map->max_osd*(1 + sizeof(*map->osd_weight) + - sizeof(*map->osd_addr)), bad); - *p += 4; /* skip length field (should match max) */ + sizeof(*map->osd_addr)), e_inval); + + if (ceph_decode_32(p) != map->max_osd) + goto e_inval; + ceph_decode_copy(p, map->osd_state, map->max_osd); - *p += 4; /* skip length field (should match max) */ + if (ceph_decode_32(p) != map->max_osd) + goto e_inval; + for (i = 0; i < map->max_osd; i++) map->osd_weight[i] = ceph_decode_32(p); - *p += 4; /* skip length field (should match max) */ + if (ceph_decode_32(p) != map->max_osd) + goto e_inval; + ceph_decode_copy(p, map->osd_addr, map->max_osd*sizeof(*map->osd_addr)); for (i = 0; i < map->max_osd; i++) ceph_decode_addr(&map->osd_addr[i]); /* pg_temp */ - ceph_decode_32_safe(p, end, len, bad); - for (i = 0; i < len; i++) { - int n, j; - struct ceph_pg pgid; - struct ceph_pg_mapping *pg; + err = decode_pg_temp(p, end, map); + if (err) + goto bad; - err = ceph_decode_pgid(p, end, &pgid); + /* primary_temp */ + if (struct_v >= 1) { + err = decode_primary_temp(p, end, map); if (err) goto bad; - ceph_decode_need(p, end, sizeof(u32), bad); - n = ceph_decode_32(p); - err = -EINVAL; - if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) - goto bad; - ceph_decode_need(p, end, n * sizeof(u32), bad); - err = -ENOMEM; - pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS); - if (!pg) - goto bad; - pg->pgid = pgid; - pg->len = n; - for (j = 0; j < n; j++) - pg->osds[j] = ceph_decode_32(p); + } - err = __insert_pg_mapping(pg, &map->pg_temp); + /* primary_affinity */ + if (struct_v >= 2) { + err = decode_primary_affinity(p, end, map); if (err) goto bad; - dout(" added pg_temp %lld.%x len %d\n", pgid.pool, pgid.seed, - len); + } else { + /* XXX can this happen? */ + kfree(map->osd_primary_affinity); + map->osd_primary_affinity = NULL; } /* crush */ - ceph_decode_32_safe(p, end, len, bad); - dout("osdmap_decode crush len %d from off 0x%x\n", len, - (int)(*p - start)); - ceph_decode_need(p, end, len, bad); - map->crush = crush_decode(*p, end); - *p += len; + ceph_decode_32_safe(p, end, len, e_inval); + map->crush = crush_decode(*p, min(*p + len, end)); if (IS_ERR(map->crush)) { err = PTR_ERR(map->crush); map->crush = NULL; goto bad; } + *p += len; - /* ignore the rest of the map */ + /* ignore the rest */ *p = end; - dout("osdmap_decode done %p %p\n", *p, end); - return map; + dout("full osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd); + return 0; +e_inval: + err = -EINVAL; bad: - dout("osdmap_decode fail err %d\n", err); - ceph_osdmap_destroy(map); - return ERR_PTR(err); + pr_err("corrupt full osdmap (%d) epoch %d off %d (%p of %p-%p)\n", + err, epoch, (int)(*p - start), *p, start, end); + print_hex_dump(KERN_DEBUG, "osdmap: ", + DUMP_PREFIX_OFFSET, 16, 1, + start, end - start, true); + return err; +} + +/* + * Allocate and decode a full map. + */ +struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end) +{ + struct ceph_osdmap *map; + int ret; + + map = kzalloc(sizeof(*map), GFP_NOFS); + if (!map) + return ERR_PTR(-ENOMEM); + + map->pg_temp = RB_ROOT; + map->primary_temp = RB_ROOT; + mutex_init(&map->crush_scratch_mutex); + + ret = osdmap_decode(p, end, map); + if (ret) { + ceph_osdmap_destroy(map); + return ERR_PTR(ret); + } + + return map; } /* @@ -840,17 +1177,18 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, __s64 new_pool_max; __s32 new_flags, max; void *start = *p; - int err = -EINVAL; - u16 version; + int err; + u8 struct_v; + + dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p)); - ceph_decode_16_safe(p, end, version, bad); - if (version != 6) { - pr_warning("got unknown v %d != 6 of inc osdmap\n", version); + err = get_osdmap_client_data_v(p, end, "inc", &struct_v); + if (err) goto bad; - } - ceph_decode_need(p, end, sizeof(fsid)+sizeof(modified)+2*sizeof(u32), - bad); + /* fsid, epoch, modified, new_pool_max, new_flags */ + ceph_decode_need(p, end, sizeof(fsid) + sizeof(u32) + sizeof(modified) + + sizeof(u64) + sizeof(u32), e_inval); ceph_decode_copy(p, &fsid, sizeof(fsid)); epoch = ceph_decode_32(p); BUG_ON(epoch != map->epoch+1); @@ -859,21 +1197,22 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, new_flags = ceph_decode_32(p); /* full map? */ - ceph_decode_32_safe(p, end, len, bad); + ceph_decode_32_safe(p, end, len, e_inval); if (len > 0) { dout("apply_incremental full map len %d, %p to %p\n", len, *p, end); - return osdmap_decode(p, min(*p+len, end)); + return ceph_osdmap_decode(p, min(*p+len, end)); } /* new crush? */ - ceph_decode_32_safe(p, end, len, bad); + ceph_decode_32_safe(p, end, len, e_inval); if (len > 0) { - dout("apply_incremental new crush map len %d, %p to %p\n", - len, *p, end); newcrush = crush_decode(*p, min(*p+len, end)); - if (IS_ERR(newcrush)) - return ERR_CAST(newcrush); + if (IS_ERR(newcrush)) { + err = PTR_ERR(newcrush); + newcrush = NULL; + goto bad; + } *p += len; } @@ -883,13 +1222,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, if (new_pool_max >= 0) map->pool_max = new_pool_max; - ceph_decode_need(p, end, 5*sizeof(u32), bad); - /* new max? */ - max = ceph_decode_32(p); + ceph_decode_32_safe(p, end, max, e_inval); if (max >= 0) { err = osdmap_set_max_osd(map, max); - if (err < 0) + if (err) goto bad; } @@ -902,51 +1239,34 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, newcrush = NULL; } - /* new_pool */ - ceph_decode_32_safe(p, end, len, bad); - while (len--) { - struct ceph_pg_pool_info *pi; + /* new_pools */ + err = decode_new_pools(p, end, map); + if (err) + goto bad; - ceph_decode_64_safe(p, end, pool, bad); - pi = __lookup_pg_pool(&map->pg_pools, pool); - if (!pi) { - pi = kzalloc(sizeof(*pi), GFP_NOFS); - if (!pi) { - err = -ENOMEM; - goto bad; - } - pi->id = pool; - __insert_pg_pool(&map->pg_pools, pi); - } - err = __decode_pool(p, end, pi); - if (err < 0) - goto bad; - } - if (version >= 5) { - err = __decode_pool_names(p, end, map); - if (err < 0) - goto bad; - } + /* new_pool_names */ + err = decode_pool_names(p, end, map); + if (err) + goto bad; /* old_pool */ - ceph_decode_32_safe(p, end, len, bad); + ceph_decode_32_safe(p, end, len, e_inval); while (len--) { struct ceph_pg_pool_info *pi; - ceph_decode_64_safe(p, end, pool, bad); + ceph_decode_64_safe(p, end, pool, e_inval); pi = __lookup_pg_pool(&map->pg_pools, pool); if (pi) __remove_pg_pool(&map->pg_pools, pi); } /* new_up */ - err = -EINVAL; - ceph_decode_32_safe(p, end, len, bad); + ceph_decode_32_safe(p, end, len, e_inval); while (len--) { u32 osd; struct ceph_entity_addr addr; - ceph_decode_32_safe(p, end, osd, bad); - ceph_decode_copy_safe(p, end, &addr, sizeof(addr), bad); + ceph_decode_32_safe(p, end, osd, e_inval); + ceph_decode_copy_safe(p, end, &addr, sizeof(addr), e_inval); ceph_decode_addr(&addr); pr_info("osd%d up\n", osd); BUG_ON(osd >= map->max_osd); @@ -955,11 +1275,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, } /* new_state */ - ceph_decode_32_safe(p, end, len, bad); + ceph_decode_32_safe(p, end, len, e_inval); while (len--) { u32 osd; u8 xorstate; - ceph_decode_32_safe(p, end, osd, bad); + ceph_decode_32_safe(p, end, osd, e_inval); xorstate = **(u8 **)p; (*p)++; /* clean flag */ if (xorstate == 0) @@ -971,10 +1291,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, } /* new_weight */ - ceph_decode_32_safe(p, end, len, bad); + ceph_decode_32_safe(p, end, len, e_inval); while (len--) { u32 osd, off; - ceph_decode_need(p, end, sizeof(u32)*2, bad); + ceph_decode_need(p, end, sizeof(u32)*2, e_inval); osd = ceph_decode_32(p); off = ceph_decode_32(p); pr_info("osd%d weight 0x%x %s\n", osd, off, @@ -985,56 +1305,35 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, } /* new_pg_temp */ - ceph_decode_32_safe(p, end, len, bad); - while (len--) { - struct ceph_pg_mapping *pg; - int j; - struct ceph_pg pgid; - u32 pglen; + err = decode_new_pg_temp(p, end, map); + if (err) + goto bad; - err = ceph_decode_pgid(p, end, &pgid); + /* new_primary_temp */ + if (struct_v >= 1) { + err = decode_new_primary_temp(p, end, map); if (err) goto bad; - ceph_decode_need(p, end, sizeof(u32), bad); - pglen = ceph_decode_32(p); - if (pglen) { - ceph_decode_need(p, end, pglen*sizeof(u32), bad); - - /* removing existing (if any) */ - (void) __remove_pg_mapping(&map->pg_temp, pgid); + } - /* insert */ - err = -EINVAL; - if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) - goto bad; - err = -ENOMEM; - pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS); - if (!pg) - goto bad; - pg->pgid = pgid; - pg->len = pglen; - for (j = 0; j < pglen; j++) - pg->osds[j] = ceph_decode_32(p); - err = __insert_pg_mapping(pg, &map->pg_temp); - if (err) { - kfree(pg); - goto bad; - } - dout(" added pg_temp %lld.%x len %d\n", pgid.pool, - pgid.seed, pglen); - } else { - /* remove */ - __remove_pg_mapping(&map->pg_temp, pgid); - } + /* new_primary_affinity */ + if (struct_v >= 2) { + err = decode_new_primary_affinity(p, end, map); + if (err) + goto bad; } /* ignore the rest */ *p = end; + + dout("inc osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd); return map; +e_inval: + err = -EINVAL; bad: - pr_err("corrupt inc osdmap epoch %d off %d (%p of %p-%p)\n", - epoch, (int)(*p - start), *p, start, end); + pr_err("corrupt inc osdmap (%d) epoch %d off %d (%p of %p-%p)\n", + err, epoch, (int)(*p - start), *p, start, end); print_hex_dump(KERN_DEBUG, "osdmap: ", DUMP_PREFIX_OFFSET, 16, 1, start, end - start, true); @@ -1142,61 +1441,249 @@ int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap, } EXPORT_SYMBOL(ceph_oloc_oid_to_pg); -static int crush_do_rule_ary(const struct crush_map *map, int ruleno, int x, - int *result, int result_max, - const __u32 *weight, int weight_max) +static int do_crush(struct ceph_osdmap *map, int ruleno, int x, + int *result, int result_max, + const __u32 *weight, int weight_max) { - int scratch[result_max * 3]; + int r; - return crush_do_rule(map, ruleno, x, result, result_max, - weight, weight_max, scratch); + BUG_ON(result_max > CEPH_PG_MAX_SIZE); + + mutex_lock(&map->crush_scratch_mutex); + r = crush_do_rule(map->crush, ruleno, x, result, result_max, + weight, weight_max, map->crush_scratch_ary); + mutex_unlock(&map->crush_scratch_mutex); + + return r; } /* - * Calculate raw osd vector for the given pgid. Return pointer to osd - * array, or NULL on failure. + * Calculate raw (crush) set for given pgid. + * + * Return raw set length, or error. */ -static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, - int *osds, int *num) +static int pg_to_raw_osds(struct ceph_osdmap *osdmap, + struct ceph_pg_pool_info *pool, + struct ceph_pg pgid, u32 pps, int *osds) { - struct ceph_pg_mapping *pg; - struct ceph_pg_pool_info *pool; int ruleno; - int r; - u32 pps; + int len; - pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); - if (!pool) - return NULL; + /* crush */ + ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset, + pool->type, pool->size); + if (ruleno < 0) { + pr_err("no crush rule: pool %lld ruleset %d type %d size %d\n", + pgid.pool, pool->crush_ruleset, pool->type, + pool->size); + return -ENOENT; + } - /* pg_temp? */ + len = do_crush(osdmap, ruleno, pps, osds, + min_t(int, pool->size, CEPH_PG_MAX_SIZE), + osdmap->osd_weight, osdmap->max_osd); + if (len < 0) { + pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n", + len, ruleno, pgid.pool, pool->crush_ruleset, + pool->type, pool->size); + return len; + } + + return len; +} + +/* + * Given raw set, calculate up set and up primary. + * + * Return up set length. *primary is set to up primary osd id, or -1 + * if up set is empty. + */ +static int raw_to_up_osds(struct ceph_osdmap *osdmap, + struct ceph_pg_pool_info *pool, + int *osds, int len, int *primary) +{ + int up_primary = -1; + int i; + + if (ceph_can_shift_osds(pool)) { + int removed = 0; + + for (i = 0; i < len; i++) { + if (ceph_osd_is_down(osdmap, osds[i])) { + removed++; + continue; + } + if (removed) + osds[i - removed] = osds[i]; + } + + len -= removed; + if (len > 0) + up_primary = osds[0]; + } else { + for (i = len - 1; i >= 0; i--) { + if (ceph_osd_is_down(osdmap, osds[i])) + osds[i] = CRUSH_ITEM_NONE; + else + up_primary = osds[i]; + } + } + + *primary = up_primary; + return len; +} + +static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps, + struct ceph_pg_pool_info *pool, + int *osds, int len, int *primary) +{ + int i; + int pos = -1; + + /* + * Do we have any non-default primary_affinity values for these + * osds? + */ + if (!osdmap->osd_primary_affinity) + return; + + for (i = 0; i < len; i++) { + if (osds[i] != CRUSH_ITEM_NONE && + osdmap->osd_primary_affinity[i] != + CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) { + break; + } + } + if (i == len) + return; + + /* + * Pick the primary. Feed both the seed (for the pg) and the + * osd into the hash/rng so that a proportional fraction of an + * osd's pgs get rejected as primary. + */ + for (i = 0; i < len; i++) { + int osd; + u32 aff; + + osd = osds[i]; + if (osd == CRUSH_ITEM_NONE) + continue; + + aff = osdmap->osd_primary_affinity[osd]; + if (aff < CEPH_OSD_MAX_PRIMARY_AFFINITY && + (crush_hash32_2(CRUSH_HASH_RJENKINS1, + pps, osd) >> 16) >= aff) { + /* + * We chose not to use this primary. Note it + * anyway as a fallback in case we don't pick + * anyone else, but keep looking. + */ + if (pos < 0) + pos = i; + } else { + pos = i; + break; + } + } + if (pos < 0) + return; + + *primary = osds[pos]; + + if (ceph_can_shift_osds(pool) && pos > 0) { + /* move the new primary to the front */ + for (i = pos; i > 0; i--) + osds[i] = osds[i - 1]; + osds[0] = *primary; + } +} + +/* + * Given up set, apply pg_temp and primary_temp mappings. + * + * Return acting set length. *primary is set to acting primary osd id, + * or -1 if acting set is empty. + */ +static int apply_temps(struct ceph_osdmap *osdmap, + struct ceph_pg_pool_info *pool, struct ceph_pg pgid, + int *osds, int len, int *primary) +{ + struct ceph_pg_mapping *pg; + int temp_len; + int temp_primary; + int i; + + /* raw_pg -> pg */ pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num, pool->pg_num_mask); + + /* pg_temp? */ pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); if (pg) { - *num = pg->len; - return pg->osds; + temp_len = 0; + temp_primary = -1; + + for (i = 0; i < pg->pg_temp.len; i++) { + if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) { + if (ceph_can_shift_osds(pool)) + continue; + else + osds[temp_len++] = CRUSH_ITEM_NONE; + } else { + osds[temp_len++] = pg->pg_temp.osds[i]; + } + } + + /* apply pg_temp's primary */ + for (i = 0; i < temp_len; i++) { + if (osds[i] != CRUSH_ITEM_NONE) { + temp_primary = osds[i]; + break; + } + } + } else { + temp_len = len; + temp_primary = *primary; } - /* crush */ - ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset, - pool->type, pool->size); - if (ruleno < 0) { - pr_err("no crush rule pool %lld ruleset %d type %d size %d\n", - pgid.pool, pool->crush_ruleset, pool->type, - pool->size); - return NULL; + /* primary_temp? */ + pg = __lookup_pg_mapping(&osdmap->primary_temp, pgid); + if (pg) + temp_primary = pg->primary_temp.osd; + + *primary = temp_primary; + return temp_len; +} + +/* + * Calculate acting set for given pgid. + * + * Return acting set length, or error. *primary is set to acting + * primary osd id, or -1 if acting set is empty or on error. + */ +int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, + int *osds, int *primary) +{ + struct ceph_pg_pool_info *pool; + u32 pps; + int len; + + pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); + if (!pool) { + *primary = -1; + return -ENOENT; } if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) { - /* hash pool id and seed sothat pool PGs do not overlap */ + /* hash pool id and seed so that pool PGs do not overlap */ pps = crush_hash32_2(CRUSH_HASH_RJENKINS1, ceph_stable_mod(pgid.seed, pool->pgp_num, pool->pgp_num_mask), pgid.pool); } else { /* - * legacy ehavior: add ps and pool together. this is + * legacy behavior: add ps and pool together. this is * not a great approach because the PGs from each pool * will overlap on top of each other: 0.5 == 1.4 == * 2.3 == ... @@ -1205,38 +1692,20 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, pool->pgp_num_mask) + (unsigned)pgid.pool; } - r = crush_do_rule_ary(osdmap->crush, ruleno, pps, - osds, min_t(int, pool->size, *num), - osdmap->osd_weight, osdmap->max_osd); - if (r < 0) { - pr_err("error %d from crush rule: pool %lld ruleset %d type %d" - " size %d\n", r, pgid.pool, pool->crush_ruleset, - pool->type, pool->size); - return NULL; + + len = pg_to_raw_osds(osdmap, pool, pgid, pps, osds); + if (len < 0) { + *primary = -1; + return len; } - *num = r; - return osds; -} -/* - * Return acting set for given pgid. - */ -int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, - int *acting) -{ - int rawosds[CEPH_PG_MAX_SIZE], *osds; - int i, o, num = CEPH_PG_MAX_SIZE; + len = raw_to_up_osds(osdmap, pool, osds, len, primary); - osds = calc_pg_raw(osdmap, pgid, rawosds, &num); - if (!osds) - return -1; + apply_primary_affinity(osdmap, pps, pool, osds, len, primary); - /* primary is first up osd */ - o = 0; - for (i = 0; i < num; i++) - if (ceph_osd_is_up(osdmap, osds[i])) - acting[o++] = osds[i]; - return o; + len = apply_temps(osdmap, pool, pgid, osds, len, primary); + + return len; } /* @@ -1244,17 +1713,11 @@ int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, */ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) { - int rawosds[CEPH_PG_MAX_SIZE], *osds; - int i, num = CEPH_PG_MAX_SIZE; + int osds[CEPH_PG_MAX_SIZE]; + int primary; - osds = calc_pg_raw(osdmap, pgid, rawosds, &num); - if (!osds) - return -1; + ceph_calc_pg_acting(osdmap, pgid, osds, &primary); - /* primary is first up osd */ - for (i = 0; i < num; i++) - if (ceph_osd_is_up(osdmap, osds[i])) - return osds[i]; - return -1; + return primary; } EXPORT_SYMBOL(ceph_calc_pg_primary); diff --git a/net/compat.c b/net/compat.c index f50161fb812..9a76eaf6318 100644 --- a/net/compat.c +++ b/net/compat.c @@ -384,8 +384,8 @@ static int compat_sock_setsockopt(struct socket *sock, int level, int optname, return sock_setsockopt(sock, level, optname, optval, optlen); } -asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, - char __user *optval, unsigned int optlen) +COMPAT_SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, + char __user *, optval, unsigned int, optlen) { int err; struct socket *sock = sockfd_lookup(fd, &err); @@ -504,8 +504,8 @@ int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *usersta } EXPORT_SYMBOL(compat_sock_get_timestampns); -asmlinkage long compat_sys_getsockopt(int fd, int level, int optname, - char __user *optval, int __user *optlen) +COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, + char __user *, optval, int __user *, optlen) { int err; struct socket *sock = sockfd_lookup(fd, &err); @@ -735,15 +735,15 @@ static unsigned char nas[21] = { }; #undef AL -asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) +COMPAT_SYSCALL_DEFINE3(sendmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { if (flags & MSG_CMSG_COMPAT) return -EINVAL; return __sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } -asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, - unsigned int vlen, unsigned int flags) +COMPAT_SYSCALL_DEFINE4(sendmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, + unsigned int, vlen, unsigned int, flags) { if (flags & MSG_CMSG_COMPAT) return -EINVAL; @@ -751,28 +751,28 @@ asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, flags | MSG_CMSG_COMPAT); } -asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) +COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { if (flags & MSG_CMSG_COMPAT) return -EINVAL; return __sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } -asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len, unsigned int flags) +COMPAT_SYSCALL_DEFINE4(recv, int, fd, void __user *, buf, compat_size_t, len, unsigned int, flags) { return sys_recv(fd, buf, len, flags | MSG_CMSG_COMPAT); } -asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len, - unsigned int flags, struct sockaddr __user *addr, - int __user *addrlen) +COMPAT_SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, buf, compat_size_t, len, + unsigned int, flags, struct sockaddr __user *, addr, + int __user *, addrlen) { return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen); } -asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, - unsigned int vlen, unsigned int flags, - struct compat_timespec __user *timeout) +COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, + unsigned int, vlen, unsigned int, flags, + struct compat_timespec __user *, timeout) { int datagrams; struct timespec ktspec; @@ -795,7 +795,7 @@ asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, return datagrams; } -asmlinkage long compat_sys_socketcall(int call, u32 __user *args) +COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) { int ret; u32 a[6]; diff --git a/net/core/Makefile b/net/core/Makefile index 9628c20acff..826b925aa45 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -21,5 +21,6 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o obj-$(CONFIG_TRACEPOINTS) += net-traces.o obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o +obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o diff --git a/net/core/dev.c b/net/core/dev.c index 3721db71635..5b3042e69f8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1245,7 +1245,7 @@ static int __dev_open(struct net_device *dev) * If we don't do this there is a chance ndo_poll_controller * or ndo_poll may be running while we open the device */ - netpoll_rx_disable(dev); + netpoll_poll_disable(dev); ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); ret = notifier_to_errno(ret); @@ -1260,7 +1260,7 @@ static int __dev_open(struct net_device *dev) if (!ret && ops->ndo_open) ret = ops->ndo_open(dev); - netpoll_rx_enable(dev); + netpoll_poll_enable(dev); if (ret) clear_bit(__LINK_STATE_START, &dev->state); @@ -1313,6 +1313,9 @@ static int __dev_close_many(struct list_head *head) might_sleep(); list_for_each_entry(dev, head, close_list) { + /* Temporarily disable netpoll until the interface is down */ + netpoll_poll_disable(dev); + call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); clear_bit(__LINK_STATE_START, &dev->state); @@ -1343,6 +1346,7 @@ static int __dev_close_many(struct list_head *head) dev->flags &= ~IFF_UP; net_dmaengine_put(); + netpoll_poll_enable(dev); } return 0; @@ -1353,14 +1357,10 @@ static int __dev_close(struct net_device *dev) int retval; LIST_HEAD(single); - /* Temporarily disable netpoll until the interface is down */ - netpoll_rx_disable(dev); - list_add(&dev->close_list, &single); retval = __dev_close_many(&single); list_del(&single); - netpoll_rx_enable(dev); return retval; } @@ -1398,14 +1398,9 @@ int dev_close(struct net_device *dev) if (dev->flags & IFF_UP) { LIST_HEAD(single); - /* Block netpoll rx while the interface is going down */ - netpoll_rx_disable(dev); - list_add(&dev->close_list, &single); dev_close_many(&single); list_del(&single); - - netpoll_rx_enable(dev); } return 0; } @@ -1645,8 +1640,7 @@ static inline void net_timestamp_set(struct sk_buff *skb) __net_timestamp(SKB); \ } \ -static inline bool is_skb_forwardable(struct net_device *dev, - struct sk_buff *skb) +bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb) { unsigned int len; @@ -1665,6 +1659,7 @@ static inline bool is_skb_forwardable(struct net_device *dev, return false; } +EXPORT_SYMBOL_GPL(is_skb_forwardable); /** * dev_forward_skb - loopback an skb to another netif @@ -2286,10 +2281,10 @@ out: } EXPORT_SYMBOL(skb_checksum_help); -__be16 skb_network_protocol(struct sk_buff *skb) +__be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; - int vlan_depth = ETH_HLEN; + int vlan_depth = skb->mac_len; /* Tunnel gso handlers can set protocol to ethernet. */ if (type == htons(ETH_P_TEB)) { @@ -2313,6 +2308,8 @@ __be16 skb_network_protocol(struct sk_buff *skb) vlan_depth += VLAN_HLEN; } + *depth = vlan_depth; + return type; } @@ -2326,12 +2323,13 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_offload *ptype; - __be16 type = skb_network_protocol(skb); + int vlan_depth = skb->mac_len; + __be16 type = skb_network_protocol(skb, &vlan_depth); if (unlikely(!type)) return ERR_PTR(-EINVAL); - __skb_pull(skb, skb->mac_len); + __skb_pull(skb, vlan_depth); rcu_read_lock(); list_for_each_entry_rcu(ptype, &offload_base, list) { @@ -2420,7 +2418,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); * 2. No high memory really exists on this machine. */ -static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) +static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_HIGHMEM int i; @@ -2495,34 +2493,38 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) } static netdev_features_t harmonize_features(struct sk_buff *skb, - netdev_features_t features) + const struct net_device *dev, + netdev_features_t features) { + int tmp; + if (skb->ip_summed != CHECKSUM_NONE && - !can_checksum_protocol(features, skb_network_protocol(skb))) { + !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) { features &= ~NETIF_F_ALL_CSUM; - } else if (illegal_highdma(skb->dev, skb)) { + } else if (illegal_highdma(dev, skb)) { features &= ~NETIF_F_SG; } return features; } -netdev_features_t netif_skb_features(struct sk_buff *skb) +netdev_features_t netif_skb_dev_features(struct sk_buff *skb, + const struct net_device *dev) { __be16 protocol = skb->protocol; - netdev_features_t features = skb->dev->features; + netdev_features_t features = dev->features; - if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) + if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs) features &= ~NETIF_F_GSO_MASK; if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, features); + return harmonize_features(skb, dev, features); } - features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | + features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) @@ -2530,9 +2532,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - return harmonize_features(skb, features); + return harmonize_features(skb, dev, features); } -EXPORT_SYMBOL(netif_skb_features); +EXPORT_SYMBOL(netif_skb_dev_features); int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) @@ -2803,7 +2805,7 @@ EXPORT_SYMBOL(dev_loopback_xmit); * the BH enable code must have IRQs enabled so that it will not deadlock. * --BLG */ -int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) +static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) { struct net_device *dev = skb->dev; struct netdev_queue *txq; @@ -2878,6 +2880,7 @@ recursion_alert: rc = -ENETDOWN; rcu_read_unlock_bh(); + atomic_long_inc(&dev->tx_dropped); kfree_skb(skb); return rc; out: @@ -2950,7 +2953,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, flow_table = rcu_dereference(rxqueue->rps_flow_table); if (!flow_table) goto out; - flow_id = skb->rxhash & flow_table->mask; + flow_id = skb_get_hash(skb) & flow_table->mask; rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, rxq_index, flow_id); if (rc < 0) @@ -2984,6 +2987,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_sock_flow_table *sock_flow_table; int cpu = -1; u16 tcpu; + u32 hash; if (skb_rx_queue_recorded(skb)) { u16 index = skb_get_rx_queue(skb); @@ -3012,7 +3016,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } skb_reset_network_header(skb); - if (!skb_get_hash(skb)) + hash = skb_get_hash(skb); + if (!hash) goto done; flow_table = rcu_dereference(rxqueue->rps_flow_table); @@ -3021,11 +3026,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, u16 next_cpu; struct rps_dev_flow *rflow; - rflow = &flow_table->flows[skb->rxhash & flow_table->mask]; + rflow = &flow_table->flows[hash & flow_table->mask]; tcpu = rflow->cpu; - next_cpu = sock_flow_table->ents[skb->rxhash & - sock_flow_table->mask]; + next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask]; /* * If the desired CPU (where last recvmsg was done) is @@ -3054,7 +3058,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } if (map) { - tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; + tcpu = map->cpus[((u64) hash * map->len) >> 32]; if (cpu_online(tcpu)) { cpu = tcpu; @@ -3229,10 +3233,6 @@ static int netif_rx_internal(struct sk_buff *skb) { int ret; - /* if netpoll wants it, pretend we never saw it */ - if (netpoll_rx(skb)) - return NET_RX_DROP; - net_timestamp_check(netdev_tstamp_prequeue, skb); trace_netif_rx(skb); @@ -3439,7 +3439,7 @@ out: * @rx_handler: receive handler to register * @rx_handler_data: data pointer that is used by rx handler * - * Register a receive hander for a device. This handler will then be + * Register a receive handler for a device. This handler will then be * called from __netif_receive_skb. A negative errno code is returned * on a failure. * @@ -3493,11 +3493,11 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); static bool skb_pfmemalloc_protocol(struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_ARP): - case __constant_htons(ETH_P_IP): - case __constant_htons(ETH_P_IPV6): - case __constant_htons(ETH_P_8021Q): - case __constant_htons(ETH_P_8021AD): + case htons(ETH_P_ARP): + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + case htons(ETH_P_8021Q): + case htons(ETH_P_8021AD): return true; default: return false; @@ -3518,10 +3518,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) trace_netif_receive_skb(skb); - /* if we've gotten here through NAPI, check netpoll */ - if (netpoll_receive_skb(skb)) - goto out; - orig_dev = skb->dev; skb_reset_network_header(skb); @@ -3648,7 +3644,6 @@ drop: unlock: rcu_read_unlock(); -out: return ret; } @@ -3838,10 +3833,10 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) diffs |= p->vlan_tci ^ skb->vlan_tci; if (maclen == ETH_HLEN) diffs |= compare_ether_header(skb_mac_header(p), - skb_gro_mac_header(skb)); + skb_mac_header(skb)); else if (!diffs) diffs = memcmp(skb_mac_header(p), - skb_gro_mac_header(skb), + skb_mac_header(skb), maclen); NAPI_GRO_CB(p)->same_flow = !diffs; } @@ -3864,6 +3859,27 @@ static void skb_gro_reset_offset(struct sk_buff *skb) } } +static void gro_pull_from_frag0(struct sk_buff *skb, int grow) +{ + struct skb_shared_info *pinfo = skb_shinfo(skb); + + BUG_ON(skb->end - skb->tail < grow); + + memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow); + + skb->data_len -= grow; + skb->tail += grow; + + pinfo->frags[0].page_offset += grow; + skb_frag_size_sub(&pinfo->frags[0], grow); + + if (unlikely(!skb_frag_size(&pinfo->frags[0]))) { + skb_frag_unref(skb, 0); + memmove(pinfo->frags, pinfo->frags + 1, + --pinfo->nr_frags * sizeof(pinfo->frags[0])); + } +} + static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; @@ -3872,14 +3888,14 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff struct list_head *head = &offload_base; int same_flow; enum gro_result ret; + int grow; - if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) + if (!(skb->dev->features & NETIF_F_GRO)) goto normal; if (skb_is_gso(skb) || skb_has_frag_list(skb)) goto normal; - skb_gro_reset_offset(skb); gro_list_prepare(napi, skb); NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ @@ -3943,27 +3959,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff ret = GRO_HELD; pull: - if (skb_headlen(skb) < skb_gro_offset(skb)) { - int grow = skb_gro_offset(skb) - skb_headlen(skb); - - BUG_ON(skb->end - skb->tail < grow); - - memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow); - - skb->tail += grow; - skb->data_len -= grow; - - skb_shinfo(skb)->frags[0].page_offset += grow; - skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow); - - if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) { - skb_frag_unref(skb, 0); - memmove(skb_shinfo(skb)->frags, - skb_shinfo(skb)->frags + 1, - --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); - } - } - + grow = skb_gro_offset(skb) - skb_headlen(skb); + if (grow > 0) + gro_pull_from_frag0(skb, grow); ok: return ret; @@ -4031,6 +4029,8 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { trace_napi_gro_receive_entry(skb); + skb_gro_reset_offset(skb); + return napi_skb_finish(dev_gro_receive(napi, skb), skb); } EXPORT_SYMBOL(napi_gro_receive); @@ -4043,6 +4043,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->vlan_tci = 0; skb->dev = napi->dev; skb->skb_iif = 0; + skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); napi->skb = skb; } @@ -4059,12 +4060,16 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_get_frags); -static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, - gro_result_t ret) +static gro_result_t napi_frags_finish(struct napi_struct *napi, + struct sk_buff *skb, + gro_result_t ret) { switch (ret) { case GRO_NORMAL: - if (netif_receive_skb_internal(skb)) + case GRO_HELD: + __skb_push(skb, ETH_HLEN); + skb->protocol = eth_type_trans(skb, skb->dev); + if (ret == GRO_NORMAL && netif_receive_skb_internal(skb)) ret = GRO_DROP; break; @@ -4073,7 +4078,6 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * napi_reuse_skb(napi, skb); break; - case GRO_HELD: case GRO_MERGED: break; } @@ -4081,17 +4085,41 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * return ret; } +/* Upper GRO stack assumes network header starts at gro_offset=0 + * Drivers could call both napi_gro_frags() and napi_gro_receive() + * We copy ethernet header into skb->data to have a common layout. + */ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) { struct sk_buff *skb = napi->skb; + const struct ethhdr *eth; + unsigned int hlen = sizeof(*eth); napi->skb = NULL; - if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) { - napi_reuse_skb(napi, skb); - return NULL; + skb_reset_mac_header(skb); + skb_gro_reset_offset(skb); + + eth = skb_gro_header_fast(skb, 0); + if (unlikely(skb_gro_header_hard(skb, hlen))) { + eth = skb_gro_header_slow(skb, hlen, 0); + if (unlikely(!eth)) { + napi_reuse_skb(napi, skb); + return NULL; + } + } else { + gro_pull_from_frag0(skb, hlen); + NAPI_GRO_CB(skb)->frag0 += hlen; + NAPI_GRO_CB(skb)->frag0_len -= hlen; } - skb->protocol = eth_type_trans(skb, skb->dev); + __skb_pull(skb, hlen); + + /* + * This works because the only protocols we care about don't require + * special handling. + * We'll fix it up properly in napi_frags_finish() + */ + skb->protocol = eth->h_proto; return skb; } @@ -4128,8 +4156,8 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) struct softnet_data *next = remsd->rps_ipi_next; if (cpu_online(remsd->cpu)) - __smp_call_function_single(remsd->cpu, - &remsd->csd, 0); + smp_call_function_single_async(remsd->cpu, + &remsd->csd); remsd = next; } } else @@ -4561,8 +4589,7 @@ void *netdev_lower_get_next_private(struct net_device *dev, if (&lower->list == &dev->adj_list.lower) return NULL; - if (iter) - *iter = lower->list.next; + *iter = lower->list.next; return lower->private; } @@ -4590,8 +4617,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, if (&lower->list == &dev->adj_list.lower) return NULL; - if (iter) - *iter = &lower->list; + *iter = &lower->list; return lower->private; } @@ -4637,7 +4663,7 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) } EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); -int netdev_adjacent_sysfs_add(struct net_device *dev, +static int netdev_adjacent_sysfs_add(struct net_device *dev, struct net_device *adj_dev, struct list_head *dev_list) { @@ -4647,7 +4673,7 @@ int netdev_adjacent_sysfs_add(struct net_device *dev, return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj), linkname); } -void netdev_adjacent_sysfs_del(struct net_device *dev, +static void netdev_adjacent_sysfs_del(struct net_device *dev, char *name, struct list_head *dev_list) { @@ -5669,6 +5695,13 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } +#ifdef CONFIG_NET_RX_BUSY_POLL + if (dev->netdev_ops->ndo_busy_poll) + features |= NETIF_F_BUSY_POLL; + else +#endif + features &= ~NETIF_F_BUSY_POLL; + return features; } @@ -6244,6 +6277,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, netdev_stats_to_stats64(storage, &dev->stats); } storage->rx_dropped += atomic_long_read(&dev->rx_dropped); + storage->tx_dropped += atomic_long_read(&dev->tx_dropped); return storage; } EXPORT_SYMBOL(dev_get_stats); diff --git a/net/core/dst.c b/net/core/dst.c index ca4231ec734..80d6286c8b6 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -142,12 +142,12 @@ loop: mutex_unlock(&dst_gc_mutex); } -int dst_discard(struct sk_buff *skb) +int dst_discard_sk(struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); return 0; } -EXPORT_SYMBOL(dst_discard); +EXPORT_SYMBOL(dst_discard_sk); const u32 dst_default_metrics[RTAX_MAX + 1] = { /* This initializer is needed to force linker to place this variable @@ -184,7 +184,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst->xfrm = NULL; #endif dst->input = dst_discard; - dst->output = dst_discard; + dst->output = dst_discard_sk; dst->error = 0; dst->obsolete = initial_obsolete; dst->header_len = 0; @@ -209,8 +209,10 @@ static void ___dst_free(struct dst_entry *dst) /* The first case (dev==NULL) is required, when protocol module is unloaded. */ - if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) - dst->input = dst->output = dst_discard; + if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) { + dst->input = dst_discard; + dst->output = dst_discard_sk; + } dst->obsolete = DST_OBSOLETE_DEAD; } @@ -361,7 +363,8 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, return; if (!unregister) { - dst->input = dst->output = dst_discard; + dst->input = dst_discard; + dst->output = dst_discard_sk; } else { dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 30071dec287..640ba0e5831 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -97,6 +97,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_RXFCS_BIT] = "rx-fcs", [NETIF_F_RXALL_BIT] = "rx-all", [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", + [NETIF_F_BUSY_POLL_BIT] = "busy-poll", }; static int ethtool_get_features(struct net_device *dev, void __user *useraddr) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index f409e0bd35c..185c341fafb 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -745,6 +745,13 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, attach_rules(&ops->rules_list, dev); break; + case NETDEV_CHANGENAME: + list_for_each_entry(ops, &net->rules_ops, list) { + detach_rules(&ops->rules_list, dev); + attach_rules(&ops->rules_list, dev); + } + break; + case NETDEV_UNREGISTER: list_for_each_entry(ops, &net->rules_ops, list) detach_rules(&ops->rules_list, dev); diff --git a/net/core/filter.c b/net/core/filter.c index ad30d626a5b..cd58614660c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1,11 +1,16 @@ /* * Linux Socket Filter - Kernel level socket filtering * - * Author: - * Jay Schulist <jschlst@samba.org> + * Based on the design of the Berkeley Packet Filter. The new + * internal format has been designed by PLUMgrid: * - * Based on the design of: - * - The Berkeley Packet Filter + * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com + * + * Authors: + * + * Jay Schulist <jschlst@samba.org> + * Alexei Starovoitov <ast@plumgrid.com> + * Daniel Borkmann <dborkman@redhat.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -108,304 +113,1051 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(sk_filter); +/* Base function for offset calculation. Needs to go into .text section, + * therefore keeping it non-static as well; will also be used by JITs + * anyway later on, so do not let the compiler omit it. + */ +noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + return 0; +} + /** - * sk_run_filter - run a filter on a socket - * @skb: buffer to run the filter on - * @fentry: filter to apply + * __sk_run_filter - run a filter on a given context + * @ctx: buffer to run the filter on + * @insn: filter to apply * - * Decode and apply filter instructions to the skb->data. - * Return length to keep, 0 for none. @skb is the data we are - * filtering, @filter is the array of filter instructions. - * Because all jumps are guaranteed to be before last instruction, - * and last instruction guaranteed to be a RET, we dont need to check - * flen. (We used to pass to this function the length of filter) + * Decode and apply filter instructions to the skb->data. Return length to + * keep, 0 for none. @ctx is the data we are operating on, @insn is the + * array of filter instructions. */ -unsigned int sk_run_filter(const struct sk_buff *skb, - const struct sock_filter *fentry) +unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) { + u64 stack[MAX_BPF_STACK / sizeof(u64)]; + u64 regs[MAX_BPF_REG], tmp; void *ptr; - u32 A = 0; /* Accumulator */ - u32 X = 0; /* Index Register */ - u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ - u32 tmp; - int k; + int off; - /* - * Process array of filter instructions. - */ - for (;; fentry++) { -#if defined(CONFIG_X86_32) -#define K (fentry->k) -#else - const u32 K = fentry->k; -#endif - - switch (fentry->code) { - case BPF_S_ALU_ADD_X: - A += X; - continue; - case BPF_S_ALU_ADD_K: - A += K; - continue; - case BPF_S_ALU_SUB_X: - A -= X; - continue; - case BPF_S_ALU_SUB_K: - A -= K; - continue; - case BPF_S_ALU_MUL_X: - A *= X; - continue; - case BPF_S_ALU_MUL_K: - A *= K; - continue; - case BPF_S_ALU_DIV_X: - if (X == 0) - return 0; - A /= X; - continue; - case BPF_S_ALU_DIV_K: - A /= K; - continue; - case BPF_S_ALU_MOD_X: - if (X == 0) - return 0; - A %= X; - continue; - case BPF_S_ALU_MOD_K: - A %= K; - continue; - case BPF_S_ALU_AND_X: - A &= X; - continue; - case BPF_S_ALU_AND_K: - A &= K; - continue; - case BPF_S_ALU_OR_X: - A |= X; - continue; - case BPF_S_ALU_OR_K: - A |= K; - continue; - case BPF_S_ANC_ALU_XOR_X: - case BPF_S_ALU_XOR_X: - A ^= X; - continue; - case BPF_S_ALU_XOR_K: - A ^= K; - continue; - case BPF_S_ALU_LSH_X: - A <<= X; - continue; - case BPF_S_ALU_LSH_K: - A <<= K; - continue; - case BPF_S_ALU_RSH_X: - A >>= X; - continue; - case BPF_S_ALU_RSH_K: - A >>= K; - continue; - case BPF_S_ALU_NEG: - A = -A; - continue; - case BPF_S_JMP_JA: - fentry += K; - continue; - case BPF_S_JMP_JGT_K: - fentry += (A > K) ? fentry->jt : fentry->jf; - continue; - case BPF_S_JMP_JGE_K: - fentry += (A >= K) ? fentry->jt : fentry->jf; - continue; - case BPF_S_JMP_JEQ_K: - fentry += (A == K) ? fentry->jt : fentry->jf; - continue; - case BPF_S_JMP_JSET_K: - fentry += (A & K) ? fentry->jt : fentry->jf; - continue; - case BPF_S_JMP_JGT_X: - fentry += (A > X) ? fentry->jt : fentry->jf; - continue; - case BPF_S_JMP_JGE_X: - fentry += (A >= X) ? fentry->jt : fentry->jf; - continue; - case BPF_S_JMP_JEQ_X: - fentry += (A == X) ? fentry->jt : fentry->jf; - continue; - case BPF_S_JMP_JSET_X: - fentry += (A & X) ? fentry->jt : fentry->jf; - continue; - case BPF_S_LD_W_ABS: - k = K; -load_w: - ptr = load_pointer(skb, k, 4, &tmp); - if (ptr != NULL) { - A = get_unaligned_be32(ptr); - continue; - } +#define K insn->imm +#define A regs[insn->a_reg] +#define X regs[insn->x_reg] +#define R0 regs[0] + +#define CONT ({insn++; goto select_insn; }) +#define CONT_JMP ({insn++; goto select_insn; }) + + static const void *jumptable[256] = { + [0 ... 255] = &&default_label, + /* Now overwrite non-defaults ... */ +#define DL(A, B, C) [A|B|C] = &&A##_##B##_##C + DL(BPF_ALU, BPF_ADD, BPF_X), + DL(BPF_ALU, BPF_ADD, BPF_K), + DL(BPF_ALU, BPF_SUB, BPF_X), + DL(BPF_ALU, BPF_SUB, BPF_K), + DL(BPF_ALU, BPF_AND, BPF_X), + DL(BPF_ALU, BPF_AND, BPF_K), + DL(BPF_ALU, BPF_OR, BPF_X), + DL(BPF_ALU, BPF_OR, BPF_K), + DL(BPF_ALU, BPF_LSH, BPF_X), + DL(BPF_ALU, BPF_LSH, BPF_K), + DL(BPF_ALU, BPF_RSH, BPF_X), + DL(BPF_ALU, BPF_RSH, BPF_K), + DL(BPF_ALU, BPF_XOR, BPF_X), + DL(BPF_ALU, BPF_XOR, BPF_K), + DL(BPF_ALU, BPF_MUL, BPF_X), + DL(BPF_ALU, BPF_MUL, BPF_K), + DL(BPF_ALU, BPF_MOV, BPF_X), + DL(BPF_ALU, BPF_MOV, BPF_K), + DL(BPF_ALU, BPF_DIV, BPF_X), + DL(BPF_ALU, BPF_DIV, BPF_K), + DL(BPF_ALU, BPF_MOD, BPF_X), + DL(BPF_ALU, BPF_MOD, BPF_K), + DL(BPF_ALU, BPF_NEG, 0), + DL(BPF_ALU, BPF_END, BPF_TO_BE), + DL(BPF_ALU, BPF_END, BPF_TO_LE), + DL(BPF_ALU64, BPF_ADD, BPF_X), + DL(BPF_ALU64, BPF_ADD, BPF_K), + DL(BPF_ALU64, BPF_SUB, BPF_X), + DL(BPF_ALU64, BPF_SUB, BPF_K), + DL(BPF_ALU64, BPF_AND, BPF_X), + DL(BPF_ALU64, BPF_AND, BPF_K), + DL(BPF_ALU64, BPF_OR, BPF_X), + DL(BPF_ALU64, BPF_OR, BPF_K), + DL(BPF_ALU64, BPF_LSH, BPF_X), + DL(BPF_ALU64, BPF_LSH, BPF_K), + DL(BPF_ALU64, BPF_RSH, BPF_X), + DL(BPF_ALU64, BPF_RSH, BPF_K), + DL(BPF_ALU64, BPF_XOR, BPF_X), + DL(BPF_ALU64, BPF_XOR, BPF_K), + DL(BPF_ALU64, BPF_MUL, BPF_X), + DL(BPF_ALU64, BPF_MUL, BPF_K), + DL(BPF_ALU64, BPF_MOV, BPF_X), + DL(BPF_ALU64, BPF_MOV, BPF_K), + DL(BPF_ALU64, BPF_ARSH, BPF_X), + DL(BPF_ALU64, BPF_ARSH, BPF_K), + DL(BPF_ALU64, BPF_DIV, BPF_X), + DL(BPF_ALU64, BPF_DIV, BPF_K), + DL(BPF_ALU64, BPF_MOD, BPF_X), + DL(BPF_ALU64, BPF_MOD, BPF_K), + DL(BPF_ALU64, BPF_NEG, 0), + DL(BPF_JMP, BPF_CALL, 0), + DL(BPF_JMP, BPF_JA, 0), + DL(BPF_JMP, BPF_JEQ, BPF_X), + DL(BPF_JMP, BPF_JEQ, BPF_K), + DL(BPF_JMP, BPF_JNE, BPF_X), + DL(BPF_JMP, BPF_JNE, BPF_K), + DL(BPF_JMP, BPF_JGT, BPF_X), + DL(BPF_JMP, BPF_JGT, BPF_K), + DL(BPF_JMP, BPF_JGE, BPF_X), + DL(BPF_JMP, BPF_JGE, BPF_K), + DL(BPF_JMP, BPF_JSGT, BPF_X), + DL(BPF_JMP, BPF_JSGT, BPF_K), + DL(BPF_JMP, BPF_JSGE, BPF_X), + DL(BPF_JMP, BPF_JSGE, BPF_K), + DL(BPF_JMP, BPF_JSET, BPF_X), + DL(BPF_JMP, BPF_JSET, BPF_K), + DL(BPF_JMP, BPF_EXIT, 0), + DL(BPF_STX, BPF_MEM, BPF_B), + DL(BPF_STX, BPF_MEM, BPF_H), + DL(BPF_STX, BPF_MEM, BPF_W), + DL(BPF_STX, BPF_MEM, BPF_DW), + DL(BPF_STX, BPF_XADD, BPF_W), + DL(BPF_STX, BPF_XADD, BPF_DW), + DL(BPF_ST, BPF_MEM, BPF_B), + DL(BPF_ST, BPF_MEM, BPF_H), + DL(BPF_ST, BPF_MEM, BPF_W), + DL(BPF_ST, BPF_MEM, BPF_DW), + DL(BPF_LDX, BPF_MEM, BPF_B), + DL(BPF_LDX, BPF_MEM, BPF_H), + DL(BPF_LDX, BPF_MEM, BPF_W), + DL(BPF_LDX, BPF_MEM, BPF_DW), + DL(BPF_LD, BPF_ABS, BPF_W), + DL(BPF_LD, BPF_ABS, BPF_H), + DL(BPF_LD, BPF_ABS, BPF_B), + DL(BPF_LD, BPF_IND, BPF_W), + DL(BPF_LD, BPF_IND, BPF_H), + DL(BPF_LD, BPF_IND, BPF_B), +#undef DL + }; + + regs[FP_REG] = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; + regs[ARG1_REG] = (u64) (unsigned long) ctx; + +select_insn: + goto *jumptable[insn->code]; + + /* ALU */ +#define ALU(OPCODE, OP) \ + BPF_ALU64_##OPCODE##_BPF_X: \ + A = A OP X; \ + CONT; \ + BPF_ALU_##OPCODE##_BPF_X: \ + A = (u32) A OP (u32) X; \ + CONT; \ + BPF_ALU64_##OPCODE##_BPF_K: \ + A = A OP K; \ + CONT; \ + BPF_ALU_##OPCODE##_BPF_K: \ + A = (u32) A OP (u32) K; \ + CONT; + + ALU(BPF_ADD, +) + ALU(BPF_SUB, -) + ALU(BPF_AND, &) + ALU(BPF_OR, |) + ALU(BPF_LSH, <<) + ALU(BPF_RSH, >>) + ALU(BPF_XOR, ^) + ALU(BPF_MUL, *) +#undef ALU + BPF_ALU_BPF_NEG_0: + A = (u32) -A; + CONT; + BPF_ALU64_BPF_NEG_0: + A = -A; + CONT; + BPF_ALU_BPF_MOV_BPF_X: + A = (u32) X; + CONT; + BPF_ALU_BPF_MOV_BPF_K: + A = (u32) K; + CONT; + BPF_ALU64_BPF_MOV_BPF_X: + A = X; + CONT; + BPF_ALU64_BPF_MOV_BPF_K: + A = K; + CONT; + BPF_ALU64_BPF_ARSH_BPF_X: + (*(s64 *) &A) >>= X; + CONT; + BPF_ALU64_BPF_ARSH_BPF_K: + (*(s64 *) &A) >>= K; + CONT; + BPF_ALU64_BPF_MOD_BPF_X: + if (unlikely(X == 0)) return 0; - case BPF_S_LD_H_ABS: - k = K; -load_h: - ptr = load_pointer(skb, k, 2, &tmp); - if (ptr != NULL) { - A = get_unaligned_be16(ptr); - continue; - } + tmp = A; + A = do_div(tmp, X); + CONT; + BPF_ALU_BPF_MOD_BPF_X: + if (unlikely(X == 0)) return 0; - case BPF_S_LD_B_ABS: - k = K; -load_b: - ptr = load_pointer(skb, k, 1, &tmp); - if (ptr != NULL) { - A = *(u8 *)ptr; - continue; - } + tmp = (u32) A; + A = do_div(tmp, (u32) X); + CONT; + BPF_ALU64_BPF_MOD_BPF_K: + tmp = A; + A = do_div(tmp, K); + CONT; + BPF_ALU_BPF_MOD_BPF_K: + tmp = (u32) A; + A = do_div(tmp, (u32) K); + CONT; + BPF_ALU64_BPF_DIV_BPF_X: + if (unlikely(X == 0)) return 0; - case BPF_S_LD_W_LEN: - A = skb->len; - continue; - case BPF_S_LDX_W_LEN: - X = skb->len; - continue; - case BPF_S_LD_W_IND: - k = X + K; - goto load_w; - case BPF_S_LD_H_IND: - k = X + K; - goto load_h; - case BPF_S_LD_B_IND: - k = X + K; - goto load_b; - case BPF_S_LDX_B_MSH: - ptr = load_pointer(skb, K, 1, &tmp); - if (ptr != NULL) { - X = (*(u8 *)ptr & 0xf) << 2; - continue; - } + do_div(A, X); + CONT; + BPF_ALU_BPF_DIV_BPF_X: + if (unlikely(X == 0)) return 0; - case BPF_S_LD_IMM: - A = K; - continue; - case BPF_S_LDX_IMM: - X = K; - continue; - case BPF_S_LD_MEM: - A = mem[K]; - continue; - case BPF_S_LDX_MEM: - X = mem[K]; - continue; - case BPF_S_MISC_TAX: - X = A; - continue; - case BPF_S_MISC_TXA: - A = X; - continue; - case BPF_S_RET_K: - return K; - case BPF_S_RET_A: - return A; - case BPF_S_ST: - mem[K] = A; - continue; - case BPF_S_STX: - mem[K] = X; - continue; - case BPF_S_ANC_PROTOCOL: - A = ntohs(skb->protocol); - continue; - case BPF_S_ANC_PKTTYPE: - A = skb->pkt_type; - continue; - case BPF_S_ANC_IFINDEX: - if (!skb->dev) - return 0; - A = skb->dev->ifindex; - continue; - case BPF_S_ANC_MARK: - A = skb->mark; - continue; - case BPF_S_ANC_QUEUE: - A = skb->queue_mapping; - continue; - case BPF_S_ANC_HATYPE: - if (!skb->dev) - return 0; - A = skb->dev->type; - continue; - case BPF_S_ANC_RXHASH: - A = skb->rxhash; - continue; - case BPF_S_ANC_CPU: - A = raw_smp_processor_id(); - continue; - case BPF_S_ANC_VLAN_TAG: - A = vlan_tx_tag_get(skb); - continue; - case BPF_S_ANC_VLAN_TAG_PRESENT: - A = !!vlan_tx_tag_present(skb); - continue; - case BPF_S_ANC_PAY_OFFSET: - A = __skb_get_poff(skb); - continue; - case BPF_S_ANC_NLATTR: { - struct nlattr *nla; - - if (skb_is_nonlinear(skb)) - return 0; - if (A > skb->len - sizeof(struct nlattr)) - return 0; - - nla = nla_find((struct nlattr *)&skb->data[A], - skb->len - A, X); - if (nla) - A = (void *)nla - (void *)skb->data; - else - A = 0; - continue; + tmp = (u32) A; + do_div(tmp, (u32) X); + A = (u32) tmp; + CONT; + BPF_ALU64_BPF_DIV_BPF_K: + do_div(A, K); + CONT; + BPF_ALU_BPF_DIV_BPF_K: + tmp = (u32) A; + do_div(tmp, (u32) K); + A = (u32) tmp; + CONT; + BPF_ALU_BPF_END_BPF_TO_BE: + switch (K) { + case 16: + A = (__force u16) cpu_to_be16(A); + break; + case 32: + A = (__force u32) cpu_to_be32(A); + break; + case 64: + A = (__force u64) cpu_to_be64(A); + break; + } + CONT; + BPF_ALU_BPF_END_BPF_TO_LE: + switch (K) { + case 16: + A = (__force u16) cpu_to_le16(A); + break; + case 32: + A = (__force u32) cpu_to_le32(A); + break; + case 64: + A = (__force u64) cpu_to_le64(A); + break; + } + CONT; + + /* CALL */ + BPF_JMP_BPF_CALL_0: + /* Function call scratches R1-R5 registers, preserves R6-R9, + * and stores return value into R0. + */ + R0 = (__bpf_call_base + insn->imm)(regs[1], regs[2], regs[3], + regs[4], regs[5]); + CONT; + + /* JMP */ + BPF_JMP_BPF_JA_0: + insn += insn->off; + CONT; + BPF_JMP_BPF_JEQ_BPF_X: + if (A == X) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JEQ_BPF_K: + if (A == K) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JNE_BPF_X: + if (A != X) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JNE_BPF_K: + if (A != K) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JGT_BPF_X: + if (A > X) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JGT_BPF_K: + if (A > K) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JGE_BPF_X: + if (A >= X) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JGE_BPF_K: + if (A >= K) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JSGT_BPF_X: + if (((s64)A) > ((s64)X)) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JSGT_BPF_K: + if (((s64)A) > ((s64)K)) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JSGE_BPF_X: + if (((s64)A) >= ((s64)X)) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JSGE_BPF_K: + if (((s64)A) >= ((s64)K)) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JSET_BPF_X: + if (A & X) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JSET_BPF_K: + if (A & K) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_EXIT_0: + return R0; + + /* STX and ST and LDX*/ +#define LDST(SIZEOP, SIZE) \ + BPF_STX_BPF_MEM_##SIZEOP: \ + *(SIZE *)(unsigned long) (A + insn->off) = X; \ + CONT; \ + BPF_ST_BPF_MEM_##SIZEOP: \ + *(SIZE *)(unsigned long) (A + insn->off) = K; \ + CONT; \ + BPF_LDX_BPF_MEM_##SIZEOP: \ + A = *(SIZE *)(unsigned long) (X + insn->off); \ + CONT; + + LDST(BPF_B, u8) + LDST(BPF_H, u16) + LDST(BPF_W, u32) + LDST(BPF_DW, u64) +#undef LDST + BPF_STX_BPF_XADD_BPF_W: /* lock xadd *(u32 *)(A + insn->off) += X */ + atomic_add((u32) X, (atomic_t *)(unsigned long) + (A + insn->off)); + CONT; + BPF_STX_BPF_XADD_BPF_DW: /* lock xadd *(u64 *)(A + insn->off) += X */ + atomic64_add((u64) X, (atomic64_t *)(unsigned long) + (A + insn->off)); + CONT; + BPF_LD_BPF_ABS_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */ + off = K; +load_word: + /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only + * appearing in the programs where ctx == skb. All programs + * keep 'ctx' in regs[CTX_REG] == R6, sk_convert_filter() + * saves it in R6, internal BPF verifier will check that + * R6 == ctx. + * + * BPF_ABS and BPF_IND are wrappers of function calls, so + * they scratch R1-R5 registers, preserve R6-R9, and store + * return value into R0. + * + * Implicit input: + * ctx + * + * Explicit input: + * X == any register + * K == 32-bit immediate + * + * Output: + * R0 - 8/16/32-bit skb data converted to cpu endianness + */ + ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp); + if (likely(ptr != NULL)) { + R0 = get_unaligned_be32(ptr); + CONT; + } + return 0; + BPF_LD_BPF_ABS_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */ + off = K; +load_half: + ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp); + if (likely(ptr != NULL)) { + R0 = get_unaligned_be16(ptr); + CONT; + } + return 0; + BPF_LD_BPF_ABS_BPF_B: /* R0 = *(u8 *) (ctx + K) */ + off = K; +load_byte: + ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp); + if (likely(ptr != NULL)) { + R0 = *(u8 *)ptr; + CONT; + } + return 0; + BPF_LD_BPF_IND_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */ + off = K + X; + goto load_word; + BPF_LD_BPF_IND_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */ + off = K + X; + goto load_half; + BPF_LD_BPF_IND_BPF_B: /* R0 = *(u8 *) (skb->data + X + K) */ + off = K + X; + goto load_byte; + + default_label: + /* If we ever reach this, we have a bug somewhere. */ + WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code); + return 0; +#undef CONT_JMP +#undef CONT + +#undef R0 +#undef X +#undef A +#undef K +} + +u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx, + const struct sock_filter_int *insni) + __attribute__ ((alias ("__sk_run_filter"))); + +u32 sk_run_filter_int_skb(const struct sk_buff *ctx, + const struct sock_filter_int *insni) + __attribute__ ((alias ("__sk_run_filter"))); +EXPORT_SYMBOL_GPL(sk_run_filter_int_skb); + +/* Helper to find the offset of pkt_type in sk_buff structure. We want + * to make sure its still a 3bit field starting at a byte boundary; + * taken from arch/x86/net/bpf_jit_comp.c. + */ +#define PKT_TYPE_MAX 7 +static unsigned int pkt_type_offset(void) +{ + struct sk_buff skb_probe = { .pkt_type = ~0, }; + u8 *ct = (u8 *) &skb_probe; + unsigned int off; + + for (off = 0; off < sizeof(struct sk_buff); off++) { + if (ct[off] == PKT_TYPE_MAX) + return off; + } + + pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__); + return -1; +} + +static u64 __skb_get_pay_offset(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *)(long) ctx; + + return __skb_get_poff(skb); +} + +static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *)(long) ctx; + struct nlattr *nla; + + if (skb_is_nonlinear(skb)) + return 0; + + if (skb->len < sizeof(struct nlattr)) + return 0; + + if (A > skb->len - sizeof(struct nlattr)) + return 0; + + nla = nla_find((struct nlattr *) &skb->data[A], skb->len - A, X); + if (nla) + return (void *) nla - (void *) skb->data; + + return 0; +} + +static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *)(long) ctx; + struct nlattr *nla; + + if (skb_is_nonlinear(skb)) + return 0; + + if (skb->len < sizeof(struct nlattr)) + return 0; + + if (A > skb->len - sizeof(struct nlattr)) + return 0; + + nla = (struct nlattr *) &skb->data[A]; + if (nla->nla_len > skb->len - A) + return 0; + + nla = nla_find_nested(nla, X); + if (nla) + return (void *) nla - (void *) skb->data; + + return 0; +} + +static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +{ + return raw_smp_processor_id(); +} + +/* Register mappings for user programs. */ +#define A_REG 0 +#define X_REG 7 +#define TMP_REG 8 +#define ARG2_REG 2 +#define ARG3_REG 3 + +static bool convert_bpf_extensions(struct sock_filter *fp, + struct sock_filter_int **insnp) +{ + struct sock_filter_int *insn = *insnp; + + switch (fp->k) { + case SKF_AD_OFF + SKF_AD_PROTOCOL: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); + + insn->code = BPF_LDX | BPF_MEM | BPF_H; + insn->a_reg = A_REG; + insn->x_reg = CTX_REG; + insn->off = offsetof(struct sk_buff, protocol); + insn++; + + /* A = ntohs(A) [emitting a nop or swap16] */ + insn->code = BPF_ALU | BPF_END | BPF_FROM_BE; + insn->a_reg = A_REG; + insn->imm = 16; + break; + + case SKF_AD_OFF + SKF_AD_PKTTYPE: + insn->code = BPF_LDX | BPF_MEM | BPF_B; + insn->a_reg = A_REG; + insn->x_reg = CTX_REG; + insn->off = pkt_type_offset(); + if (insn->off < 0) + return false; + insn++; + + insn->code = BPF_ALU | BPF_AND | BPF_K; + insn->a_reg = A_REG; + insn->imm = PKT_TYPE_MAX; + break; + + case SKF_AD_OFF + SKF_AD_IFINDEX: + case SKF_AD_OFF + SKF_AD_HATYPE: + if (FIELD_SIZEOF(struct sk_buff, dev) == 8) + insn->code = BPF_LDX | BPF_MEM | BPF_DW; + else + insn->code = BPF_LDX | BPF_MEM | BPF_W; + insn->a_reg = TMP_REG; + insn->x_reg = CTX_REG; + insn->off = offsetof(struct sk_buff, dev); + insn++; + + insn->code = BPF_JMP | BPF_JNE | BPF_K; + insn->a_reg = TMP_REG; + insn->imm = 0; + insn->off = 1; + insn++; + + insn->code = BPF_JMP | BPF_EXIT; + insn++; + + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); + + insn->a_reg = A_REG; + insn->x_reg = TMP_REG; + + if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) { + insn->code = BPF_LDX | BPF_MEM | BPF_W; + insn->off = offsetof(struct net_device, ifindex); + } else { + insn->code = BPF_LDX | BPF_MEM | BPF_H; + insn->off = offsetof(struct net_device, type); } - case BPF_S_ANC_NLATTR_NEST: { - struct nlattr *nla; - - if (skb_is_nonlinear(skb)) - return 0; - if (A > skb->len - sizeof(struct nlattr)) - return 0; - - nla = (struct nlattr *)&skb->data[A]; - if (nla->nla_len > A - skb->len) - return 0; - - nla = nla_find_nested(nla, X); - if (nla) - A = (void *)nla - (void *)skb->data; - else - A = 0; - continue; + break; + + case SKF_AD_OFF + SKF_AD_MARK: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + + insn->code = BPF_LDX | BPF_MEM | BPF_W; + insn->a_reg = A_REG; + insn->x_reg = CTX_REG; + insn->off = offsetof(struct sk_buff, mark); + break; + + case SKF_AD_OFF + SKF_AD_RXHASH: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + + insn->code = BPF_LDX | BPF_MEM | BPF_W; + insn->a_reg = A_REG; + insn->x_reg = CTX_REG; + insn->off = offsetof(struct sk_buff, hash); + break; + + case SKF_AD_OFF + SKF_AD_QUEUE: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); + + insn->code = BPF_LDX | BPF_MEM | BPF_H; + insn->a_reg = A_REG; + insn->x_reg = CTX_REG; + insn->off = offsetof(struct sk_buff, queue_mapping); + break; + + case SKF_AD_OFF + SKF_AD_VLAN_TAG: + case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); + + insn->code = BPF_LDX | BPF_MEM | BPF_H; + insn->a_reg = A_REG; + insn->x_reg = CTX_REG; + insn->off = offsetof(struct sk_buff, vlan_tci); + insn++; + + BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + + if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) { + insn->code = BPF_ALU | BPF_AND | BPF_K; + insn->a_reg = A_REG; + insn->imm = ~VLAN_TAG_PRESENT; + } else { + insn->code = BPF_ALU | BPF_RSH | BPF_K; + insn->a_reg = A_REG; + insn->imm = 12; + insn++; + + insn->code = BPF_ALU | BPF_AND | BPF_K; + insn->a_reg = A_REG; + insn->imm = 1; } -#ifdef CONFIG_SECCOMP_FILTER - case BPF_S_ANC_SECCOMP_LD_W: - A = seccomp_bpf_load(fentry->k); - continue; -#endif + break; + + case SKF_AD_OFF + SKF_AD_PAY_OFFSET: + case SKF_AD_OFF + SKF_AD_NLATTR: + case SKF_AD_OFF + SKF_AD_NLATTR_NEST: + case SKF_AD_OFF + SKF_AD_CPU: + /* arg1 = ctx */ + insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + insn->a_reg = ARG1_REG; + insn->x_reg = CTX_REG; + insn++; + + /* arg2 = A */ + insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + insn->a_reg = ARG2_REG; + insn->x_reg = A_REG; + insn++; + + /* arg3 = X */ + insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + insn->a_reg = ARG3_REG; + insn->x_reg = X_REG; + insn++; + + /* Emit call(ctx, arg2=A, arg3=X) */ + insn->code = BPF_JMP | BPF_CALL; + switch (fp->k) { + case SKF_AD_OFF + SKF_AD_PAY_OFFSET: + insn->imm = __skb_get_pay_offset - __bpf_call_base; + break; + case SKF_AD_OFF + SKF_AD_NLATTR: + insn->imm = __skb_get_nlattr - __bpf_call_base; + break; + case SKF_AD_OFF + SKF_AD_NLATTR_NEST: + insn->imm = __skb_get_nlattr_nest - __bpf_call_base; + break; + case SKF_AD_OFF + SKF_AD_CPU: + insn->imm = __get_raw_cpu_id - __bpf_call_base; + break; + } + break; + + case SKF_AD_OFF + SKF_AD_ALU_XOR_X: + insn->code = BPF_ALU | BPF_XOR | BPF_X; + insn->a_reg = A_REG; + insn->x_reg = X_REG; + break; + + default: + /* This is just a dummy call to avoid letting the compiler + * evict __bpf_call_base() as an optimization. Placed here + * where no-one bothers. + */ + BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0); + return false; + } + + *insnp = insn; + return true; +} + +/** + * sk_convert_filter - convert filter program + * @prog: the user passed filter program + * @len: the length of the user passed filter program + * @new_prog: buffer where converted program will be stored + * @new_len: pointer to store length of converted program + * + * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style. + * Conversion workflow: + * + * 1) First pass for calculating the new program length: + * sk_convert_filter(old_prog, old_len, NULL, &new_len) + * + * 2) 2nd pass to remap in two passes: 1st pass finds new + * jump offsets, 2nd pass remapping: + * new_prog = kmalloc(sizeof(struct sock_filter_int) * new_len); + * sk_convert_filter(old_prog, old_len, new_prog, &new_len); + * + * User BPF's register A is mapped to our BPF register 6, user BPF + * register X is mapped to BPF register 7; frame pointer is always + * register 10; Context 'void *ctx' is stored in register 1, that is, + * for socket filters: ctx == 'struct sk_buff *', for seccomp: + * ctx == 'struct seccomp_data *'. + */ +int sk_convert_filter(struct sock_filter *prog, int len, + struct sock_filter_int *new_prog, int *new_len) +{ + int new_flen = 0, pass = 0, target, i; + struct sock_filter_int *new_insn; + struct sock_filter *fp; + int *addrs = NULL; + u8 bpf_src; + + BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK); + BUILD_BUG_ON(FP_REG + 1 != MAX_BPF_REG); + + if (len <= 0 || len >= BPF_MAXINSNS) + return -EINVAL; + + if (new_prog) { + addrs = kzalloc(len * sizeof(*addrs), GFP_KERNEL); + if (!addrs) + return -ENOMEM; + } + +do_pass: + new_insn = new_prog; + fp = prog; + + if (new_insn) { + new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + new_insn->a_reg = CTX_REG; + new_insn->x_reg = ARG1_REG; + } + new_insn++; + + for (i = 0; i < len; fp++, i++) { + struct sock_filter_int tmp_insns[6] = { }; + struct sock_filter_int *insn = tmp_insns; + + if (addrs) + addrs[i] = new_insn - new_prog; + + switch (fp->code) { + /* All arithmetic insns and skb loads map as-is. */ + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU | BPF_LSH | BPF_X: + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_X: + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_MOD | BPF_X: + case BPF_ALU | BPF_MOD | BPF_K: + case BPF_ALU | BPF_NEG: + case BPF_LD | BPF_ABS | BPF_W: + case BPF_LD | BPF_ABS | BPF_H: + case BPF_LD | BPF_ABS | BPF_B: + case BPF_LD | BPF_IND | BPF_W: + case BPF_LD | BPF_IND | BPF_H: + case BPF_LD | BPF_IND | BPF_B: + /* Check for overloaded BPF extension and + * directly convert it if found, otherwise + * just move on with mapping. + */ + if (BPF_CLASS(fp->code) == BPF_LD && + BPF_MODE(fp->code) == BPF_ABS && + convert_bpf_extensions(fp, &insn)) + break; + + insn->code = fp->code; + insn->a_reg = A_REG; + insn->x_reg = X_REG; + insn->imm = fp->k; + break; + + /* Jump opcodes map as-is, but offsets need adjustment. */ + case BPF_JMP | BPF_JA: + target = i + fp->k + 1; + insn->code = fp->code; +#define EMIT_JMP \ + do { \ + if (target >= len || target < 0) \ + goto err; \ + insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \ + /* Adjust pc relative offset for 2nd or 3rd insn. */ \ + insn->off -= insn - tmp_insns; \ + } while (0) + + EMIT_JMP; + break; + + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) { + /* BPF immediates are signed, zero extend + * immediate into tmp register and use it + * in compare insn. + */ + insn->code = BPF_ALU | BPF_MOV | BPF_K; + insn->a_reg = TMP_REG; + insn->imm = fp->k; + insn++; + + insn->a_reg = A_REG; + insn->x_reg = TMP_REG; + bpf_src = BPF_X; + } else { + insn->a_reg = A_REG; + insn->x_reg = X_REG; + insn->imm = fp->k; + bpf_src = BPF_SRC(fp->code); + } + + /* Common case where 'jump_false' is next insn. */ + if (fp->jf == 0) { + insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; + target = i + fp->jt + 1; + EMIT_JMP; + break; + } + + /* Convert JEQ into JNE when 'jump_true' is next insn. */ + if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) { + insn->code = BPF_JMP | BPF_JNE | bpf_src; + target = i + fp->jf + 1; + EMIT_JMP; + break; + } + + /* Other jumps are mapped into two insns: Jxx and JA. */ + target = i + fp->jt + 1; + insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; + EMIT_JMP; + insn++; + + insn->code = BPF_JMP | BPF_JA; + target = i + fp->jf + 1; + EMIT_JMP; + break; + + /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ + case BPF_LDX | BPF_MSH | BPF_B: + insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + insn->a_reg = TMP_REG; + insn->x_reg = A_REG; + insn++; + + insn->code = BPF_LD | BPF_ABS | BPF_B; + insn->a_reg = A_REG; + insn->imm = fp->k; + insn++; + + insn->code = BPF_ALU | BPF_AND | BPF_K; + insn->a_reg = A_REG; + insn->imm = 0xf; + insn++; + + insn->code = BPF_ALU | BPF_LSH | BPF_K; + insn->a_reg = A_REG; + insn->imm = 2; + insn++; + + insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + insn->a_reg = X_REG; + insn->x_reg = A_REG; + insn++; + + insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + insn->a_reg = A_REG; + insn->x_reg = TMP_REG; + break; + + /* RET_K, RET_A are remaped into 2 insns. */ + case BPF_RET | BPF_A: + case BPF_RET | BPF_K: + insn->code = BPF_ALU | BPF_MOV | + (BPF_RVAL(fp->code) == BPF_K ? + BPF_K : BPF_X); + insn->a_reg = 0; + insn->x_reg = A_REG; + insn->imm = fp->k; + insn++; + + insn->code = BPF_JMP | BPF_EXIT; + break; + + /* Store to stack. */ + case BPF_ST: + case BPF_STX: + insn->code = BPF_STX | BPF_MEM | BPF_W; + insn->a_reg = FP_REG; + insn->x_reg = fp->code == BPF_ST ? A_REG : X_REG; + insn->off = -(BPF_MEMWORDS - fp->k) * 4; + break; + + /* Load from stack. */ + case BPF_LD | BPF_MEM: + case BPF_LDX | BPF_MEM: + insn->code = BPF_LDX | BPF_MEM | BPF_W; + insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? + A_REG : X_REG; + insn->x_reg = FP_REG; + insn->off = -(BPF_MEMWORDS - fp->k) * 4; + break; + + /* A = K or X = K */ + case BPF_LD | BPF_IMM: + case BPF_LDX | BPF_IMM: + insn->code = BPF_ALU | BPF_MOV | BPF_K; + insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? + A_REG : X_REG; + insn->imm = fp->k; + break; + + /* X = A */ + case BPF_MISC | BPF_TAX: + insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + insn->a_reg = X_REG; + insn->x_reg = A_REG; + break; + + /* A = X */ + case BPF_MISC | BPF_TXA: + insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + insn->a_reg = A_REG; + insn->x_reg = X_REG; + break; + + /* A = skb->len or X = skb->len */ + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LDX | BPF_W | BPF_LEN: + insn->code = BPF_LDX | BPF_MEM | BPF_W; + insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? + A_REG : X_REG; + insn->x_reg = CTX_REG; + insn->off = offsetof(struct sk_buff, len); + break; + + /* access seccomp_data fields */ + case BPF_LDX | BPF_ABS | BPF_W: + insn->code = BPF_LDX | BPF_MEM | BPF_W; + insn->a_reg = A_REG; + insn->x_reg = CTX_REG; + insn->off = fp->k; + break; + default: - WARN_RATELIMIT(1, "Unknown code:%u jt:%u tf:%u k:%u\n", - fentry->code, fentry->jt, - fentry->jf, fentry->k); - return 0; + goto err; } + + insn++; + if (new_prog) + memcpy(new_insn, tmp_insns, + sizeof(*insn) * (insn - tmp_insns)); + + new_insn += insn - tmp_insns; } + if (!new_prog) { + /* Only calculating new length. */ + *new_len = new_insn - new_prog; + return 0; + } + + pass++; + if (new_flen != new_insn - new_prog) { + new_flen = new_insn - new_prog; + if (pass > 2) + goto err; + + goto do_pass; + } + + kfree(addrs); + BUG_ON(*new_len != new_flen); return 0; +err: + kfree(addrs); + return -EINVAL; } -EXPORT_SYMBOL(sk_run_filter); -/* - * Security : +/* Security: + * * A BPF program is able to use 16 cells of memory to store intermediate - * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()) + * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()). + * * As we dont want to clear mem[] array for each packet going through * sk_run_filter(), we check that filter loaded by user never try to read * a cell if not previously written, and we check all branches to be sure @@ -629,30 +1381,197 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) } EXPORT_SYMBOL(sk_chk_filter); +static int sk_store_orig_filter(struct sk_filter *fp, + const struct sock_fprog *fprog) +{ + unsigned int fsize = sk_filter_proglen(fprog); + struct sock_fprog_kern *fkprog; + + fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL); + if (!fp->orig_prog) + return -ENOMEM; + + fkprog = fp->orig_prog; + fkprog->len = fprog->len; + fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL); + if (!fkprog->filter) { + kfree(fp->orig_prog); + return -ENOMEM; + } + + return 0; +} + +static void sk_release_orig_filter(struct sk_filter *fp) +{ + struct sock_fprog_kern *fprog = fp->orig_prog; + + if (fprog) { + kfree(fprog->filter); + kfree(fprog); + } +} + /** * sk_filter_release_rcu - Release a socket filter by rcu_head * @rcu: rcu_head that contains the sk_filter to free */ -void sk_filter_release_rcu(struct rcu_head *rcu) +static void sk_filter_release_rcu(struct rcu_head *rcu) { struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); + sk_release_orig_filter(fp); bpf_jit_free(fp); } -EXPORT_SYMBOL(sk_filter_release_rcu); -static int __sk_prepare_filter(struct sk_filter *fp) +/** + * sk_filter_release - release a socket filter + * @fp: filter to remove + * + * Remove a filter from a socket and release its resources. + */ +static void sk_filter_release(struct sk_filter *fp) +{ + if (atomic_dec_and_test(&fp->refcnt)) + call_rcu(&fp->rcu, sk_filter_release_rcu); +} + +void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) +{ + atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc); + sk_filter_release(fp); +} + +void sk_filter_charge(struct sock *sk, struct sk_filter *fp) +{ + atomic_inc(&fp->refcnt); + atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc); +} + +static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp, + struct sock *sk, + unsigned int len) +{ + struct sk_filter *fp_new; + + if (sk == NULL) + return krealloc(fp, len, GFP_KERNEL); + + fp_new = sock_kmalloc(sk, len, GFP_KERNEL); + if (fp_new) { + memcpy(fp_new, fp, sizeof(struct sk_filter)); + /* As we're kepping orig_prog in fp_new along, + * we need to make sure we're not evicting it + * from the old fp. + */ + fp->orig_prog = NULL; + sk_filter_uncharge(sk, fp); + } + + return fp_new; +} + +static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, + struct sock *sk) +{ + struct sock_filter *old_prog; + struct sk_filter *old_fp; + int i, err, new_len, old_len = fp->len; + + /* We are free to overwrite insns et al right here as it + * won't be used at this point in time anymore internally + * after the migration to the internal BPF instruction + * representation. + */ + BUILD_BUG_ON(sizeof(struct sock_filter) != + sizeof(struct sock_filter_int)); + + /* For now, we need to unfiddle BPF_S_* identifiers in place. + * This can sooner or later on be subject to removal, e.g. when + * JITs have been converted. + */ + for (i = 0; i < fp->len; i++) + sk_decode_filter(&fp->insns[i], &fp->insns[i]); + + /* Conversion cannot happen on overlapping memory areas, + * so we need to keep the user BPF around until the 2nd + * pass. At this time, the user BPF is stored in fp->insns. + */ + old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter), + GFP_KERNEL); + if (!old_prog) { + err = -ENOMEM; + goto out_err; + } + + /* 1st pass: calculate the new program length. */ + err = sk_convert_filter(old_prog, old_len, NULL, &new_len); + if (err) + goto out_err_free; + + /* Expand fp for appending the new filter representation. */ + old_fp = fp; + fp = __sk_migrate_realloc(old_fp, sk, sk_filter_size(new_len)); + if (!fp) { + /* The old_fp is still around in case we couldn't + * allocate new memory, so uncharge on that one. + */ + fp = old_fp; + err = -ENOMEM; + goto out_err_free; + } + + fp->bpf_func = sk_run_filter_int_skb; + fp->len = new_len; + + /* 2nd pass: remap sock_filter insns into sock_filter_int insns. */ + err = sk_convert_filter(old_prog, old_len, fp->insnsi, &new_len); + if (err) + /* 2nd sk_convert_filter() can fail only if it fails + * to allocate memory, remapping must succeed. Note, + * that at this time old_fp has already been released + * by __sk_migrate_realloc(). + */ + goto out_err_free; + + kfree(old_prog); + return fp; + +out_err_free: + kfree(old_prog); +out_err: + /* Rollback filter setup. */ + if (sk != NULL) + sk_filter_uncharge(sk, fp); + else + kfree(fp); + return ERR_PTR(err); +} + +static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, + struct sock *sk) { int err; - fp->bpf_func = sk_run_filter; + fp->bpf_func = NULL; + fp->jited = 0; err = sk_chk_filter(fp->insns, fp->len); if (err) - return err; + return ERR_PTR(err); + /* Probe if we can JIT compile the filter and if so, do + * the compilation of the filter. + */ bpf_jit_compile(fp); - return 0; + + /* JIT compiler couldn't process this filter, so do the + * internal BPF translation for the optimized interpreter. + */ + if (!fp->jited) + fp = __sk_migrate_filter(fp, sk); + + return fp; } /** @@ -668,9 +1587,8 @@ static int __sk_prepare_filter(struct sk_filter *fp) int sk_unattached_filter_create(struct sk_filter **pfp, struct sock_fprog *fprog) { + unsigned int fsize = sk_filter_proglen(fprog); struct sk_filter *fp; - unsigned int fsize = sizeof(struct sock_filter) * fprog->len; - int err; /* Make sure new filter is there and in the right amounts. */ if (fprog->filter == NULL) @@ -679,20 +1597,26 @@ int sk_unattached_filter_create(struct sk_filter **pfp, fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL); if (!fp) return -ENOMEM; + memcpy(fp->insns, fprog->filter, fsize); atomic_set(&fp->refcnt, 1); fp->len = fprog->len; + /* Since unattached filters are not copied back to user + * space through sk_get_filter(), we do not need to hold + * a copy here, and can spare us the work. + */ + fp->orig_prog = NULL; - err = __sk_prepare_filter(fp); - if (err) - goto free_mem; + /* __sk_prepare_filter() already takes care of uncharging + * memory in case something goes wrong. + */ + fp = __sk_prepare_filter(fp, NULL); + if (IS_ERR(fp)) + return PTR_ERR(fp); *pfp = fp; return 0; -free_mem: - kfree(fp); - return err; } EXPORT_SYMBOL_GPL(sk_unattached_filter_create); @@ -715,7 +1639,7 @@ EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { struct sk_filter *fp, *old_fp; - unsigned int fsize = sizeof(struct sock_filter) * fprog->len; + unsigned int fsize = sk_filter_proglen(fprog); unsigned int sk_fsize = sk_filter_size(fprog->len); int err; @@ -729,6 +1653,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL); if (!fp) return -ENOMEM; + if (copy_from_user(fp->insns, fprog->filter, fsize)) { sock_kfree_s(sk, fp, sk_fsize); return -EFAULT; @@ -737,18 +1662,26 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) atomic_set(&fp->refcnt, 1); fp->len = fprog->len; - err = __sk_prepare_filter(fp); + err = sk_store_orig_filter(fp, fprog); if (err) { sk_filter_uncharge(sk, fp); - return err; + return -ENOMEM; } + /* __sk_prepare_filter() already takes care of uncharging + * memory in case something goes wrong. + */ + fp = __sk_prepare_filter(fp, sk); + if (IS_ERR(fp)) + return PTR_ERR(fp); + old_fp = rcu_dereference_protected(sk->sk_filter, sock_owned_by_user(sk)); rcu_assign_pointer(sk->sk_filter, fp); if (old_fp) sk_filter_uncharge(sk, old_fp); + return 0; } EXPORT_SYMBOL_GPL(sk_attach_filter); @@ -768,6 +1701,7 @@ int sk_detach_filter(struct sock *sk) sk_filter_uncharge(sk, filter); ret = 0; } + return ret; } EXPORT_SYMBOL_GPL(sk_detach_filter); @@ -809,7 +1743,6 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS, @@ -850,34 +1783,41 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) to->k = filt->k; } -int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) +int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, + unsigned int len) { + struct sock_fprog_kern *fprog; struct sk_filter *filter; - int i, ret; + int ret = 0; lock_sock(sk); filter = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); - ret = 0; + sock_owned_by_user(sk)); if (!filter) goto out; - ret = filter->len; + + /* We're copying the filter that has been originally attached, + * so no conversion/decode needed anymore. + */ + fprog = filter->orig_prog; + + ret = fprog->len; if (!len) + /* User space only enquires number of filter blocks. */ goto out; + ret = -EINVAL; - if (len < filter->len) + if (len < fprog->len) goto out; ret = -EFAULT; - for (i = 0; i < filter->len; i++) { - struct sock_filter fb; - - sk_decode_filter(&filter->insns[i], &fb); - if (copy_to_user(&ubuf[i], &fb, sizeof(fb))) - goto out; - } + if (copy_to_user(ubuf, fprog->filter, sk_filter_proglen(fprog))) + goto out; - ret = filter->len; + /* Instead of bytes, the API requests to return the number + * of filter blocks. + */ + ret = fprog->len; out: release_sock(sk); return ret; diff --git a/net/core/flow.c b/net/core/flow.c index dfa602ceb8c..a0348fde1fd 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -24,6 +24,7 @@ #include <net/flow.h> #include <linux/atomic.h> #include <linux/security.h> +#include <net/net_namespace.h> struct flow_cache_entry { union { @@ -38,37 +39,14 @@ struct flow_cache_entry { struct flow_cache_object *object; }; -struct flow_cache_percpu { - struct hlist_head *hash_table; - int hash_count; - u32 hash_rnd; - int hash_rnd_recalc; - struct tasklet_struct flush_tasklet; -}; - struct flow_flush_info { struct flow_cache *cache; atomic_t cpuleft; struct completion completion; }; -struct flow_cache { - u32 hash_shift; - struct flow_cache_percpu __percpu *percpu; - struct notifier_block hotcpu_notifier; - int low_watermark; - int high_watermark; - struct timer_list rnd_timer; -}; - -atomic_t flow_cache_genid = ATOMIC_INIT(0); -EXPORT_SYMBOL(flow_cache_genid); -static struct flow_cache flow_cache_global; static struct kmem_cache *flow_cachep __read_mostly; -static DEFINE_SPINLOCK(flow_cache_gc_lock); -static LIST_HEAD(flow_cache_gc_list); - #define flow_cache_hash_size(cache) (1 << (cache)->hash_shift) #define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) @@ -84,16 +62,18 @@ static void flow_cache_new_hashrnd(unsigned long arg) add_timer(&fc->rnd_timer); } -static int flow_entry_valid(struct flow_cache_entry *fle) +static int flow_entry_valid(struct flow_cache_entry *fle, + struct netns_xfrm *xfrm) { - if (atomic_read(&flow_cache_genid) != fle->genid) + if (atomic_read(&xfrm->flow_cache_genid) != fle->genid) return 0; if (fle->object && !fle->object->ops->check(fle->object)) return 0; return 1; } -static void flow_entry_kill(struct flow_cache_entry *fle) +static void flow_entry_kill(struct flow_cache_entry *fle, + struct netns_xfrm *xfrm) { if (fle->object) fle->object->ops->delete(fle->object); @@ -104,26 +84,28 @@ static void flow_cache_gc_task(struct work_struct *work) { struct list_head gc_list; struct flow_cache_entry *fce, *n; + struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, + flow_cache_gc_work); INIT_LIST_HEAD(&gc_list); - spin_lock_bh(&flow_cache_gc_lock); - list_splice_tail_init(&flow_cache_gc_list, &gc_list); - spin_unlock_bh(&flow_cache_gc_lock); + spin_lock_bh(&xfrm->flow_cache_gc_lock); + list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list); + spin_unlock_bh(&xfrm->flow_cache_gc_lock); list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) - flow_entry_kill(fce); + flow_entry_kill(fce, xfrm); } -static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task); static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, - int deleted, struct list_head *gc_list) + int deleted, struct list_head *gc_list, + struct netns_xfrm *xfrm) { if (deleted) { fcp->hash_count -= deleted; - spin_lock_bh(&flow_cache_gc_lock); - list_splice_tail(gc_list, &flow_cache_gc_list); - spin_unlock_bh(&flow_cache_gc_lock); - schedule_work(&flow_cache_gc_work); + spin_lock_bh(&xfrm->flow_cache_gc_lock); + list_splice_tail(gc_list, &xfrm->flow_cache_gc_list); + spin_unlock_bh(&xfrm->flow_cache_gc_lock); + schedule_work(&xfrm->flow_cache_gc_work); } } @@ -135,6 +117,8 @@ static void __flow_cache_shrink(struct flow_cache *fc, struct hlist_node *tmp; LIST_HEAD(gc_list); int i, deleted = 0; + struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, + flow_cache_global); for (i = 0; i < flow_cache_hash_size(fc); i++) { int saved = 0; @@ -142,7 +126,7 @@ static void __flow_cache_shrink(struct flow_cache *fc, hlist_for_each_entry_safe(fle, tmp, &fcp->hash_table[i], u.hlist) { if (saved < shrink_to && - flow_entry_valid(fle)) { + flow_entry_valid(fle, xfrm)) { saved++; } else { deleted++; @@ -152,7 +136,7 @@ static void __flow_cache_shrink(struct flow_cache *fc, } } - flow_cache_queue_garbage(fcp, deleted, &gc_list); + flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); } static void flow_cache_shrink(struct flow_cache *fc, @@ -208,7 +192,7 @@ struct flow_cache_object * flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver, void *ctx) { - struct flow_cache *fc = &flow_cache_global; + struct flow_cache *fc = &net->xfrm.flow_cache_global; struct flow_cache_percpu *fcp; struct flow_cache_entry *fle, *tfle; struct flow_cache_object *flo; @@ -258,7 +242,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); fcp->hash_count++; } - } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) { + } else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) { flo = fle->object; if (!flo) goto ret_object; @@ -279,7 +263,7 @@ nocache: } flo = resolver(net, key, family, dir, flo, ctx); if (fle) { - fle->genid = atomic_read(&flow_cache_genid); + fle->genid = atomic_read(&net->xfrm.flow_cache_genid); if (!IS_ERR(flo)) fle->object = flo; else @@ -303,12 +287,14 @@ static void flow_cache_flush_tasklet(unsigned long data) struct hlist_node *tmp; LIST_HEAD(gc_list); int i, deleted = 0; + struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, + flow_cache_global); fcp = this_cpu_ptr(fc->percpu); for (i = 0; i < flow_cache_hash_size(fc); i++) { hlist_for_each_entry_safe(fle, tmp, &fcp->hash_table[i], u.hlist) { - if (flow_entry_valid(fle)) + if (flow_entry_valid(fle, xfrm)) continue; deleted++; @@ -317,7 +303,7 @@ static void flow_cache_flush_tasklet(unsigned long data) } } - flow_cache_queue_garbage(fcp, deleted, &gc_list); + flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); if (atomic_dec_and_test(&info->cpuleft)) complete(&info->completion); @@ -351,10 +337,9 @@ static void flow_cache_flush_per_cpu(void *data) tasklet_schedule(tasklet); } -void flow_cache_flush(void) +void flow_cache_flush(struct net *net) { struct flow_flush_info info; - static DEFINE_MUTEX(flow_flush_sem); cpumask_var_t mask; int i, self; @@ -365,8 +350,8 @@ void flow_cache_flush(void) /* Don't want cpus going down or up during this. */ get_online_cpus(); - mutex_lock(&flow_flush_sem); - info.cache = &flow_cache_global; + mutex_lock(&net->xfrm.flow_flush_sem); + info.cache = &net->xfrm.flow_cache_global; for_each_online_cpu(i) if (!flow_cache_percpu_empty(info.cache, i)) cpumask_set_cpu(i, mask); @@ -386,21 +371,23 @@ void flow_cache_flush(void) wait_for_completion(&info.completion); done: - mutex_unlock(&flow_flush_sem); + mutex_unlock(&net->xfrm.flow_flush_sem); put_online_cpus(); free_cpumask_var(mask); } static void flow_cache_flush_task(struct work_struct *work) { - flow_cache_flush(); -} + struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, + flow_cache_gc_work); + struct net *net = container_of(xfrm, struct net, xfrm); -static DECLARE_WORK(flow_cache_flush_work, flow_cache_flush_task); + flow_cache_flush(net); +} -void flow_cache_flush_deferred(void) +void flow_cache_flush_deferred(struct net *net) { - schedule_work(&flow_cache_flush_work); + schedule_work(&net->xfrm.flow_cache_flush_work); } static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) @@ -425,7 +412,8 @@ static int flow_cache_cpu(struct notifier_block *nfb, unsigned long action, void *hcpu) { - struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier); + struct flow_cache *fc = container_of(nfb, struct flow_cache, + hotcpu_notifier); int res, cpu = (unsigned long) hcpu; struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); @@ -444,9 +432,20 @@ static int flow_cache_cpu(struct notifier_block *nfb, return NOTIFY_OK; } -static int __init flow_cache_init(struct flow_cache *fc) +int flow_cache_init(struct net *net) { int i; + struct flow_cache *fc = &net->xfrm.flow_cache_global; + + if (!flow_cachep) + flow_cachep = kmem_cache_create("flow_cache", + sizeof(struct flow_cache_entry), + 0, SLAB_PANIC, NULL); + spin_lock_init(&net->xfrm.flow_cache_gc_lock); + INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list); + INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task); + INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task); + mutex_init(&net->xfrm.flow_flush_sem); fc->hash_shift = 10; fc->low_watermark = 2 * flow_cache_hash_size(fc); @@ -456,6 +455,8 @@ static int __init flow_cache_init(struct flow_cache *fc) if (!fc->percpu) return -ENOMEM; + cpu_notifier_register_begin(); + for_each_online_cpu(i) { if (flow_cache_cpu_prepare(fc, i)) goto err; @@ -463,7 +464,9 @@ static int __init flow_cache_init(struct flow_cache *fc) fc->hotcpu_notifier = (struct notifier_block){ .notifier_call = flow_cache_cpu, }; - register_hotcpu_notifier(&fc->hotcpu_notifier); + __register_hotcpu_notifier(&fc->hotcpu_notifier); + + cpu_notifier_register_done(); setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, (unsigned long) fc); @@ -479,19 +482,30 @@ err: fcp->hash_table = NULL; } + cpu_notifier_register_done(); + free_percpu(fc->percpu); fc->percpu = NULL; return -ENOMEM; } +EXPORT_SYMBOL(flow_cache_init); -static int __init flow_cache_init_global(void) +void flow_cache_fini(struct net *net) { - flow_cachep = kmem_cache_create("flow_cache", - sizeof(struct flow_cache_entry), - 0, SLAB_PANIC, NULL); + int i; + struct flow_cache *fc = &net->xfrm.flow_cache_global; - return flow_cache_init(&flow_cache_global); -} + del_timer_sync(&fc->rnd_timer); + unregister_hotcpu_notifier(&fc->hotcpu_notifier); + + for_each_possible_cpu(i) { + struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); + kfree(fcp->hash_table); + fcp->hash_table = NULL; + } -module_init(flow_cache_init_global); + free_percpu(fc->percpu); + fc->percpu = NULL; +} +EXPORT_SYMBOL(flow_cache_fini); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 87577d44755..107ed12a532 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -61,7 +61,7 @@ bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) again: switch (proto) { - case __constant_htons(ETH_P_IP): { + case htons(ETH_P_IP): { const struct iphdr *iph; struct iphdr _iph; ip: @@ -77,7 +77,7 @@ ip: iph_to_flow_copy_addrs(flow, iph); break; } - case __constant_htons(ETH_P_IPV6): { + case htons(ETH_P_IPV6): { const struct ipv6hdr *iph; struct ipv6hdr _iph; ipv6: @@ -91,8 +91,8 @@ ipv6: nhoff += sizeof(struct ipv6hdr); break; } - case __constant_htons(ETH_P_8021AD): - case __constant_htons(ETH_P_8021Q): { + case htons(ETH_P_8021AD): + case htons(ETH_P_8021Q): { const struct vlan_hdr *vlan; struct vlan_hdr _vlan; @@ -104,7 +104,7 @@ ipv6: nhoff += sizeof(*vlan); goto again; } - case __constant_htons(ETH_P_PPP_SES): { + case htons(ETH_P_PPP_SES): { struct { struct pppoe_hdr hdr; __be16 proto; @@ -115,9 +115,9 @@ ipv6: proto = hdr->proto; nhoff += PPPOE_SES_HLEN; switch (proto) { - case __constant_htons(PPP_IP): + case htons(PPP_IP): goto ip; - case __constant_htons(PPP_IPV6): + case htons(PPP_IPV6): goto ipv6; default: return false; @@ -203,8 +203,8 @@ static __always_inline u32 __flow_hash_1word(u32 a) /* * __skb_get_hash: calculate a flow hash based on src/dst addresses - * and src/dst port numbers. Sets rxhash in skb to non-zero hash value - * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb + * and src/dst port numbers. Sets hash in skb to non-zero hash value + * on success, zero indicates no valid hash. Also, sets l4_hash in skb * if hash is a canonical 4-tuple hash over transport ports. */ void __skb_get_hash(struct sk_buff *skb) @@ -216,7 +216,7 @@ void __skb_get_hash(struct sk_buff *skb) return; if (keys.ports) - skb->l4_rxhash = 1; + skb->l4_hash = 1; /* get a consistent hash (same value on both flow directions) */ if (((__force u32)keys.dst < (__force u32)keys.src) || @@ -232,7 +232,7 @@ void __skb_get_hash(struct sk_buff *skb) if (!hash) hash = 1; - skb->rxhash = hash; + skb->hash = hash; } EXPORT_SYMBOL(__skb_get_hash); @@ -323,17 +323,6 @@ u32 __skb_get_poff(const struct sk_buff *skb) return poff; } -static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) -{ - if (unlikely(queue_index >= dev->real_num_tx_queues)) { - net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n", - dev->name, queue_index, - dev->real_num_tx_queues); - return 0; - } - return queue_index; -} - static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_XPS @@ -355,7 +344,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) hash = skb->sk->sk_hash; else hash = (__force u16) skb->protocol ^ - skb->rxhash; + skb->hash; hash = __flow_hash_1word(hash); queue_index = map->queues[ ((u64)hash * map->len) >> 32]; @@ -372,7 +361,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) #endif } -u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) +static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) { struct sock *sk = skb->sk; int queue_index = sk_tx_queue_get(sk); @@ -392,7 +381,6 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) return queue_index; } -EXPORT_SYMBOL(__netdev_pick_tx); struct netdev_queue *netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, @@ -403,13 +391,13 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, if (dev->real_num_tx_queues != 1) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_select_queue) - queue_index = ops->ndo_select_queue(dev, skb, - accel_priv); + queue_index = ops->ndo_select_queue(dev, skb, accel_priv, + __netdev_pick_tx); else queue_index = __netdev_pick_tx(dev, skb); if (!accel_priv) - queue_index = dev_cap_txqueue(dev, queue_index); + queue_index = netdev_cap_txqueue(dev, queue_index); } skb_set_queue_mapping(skb, queue_index); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index b9e9e0d3867..8f8a96ef9f3 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -766,9 +766,6 @@ static void neigh_periodic_work(struct work_struct *work) nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - if (atomic_read(&tbl->entries) < tbl->gc_thresh1) - goto out; - /* * periodically recompute ReachableTime from random function */ @@ -781,6 +778,9 @@ static void neigh_periodic_work(struct work_struct *work) neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); } + if (atomic_read(&tbl->entries) < tbl->gc_thresh1) + goto out; + for (i = 0 ; i < (1 << nht->hash_shift); i++) { np = &nht->hash_buckets[i]; @@ -836,10 +836,10 @@ out: static __inline__ int neigh_max_probes(struct neighbour *n) { struct neigh_parms *p = n->parms; - return (n->nud_state & NUD_PROBE) ? - NEIGH_VAR(p, UCAST_PROBES) : - NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + - NEIGH_VAR(p, MCAST_PROBES); + int max_probes = NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES); + if (!(n->nud_state & NUD_PROBE)) + max_probes += NEIGH_VAR(p, MCAST_PROBES); + return max_probes; } static void neigh_invalidate(struct neighbour *neigh) @@ -945,6 +945,7 @@ static void neigh_timer_handler(unsigned long arg) neigh->nud_state = NUD_FAILED; notify = 1; neigh_invalidate(neigh); + goto out; } if (neigh->nud_state & NUD_IN_TIMER) { @@ -3046,7 +3047,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, if (!t) goto err; - for (i = 0; i < ARRAY_SIZE(t->neigh_vars); i++) { + for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) { t->neigh_vars[i].data += (long) p; t->neigh_vars[i].extra1 = dev; t->neigh_vars[i].extra2 = p; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 93886246a0b..1cac29ebb05 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -104,6 +104,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, } NETDEVICE_SHOW_RO(dev_id, fmt_hex); +NETDEVICE_SHOW_RO(dev_port, fmt_dec); NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec); NETDEVICE_SHOW_RO(addr_len, fmt_dec); NETDEVICE_SHOW_RO(iflink, fmt_dec); @@ -252,6 +253,16 @@ static ssize_t operstate_show(struct device *dev, } static DEVICE_ATTR_RO(operstate); +static ssize_t carrier_changes_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + return sprintf(buf, fmt_dec, + atomic_read(&netdev->carrier_changes)); +} +static DEVICE_ATTR_RO(carrier_changes); + /* read-write attributes */ static int change_mtu(struct net_device *net, unsigned long new_mtu) @@ -373,6 +384,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_netdev_group.attr, &dev_attr_type.attr, &dev_attr_dev_id.attr, + &dev_attr_dev_port.attr, &dev_attr_iflink.attr, &dev_attr_ifindex.attr, &dev_attr_addr_assign_type.attr, @@ -384,6 +396,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_duplex.attr, &dev_attr_dormant.attr, &dev_attr_operstate.attr, + &dev_attr_carrier_changes.attr, &dev_attr_ifalias.attr, &dev_attr_carrier.attr, &dev_attr_mtu.attr, @@ -789,7 +802,7 @@ exit: kobject_put(kobj); return error; } -#endif /* CONFIG_SYFS */ +#endif /* CONFIG_SYSFS */ int net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) @@ -996,15 +1009,12 @@ static struct attribute_group dql_group = { #endif /* CONFIG_BQL */ #ifdef CONFIG_XPS -static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) +static unsigned int get_netdev_queue_index(struct netdev_queue *queue) { struct net_device *dev = queue->dev; - int i; - - for (i = 0; i < dev->num_tx_queues; i++) - if (queue == &dev->_tx[i]) - break; + unsigned int i; + i = queue - dev->_tx; BUG_ON(i >= dev->num_tx_queues); return i; diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 719efd54166..22931e1b99b 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -23,7 +23,7 @@ static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state struct cgroup_cls_state *task_cls_state(struct task_struct *p) { - return css_cls_state(task_css(p, net_cls_subsys_id)); + return css_cls_state(task_css(p, net_cls_cgrp_id)); } EXPORT_SYMBOL_GPL(task_cls_state); @@ -73,7 +73,7 @@ static void cgrp_attach(struct cgroup_subsys_state *css, void *v = (void *)(unsigned long)cs->classid; struct task_struct *p; - cgroup_taskset_for_each(p, css, tset) { + cgroup_taskset_for_each(p, tset) { task_lock(p); iterate_fd(p->files, 0, update_classid, v); task_unlock(p); @@ -102,19 +102,10 @@ static struct cftype ss_files[] = { { } /* terminate */ }; -struct cgroup_subsys net_cls_subsys = { - .name = "net_cls", +struct cgroup_subsys net_cls_cgrp_subsys = { .css_alloc = cgrp_css_alloc, .css_online = cgrp_css_online, .css_free = cgrp_css_free, .attach = cgrp_attach, - .subsys_id = net_cls_subsys_id, .base_cftypes = ss_files, - .module = THIS_MODULE, }; - -static int __init init_netclassid_cgroup(void) -{ - return cgroup_load_subsys(&net_cls_subsys); -} -__initcall(init_netclassid_cgroup); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c03f3dec476..e33937fb32a 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -46,13 +46,9 @@ static struct sk_buff_head skb_pool; -static atomic_t trapped; - DEFINE_STATIC_SRCU(netpoll_srcu); #define USEC_PER_POLL 50 -#define NETPOLL_RX_ENABLED 1 -#define NETPOLL_RX_DROP 2 #define MAX_SKB_SIZE \ (sizeof(struct ethhdr) + \ @@ -61,7 +57,6 @@ DEFINE_STATIC_SRCU(netpoll_srcu); MAX_UDP_CHUNK) static void zap_completion_queue(void); -static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo); static void netpoll_async_cleanup(struct work_struct *work); static unsigned int carrier_timeout = 4; @@ -74,6 +69,37 @@ module_param(carrier_timeout, uint, 0644); #define np_notice(np, fmt, ...) \ pr_notice("%s: " fmt, np->name, ##__VA_ARGS__) +static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *txq) +{ + const struct net_device_ops *ops = dev->netdev_ops; + int status = NETDEV_TX_OK; + netdev_features_t features; + + features = netif_skb_features(skb); + + if (vlan_tx_tag_present(skb) && + !vlan_hw_offload_capable(features, skb->vlan_proto)) { + skb = __vlan_put_tag(skb, skb->vlan_proto, + vlan_tx_tag_get(skb)); + if (unlikely(!skb)) { + /* This is actually a packet drop, but we + * don't want the code that calls this + * function to try and operate on a NULL skb. + */ + goto out; + } + skb->vlan_tci = 0; + } + + status = ops->ndo_start_xmit(skb, dev); + if (status == NETDEV_TX_OK) + txq_trans_update(txq); + +out: + return status; +} + static void queue_process(struct work_struct *work) { struct netpoll_info *npinfo = @@ -83,51 +109,31 @@ static void queue_process(struct work_struct *work) while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; - const struct net_device_ops *ops = dev->netdev_ops; struct netdev_queue *txq; if (!netif_device_present(dev) || !netif_running(dev)) { - __kfree_skb(skb); + kfree_skb(skb); continue; } txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); local_irq_save(flags); - __netif_tx_lock(txq, smp_processor_id()); + HARD_TX_LOCK(dev, txq, smp_processor_id()); if (netif_xmit_frozen_or_stopped(txq) || - ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { + netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); - __netif_tx_unlock(txq); + HARD_TX_UNLOCK(dev, txq); local_irq_restore(flags); schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } - __netif_tx_unlock(txq); + HARD_TX_UNLOCK(dev, txq); local_irq_restore(flags); } } -static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, - unsigned short ulen, __be32 saddr, __be32 daddr) -{ - __wsum psum; - - if (uh->check == 0 || skb_csum_unnecessary(skb)) - return 0; - - psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); - - if (skb->ip_summed == CHECKSUM_COMPLETE && - !csum_fold(csum_add(psum, skb->csum))) - return 0; - - skb->csum = psum; - - return __skb_checksum_complete(skb); -} - /* * Check whether delayed processing was scheduled for our NIC. If so, * we attempt to grab the poll lock and use ->poll() to pump the card. @@ -138,14 +144,8 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, * trylock here and interrupts are already disabled in the softirq * case. Further, we test the poll_owner to avoid recursion on UP * systems where the lock doesn't exist. - * - * In cases where there is bi-directional communications, reading only - * one message at a time can lead to packets being dropped by the - * network adapter, forcing superfluous retries and possibly timeouts. - * Thus, we set our budget to greater than 1. */ -static int poll_one_napi(struct netpoll_info *npinfo, - struct napi_struct *napi, int budget) +static int poll_one_napi(struct napi_struct *napi, int budget) { int work; @@ -156,52 +156,35 @@ static int poll_one_napi(struct netpoll_info *npinfo, if (!test_bit(NAPI_STATE_SCHED, &napi->state)) return budget; - npinfo->rx_flags |= NETPOLL_RX_DROP; - atomic_inc(&trapped); set_bit(NAPI_STATE_NPSVC, &napi->state); work = napi->poll(napi, budget); + WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll); trace_napi_poll(napi); clear_bit(NAPI_STATE_NPSVC, &napi->state); - atomic_dec(&trapped); - npinfo->rx_flags &= ~NETPOLL_RX_DROP; return budget - work; } -static void poll_napi(struct net_device *dev) +static void poll_napi(struct net_device *dev, int budget) { struct napi_struct *napi; - int budget = 16; list_for_each_entry(napi, &dev->napi_list, dev_list) { if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { - budget = poll_one_napi(rcu_dereference_bh(dev->npinfo), - napi, budget); + budget = poll_one_napi(napi, budget); spin_unlock(&napi->poll_lock); - - if (!budget) - break; } } } -static void service_neigh_queue(struct netpoll_info *npi) -{ - if (npi) { - struct sk_buff *skb; - - while ((skb = skb_dequeue(&npi->neigh_tx))) - netpoll_neigh_reply(skb, npi); - } -} - static void netpoll_poll_dev(struct net_device *dev) { const struct net_device_ops *ops; struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); + int budget = 0; /* Don't do any rx activity if the dev_lock mutex is held * the dev_open/close paths use this to block netpoll activity @@ -224,31 +207,14 @@ static void netpoll_poll_dev(struct net_device *dev) /* Process pending work on NIC */ ops->ndo_poll_controller(dev); - poll_napi(dev); + poll_napi(dev, budget); up(&ni->dev_lock); - if (dev->flags & IFF_SLAVE) { - if (ni) { - struct net_device *bond_dev; - struct sk_buff *skb; - struct netpoll_info *bond_ni; - - bond_dev = netdev_master_upper_dev_get_rcu(dev); - bond_ni = rcu_dereference_bh(bond_dev->npinfo); - while ((skb = skb_dequeue(&ni->neigh_tx))) { - skb->dev = bond_dev; - skb_queue_tail(&bond_ni->neigh_tx, skb); - } - } - } - - service_neigh_queue(ni); - zap_completion_queue(); } -void netpoll_rx_disable(struct net_device *dev) +void netpoll_poll_disable(struct net_device *dev) { struct netpoll_info *ni; int idx; @@ -259,9 +225,9 @@ void netpoll_rx_disable(struct net_device *dev) down(&ni->dev_lock); srcu_read_unlock(&netpoll_srcu, idx); } -EXPORT_SYMBOL(netpoll_rx_disable); +EXPORT_SYMBOL(netpoll_poll_disable); -void netpoll_rx_enable(struct net_device *dev) +void netpoll_poll_enable(struct net_device *dev) { struct netpoll_info *ni; rcu_read_lock(); @@ -270,7 +236,7 @@ void netpoll_rx_enable(struct net_device *dev) up(&ni->dev_lock); rcu_read_unlock(); } -EXPORT_SYMBOL(netpoll_rx_enable); +EXPORT_SYMBOL(netpoll_poll_enable); static void refill_skbs(void) { @@ -304,7 +270,7 @@ static void zap_completion_queue(void) while (clist != NULL) { struct sk_buff *skb = clist; clist = clist->next; - if (skb->destructor) { + if (!skb_irq_freeable(skb)) { atomic_inc(&skb->users); dev_kfree_skb_any(skb); /* put this one back */ } else { @@ -359,7 +325,6 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, { int status = NETDEV_TX_BUSY; unsigned long tries; - const struct net_device_ops *ops = dev->netdev_ops; /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo; @@ -367,7 +332,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, npinfo = rcu_dereference_bh(np->dev->npinfo); if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { - __kfree_skb(skb); + dev_kfree_skb_irq(skb); return; } @@ -380,29 +345,11 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { - if (__netif_tx_trylock(txq)) { - if (!netif_xmit_stopped(txq)) { - if (vlan_tx_tag_present(skb) && - !vlan_hw_offload_capable(netif_skb_features(skb), - skb->vlan_proto)) { - skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)); - if (unlikely(!skb)) { - /* This is actually a packet drop, but we - * don't want the code at the end of this - * function to try and re-queue a NULL skb. - */ - status = NETDEV_TX_OK; - goto unlock_txq; - } - skb->vlan_tci = 0; - } - - status = ops->ndo_start_xmit(skb, dev); - if (status == NETDEV_TX_OK) - txq_trans_update(txq); - } - unlock_txq: - __netif_tx_unlock(txq); + if (HARD_TX_TRYLOCK(dev, txq)) { + if (!netif_xmit_stopped(txq)) + status = netpoll_start_xmit(skb, dev, txq); + + HARD_TX_UNLOCK(dev, txq); if (status == NETDEV_TX_OK) break; @@ -417,7 +364,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, WARN_ONCE(!irqs_disabled(), "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n", - dev->name, ops->ndo_start_xmit); + dev->name, dev->netdev_ops->ndo_start_xmit); } @@ -529,384 +476,6 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) } EXPORT_SYMBOL(netpoll_send_udp); -static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo) -{ - int size, type = ARPOP_REPLY; - __be32 sip, tip; - unsigned char *sha; - struct sk_buff *send_skb; - struct netpoll *np, *tmp; - unsigned long flags; - int hlen, tlen; - int hits = 0, proto; - - if (list_empty(&npinfo->rx_np)) - return; - - /* Before checking the packet, we do some early - inspection whether this is interesting at all */ - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (np->dev == skb->dev) - hits++; - } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - - /* No netpoll struct is using this dev */ - if (!hits) - return; - - proto = ntohs(eth_hdr(skb)->h_proto); - if (proto == ETH_P_ARP) { - struct arphdr *arp; - unsigned char *arp_ptr; - /* No arp on this interface */ - if (skb->dev->flags & IFF_NOARP) - return; - - if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) - return; - - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - arp = arp_hdr(skb); - - if ((arp->ar_hrd != htons(ARPHRD_ETHER) && - arp->ar_hrd != htons(ARPHRD_IEEE802)) || - arp->ar_pro != htons(ETH_P_IP) || - arp->ar_op != htons(ARPOP_REQUEST)) - return; - - arp_ptr = (unsigned char *)(arp+1); - /* save the location of the src hw addr */ - sha = arp_ptr; - arp_ptr += skb->dev->addr_len; - memcpy(&sip, arp_ptr, 4); - arp_ptr += 4; - /* If we actually cared about dst hw addr, - it would get copied here */ - arp_ptr += skb->dev->addr_len; - memcpy(&tip, arp_ptr, 4); - - /* Should we ignore arp? */ - if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) - return; - - size = arp_hdr_len(skb->dev); - - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (tip != np->local_ip.ip) - continue; - - hlen = LL_RESERVED_SPACE(np->dev); - tlen = np->dev->needed_tailroom; - send_skb = find_skb(np, size + hlen + tlen, hlen); - if (!send_skb) - continue; - - skb_reset_network_header(send_skb); - arp = (struct arphdr *) skb_put(send_skb, size); - send_skb->dev = skb->dev; - send_skb->protocol = htons(ETH_P_ARP); - - /* Fill the device header for the ARP frame */ - if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP, - sha, np->dev->dev_addr, - send_skb->len) < 0) { - kfree_skb(send_skb); - continue; - } - - /* - * Fill out the arp protocol part. - * - * we only support ethernet device type, - * which (according to RFC 1390) should - * always equal 1 (Ethernet). - */ - - arp->ar_hrd = htons(np->dev->type); - arp->ar_pro = htons(ETH_P_IP); - arp->ar_hln = np->dev->addr_len; - arp->ar_pln = 4; - arp->ar_op = htons(type); - - arp_ptr = (unsigned char *)(arp + 1); - memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &tip, 4); - arp_ptr += 4; - memcpy(arp_ptr, sha, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &sip, 4); - - netpoll_send_skb(np, send_skb); - - /* If there are several rx_skb_hooks for the same - * address we're fine by sending a single reply - */ - break; - } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } else if( proto == ETH_P_IPV6) { -#if IS_ENABLED(CONFIG_IPV6) - struct nd_msg *msg; - u8 *lladdr = NULL; - struct ipv6hdr *hdr; - struct icmp6hdr *icmp6h; - const struct in6_addr *saddr; - const struct in6_addr *daddr; - struct inet6_dev *in6_dev = NULL; - struct in6_addr *target; - - in6_dev = in6_dev_get(skb->dev); - if (!in6_dev || !in6_dev->cnf.accept_ra) - return; - - if (!pskb_may_pull(skb, skb->len)) - return; - - msg = (struct nd_msg *)skb_transport_header(skb); - - __skb_push(skb, skb->data - skb_transport_header(skb)); - - if (ipv6_hdr(skb)->hop_limit != 255) - return; - if (msg->icmph.icmp6_code != 0) - return; - if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) - return; - - saddr = &ipv6_hdr(skb)->saddr; - daddr = &ipv6_hdr(skb)->daddr; - - size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr); - - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (!ipv6_addr_equal(daddr, &np->local_ip.in6)) - continue; - - hlen = LL_RESERVED_SPACE(np->dev); - tlen = np->dev->needed_tailroom; - send_skb = find_skb(np, size + hlen + tlen, hlen); - if (!send_skb) - continue; - - send_skb->protocol = htons(ETH_P_IPV6); - send_skb->dev = skb->dev; - - skb_reset_network_header(send_skb); - hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr)); - *(__be32*)hdr = htonl(0x60000000); - hdr->payload_len = htons(size); - hdr->nexthdr = IPPROTO_ICMPV6; - hdr->hop_limit = 255; - hdr->saddr = *saddr; - hdr->daddr = *daddr; - - icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr)); - icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; - icmp6h->icmp6_router = 0; - icmp6h->icmp6_solicited = 1; - - target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr)); - *target = msg->target; - icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, - IPPROTO_ICMPV6, - csum_partial(icmp6h, - size, 0)); - - if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6, - lladdr, np->dev->dev_addr, - send_skb->len) < 0) { - kfree_skb(send_skb); - continue; - } - - netpoll_send_skb(np, send_skb); - - /* If there are several rx_skb_hooks for the same - * address, we're fine by sending a single reply - */ - break; - } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); -#endif - } -} - -static bool pkt_is_ns(struct sk_buff *skb) -{ - struct nd_msg *msg; - struct ipv6hdr *hdr; - - if (skb->protocol != htons(ETH_P_ARP)) - return false; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg))) - return false; - - msg = (struct nd_msg *)skb_transport_header(skb); - __skb_push(skb, skb->data - skb_transport_header(skb)); - hdr = ipv6_hdr(skb); - - if (hdr->nexthdr != IPPROTO_ICMPV6) - return false; - if (hdr->hop_limit != 255) - return false; - if (msg->icmph.icmp6_code != 0) - return false; - if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) - return false; - - return true; -} - -int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) -{ - int proto, len, ulen, data_len; - int hits = 0, offset; - const struct iphdr *iph; - struct udphdr *uh; - struct netpoll *np, *tmp; - uint16_t source; - - if (list_empty(&npinfo->rx_np)) - goto out; - - if (skb->dev->type != ARPHRD_ETHER) - goto out; - - /* check if netpoll clients need ARP */ - if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) { - skb_queue_tail(&npinfo->neigh_tx, skb); - return 1; - } else if (pkt_is_ns(skb) && atomic_read(&trapped)) { - skb_queue_tail(&npinfo->neigh_tx, skb); - return 1; - } - - if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { - skb = vlan_untag(skb); - if (unlikely(!skb)) - goto out; - } - - proto = ntohs(eth_hdr(skb)->h_proto); - if (proto != ETH_P_IP && proto != ETH_P_IPV6) - goto out; - if (skb->pkt_type == PACKET_OTHERHOST) - goto out; - if (skb_shared(skb)) - goto out; - - if (proto == ETH_P_IP) { - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto out; - iph = (struct iphdr *)skb->data; - if (iph->ihl < 5 || iph->version != 4) - goto out; - if (!pskb_may_pull(skb, iph->ihl*4)) - goto out; - iph = (struct iphdr *)skb->data; - if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) - goto out; - - len = ntohs(iph->tot_len); - if (skb->len < len || len < iph->ihl*4) - goto out; - - /* - * Our transport medium may have padded the buffer out. - * Now We trim to the true length of the frame. - */ - if (pskb_trim_rcsum(skb, len)) - goto out; - - iph = (struct iphdr *)skb->data; - if (iph->protocol != IPPROTO_UDP) - goto out; - - len -= iph->ihl*4; - uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); - offset = (unsigned char *)(uh + 1) - skb->data; - ulen = ntohs(uh->len); - data_len = skb->len - offset; - source = ntohs(uh->source); - - if (ulen != len) - goto out; - if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) - goto out; - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (np->local_ip.ip && np->local_ip.ip != iph->daddr) - continue; - if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr) - continue; - if (np->local_port && np->local_port != ntohs(uh->dest)) - continue; - - np->rx_skb_hook(np, source, skb, offset, data_len); - hits++; - } - } else { -#if IS_ENABLED(CONFIG_IPV6) - const struct ipv6hdr *ip6h; - - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - goto out; - ip6h = (struct ipv6hdr *)skb->data; - if (ip6h->version != 6) - goto out; - len = ntohs(ip6h->payload_len); - if (!len) - goto out; - if (len + sizeof(struct ipv6hdr) > skb->len) - goto out; - if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr))) - goto out; - ip6h = ipv6_hdr(skb); - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto out; - uh = udp_hdr(skb); - offset = (unsigned char *)(uh + 1) - skb->data; - ulen = ntohs(uh->len); - data_len = skb->len - offset; - source = ntohs(uh->source); - if (ulen != skb->len) - goto out; - if (udp6_csum_init(skb, uh, IPPROTO_UDP)) - goto out; - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr)) - continue; - if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr)) - continue; - if (np->local_port && np->local_port != ntohs(uh->dest)) - continue; - - np->rx_skb_hook(np, source, skb, offset, data_len); - hits++; - } -#endif - } - - if (!hits) - goto out; - - kfree_skb(skb); - return 1; - -out: - if (atomic_read(&trapped)) { - kfree_skb(skb); - return 1; - } - - return 0; -} - void netpoll_print_options(struct netpoll *np) { np_info(np, "local port %d\n", np->local_port); @@ -948,6 +517,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; int ipv6; + bool ipversion_set = false; if (*cur != '@') { if ((delim = strchr(cur, '@')) == NULL) @@ -960,6 +530,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) cur++; if (*cur != '/') { + ipversion_set = true; if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; *delim = 0; @@ -1002,7 +573,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip); if (ipv6 < 0) goto parse_failed; - else if (np->ipv6 != (bool)ipv6) + else if (ipversion_set && np->ipv6 != (bool)ipv6) goto parse_failed; else np->ipv6 = (bool)ipv6; @@ -1024,11 +595,10 @@ int netpoll_parse_options(struct netpoll *np, char *opt) } EXPORT_SYMBOL(netpoll_parse_options); -int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) +int __netpoll_setup(struct netpoll *np, struct net_device *ndev) { struct netpoll_info *npinfo; const struct net_device_ops *ops; - unsigned long flags; int err; np->dev = ndev; @@ -1044,18 +614,13 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) } if (!ndev->npinfo) { - npinfo = kmalloc(sizeof(*npinfo), gfp); + npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); if (!npinfo) { err = -ENOMEM; goto out; } - npinfo->rx_flags = 0; - INIT_LIST_HEAD(&npinfo->rx_np); - - spin_lock_init(&npinfo->rx_lock); sema_init(&npinfo->dev_lock, 1); - skb_queue_head_init(&npinfo->neigh_tx); skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); @@ -1063,7 +628,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) ops = np->dev->netdev_ops; if (ops->ndo_netpoll_setup) { - err = ops->ndo_netpoll_setup(ndev, npinfo, gfp); + err = ops->ndo_netpoll_setup(ndev, npinfo); if (err) goto free_npinfo; } @@ -1074,13 +639,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) npinfo->netpoll = np; - if (np->rx_skb_hook) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - npinfo->rx_flags |= NETPOLL_RX_ENABLED; - list_add_tail(&np->rx, &npinfo->rx_np); - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } - /* last thing to do is link it to the net device structure */ rcu_assign_pointer(ndev->npinfo, npinfo); @@ -1202,7 +760,7 @@ int netpoll_setup(struct netpoll *np) /* fill up the skb queue */ refill_skbs(); - err = __netpoll_setup(np, ndev, GFP_KERNEL); + err = __netpoll_setup(np, ndev); if (err) goto put; @@ -1229,7 +787,6 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) struct netpoll_info *npinfo = container_of(rcu_head, struct netpoll_info, rcu); - skb_queue_purge(&npinfo->neigh_tx); skb_queue_purge(&npinfo->txq); /* we can't call cancel_delayed_work_sync here, as we are in softirq */ @@ -1245,7 +802,6 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) void __netpoll_cleanup(struct netpoll *np) { struct netpoll_info *npinfo; - unsigned long flags; /* rtnl_dereference would be preferable here but * rcu_cleanup_netpoll path can put us in here safely without @@ -1255,14 +811,6 @@ void __netpoll_cleanup(struct netpoll *np) if (!npinfo) return; - if (!list_empty(&npinfo->rx_np)) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_del(&np->rx); - if (list_empty(&npinfo->rx_np)) - npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } - synchronize_srcu(&netpoll_srcu); if (atomic_dec_and_test(&npinfo->refcnt)) { @@ -1272,7 +820,7 @@ void __netpoll_cleanup(struct netpoll *np) if (ops->ndo_netpoll_cleanup) ops->ndo_netpoll_cleanup(np->dev); - rcu_assign_pointer(np->dev->npinfo, NULL); + RCU_INIT_POINTER(np->dev->npinfo, NULL); call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); } } @@ -1306,18 +854,3 @@ out: rtnl_unlock(); } EXPORT_SYMBOL(netpoll_cleanup); - -int netpoll_trap(void) -{ - return atomic_read(&trapped); -} -EXPORT_SYMBOL(netpoll_trap); - -void netpoll_set_trap(int trap) -{ - if (trap) - atomic_inc(&trapped); - else - atomic_dec(&trapped); -} -EXPORT_SYMBOL(netpoll_set_trap); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 9043caedcd0..3825f669147 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -186,7 +186,7 @@ static int read_priomap(struct seq_file *sf, void *v) } static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft, - const char *buffer) + char *buffer) { char devname[IFNAMSIZ + 1]; struct net_device *dev; @@ -224,7 +224,7 @@ static void net_prio_attach(struct cgroup_subsys_state *css, struct task_struct *p; void *v = (void *)(unsigned long)css->cgroup->id; - cgroup_taskset_for_each(p, css, tset) { + cgroup_taskset_for_each(p, tset) { task_lock(p); iterate_fd(p->files, 0, update_netprio, v); task_unlock(p); @@ -244,15 +244,12 @@ static struct cftype ss_files[] = { { } /* terminate */ }; -struct cgroup_subsys net_prio_subsys = { - .name = "net_prio", +struct cgroup_subsys net_prio_cgrp_subsys = { .css_alloc = cgrp_css_alloc, .css_online = cgrp_css_online, .css_free = cgrp_css_free, .attach = net_prio_attach, - .subsys_id = net_prio_subsys_id, .base_cftypes = ss_files, - .module = THIS_MODULE, }; static int netprio_device_event(struct notifier_block *unused, @@ -283,37 +280,9 @@ static struct notifier_block netprio_device_notifier = { static int __init init_cgroup_netprio(void) { - int ret; - - ret = cgroup_load_subsys(&net_prio_subsys); - if (ret) - goto out; - register_netdevice_notifier(&netprio_device_notifier); - -out: - return ret; -} - -static void __exit exit_cgroup_netprio(void) -{ - struct netprio_map *old; - struct net_device *dev; - - unregister_netdevice_notifier(&netprio_device_notifier); - - cgroup_unload_subsys(&net_prio_subsys); - - rtnl_lock(); - for_each_netdev(&init_net, dev) { - old = rtnl_dereference(dev->priomap); - RCU_INIT_POINTER(dev->priomap, NULL); - if (old) - kfree_rcu(old, rcu); - } - rtnl_unlock(); + return 0; } -module_init(init_cgroup_netprio); -module_exit(exit_cgroup_netprio); +subsys_initcall(init_cgroup_netprio); MODULE_LICENSE("GPL v2"); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index fdac61cac1b..0304f981f7f 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -476,23 +476,22 @@ static int pgctrl_show(struct seq_file *seq, void *v) static ssize_t pgctrl_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - int err = 0; char data[128]; struct pktgen_net *pn = net_generic(current->nsproxy->net_ns, pg_net_id); - if (!capable(CAP_NET_ADMIN)) { - err = -EPERM; - goto out; - } + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (count == 0) + return -EINVAL; if (count > sizeof(data)) count = sizeof(data); - if (copy_from_user(data, buf, count)) { - err = -EFAULT; - goto out; - } - data[count - 1] = 0; /* Make string */ + if (copy_from_user(data, buf, count)) + return -EFAULT; + + data[count - 1] = 0; /* Strip trailing '\n' and terminate string */ if (!strcmp(data, "stop")) pktgen_stop_all_threads_ifs(pn); @@ -506,10 +505,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf, else pr_warning("Unknown command: %s\n", data); - err = count; - -out: - return err; + return count; } static int pgctrl_open(struct inode *inode, struct file *file) @@ -1251,7 +1247,13 @@ static ssize_t pktgen_if_write(struct file *file, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", f, "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " - "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n"); + "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, " + "MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, " + "QUEUE_MAP_RND, QUEUE_MAP_CPU, UDPCSUM, " +#ifdef CONFIG_XFRM + "IPSEC, " +#endif + "NODE_ALLOC\n"); return count; } sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); @@ -3336,9 +3338,11 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) queue_map = skb_get_queue_mapping(pkt_dev->skb); txq = netdev_get_tx_queue(odev, queue_map); - __netif_tx_lock_bh(txq); + local_bh_disable(); - if (unlikely(netif_xmit_frozen_or_stopped(txq))) { + HARD_TX_LOCK(odev, txq, smp_processor_id()); + + if (unlikely(netif_xmit_frozen_or_drv_stopped(txq))) { ret = NETDEV_TX_BUSY; pkt_dev->last_ok = 0; goto unlock; @@ -3372,7 +3376,9 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->last_ok = 0; } unlock: - __netif_tx_unlock_bh(txq); + HARD_TX_UNLOCK(odev, txq); + + local_bh_enable(); /* If pkt_dev->count is zero, then run forever */ if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c new file mode 100644 index 00000000000..eaba0f68f86 --- /dev/null +++ b/net/core/ptp_classifier.c @@ -0,0 +1,141 @@ +/* PTP classifier + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +/* The below program is the bpf_asm (tools/net/) representation of + * the opcode array in the ptp_filter structure. + * + * For convenience, this can easily be altered and reviewed with + * bpf_asm and bpf_dbg, e.g. `./bpf_asm -c prog` where prog is a + * simple file containing the below program: + * + * ldh [12] ; load ethertype + * + * ; PTP over UDP over IPv4 over Ethernet + * test_ipv4: + * jneq #0x800, test_ipv6 ; ETH_P_IP ? + * ldb [23] ; load proto + * jneq #17, drop_ipv4 ; IPPROTO_UDP ? + * ldh [20] ; load frag offset field + * jset #0x1fff, drop_ipv4 ; don't allow fragments + * ldxb 4*([14]&0xf) ; load IP header len + * ldh [x + 16] ; load UDP dst port + * jneq #319, drop_ipv4 ; is port PTP_EV_PORT ? + * ldh [x + 22] ; load payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x10 ; PTP_CLASS_IPV4 + * ret a ; return PTP class + * drop_ipv4: ret #0x0 ; PTP_CLASS_NONE + * + * ; PTP over UDP over IPv6 over Ethernet + * test_ipv6: + * jneq #0x86dd, test_8021q ; ETH_P_IPV6 ? + * ldb [20] ; load proto + * jneq #17, drop_ipv6 ; IPPROTO_UDP ? + * ldh [56] ; load UDP dst port + * jneq #319, drop_ipv6 ; is port PTP_EV_PORT ? + * ldh [62] ; load payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x20 ; PTP_CLASS_IPV6 + * ret a ; return PTP class + * drop_ipv6: ret #0x0 ; PTP_CLASS_NONE + * + * ; PTP over 802.1Q over Ethernet + * test_8021q: + * jneq #0x8100, test_ieee1588 ; ETH_P_8021Q ? + * ldh [16] ; load inner type + * jneq #0x88f7, drop_ieee1588 ; ETH_P_1588 ? + * ldb [18] ; load payload + * and #0x8 ; as we don't have ports here, test + * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these + * ldh [18] ; reload payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x40 ; PTP_CLASS_V2_VLAN + * ret a ; return PTP class + * + * ; PTP over Ethernet + * test_ieee1588: + * jneq #0x88f7, drop_ieee1588 ; ETH_P_1588 ? + * ldb [14] ; load payload + * and #0x8 ; as we don't have ports here, test + * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these + * ldh [14] ; reload payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x30 ; PTP_CLASS_L2 + * ret a ; return PTP class + * drop_ieee1588: ret #0x0 ; PTP_CLASS_NONE + */ + +#include <linux/skbuff.h> +#include <linux/filter.h> +#include <linux/ptp_classify.h> + +static struct sk_filter *ptp_insns __read_mostly; + +unsigned int ptp_classify_raw(const struct sk_buff *skb) +{ + return SK_RUN_FILTER(ptp_insns, skb); +} +EXPORT_SYMBOL_GPL(ptp_classify_raw); + +void __init ptp_classifier_init(void) +{ + static struct sock_filter ptp_filter[] = { + { 0x28, 0, 0, 0x0000000c }, + { 0x15, 0, 12, 0x00000800 }, + { 0x30, 0, 0, 0x00000017 }, + { 0x15, 0, 9, 0x00000011 }, + { 0x28, 0, 0, 0x00000014 }, + { 0x45, 7, 0, 0x00001fff }, + { 0xb1, 0, 0, 0x0000000e }, + { 0x48, 0, 0, 0x00000010 }, + { 0x15, 0, 4, 0x0000013f }, + { 0x48, 0, 0, 0x00000016 }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000010 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x06, 0, 0, 0x00000000 }, + { 0x15, 0, 9, 0x000086dd }, + { 0x30, 0, 0, 0x00000014 }, + { 0x15, 0, 6, 0x00000011 }, + { 0x28, 0, 0, 0x00000038 }, + { 0x15, 0, 4, 0x0000013f }, + { 0x28, 0, 0, 0x0000003e }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000020 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x06, 0, 0, 0x00000000 }, + { 0x15, 0, 9, 0x00008100 }, + { 0x28, 0, 0, 0x00000010 }, + { 0x15, 0, 15, 0x000088f7 }, + { 0x30, 0, 0, 0x00000012 }, + { 0x54, 0, 0, 0x00000008 }, + { 0x15, 0, 12, 0x00000000 }, + { 0x28, 0, 0, 0x00000012 }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000040 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x15, 0, 7, 0x000088f7 }, + { 0x30, 0, 0, 0x0000000e }, + { 0x54, 0, 0, 0x00000008 }, + { 0x15, 0, 4, 0x00000000 }, + { 0x28, 0, 0, 0x0000000e }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000030 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x06, 0, 0, 0x00000000 }, + }; + struct sock_fprog ptp_prog = { + .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, + }; + + BUG_ON(sk_unattached_filter_create(&ptp_insns, &ptp_prog)); +} diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 4425148d2b5..467f326126e 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -221,5 +221,4 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, out: spin_unlock_bh(&fastopenq->lock); sock_put(lsk); - return; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 393b1bc9a61..d4ff41739b0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -374,7 +374,7 @@ static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev) if (!master_dev) return 0; ops = master_dev->rtnl_link_ops; - if (!ops->get_slave_size) + if (!ops || !ops->get_slave_size) return 0; /* IFLA_INFO_SLAVE_DATA + nested data */ return nla_total_size(sizeof(struct nlattr)) + @@ -822,6 +822,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ + nla_total_size(ext_filter_mask & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ @@ -970,7 +971,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, (dev->qdisc && nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || (dev->ifalias && - nla_put_string(skb, IFLA_IFALIAS, dev->ifalias))) + nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) || + nla_put_u32(skb, IFLA_CARRIER_CHANGES, + atomic_read(&dev->carrier_changes))) goto nla_put_failure; if (1) { @@ -1121,56 +1124,7 @@ nla_put_failure: return -EMSGSIZE; } -static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct net *net = sock_net(skb->sk); - int h, s_h; - int idx = 0, s_idx; - struct net_device *dev; - struct hlist_head *head; - struct nlattr *tb[IFLA_MAX+1]; - u32 ext_filter_mask = 0; - - s_h = cb->args[0]; - s_idx = cb->args[1]; - - rcu_read_lock(); - cb->seq = net->dev_base_seq; - - if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, - ifla_policy) >= 0) { - - if (tb[IFLA_EXT_MASK]) - ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); - } - - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, 0, - NLM_F_MULTI, - ext_filter_mask) <= 0) - goto out; - - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -cont: - idx++; - } - } -out: - rcu_read_unlock(); - cb->args[1] = idx; - cb->args[0] = h; - - return skb->len; -} - -const struct nla_policy ifla_policy[IFLA_MAX+1] = { +static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, @@ -1196,8 +1150,8 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 }, [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 }, [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_PORT_ID_LEN }, + [IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */ }; -EXPORT_SYMBOL(ifla_policy); static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_KIND] = { .type = NLA_STRING }, @@ -1235,6 +1189,61 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { [IFLA_PORT_RESPONSE] = { .type = NLA_U16, }, }; +static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + int h, s_h; + int idx = 0, s_idx; + struct net_device *dev; + struct hlist_head *head; + struct nlattr *tb[IFLA_MAX+1]; + u32 ext_filter_mask = 0; + + s_h = cb->args[0]; + s_idx = cb->args[1]; + + rcu_read_lock(); + cb->seq = net->dev_base_seq; + + if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, + ifla_policy) >= 0) { + + if (tb[IFLA_EXT_MASK]) + ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); + } + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry_rcu(dev, head, index_hlist) { + if (idx < s_idx) + goto cont; + if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, + NLM_F_MULTI, + ext_filter_mask) <= 0) + goto out; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + idx++; + } + } +out: + rcu_read_unlock(); + cb->args[1] = idx; + cb->args[0] = h; + + return skb->len; +} + +int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len) +{ + return nla_parse(tb, IFLA_MAX, head, len, ifla_policy); +} +EXPORT_SYMBOL(rtnl_nla_parse_ifla); + struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) { struct net *net; @@ -1963,16 +1972,21 @@ replay: dev->ifindex = ifm->ifi_index; - if (ops->newlink) + if (ops->newlink) { err = ops->newlink(net, dev, tb, data); - else + /* Drivers should call free_netdev() in ->destructor + * and unregister it on failure so that device could be + * finally freed in rtnl_unlock. + */ + if (err < 0) + goto out; + } else { err = register_netdevice(dev); - - if (err < 0) { - free_netdev(dev); - goto out; + if (err < 0) { + free_netdev(dev); + goto out; + } } - err = rtnl_configure_link(dev, ifm); if (err < 0) unregister_netdevice(dev); @@ -2116,12 +2130,13 @@ EXPORT_SYMBOL(rtmsg_ifinfo); static int nlmsg_populate_fdb_fill(struct sk_buff *skb, struct net_device *dev, u8 *addr, u32 pid, u32 seq, - int type, unsigned int flags) + int type, unsigned int flags, + int nlflags) { struct nlmsghdr *nlh; struct ndmsg *ndm; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), nlflags); if (!nlh) return -EMSGSIZE; @@ -2159,7 +2174,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type) if (!skb) goto errout; - err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF); + err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF, 0); if (err < 0) { kfree_skb(skb); goto errout; @@ -2384,7 +2399,8 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, portid, seq, - RTM_NEWNEIGH, NTF_SELF); + RTM_NEWNEIGH, NTF_SELF, + NLM_F_MULTI); if (err < 0) return err; skip: diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5976ef0846b..1b62343f583 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -707,9 +707,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mark = old->mark; new->skb_iif = old->skb_iif; __nf_copy(new, old); -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) - new->nf_trace = old->nf_trace; -#endif #ifdef CONFIG_NET_SCHED new->tc_index = old->tc_index; #ifdef CONFIG_NET_CLS_ACT @@ -2130,25 +2127,31 @@ EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); * * The `hlen` as calculated by skb_zerocopy_headlen() specifies the * headroom in the `to` buffer. + * + * Return value: + * 0: everything is OK + * -ENOMEM: couldn't orphan frags of @from due to lack of memory + * -EFAULT: skb_copy_bits() found some problem with skb geometry */ -void -skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) +int +skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) { int i, j = 0; int plen = 0; /* length of skb->head fragment */ + int ret; struct page *page; unsigned int offset; BUG_ON(!from->head_frag && !hlen); /* dont bother with small payloads */ - if (len <= skb_tailroom(to)) { - skb_copy_bits(from, 0, skb_put(to, len), len); - return; - } + if (len <= skb_tailroom(to)) + return skb_copy_bits(from, 0, skb_put(to, len), len); if (hlen) { - skb_copy_bits(from, 0, skb_put(to, hlen), hlen); + ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen); + if (unlikely(ret)) + return ret; len -= hlen; } else { plen = min_t(int, skb_headlen(from), len); @@ -2166,6 +2169,11 @@ skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) to->len += len + plen; to->data_len += len + plen; + if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) { + skb_tx_error(from); + return -ENOMEM; + } + for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { if (!len) break; @@ -2176,6 +2184,8 @@ skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) j++; } skb_shinfo(to)->nr_frags = j; + + return 0; } EXPORT_SYMBOL_GPL(skb_zerocopy); @@ -2841,81 +2851,85 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); /** * skb_segment - Perform protocol segmentation on skb. - * @skb: buffer to segment + * @head_skb: buffer to segment * @features: features for the output path (see dev->features) * * This function performs segmentation on the given skb. It returns * a pointer to the first in a list of new skbs for the segments. * In case of error it returns ERR_PTR(err). */ -struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) +struct sk_buff *skb_segment(struct sk_buff *head_skb, + netdev_features_t features) { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; - struct sk_buff *fskb = skb_shinfo(skb)->frag_list; - skb_frag_t *skb_frag = skb_shinfo(skb)->frags; - unsigned int mss = skb_shinfo(skb)->gso_size; - unsigned int doffset = skb->data - skb_mac_header(skb); + struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list; + skb_frag_t *frag = skb_shinfo(head_skb)->frags; + unsigned int mss = skb_shinfo(head_skb)->gso_size; + unsigned int doffset = head_skb->data - skb_mac_header(head_skb); + struct sk_buff *frag_skb = head_skb; unsigned int offset = doffset; - unsigned int tnl_hlen = skb_tnl_header_len(skb); + unsigned int tnl_hlen = skb_tnl_header_len(head_skb); unsigned int headroom; unsigned int len; __be16 proto; bool csum; int sg = !!(features & NETIF_F_SG); - int nfrags = skb_shinfo(skb)->nr_frags; + int nfrags = skb_shinfo(head_skb)->nr_frags; int err = -ENOMEM; int i = 0; int pos; + int dummy; - proto = skb_network_protocol(skb); + proto = skb_network_protocol(head_skb, &dummy); if (unlikely(!proto)) return ERR_PTR(-EINVAL); csum = !!can_checksum_protocol(features, proto); - __skb_push(skb, doffset); - headroom = skb_headroom(skb); - pos = skb_headlen(skb); + __skb_push(head_skb, doffset); + headroom = skb_headroom(head_skb); + pos = skb_headlen(head_skb); do { struct sk_buff *nskb; - skb_frag_t *frag; + skb_frag_t *nskb_frag; int hsize; int size; - len = skb->len - offset; + len = head_skb->len - offset; if (len > mss) len = mss; - hsize = skb_headlen(skb) - offset; + hsize = skb_headlen(head_skb) - offset; if (hsize < 0) hsize = 0; if (hsize > len || !sg) hsize = len; - if (!hsize && i >= nfrags && skb_headlen(fskb) && - (skb_headlen(fskb) == len || sg)) { - BUG_ON(skb_headlen(fskb) > len); + if (!hsize && i >= nfrags && skb_headlen(list_skb) && + (skb_headlen(list_skb) == len || sg)) { + BUG_ON(skb_headlen(list_skb) > len); i = 0; - nfrags = skb_shinfo(fskb)->nr_frags; - skb_frag = skb_shinfo(fskb)->frags; - pos += skb_headlen(fskb); + nfrags = skb_shinfo(list_skb)->nr_frags; + frag = skb_shinfo(list_skb)->frags; + frag_skb = list_skb; + pos += skb_headlen(list_skb); while (pos < offset + len) { BUG_ON(i >= nfrags); - size = skb_frag_size(skb_frag); + size = skb_frag_size(frag); if (pos + size > offset + len) break; i++; pos += size; - skb_frag++; + frag++; } - nskb = skb_clone(fskb, GFP_ATOMIC); - fskb = fskb->next; + nskb = skb_clone(list_skb, GFP_ATOMIC); + list_skb = list_skb->next; if (unlikely(!nskb)) goto err; @@ -2936,7 +2950,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) __skb_push(nskb, doffset); } else { nskb = __alloc_skb(hsize + doffset + headroom, - GFP_ATOMIC, skb_alloc_rx_flag(skb), + GFP_ATOMIC, skb_alloc_rx_flag(head_skb), NUMA_NO_NODE); if (unlikely(!nskb)) @@ -2952,12 +2966,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) segs = nskb; tail = nskb; - __copy_skb_header(nskb, skb); - nskb->mac_len = skb->mac_len; + __copy_skb_header(nskb, head_skb); + nskb->mac_len = head_skb->mac_len; skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); - skb_copy_from_linear_data_offset(skb, -tnl_hlen, + skb_copy_from_linear_data_offset(head_skb, -tnl_hlen, nskb->data - tnl_hlen, doffset + tnl_hlen); @@ -2966,30 +2980,32 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (!sg) { nskb->ip_summed = CHECKSUM_NONE; - nskb->csum = skb_copy_and_csum_bits(skb, offset, + nskb->csum = skb_copy_and_csum_bits(head_skb, offset, skb_put(nskb, len), len, 0); continue; } - frag = skb_shinfo(nskb)->frags; + nskb_frag = skb_shinfo(nskb)->frags; - skb_copy_from_linear_data_offset(skb, offset, + skb_copy_from_linear_data_offset(head_skb, offset, skb_put(nskb, hsize), hsize); - skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; + skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags & + SKBTX_SHARED_FRAG; while (pos < offset + len) { if (i >= nfrags) { - BUG_ON(skb_headlen(fskb)); + BUG_ON(skb_headlen(list_skb)); i = 0; - nfrags = skb_shinfo(fskb)->nr_frags; - skb_frag = skb_shinfo(fskb)->frags; + nfrags = skb_shinfo(list_skb)->nr_frags; + frag = skb_shinfo(list_skb)->frags; + frag_skb = list_skb; BUG_ON(!nfrags); - fskb = fskb->next; + list_skb = list_skb->next; } if (unlikely(skb_shinfo(nskb)->nr_frags >= @@ -3000,27 +3016,30 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) goto err; } - *frag = *skb_frag; - __skb_frag_ref(frag); - size = skb_frag_size(frag); + if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) + goto err; + + *nskb_frag = *frag; + __skb_frag_ref(nskb_frag); + size = skb_frag_size(nskb_frag); if (pos < offset) { - frag->page_offset += offset - pos; - skb_frag_size_sub(frag, offset - pos); + nskb_frag->page_offset += offset - pos; + skb_frag_size_sub(nskb_frag, offset - pos); } skb_shinfo(nskb)->nr_frags++; if (pos + size <= offset + len) { i++; - skb_frag++; + frag++; pos += size; } else { - skb_frag_size_sub(frag, pos + size - (offset + len)); + skb_frag_size_sub(nskb_frag, pos + size - (offset + len)); goto skip_fraglist; } - frag++; + nskb_frag++; } skip_fraglist: @@ -3034,7 +3053,7 @@ perform_csum_check: nskb->len - doffset, 0); nskb->ip_summed = CHECKSUM_NONE; } - } while ((offset += len) < skb->len); + } while ((offset += len) < head_skb->len); return segs; @@ -3281,6 +3300,32 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) return elt; } +/* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given + * sglist without mark the sg which contain last skb data as the end. + * So the caller can mannipulate sg list as will when padding new data after + * the first call without calling sg_unmark_end to expend sg list. + * + * Scenario to use skb_to_sgvec_nomark: + * 1. sg_init_table + * 2. skb_to_sgvec_nomark(payload1) + * 3. skb_to_sgvec_nomark(payload2) + * + * This is equivalent to: + * 1. sg_init_table + * 2. skb_to_sgvec(payload1) + * 3. sg_unmark_end + * 4. skb_to_sgvec(payload2) + * + * When mapping mutilple payload conditionally, skb_to_sgvec_nomark + * is more preferable. + */ +int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, + int offset, int len) +{ + return __skb_to_sgvec(skb, sg, offset, len); +} +EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark); + int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) { int nsg = __skb_to_sgvec(skb, sg, offset, len); @@ -3413,8 +3458,6 @@ static void sock_rmem_free(struct sk_buff *skb) */ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) { - int len = skb->len; - if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned int)sk->sk_rcvbuf) return -ENOMEM; @@ -3429,7 +3472,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) skb_queue_tail(&sk->sk_error_queue, skb); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, len); + sk->sk_data_ready(sk); return 0; } EXPORT_SYMBOL(sock_queue_err_skb); @@ -3543,15 +3586,47 @@ static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len, return 0; } +#define MAX_TCP_HDR_LEN (15 * 4) + +static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb, + typeof(IPPROTO_IP) proto, + unsigned int off) +{ + switch (proto) { + int err; + + case IPPROTO_TCP: + err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr), + off + MAX_TCP_HDR_LEN); + if (!err && !skb_partial_csum_set(skb, off, + offsetof(struct tcphdr, + check))) + err = -EPROTO; + return err ? ERR_PTR(err) : &tcp_hdr(skb)->check; + + case IPPROTO_UDP: + err = skb_maybe_pull_tail(skb, off + sizeof(struct udphdr), + off + sizeof(struct udphdr)); + if (!err && !skb_partial_csum_set(skb, off, + offsetof(struct udphdr, + check))) + err = -EPROTO; + return err ? ERR_PTR(err) : &udp_hdr(skb)->check; + } + + return ERR_PTR(-EPROTO); +} + /* This value should be large enough to cover a tagged ethernet header plus * maximally sized IP and TCP or UDP headers. */ #define MAX_IP_HDR_LEN 128 -static int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate) +static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate) { unsigned int off; bool fragment; + __sum16 *csum; int err; fragment = false; @@ -3572,51 +3647,15 @@ static int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate) if (fragment) goto out; - switch (ip_hdr(skb)->protocol) { - case IPPROTO_TCP: - err = skb_maybe_pull_tail(skb, - off + sizeof(struct tcphdr), - MAX_IP_HDR_LEN); - if (err < 0) - goto out; - - if (!skb_partial_csum_set(skb, off, - offsetof(struct tcphdr, check))) { - err = -EPROTO; - goto out; - } - - if (recalculate) - tcp_hdr(skb)->check = - ~csum_tcpudp_magic(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, - skb->len - off, - IPPROTO_TCP, 0); - break; - case IPPROTO_UDP: - err = skb_maybe_pull_tail(skb, - off + sizeof(struct udphdr), - MAX_IP_HDR_LEN); - if (err < 0) - goto out; - - if (!skb_partial_csum_set(skb, off, - offsetof(struct udphdr, check))) { - err = -EPROTO; - goto out; - } - - if (recalculate) - udp_hdr(skb)->check = - ~csum_tcpudp_magic(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, - skb->len - off, - IPPROTO_UDP, 0); - break; - default: - goto out; - } + csum = skb_checksum_setup_ip(skb, ip_hdr(skb)->protocol, off); + if (IS_ERR(csum)) + return PTR_ERR(csum); + if (recalculate) + *csum = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - off, + ip_hdr(skb)->protocol, 0); err = 0; out: @@ -3639,6 +3678,7 @@ static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate) unsigned int len; bool fragment; bool done; + __sum16 *csum; fragment = false; done = false; @@ -3716,51 +3756,14 @@ static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate) if (!done || fragment) goto out; - switch (nexthdr) { - case IPPROTO_TCP: - err = skb_maybe_pull_tail(skb, - off + sizeof(struct tcphdr), - MAX_IPV6_HDR_LEN); - if (err < 0) - goto out; - - if (!skb_partial_csum_set(skb, off, - offsetof(struct tcphdr, check))) { - err = -EPROTO; - goto out; - } - - if (recalculate) - tcp_hdr(skb)->check = - ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - skb->len - off, - IPPROTO_TCP, 0); - break; - case IPPROTO_UDP: - err = skb_maybe_pull_tail(skb, - off + sizeof(struct udphdr), - MAX_IPV6_HDR_LEN); - if (err < 0) - goto out; - - if (!skb_partial_csum_set(skb, off, - offsetof(struct udphdr, check))) { - err = -EPROTO; - goto out; - } - - if (recalculate) - udp_hdr(skb)->check = - ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - skb->len - off, - IPPROTO_UDP, 0); - break; - default: - goto out; - } + csum = skb_checksum_setup_ip(skb, nexthdr, off); + if (IS_ERR(csum)) + return PTR_ERR(csum); + if (recalculate) + *csum = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - off, nexthdr, 0); err = 0; out: @@ -3778,7 +3781,7 @@ int skb_checksum_setup(struct sk_buff *skb, bool recalculate) switch (skb->protocol) { case htons(ETH_P_IP): - err = skb_checksum_setup_ip(skb, recalculate); + err = skb_checksum_setup_ipv4(skb, recalculate); break; case htons(ETH_P_IPV6): @@ -3932,12 +3935,14 @@ EXPORT_SYMBOL_GPL(skb_scrub_packet); unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) { const struct skb_shared_info *shinfo = skb_shinfo(skb); - unsigned int hdr_len; if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - hdr_len = tcp_hdrlen(skb); - else - hdr_len = sizeof(struct udphdr); - return hdr_len + shinfo->gso_size; + return tcp_hdrlen(skb) + shinfo->gso_size; + + /* UFO sets gso_size to the size of the fragmentation + * payload, i.e. the size of the L4 (UDP) header is already + * accounted for. + */ + return shinfo->gso_size; } EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); diff --git a/net/core/sock.c b/net/core/sock.c index 0c127dcdf6a..b4fff008136 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -428,7 +428,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) spin_unlock_irqrestore(&list->lock, flags); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, skb_len); + sk->sk_data_ready(sk); return 0; } EXPORT_SYMBOL(sock_queue_rcv_skb); @@ -1775,7 +1775,9 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, while (order) { if (npages >= 1 << order) { page = alloc_pages(sk->sk_allocation | - __GFP_COMP | __GFP_NOWARN, + __GFP_COMP | + __GFP_NOWARN | + __GFP_NORETRY, order); if (page) goto fill_page; @@ -1845,7 +1847,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio) gfp_t gfp = prio; if (order) - gfp |= __GFP_COMP | __GFP_NOWARN; + gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; pfrag->page = alloc_pages(gfp, order); if (likely(pfrag->page)) { pfrag->offset = 0; @@ -2194,7 +2196,7 @@ static void sock_def_error_report(struct sock *sk) rcu_read_unlock(); } -static void sock_def_readable(struct sock *sk, int len) +static void sock_def_readable(struct sock *sk) { struct socket_wq *wq; @@ -2355,10 +2357,13 @@ void release_sock(struct sock *sk) if (sk->sk_backlog.tail) __release_sock(sk); + /* Warning : release_cb() might need to release sk ownership, + * ie call sock_release_ownership(sk) before us. + */ if (sk->sk_prot->release_cb) sk->sk_prot->release_cb(sk); - sk->sk_lock.owned = 0; + sock_release_ownership(sk); if (waitqueue_active(&sk->sk_lock.wq)) wake_up(&sk->sk_lock.wq); spin_unlock_bh(&sk->sk_lock.slock); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index a0e9cf6379d..d7af1885932 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -52,9 +52,10 @@ EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, struct sk_buff *skb, int attrtype) { - struct nlattr *attr; + struct sock_fprog_kern *fprog; struct sk_filter *filter; - unsigned int len; + struct nlattr *attr; + unsigned int flen; int err = 0; if (!ns_capable(user_ns, CAP_NET_ADMIN)) { @@ -63,24 +64,20 @@ int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, } rcu_read_lock(); - filter = rcu_dereference(sk->sk_filter); - len = filter ? filter->len * sizeof(struct sock_filter) : 0; + if (!filter) + goto out; - attr = nla_reserve(skb, attrtype, len); + fprog = filter->orig_prog; + flen = sk_filter_proglen(fprog); + + attr = nla_reserve(skb, attrtype, flen); if (attr == NULL) { err = -EMSGSIZE; goto out; } - if (filter) { - struct sock_filter *fb = (struct sock_filter *)nla_data(attr); - int i; - - for (i = 0; i < filter->len; i++, fb++) - sk_decode_filter(&filter->insns[i], fb); - } - + memcpy(nla_data(attr), fprog->filter, flen); out: rcu_read_unlock(); return err; diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 661b5a40ec1..6521dfd8b7c 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -23,16 +23,11 @@ #include <linux/skbuff.h> #include <linux/export.h> -static struct sock_filter ptp_filter[] = { - PTP_FILTER -}; - static unsigned int classify(const struct sk_buff *skb) { - if (likely(skb->dev && - skb->dev->phydev && + if (likely(skb->dev && skb->dev->phydev && skb->dev->phydev->drv)) - return sk_run_filter(skb, ptp_filter); + return ptp_classify_raw(skb); else return PTP_CLASS_NONE; } @@ -60,11 +55,13 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) if (likely(phydev->drv->txtstamp)) { if (!atomic_inc_not_zero(&sk->sk_refcnt)) return; + clone = skb_clone(skb, GFP_ATOMIC); if (!clone) { sock_put(sk); return; } + clone->sk = sk; phydev->drv->txtstamp(phydev, clone, type); } @@ -89,12 +86,15 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, } *skb_hwtstamps(skb) = *hwtstamps; + serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; skb->sk = NULL; + err = sock_queue_err_skb(sk, skb); + sock_put(sk); if (err) kfree_skb(skb); @@ -132,8 +132,3 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) return false; } EXPORT_SYMBOL_GPL(skb_defer_rx_timestamp); - -void __init skb_timestamping_init(void) -{ - BUG_ON(sk_chk_filter(ptp_filter, ARRAY_SIZE(ptp_filter))); -} diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index c073b81a1f3..62b5828acde 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -8,7 +8,7 @@ #include "tfrc.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG -static bool tfrc_debug; +bool tfrc_debug; module_param(tfrc_debug, bool, 0644); MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages"); #endif diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index a3d8f7c76ae..40ee7d62b65 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -21,6 +21,7 @@ #include "packet_history.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG +extern bool tfrc_debug; #define tfrc_pr_debug(format, a...) DCCP_PR_DEBUG(tfrc_debug, format, ##a) #else #define tfrc_pr_debug(format, a...) diff --git a/net/dccp/input.c b/net/dccp/input.c index 14cdafad7a9..3c8ec7d4a34 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -28,7 +28,7 @@ static void dccp_enqueue_skb(struct sock *sk, struct sk_buff *skb) __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4); __skb_queue_tail(&sk->sk_receive_queue, skb); skb_set_owner_r(skb, sk); - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); } static void dccp_fin(struct sock *sk, struct sk_buff *skb) diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 9e2f78bc155..c69eb9c4fbb 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -237,7 +237,7 @@ int dccp_child_process(struct sock *parent, struct sock *child, /* Wakeup parent, send SIGIO */ if (state == DCCP_RESPOND && child->sk_state != state) - parent->sk_data_ready(parent, 0); + parent->sk_data_ready(parent); } else { /* Alas, it is possible again, because we do lookup * in main socket hash table and lock on listening diff --git a/net/dccp/output.c b/net/dccp/output.c index 8876078859d..0248e8a3460 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -138,7 +138,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) DCCP_INC_STATS(DCCP_MIB_OUTSEGS); - err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl); + err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); return net_xmit_eval(err); } return -ENOBUFS; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 2954dcbca83..4c04848953b 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2104,8 +2104,6 @@ static struct notifier_block dn_dev_notifier = { .notifier_call = dn_device_event, }; -extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); - static struct packet_type dn_dix_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_DNA_RT), .func = dn_route_rcv, @@ -2353,9 +2351,6 @@ static const struct proto_ops dn_proto_ops = { .sendpage = sock_no_sendpage, }; -void dn_register_sysctl(void); -void dn_unregister_sysctl(void); - MODULE_DESCRIPTION("The Linux DECnet Network Protocol"); MODULE_AUTHOR("Linux DECnet Project Team"); MODULE_LICENSE("GPL"); diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index c344163e6ac..fe5f01485d3 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -585,7 +585,6 @@ out: static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig, struct sk_buff_head *queue) { int err; - int skb_len; /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces number of warnings when compiling with -W --ANK @@ -600,12 +599,11 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig if (err) goto out; - skb_len = skb->len; skb_set_owner_r(skb, sk); skb_queue_tail(queue, skb); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, skb_len); + sk->sk_data_ready(sk); out: return err; } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index ce0cbbfe0f4..daccc4a36d8 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -752,7 +752,7 @@ static int dn_to_neigh_output(struct sk_buff *skb) return n->output(n, skb); } -static int dn_output(struct sk_buff *skb) +static int dn_output(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; @@ -838,6 +838,18 @@ drop: * Used to catch bugs. This should never normally get * called. */ +static int dn_rt_bug_sk(struct sock *sk, struct sk_buff *skb) +{ + struct dn_skb_cb *cb = DN_SKB_CB(skb); + + net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n", + le16_to_cpu(cb->src), le16_to_cpu(cb->dst)); + + kfree_skb(skb); + + return NET_RX_DROP; +} + static int dn_rt_bug(struct sk_buff *skb) { struct dn_skb_cb *cb = DN_SKB_CB(skb); @@ -1463,7 +1475,7 @@ make_route: rt->n = neigh; rt->dst.lastuse = jiffies; - rt->dst.output = dn_rt_bug; + rt->dst.output = dn_rt_bug_sk; switch (res.type) { case RTN_UNICAST: rt->dst.input = dn_forward; diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index cac505f166d..e5302b7f7ca 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -209,7 +209,7 @@ static int slave_xmit(struct sk_buff *skb, struct hsr_priv *hsr_priv, /* Address substitution (IEC62439-3 pp 26, 50): replace mac * address of outgoing frame with that of the outgoing slave's. */ - memcpy(hsr_ethhdr->ethhdr.h_source, skb->dev->dev_addr, ETH_ALEN); + ether_addr_copy(hsr_ethhdr->ethhdr.h_source, skb->dev->dev_addr); return dev_queue_xmit(skb); } @@ -346,7 +346,7 @@ static void send_hsr_supervision_frame(struct net_device *hsr_dev, u8 type) /* Payload: MacAddressA */ hsr_sp = (typeof(hsr_sp)) skb_put(skb, sizeof(*hsr_sp)); - memcpy(hsr_sp->MacAddressA, hsr_dev->dev_addr, ETH_ALEN); + ether_addr_copy(hsr_sp->MacAddressA, hsr_dev->dev_addr); dev_queue_xmit(skb); return; @@ -493,7 +493,7 @@ static int check_slave_ok(struct net_device *dev) /* Default multicast address for HSR Supervision frames */ -static const unsigned char def_multicast_addr[ETH_ALEN] = { +static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = { 0x01, 0x15, 0x4e, 0x00, 0x01, 0x00 }; @@ -519,7 +519,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], hsr_priv->announce_timer.function = hsr_announce; hsr_priv->announce_timer.data = (unsigned long) hsr_priv; - memcpy(hsr_priv->sup_multicast_addr, def_multicast_addr, ETH_ALEN); + ether_addr_copy(hsr_priv->sup_multicast_addr, def_multicast_addr); hsr_priv->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; /* FIXME: should I modify the value of these? @@ -547,7 +547,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], hsr_dev->features |= NETIF_F_VLAN_CHALLENGED; /* Set hsr_dev's MAC address to that of mac_slave1 */ - memcpy(hsr_dev->dev_addr, hsr_priv->slave[0]->dev_addr, ETH_ALEN); + ether_addr_copy(hsr_dev->dev_addr, hsr_priv->slave[0]->dev_addr); /* Set required header length */ for (i = 0; i < HSR_MAX_SLAVE; i++) { diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 327060c6c87..83e58449366 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -108,8 +108,8 @@ int hsr_create_self_node(struct list_head *self_node_db, if (!node) return -ENOMEM; - memcpy(node->MacAddressA, addr_a, ETH_ALEN); - memcpy(node->MacAddressB, addr_b, ETH_ALEN); + ether_addr_copy(node->MacAddressA, addr_a); + ether_addr_copy(node->MacAddressB, addr_b); rcu_read_lock(); oldnode = list_first_or_null_rcu(self_node_db, @@ -199,7 +199,7 @@ struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv, /* Node is known, but frame was received from an unknown * address. Node is PICS_SUBS capable; merge its AddrB. */ - memcpy(node->MacAddressB, hsr_ethsup->ethhdr.h_source, ETH_ALEN); + ether_addr_copy(node->MacAddressB, hsr_ethsup->ethhdr.h_source); node->AddrB_if = dev_idx; return node; } @@ -208,8 +208,8 @@ struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv, if (!node) return NULL; - memcpy(node->MacAddressA, hsr_sp->MacAddressA, ETH_ALEN); - memcpy(node->MacAddressB, hsr_ethsup->ethhdr.h_source, ETH_ALEN); + ether_addr_copy(node->MacAddressA, hsr_sp->MacAddressA); + ether_addr_copy(node->MacAddressB, hsr_ethsup->ethhdr.h_source); if (!ether_addr_equal(hsr_sp->MacAddressA, hsr_ethsup->ethhdr.h_source)) node->AddrB_if = dev_idx; else @@ -250,7 +250,7 @@ void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb) rcu_read_lock(); node = find_node_by_AddrB(&hsr_priv->node_db, ethhdr->h_source); if (node) - memcpy(ethhdr->h_source, node->MacAddressA, ETH_ALEN); + ether_addr_copy(ethhdr->h_source, node->MacAddressA); rcu_read_unlock(); } @@ -272,7 +272,7 @@ void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr, rcu_read_lock(); node = find_node_by_AddrA(&hsr_priv->node_db, ethhdr->h_dest); if (node && (node->AddrB_if == dev_idx)) - memcpy(ethhdr->h_dest, node->MacAddressB, ETH_ALEN); + ether_addr_copy(ethhdr->h_dest, node->MacAddressB); rcu_read_unlock(); } @@ -297,7 +297,7 @@ static bool seq_nr_after(u16 a, u16 b) void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx) { - if ((dev_idx < 0) || (dev_idx >= HSR_MAX_DEV)) { + if ((dev_idx < 0) || (dev_idx >= HSR_MAX_SLAVE)) { WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx); return; } @@ -428,13 +428,13 @@ void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos, node = list_first_or_null_rcu(&hsr_priv->node_db, struct node_entry, mac_list); if (node) - memcpy(addr, node->MacAddressA, ETH_ALEN); + ether_addr_copy(addr, node->MacAddressA); return node; } node = _pos; list_for_each_entry_continue_rcu(node, &hsr_priv->node_db, mac_list) { - memcpy(addr, node->MacAddressA, ETH_ALEN); + ether_addr_copy(addr, node->MacAddressA); return node; } @@ -462,7 +462,7 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv, return -ENOENT; /* No such entry */ } - memcpy(addr_b, node->MacAddressB, ETH_ALEN); + ether_addr_copy(addr_b, node->MacAddressB); tdiff = jiffies - node->time_in[HSR_DEV_SLAVE_A]; if (node->time_in_stale[HSR_DEV_SLAVE_A]) diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index af68dd83a4e..3fee5218a69 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -138,8 +138,8 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, break; if (dev == hsr_priv->slave[0]) - memcpy(hsr_priv->dev->dev_addr, - hsr_priv->slave[0]->dev_addr, ETH_ALEN); + ether_addr_copy(hsr_priv->dev->dev_addr, + hsr_priv->slave[0]->dev_addr); /* Make sure we recognize frames from ourselves in hsr_rcv() */ res = hsr_create_self_node(&hsr_priv->self_node_db, @@ -459,7 +459,7 @@ static int __init hsr_init(void) static void __exit hsr_exit(void) { unregister_netdevice_notifier(&hsr_nb); - del_timer(&prune_timer); + del_timer_sync(&prune_timer); hsr_netlink_exit(); dev_remove_pack(&hsr_pt); } diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h deleted file mode 100644 index 2b835db3bda..00000000000 --- a/net/ieee802154/6lowpan.h +++ /dev/null @@ -1,319 +0,0 @@ -/* - * Copyright 2011, Siemens AG - * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> - */ - -/* - * Based on patches from Jon Smirl <jonsmirl@gmail.com> - * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -/* Jon's code is based on 6lowpan implementation for Contiki which is: - * Copyright (c) 2008, Swedish Institute of Computer Science. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the Institute nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef __6LOWPAN_H__ -#define __6LOWPAN_H__ - -#define UIP_802154_SHORTADDR_LEN 2 /* compressed ipv6 address length */ -#define UIP_IPH_LEN 40 /* ipv6 fixed header size */ -#define UIP_PROTO_UDP 17 /* ipv6 next header value for UDP */ -#define UIP_FRAGH_LEN 8 /* ipv6 fragment header size */ - -/* - * ipv6 address based on mac - * second bit-flip (Universe/Local) is done according RFC2464 - */ -#define is_addr_mac_addr_based(a, m) \ - ((((a)->s6_addr[8]) == (((m)[0]) ^ 0x02)) && \ - (((a)->s6_addr[9]) == (m)[1]) && \ - (((a)->s6_addr[10]) == (m)[2]) && \ - (((a)->s6_addr[11]) == (m)[3]) && \ - (((a)->s6_addr[12]) == (m)[4]) && \ - (((a)->s6_addr[13]) == (m)[5]) && \ - (((a)->s6_addr[14]) == (m)[6]) && \ - (((a)->s6_addr[15]) == (m)[7])) - -/* ipv6 address is unspecified */ -#define is_addr_unspecified(a) \ - ((((a)->s6_addr32[0]) == 0) && \ - (((a)->s6_addr32[1]) == 0) && \ - (((a)->s6_addr32[2]) == 0) && \ - (((a)->s6_addr32[3]) == 0)) - -/* compare ipv6 addresses prefixes */ -#define ipaddr_prefixcmp(addr1, addr2, length) \ - (memcmp(addr1, addr2, length >> 3) == 0) - -/* local link, i.e. FE80::/10 */ -#define is_addr_link_local(a) (((a)->s6_addr16[0]) == htons(0xFE80)) - -/* - * check whether we can compress the IID to 16 bits, - * it's possible for unicast adresses with first 49 bits are zero only. - */ -#define lowpan_is_iid_16_bit_compressable(a) \ - ((((a)->s6_addr16[4]) == 0) && \ - (((a)->s6_addr[10]) == 0) && \ - (((a)->s6_addr[11]) == 0xff) && \ - (((a)->s6_addr[12]) == 0xfe) && \ - (((a)->s6_addr[13]) == 0)) - -/* multicast address */ -#define is_addr_mcast(a) (((a)->s6_addr[0]) == 0xFF) - -/* check whether the 112-bit gid of the multicast address is mappable to: */ - -/* 9 bits, for FF02::1 (all nodes) and FF02::2 (all routers) addresses only. */ -#define lowpan_is_mcast_addr_compressable(a) \ - ((((a)->s6_addr16[1]) == 0) && \ - (((a)->s6_addr16[2]) == 0) && \ - (((a)->s6_addr16[3]) == 0) && \ - (((a)->s6_addr16[4]) == 0) && \ - (((a)->s6_addr16[5]) == 0) && \ - (((a)->s6_addr16[6]) == 0) && \ - (((a)->s6_addr[14]) == 0) && \ - ((((a)->s6_addr[15]) == 1) || (((a)->s6_addr[15]) == 2))) - -/* 48 bits, FFXX::00XX:XXXX:XXXX */ -#define lowpan_is_mcast_addr_compressable48(a) \ - ((((a)->s6_addr16[1]) == 0) && \ - (((a)->s6_addr16[2]) == 0) && \ - (((a)->s6_addr16[3]) == 0) && \ - (((a)->s6_addr16[4]) == 0) && \ - (((a)->s6_addr[10]) == 0)) - -/* 32 bits, FFXX::00XX:XXXX */ -#define lowpan_is_mcast_addr_compressable32(a) \ - ((((a)->s6_addr16[1]) == 0) && \ - (((a)->s6_addr16[2]) == 0) && \ - (((a)->s6_addr16[3]) == 0) && \ - (((a)->s6_addr16[4]) == 0) && \ - (((a)->s6_addr16[5]) == 0) && \ - (((a)->s6_addr[12]) == 0)) - -/* 8 bits, FF02::00XX */ -#define lowpan_is_mcast_addr_compressable8(a) \ - ((((a)->s6_addr[1]) == 2) && \ - (((a)->s6_addr16[1]) == 0) && \ - (((a)->s6_addr16[2]) == 0) && \ - (((a)->s6_addr16[3]) == 0) && \ - (((a)->s6_addr16[4]) == 0) && \ - (((a)->s6_addr16[5]) == 0) && \ - (((a)->s6_addr16[6]) == 0) && \ - (((a)->s6_addr[14]) == 0)) - -#define lowpan_is_addr_broadcast(a) \ - ((((a)[0]) == 0xFF) && \ - (((a)[1]) == 0xFF) && \ - (((a)[2]) == 0xFF) && \ - (((a)[3]) == 0xFF) && \ - (((a)[4]) == 0xFF) && \ - (((a)[5]) == 0xFF) && \ - (((a)[6]) == 0xFF) && \ - (((a)[7]) == 0xFF)) - -#define LOWPAN_DISPATCH_IPV6 0x41 /* 01000001 = 65 */ -#define LOWPAN_DISPATCH_HC1 0x42 /* 01000010 = 66 */ -#define LOWPAN_DISPATCH_IPHC 0x60 /* 011xxxxx = ... */ -#define LOWPAN_DISPATCH_FRAG1 0xc0 /* 11000xxx */ -#define LOWPAN_DISPATCH_FRAGN 0xe0 /* 11100xxx */ - -#define LOWPAN_DISPATCH_MASK 0xf8 /* 11111000 */ - -#define LOWPAN_FRAG_TIMEOUT (HZ * 60) /* time-out 60 sec */ - -#define LOWPAN_FRAG1_HEAD_SIZE 0x4 -#define LOWPAN_FRAGN_HEAD_SIZE 0x5 - -/* - * According IEEE802.15.4 standard: - * - MTU is 127 octets - * - maximum MHR size is 37 octets - * - MFR size is 2 octets - * - * so minimal payload size that we may guarantee is: - * MTU - MHR - MFR = 88 octets - */ -#define LOWPAN_FRAG_SIZE 88 - -/* - * Values of fields within the IPHC encoding first byte - * (C stands for compressed and I for inline) - */ -#define LOWPAN_IPHC_TF 0x18 - -#define LOWPAN_IPHC_FL_C 0x10 -#define LOWPAN_IPHC_TC_C 0x08 -#define LOWPAN_IPHC_NH_C 0x04 -#define LOWPAN_IPHC_TTL_1 0x01 -#define LOWPAN_IPHC_TTL_64 0x02 -#define LOWPAN_IPHC_TTL_255 0x03 -#define LOWPAN_IPHC_TTL_I 0x00 - - -/* Values of fields within the IPHC encoding second byte */ -#define LOWPAN_IPHC_CID 0x80 - -#define LOWPAN_IPHC_ADDR_00 0x00 -#define LOWPAN_IPHC_ADDR_01 0x01 -#define LOWPAN_IPHC_ADDR_02 0x02 -#define LOWPAN_IPHC_ADDR_03 0x03 - -#define LOWPAN_IPHC_SAC 0x40 -#define LOWPAN_IPHC_SAM 0x30 - -#define LOWPAN_IPHC_SAM_BIT 4 - -#define LOWPAN_IPHC_M 0x08 -#define LOWPAN_IPHC_DAC 0x04 -#define LOWPAN_IPHC_DAM_00 0x00 -#define LOWPAN_IPHC_DAM_01 0x01 -#define LOWPAN_IPHC_DAM_10 0x02 -#define LOWPAN_IPHC_DAM_11 0x03 - -#define LOWPAN_IPHC_DAM_BIT 0 -/* - * LOWPAN_UDP encoding (works together with IPHC) - */ -#define LOWPAN_NHC_UDP_MASK 0xF8 -#define LOWPAN_NHC_UDP_ID 0xF0 -#define LOWPAN_NHC_UDP_CHECKSUMC 0x04 -#define LOWPAN_NHC_UDP_CHECKSUMI 0x00 - -#define LOWPAN_NHC_UDP_4BIT_PORT 0xF0B0 -#define LOWPAN_NHC_UDP_4BIT_MASK 0xFFF0 -#define LOWPAN_NHC_UDP_8BIT_PORT 0xF000 -#define LOWPAN_NHC_UDP_8BIT_MASK 0xFF00 - -/* values for port compression, _with checksum_ ie bit 5 set to 0 */ -#define LOWPAN_NHC_UDP_CS_P_00 0xF0 /* all inline */ -#define LOWPAN_NHC_UDP_CS_P_01 0xF1 /* source 16bit inline, - dest = 0xF0 + 8 bit inline */ -#define LOWPAN_NHC_UDP_CS_P_10 0xF2 /* source = 0xF0 + 8bit inline, - dest = 16 bit inline */ -#define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */ -#define LOWPAN_NHC_UDP_CS_C 0x04 /* checksum elided */ - -#ifdef DEBUG -/* print data in line */ -static inline void raw_dump_inline(const char *caller, char *msg, - unsigned char *buf, int len) -{ - if (msg) - pr_debug("%s():%s: ", caller, msg); - - print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, buf, len, false); -} - -/* print data in a table format: - * - * addr: xx xx xx xx xx xx - * addr: xx xx xx xx xx xx - * ... - */ -static inline void raw_dump_table(const char *caller, char *msg, - unsigned char *buf, int len) -{ - if (msg) - pr_debug("%s():%s:\n", caller, msg); - - print_hex_dump_debug("\t", DUMP_PREFIX_OFFSET, 16, 1, buf, len, false); -} -#else -static inline void raw_dump_table(const char *caller, char *msg, - unsigned char *buf, int len) { } -static inline void raw_dump_inline(const char *caller, char *msg, - unsigned char *buf, int len) { } -#endif - -static inline int lowpan_fetch_skb_u8(struct sk_buff *skb, u8 *val) -{ - if (unlikely(!pskb_may_pull(skb, 1))) - return -EINVAL; - - *val = skb->data[0]; - skb_pull(skb, 1); - - return 0; -} - -static inline int lowpan_fetch_skb_u16(struct sk_buff *skb, u16 *val) -{ - if (unlikely(!pskb_may_pull(skb, 2))) - return -EINVAL; - - *val = (skb->data[0] << 8) | skb->data[1]; - skb_pull(skb, 2); - - return 0; -} - -static inline bool lowpan_fetch_skb(struct sk_buff *skb, - void *data, const unsigned int len) -{ - if (unlikely(!pskb_may_pull(skb, len))) - return true; - - skb_copy_from_linear_data(skb, data, len); - skb_pull(skb, len); - - return false; -} - -static inline void lowpan_push_hc_data(u8 **hc_ptr, const void *data, - const size_t len) -{ - memcpy(*hc_ptr, data, len); - *hc_ptr += len; -} - -typedef int (*skb_delivery_cb)(struct sk_buff *skb, struct net_device *dev); - -int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, - const u8 *saddr, const u8 saddr_type, const u8 saddr_len, - const u8 *daddr, const u8 daddr_type, const u8 daddr_len, - u8 iphc0, u8 iphc1, skb_delivery_cb skb_deliver); -int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *_daddr, - const void *_saddr, unsigned int len); - -#endif /* __6LOWPAN_H__ */ diff --git a/net/ieee802154/6lowpan_iphc.c b/net/ieee802154/6lowpan_iphc.c index 860aa2d445b..211b5686d71 100644 --- a/net/ieee802154/6lowpan_iphc.c +++ b/net/ieee802154/6lowpan_iphc.c @@ -54,11 +54,10 @@ #include <linux/if_arp.h> #include <linux/module.h> #include <linux/netdevice.h> +#include <net/6lowpan.h> #include <net/ipv6.h> #include <net/af_ieee802154.h> -#include "6lowpan.h" - /* * Uncompress address function for source and * destination address(non-multicast). diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan_rtnl.c index 48b25c0af4d..0f5a69ed746 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan_rtnl.c @@ -1,10 +1,8 @@ -/* - * Copyright 2011, Siemens AG +/* Copyright 2011, Siemens AG * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> */ -/* - * Based on patches from Jon Smirl <jonsmirl@gmail.com> +/* Based on patches from Jon Smirl <jonsmirl@gmail.com> * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> * * This program is free software; you can redistribute it and/or modify @@ -15,10 +13,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ /* Jon's code is based on 6lowpan implementation for Contiki which is: @@ -58,9 +52,10 @@ #include <net/af_ieee802154.h> #include <net/ieee802154.h> #include <net/ieee802154_netdev.h> +#include <net/6lowpan.h> #include <net/ipv6.h> -#include "6lowpan.h" +#include "reassembly.h" static LIST_HEAD(lowpan_devices); @@ -68,7 +63,7 @@ static LIST_HEAD(lowpan_devices); struct lowpan_dev_info { struct net_device *real_dev; /* real WPAN device ptr */ struct mutex dev_list_mtx; /* mutex for list ops */ - unsigned short fragment_tag; + __be16 fragment_tag; }; struct lowpan_dev_record { @@ -76,18 +71,6 @@ struct lowpan_dev_record { struct list_head list; }; -struct lowpan_fragment { - struct sk_buff *skb; /* skb to be assembled */ - u16 length; /* length to be assemled */ - u32 bytes_rcv; /* bytes received */ - u16 tag; /* current fragment tag */ - struct timer_list timer; /* assembling timer */ - struct list_head list; /* fragments list */ -}; - -static LIST_HEAD(lowpan_fragments); -static DEFINE_SPINLOCK(flist_lock); - static inline struct lowpan_dev_info *lowpan_dev_info(const struct net_device *dev) { @@ -106,7 +89,6 @@ static int lowpan_header_create(struct sk_buff *skb, unsigned short type, const void *_daddr, const void *_saddr, unsigned int len) { - struct ipv6hdr *hdr; const u8 *saddr = _saddr; const u8 *daddr = _daddr; struct ieee802154_addr sa, da; @@ -117,8 +99,6 @@ static int lowpan_header_create(struct sk_buff *skb, if (type != ETH_P_IPV6) return 0; - hdr = ipv6_hdr(skb); - if (!saddr) saddr = dev->dev_addr; @@ -127,13 +107,11 @@ static int lowpan_header_create(struct sk_buff *skb, lowpan_header_compress(skb, dev, type, daddr, saddr, len); - /* - * NOTE1: I'm still unsure about the fact that compression and WPAN + /* NOTE1: I'm still unsure about the fact that compression and WPAN * header are created here and not later in the xmit. So wait for * an opinion of net maintainers. */ - /* - * NOTE2: to be absolutely correct, we must derive PANid information + /* NOTE2: to be absolutely correct, we must derive PANid information * from MAC subif of the 'dev' and 'real_dev' network devices, but * this isn't implemented in mainline yet, so currently we assign 0xff */ @@ -141,30 +119,29 @@ static int lowpan_header_create(struct sk_buff *skb, mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev); /* prepare wpan address data */ - sa.addr_type = IEEE802154_ADDR_LONG; + sa.mode = IEEE802154_ADDR_LONG; sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + sa.extended_addr = ieee802154_devaddr_from_raw(saddr); - memcpy(&(sa.hwaddr), saddr, 8); /* intra-PAN communications */ - da.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + da.pan_id = sa.pan_id; - /* - * if the destination address is the broadcast address, use the + /* if the destination address is the broadcast address, use the * corresponding short address */ if (lowpan_is_addr_broadcast(daddr)) { - da.addr_type = IEEE802154_ADDR_SHORT; - da.short_addr = IEEE802154_ADDR_BROADCAST; + da.mode = IEEE802154_ADDR_SHORT; + da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); } else { - da.addr_type = IEEE802154_ADDR_LONG; - memcpy(&(da.hwaddr), daddr, IEEE802154_ADDR_LEN); + da.mode = IEEE802154_ADDR_LONG; + da.extended_addr = ieee802154_devaddr_from_raw(daddr); /* request acknowledgment */ mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; } return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, - type, (void *)&da, (void *)&sa, skb->len); + type, (void *)&da, (void *)&sa, 0); } static int lowpan_give_skb_to_devices(struct sk_buff *skb, @@ -191,73 +168,11 @@ static int lowpan_give_skb_to_devices(struct sk_buff *skb, return stat; } -static void lowpan_fragment_timer_expired(unsigned long entry_addr) -{ - struct lowpan_fragment *entry = (struct lowpan_fragment *)entry_addr; - - pr_debug("timer expired for frame with tag %d\n", entry->tag); - - list_del(&entry->list); - dev_kfree_skb(entry->skb); - kfree(entry); -} - -static struct lowpan_fragment * -lowpan_alloc_new_frame(struct sk_buff *skb, u16 len, u16 tag) -{ - struct lowpan_fragment *frame; - - frame = kzalloc(sizeof(struct lowpan_fragment), - GFP_ATOMIC); - if (!frame) - goto frame_err; - - INIT_LIST_HEAD(&frame->list); - - frame->length = len; - frame->tag = tag; - - /* allocate buffer for frame assembling */ - frame->skb = netdev_alloc_skb_ip_align(skb->dev, frame->length + - sizeof(struct ipv6hdr)); - - if (!frame->skb) - goto skb_err; - - frame->skb->priority = skb->priority; - - /* reserve headroom for uncompressed ipv6 header */ - skb_reserve(frame->skb, sizeof(struct ipv6hdr)); - skb_put(frame->skb, frame->length); - - /* copy the first control block to keep a - * trace of the link-layer addresses in case - * of a link-local compressed address - */ - memcpy(frame->skb->cb, skb->cb, sizeof(skb->cb)); - - init_timer(&frame->timer); - /* time out is the same as for ipv6 - 60 sec */ - frame->timer.expires = jiffies + LOWPAN_FRAG_TIMEOUT; - frame->timer.data = (unsigned long)frame; - frame->timer.function = lowpan_fragment_timer_expired; - - add_timer(&frame->timer); - - list_add_tail(&frame->list, &lowpan_fragments); - - return frame; - -skb_err: - kfree(frame); -frame_err: - return NULL; -} - -static int process_data(struct sk_buff *skb) +static int process_data(struct sk_buff *skb, const struct ieee802154_hdr *hdr) { u8 iphc0, iphc1; - const struct ieee802154_addr *_saddr, *_daddr; + struct ieee802154_addr_sa sa, da; + void *sap, *dap; raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); /* at least two bytes will be used for the encoding */ @@ -267,108 +182,27 @@ static int process_data(struct sk_buff *skb) if (lowpan_fetch_skb_u8(skb, &iphc0)) goto drop; - /* fragments assembling */ - switch (iphc0 & LOWPAN_DISPATCH_MASK) { - case LOWPAN_DISPATCH_FRAG1: - case LOWPAN_DISPATCH_FRAGN: - { - struct lowpan_fragment *frame; - /* slen stores the rightmost 8 bits of the 11 bits length */ - u8 slen, offset = 0; - u16 len, tag; - bool found = false; - - if (lowpan_fetch_skb_u8(skb, &slen) || /* frame length */ - lowpan_fetch_skb_u16(skb, &tag)) /* fragment tag */ - goto drop; - - /* adds the 3 MSB to the 8 LSB to retrieve the 11 bits length */ - len = ((iphc0 & 7) << 8) | slen; - - if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1) { - pr_debug("%s received a FRAG1 packet (tag: %d, " - "size of the entire IP packet: %d)", - __func__, tag, len); - } else { /* FRAGN */ - if (lowpan_fetch_skb_u8(skb, &offset)) - goto unlock_and_drop; - pr_debug("%s received a FRAGN packet (tag: %d, " - "size of the entire IP packet: %d, " - "offset: %d)", __func__, tag, len, offset * 8); - } - - /* - * check if frame assembling with the same tag is - * already in progress - */ - spin_lock_bh(&flist_lock); - - list_for_each_entry(frame, &lowpan_fragments, list) - if (frame->tag == tag) { - found = true; - break; - } - - /* alloc new frame structure */ - if (!found) { - pr_debug("%s first fragment received for tag %d, " - "begin packet reassembly", __func__, tag); - frame = lowpan_alloc_new_frame(skb, len, tag); - if (!frame) - goto unlock_and_drop; - } - - /* if payload fits buffer, copy it */ - if (likely((offset * 8 + skb->len) <= frame->length)) - skb_copy_to_linear_data_offset(frame->skb, offset * 8, - skb->data, skb->len); - else - goto unlock_and_drop; - - frame->bytes_rcv += skb->len; - - /* frame assembling complete */ - if ((frame->bytes_rcv == frame->length) && - frame->timer.expires > jiffies) { - /* if timer haven't expired - first of all delete it */ - del_timer_sync(&frame->timer); - list_del(&frame->list); - spin_unlock_bh(&flist_lock); - - pr_debug("%s successfully reassembled fragment " - "(tag %d)", __func__, tag); - - dev_kfree_skb(skb); - skb = frame->skb; - kfree(frame); - - if (lowpan_fetch_skb_u8(skb, &iphc0)) - goto drop; - - break; - } - spin_unlock_bh(&flist_lock); - - return kfree_skb(skb), 0; - } - default: - break; - } - if (lowpan_fetch_skb_u8(skb, &iphc1)) goto drop; - _saddr = &mac_cb(skb)->sa; - _daddr = &mac_cb(skb)->da; + ieee802154_addr_to_sa(&sa, &hdr->source); + ieee802154_addr_to_sa(&da, &hdr->dest); + + if (sa.addr_type == IEEE802154_ADDR_SHORT) + sap = &sa.short_addr; + else + sap = &sa.hwaddr; - return lowpan_process_data(skb, skb->dev, (u8 *)_saddr->hwaddr, - _saddr->addr_type, IEEE802154_ADDR_LEN, - (u8 *)_daddr->hwaddr, _daddr->addr_type, - IEEE802154_ADDR_LEN, iphc0, iphc1, - lowpan_give_skb_to_devices); + if (da.addr_type == IEEE802154_ADDR_SHORT) + dap = &da.short_addr; + else + dap = &da.hwaddr; + + return lowpan_process_data(skb, skb->dev, sap, sa.addr_type, + IEEE802154_ADDR_LEN, dap, da.addr_type, + IEEE802154_ADDR_LEN, iphc0, iphc1, + lowpan_give_skb_to_devices); -unlock_and_drop: - spin_unlock_bh(&flist_lock); drop: kfree_skb(skb); return -EINVAL; @@ -389,7 +223,7 @@ static int lowpan_set_address(struct net_device *dev, void *p) static int lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, - int mlen, int plen, int offset, int type) + int mlen, int plen, int offset, int type) { struct sk_buff *frag; int hlen; @@ -425,51 +259,68 @@ lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, static int lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev) { - int err, header_length, payload_length, tag, offset = 0; + int err; + u16 dgram_offset, dgram_size, payload_length, header_length, + lowpan_size, frag_plen, offset; + __be16 tag; u8 head[5]; header_length = skb->mac_len; payload_length = skb->len - header_length; tag = lowpan_dev_info(dev)->fragment_tag++; + lowpan_size = skb_network_header_len(skb); + dgram_size = lowpan_uncompress_size(skb, &dgram_offset) - + header_length; /* first fragment header */ - head[0] = LOWPAN_DISPATCH_FRAG1 | ((payload_length >> 8) & 0x7); - head[1] = payload_length & 0xff; - head[2] = tag >> 8; - head[3] = tag & 0xff; + head[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x7); + head[1] = dgram_size & 0xff; + memcpy(head + 2, &tag, sizeof(tag)); - err = lowpan_fragment_xmit(skb, head, header_length, LOWPAN_FRAG_SIZE, - 0, LOWPAN_DISPATCH_FRAG1); + /* calc the nearest payload length(divided to 8) for first fragment + * which fits into a IEEE802154_MTU + */ + frag_plen = round_down(IEEE802154_MTU - header_length - + LOWPAN_FRAG1_HEAD_SIZE - lowpan_size - + IEEE802154_MFR_SIZE, 8); + err = lowpan_fragment_xmit(skb, head, header_length, + frag_plen + lowpan_size, 0, + LOWPAN_DISPATCH_FRAG1); if (err) { pr_debug("%s unable to send FRAG1 packet (tag: %d)", __func__, tag); goto exit; } - offset = LOWPAN_FRAG_SIZE; + offset = lowpan_size + frag_plen; + dgram_offset += frag_plen; /* next fragment header */ head[0] &= ~LOWPAN_DISPATCH_FRAG1; head[0] |= LOWPAN_DISPATCH_FRAGN; + frag_plen = round_down(IEEE802154_MTU - header_length - + LOWPAN_FRAGN_HEAD_SIZE - IEEE802154_MFR_SIZE, 8); + while (payload_length - offset > 0) { - int len = LOWPAN_FRAG_SIZE; + int len = frag_plen; - head[4] = offset / 8; + head[4] = dgram_offset >> 3; if (payload_length - offset < len) len = payload_length - offset; - err = lowpan_fragment_xmit(skb, head, header_length, - len, offset, LOWPAN_DISPATCH_FRAGN); + err = lowpan_fragment_xmit(skb, head, header_length, len, + offset, LOWPAN_DISPATCH_FRAGN); if (err) { - pr_debug("%s unable to send a subsequent FRAGN packet " - "(tag: %d, offset: %d", __func__, tag, offset); + pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n", + __func__, tag, offset); goto exit; } offset += len; + dgram_offset += len; } exit: @@ -511,13 +362,13 @@ static struct wpan_phy *lowpan_get_phy(const struct net_device *dev) return ieee802154_mlme_ops(real_dev)->get_phy(real_dev); } -static u16 lowpan_get_pan_id(const struct net_device *dev) +static __le16 lowpan_get_pan_id(const struct net_device *dev) { struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev); } -static u16 lowpan_get_short_addr(const struct net_device *dev) +static __le16 lowpan_get_short_addr(const struct net_device *dev) { struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev); @@ -533,7 +384,27 @@ static struct header_ops lowpan_header_ops = { .create = lowpan_header_create, }; +static struct lock_class_key lowpan_tx_busylock; +static struct lock_class_key lowpan_netdev_xmit_lock_key; + +static void lowpan_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, + &lowpan_netdev_xmit_lock_key); +} + + +static int lowpan_dev_init(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL); + dev->qdisc_tx_busylock = &lowpan_tx_busylock; + return 0; +} + static const struct net_device_ops lowpan_netdev_ops = { + .ndo_init = lowpan_dev_init, .ndo_start_xmit = lowpan_xmit, .ndo_set_mac_address = lowpan_set_address, }; @@ -576,45 +447,55 @@ static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct sk_buff *local_skb; + struct ieee802154_hdr hdr; + int ret; - if (!netif_running(dev)) + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) goto drop; + if (!netif_running(dev)) + goto drop_skb; + if (dev->type != ARPHRD_IEEE802154) - goto drop; + goto drop_skb; + + if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) + goto drop_skb; /* check that it's our buffer */ if (skb->data[0] == LOWPAN_DISPATCH_IPV6) { - /* Copy the packet so that the IPv6 header is - * properly aligned. - */ - local_skb = skb_copy_expand(skb, NET_SKB_PAD - 1, - skb_tailroom(skb), GFP_ATOMIC); - if (!local_skb) - goto drop; - - local_skb->protocol = htons(ETH_P_IPV6); - local_skb->pkt_type = PACKET_HOST; + skb->protocol = htons(ETH_P_IPV6); + skb->pkt_type = PACKET_HOST; /* Pull off the 1-byte of 6lowpan header. */ - skb_pull(local_skb, 1); - - lowpan_give_skb_to_devices(local_skb, NULL); + skb_pull(skb, 1); - kfree_skb(local_skb); - kfree_skb(skb); + ret = lowpan_give_skb_to_devices(skb, NULL); + if (ret == NET_RX_DROP) + goto drop; } else { switch (skb->data[0] & 0xe0) { case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ + ret = process_data(skb, &hdr); + if (ret == NET_RX_DROP) + goto drop; + break; case LOWPAN_DISPATCH_FRAG1: /* first fragment header */ + ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1); + if (ret == 1) { + ret = process_data(skb, &hdr); + if (ret == NET_RX_DROP) + goto drop; + } + break; case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */ - local_skb = skb_clone(skb, GFP_ATOMIC); - if (!local_skb) - goto drop; - process_data(local_skb); - - kfree_skb(skb); + ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAGN); + if (ret == 1) { + ret = process_data(skb, &hdr); + if (ret == NET_RX_DROP) + goto drop; + } break; default: break; @@ -622,9 +503,9 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, } return NET_RX_SUCCESS; - -drop: +drop_skb: kfree_skb(skb); +drop: return NET_RX_DROP; } @@ -648,10 +529,9 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, } lowpan_dev_info(dev)->real_dev = real_dev; - lowpan_dev_info(dev)->fragment_tag = 0; mutex_init(&lowpan_dev_info(dev)->dev_list_mtx); - entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL); + entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) { dev_put(real_dev); lowpan_dev_info(dev)->real_dev = NULL; @@ -744,7 +624,7 @@ static struct notifier_block lowpan_dev_notifier = { }; static struct packet_type lowpan_packet_type = { - .type = __constant_htons(ETH_P_IEEE802154), + .type = htons(ETH_P_IEEE802154), .func = lowpan_rcv, }; @@ -752,43 +632,40 @@ static int __init lowpan_init_module(void) { int err = 0; - err = lowpan_netlink_init(); + err = lowpan_net_frag_init(); if (err < 0) goto out; + err = lowpan_netlink_init(); + if (err < 0) + goto out_frag; + dev_add_pack(&lowpan_packet_type); err = register_netdevice_notifier(&lowpan_dev_notifier); - if (err < 0) { - dev_remove_pack(&lowpan_packet_type); - lowpan_netlink_fini(); - } + if (err < 0) + goto out_pack; + + return 0; + +out_pack: + dev_remove_pack(&lowpan_packet_type); + lowpan_netlink_fini(); +out_frag: + lowpan_net_frag_exit(); out: return err; } static void __exit lowpan_cleanup_module(void) { - struct lowpan_fragment *frame, *tframe; - lowpan_netlink_fini(); dev_remove_pack(&lowpan_packet_type); - unregister_netdevice_notifier(&lowpan_dev_notifier); + lowpan_net_frag_exit(); - /* Now 6lowpan packet_type is removed, so no new fragments are - * expected on RX, therefore that's the time to clean incomplete - * fragments. - */ - spin_lock_bh(&flist_lock); - list_for_each_entry_safe(frame, tframe, &lowpan_fragments, list) { - del_timer_sync(&frame->timer); - list_del(&frame->list); - dev_kfree_skb(frame->skb); - kfree(frame); - } - spin_unlock_bh(&flist_lock); + unregister_netdevice_notifier(&lowpan_dev_notifier); } module_init(lowpan_init_module); diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig index 9c9879d5ea6..8af1330b313 100644 --- a/net/ieee802154/Kconfig +++ b/net/ieee802154/Kconfig @@ -15,7 +15,7 @@ config IEEE802154_6LOWPAN depends on IEEE802154 && IPV6 select 6LOWPAN_IPHC ---help--- - IPv6 compression over IEEE 802.15.4. + IPv6 compression over IEEE 802.15.4. config 6LOWPAN_IPHC tristate diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index e8f05885ced..bf1b51497a4 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -2,5 +2,9 @@ obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan.o obj-$(CONFIG_6LOWPAN_IPHC) += 6lowpan_iphc.o -ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o +6lowpan-y := 6lowpan_rtnl.o reassembly.o +ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o \ + header_ops.o af_802154-y := af_ieee802154.o raw.o dgram.o + +ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/ieee802154/af802154.h b/net/ieee802154/af802154.h index b1ec5253752..8330a09bfc9 100644 --- a/net/ieee802154/af802154.h +++ b/net/ieee802154/af802154.h @@ -25,12 +25,13 @@ #define AF802154_H struct sk_buff; -struct net_devce; +struct net_device; +struct ieee802154_addr; extern struct proto ieee802154_raw_prot; extern struct proto ieee802154_dgram_prot; void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb); int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb); struct net_device *ieee802154_get_dev(struct net *net, - struct ieee802154_addr *addr); + const struct ieee802154_addr *addr); #endif diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index 40e606f3788..351d9a94ec2 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -43,25 +43,27 @@ /* * Utility function for families */ -struct net_device *ieee802154_get_dev(struct net *net, - struct ieee802154_addr *addr) +struct net_device* +ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr) { struct net_device *dev = NULL; struct net_device *tmp; - u16 pan_id, short_addr; + __le16 pan_id, short_addr; + u8 hwaddr[IEEE802154_ADDR_LEN]; - switch (addr->addr_type) { + switch (addr->mode) { case IEEE802154_ADDR_LONG: + ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr); rcu_read_lock(); - dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, addr->hwaddr); + dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr); if (dev) dev_hold(dev); rcu_read_unlock(); break; case IEEE802154_ADDR_SHORT: - if (addr->pan_id == 0xffff || - addr->short_addr == IEEE802154_ADDR_UNDEF || - addr->short_addr == 0xffff) + if (addr->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST) || + addr->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) || + addr->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) break; rtnl_lock(); @@ -86,7 +88,7 @@ struct net_device *ieee802154_get_dev(struct net *net, break; default: pr_warning("Unsupported ieee802154 address type: %d\n", - addr->addr_type); + addr->mode); break; } @@ -326,7 +328,7 @@ drop: static struct packet_type ieee802154_packet_type = { - .type = __constant_htons(ETH_P_IEEE802154), + .type = htons(ETH_P_IEEE802154), .func = ieee802154_rcv, }; diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 1846c1fe0d0..786437bc0c0 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -73,10 +73,10 @@ static int dgram_init(struct sock *sk) { struct dgram_sock *ro = dgram_sk(sk); - ro->dst_addr.addr_type = IEEE802154_ADDR_LONG; - ro->dst_addr.pan_id = 0xffff; + ro->dst_addr.mode = IEEE802154_ADDR_LONG; + ro->dst_addr.pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST); ro->want_ack = 1; - memset(&ro->dst_addr.hwaddr, 0xff, sizeof(ro->dst_addr.hwaddr)); + memset(&ro->dst_addr.extended_addr, 0xff, IEEE802154_ADDR_LEN); return 0; } @@ -88,6 +88,7 @@ static void dgram_close(struct sock *sk, long timeout) static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len) { struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; + struct ieee802154_addr haddr; struct dgram_sock *ro = dgram_sk(sk); int err = -EINVAL; struct net_device *dev; @@ -102,7 +103,8 @@ static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len) if (addr->family != AF_IEEE802154) goto out; - dev = ieee802154_get_dev(sock_net(sk), &addr->addr); + ieee802154_addr_from_sa(&haddr, &addr->addr); + dev = ieee802154_get_dev(sock_net(sk), &haddr); if (!dev) { err = -ENODEV; goto out; @@ -113,7 +115,7 @@ static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len) goto out_put; } - memcpy(&ro->src_addr, &addr->addr, sizeof(struct ieee802154_addr)); + ro->src_addr = haddr; ro->bound = 1; err = 0; @@ -149,8 +151,7 @@ static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg) * of this packet since that is all * that will be read. */ - /* FIXME: parse the header for more correct value */ - amount = skb->len - (3+8+8); + amount = skb->len - ieee802154_hdr_length(skb); } spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); @@ -181,7 +182,7 @@ static int dgram_connect(struct sock *sk, struct sockaddr *uaddr, goto out; } - memcpy(&ro->dst_addr, &addr->addr, sizeof(struct ieee802154_addr)); + ieee802154_addr_from_sa(&ro->dst_addr, &addr->addr); out: release_sock(sk); @@ -194,8 +195,8 @@ static int dgram_disconnect(struct sock *sk, int flags) lock_sock(sk); - ro->dst_addr.addr_type = IEEE802154_ADDR_LONG; - memset(&ro->dst_addr.hwaddr, 0xff, sizeof(ro->dst_addr.hwaddr)); + ro->dst_addr.mode = IEEE802154_ADDR_LONG; + memset(&ro->dst_addr.extended_addr, 0xff, IEEE802154_ADDR_LEN); release_sock(sk); @@ -232,7 +233,7 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, if (size > mtu) { pr_debug("size = %Zu, mtu = %u\n", size, mtu); - err = -EINVAL; + err = -EMSGSIZE; goto out_dev; } @@ -312,7 +313,7 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, if (saddr) { saddr->family = AF_IEEE802154; - saddr->addr = mac_cb(skb)->sa; + ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source); *addr_len = sizeof(*saddr); } @@ -328,6 +329,10 @@ out: static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb) { + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_RX_DROP; + if (sock_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); return NET_RX_DROP; @@ -336,40 +341,43 @@ static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb) return NET_RX_SUCCESS; } -static inline int ieee802154_match_sock(u8 *hw_addr, u16 pan_id, - u16 short_addr, struct dgram_sock *ro) +static inline bool +ieee802154_match_sock(__le64 hw_addr, __le16 pan_id, __le16 short_addr, + struct dgram_sock *ro) { if (!ro->bound) - return 1; + return true; - if (ro->src_addr.addr_type == IEEE802154_ADDR_LONG && - !memcmp(ro->src_addr.hwaddr, hw_addr, IEEE802154_ADDR_LEN)) - return 1; + if (ro->src_addr.mode == IEEE802154_ADDR_LONG && + hw_addr == ro->src_addr.extended_addr) + return true; - if (ro->src_addr.addr_type == IEEE802154_ADDR_SHORT && - pan_id == ro->src_addr.pan_id && - short_addr == ro->src_addr.short_addr) - return 1; + if (ro->src_addr.mode == IEEE802154_ADDR_SHORT && + pan_id == ro->src_addr.pan_id && + short_addr == ro->src_addr.short_addr) + return true; - return 0; + return false; } int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) { struct sock *sk, *prev = NULL; int ret = NET_RX_SUCCESS; - u16 pan_id, short_addr; + __le16 pan_id, short_addr; + __le64 hw_addr; /* Data frame processing */ BUG_ON(dev->type != ARPHRD_IEEE802154); pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev); + hw_addr = ieee802154_devaddr_from_raw(dev->dev_addr); read_lock(&dgram_lock); sk_for_each(sk, &dgram_head) { - if (ieee802154_match_sock(dev->dev_addr, pan_id, short_addr, - dgram_sk(sk))) { + if (ieee802154_match_sock(hw_addr, pan_id, short_addr, + dgram_sk(sk))) { if (prev) { struct sk_buff *clone; clone = skb_clone(skb, GFP_ATOMIC); diff --git a/net/ieee802154/header_ops.c b/net/ieee802154/header_ops.c new file mode 100644 index 00000000000..bed42a48408 --- /dev/null +++ b/net/ieee802154/header_ops.c @@ -0,0 +1,287 @@ +/* + * Copyright (C) 2014 Fraunhofer ITWM + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Written by: + * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de> + */ + +#include <net/mac802154.h> +#include <net/ieee802154.h> +#include <net/ieee802154_netdev.h> + +static int +ieee802154_hdr_push_addr(u8 *buf, const struct ieee802154_addr *addr, + bool omit_pan) +{ + int pos = 0; + + if (addr->mode == IEEE802154_ADDR_NONE) + return 0; + + if (!omit_pan) { + memcpy(buf + pos, &addr->pan_id, 2); + pos += 2; + } + + switch (addr->mode) { + case IEEE802154_ADDR_SHORT: + memcpy(buf + pos, &addr->short_addr, 2); + pos += 2; + break; + + case IEEE802154_ADDR_LONG: + memcpy(buf + pos, &addr->extended_addr, IEEE802154_ADDR_LEN); + pos += IEEE802154_ADDR_LEN; + break; + + default: + return -EINVAL; + } + + return pos; +} + +static int +ieee802154_hdr_push_sechdr(u8 *buf, const struct ieee802154_sechdr *hdr) +{ + int pos = 5; + + memcpy(buf, hdr, 1); + memcpy(buf + 1, &hdr->frame_counter, 4); + + switch (hdr->key_id_mode) { + case IEEE802154_SCF_KEY_IMPLICIT: + return pos; + + case IEEE802154_SCF_KEY_INDEX: + break; + + case IEEE802154_SCF_KEY_SHORT_INDEX: + memcpy(buf + pos, &hdr->short_src, 4); + pos += 4; + break; + + case IEEE802154_SCF_KEY_HW_INDEX: + memcpy(buf + pos, &hdr->extended_src, IEEE802154_ADDR_LEN); + pos += IEEE802154_ADDR_LEN; + break; + } + + buf[pos++] = hdr->key_id; + + return pos; +} + +int +ieee802154_hdr_push(struct sk_buff *skb, const struct ieee802154_hdr *hdr) +{ + u8 buf[MAC802154_FRAME_HARD_HEADER_LEN]; + int pos = 2; + int rc; + struct ieee802154_hdr_fc fc = hdr->fc; + + buf[pos++] = hdr->seq; + + fc.dest_addr_mode = hdr->dest.mode; + + rc = ieee802154_hdr_push_addr(buf + pos, &hdr->dest, false); + if (rc < 0) + return -EINVAL; + pos += rc; + + fc.source_addr_mode = hdr->source.mode; + + if (hdr->source.pan_id == hdr->dest.pan_id && + hdr->dest.mode != IEEE802154_ADDR_NONE) + fc.intra_pan = true; + + rc = ieee802154_hdr_push_addr(buf + pos, &hdr->source, fc.intra_pan); + if (rc < 0) + return -EINVAL; + pos += rc; + + if (fc.security_enabled) { + fc.version = 1; + + rc = ieee802154_hdr_push_sechdr(buf + pos, &hdr->sec); + if (rc < 0) + return -EINVAL; + + pos += rc; + } + + memcpy(buf, &fc, 2); + + memcpy(skb_push(skb, pos), buf, pos); + + return pos; +} +EXPORT_SYMBOL_GPL(ieee802154_hdr_push); + +static int +ieee802154_hdr_get_addr(const u8 *buf, int mode, bool omit_pan, + struct ieee802154_addr *addr) +{ + int pos = 0; + + addr->mode = mode; + + if (mode == IEEE802154_ADDR_NONE) + return 0; + + if (!omit_pan) { + memcpy(&addr->pan_id, buf + pos, 2); + pos += 2; + } + + if (mode == IEEE802154_ADDR_SHORT) { + memcpy(&addr->short_addr, buf + pos, 2); + return pos + 2; + } else { + memcpy(&addr->extended_addr, buf + pos, IEEE802154_ADDR_LEN); + return pos + IEEE802154_ADDR_LEN; + } +} + +static int ieee802154_hdr_addr_len(int mode, bool omit_pan) +{ + int pan_len = omit_pan ? 0 : 2; + + switch (mode) { + case IEEE802154_ADDR_NONE: return 0; + case IEEE802154_ADDR_SHORT: return 2 + pan_len; + case IEEE802154_ADDR_LONG: return IEEE802154_ADDR_LEN + pan_len; + default: return -EINVAL; + } +} + +static int +ieee802154_hdr_get_sechdr(const u8 *buf, struct ieee802154_sechdr *hdr) +{ + int pos = 5; + + memcpy(hdr, buf, 1); + memcpy(&hdr->frame_counter, buf + 1, 4); + + switch (hdr->key_id_mode) { + case IEEE802154_SCF_KEY_IMPLICIT: + return pos; + + case IEEE802154_SCF_KEY_INDEX: + break; + + case IEEE802154_SCF_KEY_SHORT_INDEX: + memcpy(&hdr->short_src, buf + pos, 4); + pos += 4; + break; + + case IEEE802154_SCF_KEY_HW_INDEX: + memcpy(&hdr->extended_src, buf + pos, IEEE802154_ADDR_LEN); + pos += IEEE802154_ADDR_LEN; + break; + } + + hdr->key_id = buf[pos++]; + + return pos; +} + +static int ieee802154_hdr_sechdr_len(u8 sc) +{ + switch (IEEE802154_SCF_KEY_ID_MODE(sc)) { + case IEEE802154_SCF_KEY_IMPLICIT: return 5; + case IEEE802154_SCF_KEY_INDEX: return 6; + case IEEE802154_SCF_KEY_SHORT_INDEX: return 10; + case IEEE802154_SCF_KEY_HW_INDEX: return 14; + default: return -EINVAL; + } +} + +static int ieee802154_hdr_minlen(const struct ieee802154_hdr *hdr) +{ + int dlen, slen; + + dlen = ieee802154_hdr_addr_len(hdr->fc.dest_addr_mode, false); + slen = ieee802154_hdr_addr_len(hdr->fc.source_addr_mode, + hdr->fc.intra_pan); + + if (slen < 0 || dlen < 0) + return -EINVAL; + + return 3 + dlen + slen + hdr->fc.security_enabled; +} + +static int +ieee802154_hdr_get_addrs(const u8 *buf, struct ieee802154_hdr *hdr) +{ + int pos = 0; + + pos += ieee802154_hdr_get_addr(buf + pos, hdr->fc.dest_addr_mode, + false, &hdr->dest); + pos += ieee802154_hdr_get_addr(buf + pos, hdr->fc.source_addr_mode, + hdr->fc.intra_pan, &hdr->source); + + if (hdr->fc.intra_pan) + hdr->source.pan_id = hdr->dest.pan_id; + + return pos; +} + +int +ieee802154_hdr_pull(struct sk_buff *skb, struct ieee802154_hdr *hdr) +{ + int pos = 3, rc; + + if (!pskb_may_pull(skb, 3)) + return -EINVAL; + + memcpy(hdr, skb->data, 3); + + rc = ieee802154_hdr_minlen(hdr); + if (rc < 0 || !pskb_may_pull(skb, rc)) + return -EINVAL; + + pos += ieee802154_hdr_get_addrs(skb->data + pos, hdr); + + if (hdr->fc.security_enabled) { + int want = pos + ieee802154_hdr_sechdr_len(skb->data[pos]); + + if (!pskb_may_pull(skb, want)) + return -EINVAL; + + pos += ieee802154_hdr_get_sechdr(skb->data + pos, &hdr->sec); + } + + skb_pull(skb, pos); + return pos; +} +EXPORT_SYMBOL_GPL(ieee802154_hdr_pull); + +int +ieee802154_hdr_peek_addrs(const struct sk_buff *skb, struct ieee802154_hdr *hdr) +{ + const u8 *buf = skb_mac_header(skb); + int pos = 3, rc; + + if (buf + 3 > skb_tail_pointer(skb)) + return -EINVAL; + + memcpy(hdr, buf, 3); + + rc = ieee802154_hdr_minlen(hdr); + if (rc < 0 || buf + rc > skb_tail_pointer(skb)) + return -EINVAL; + + pos += ieee802154_hdr_get_addrs(buf + pos, hdr); + return pos; +} +EXPORT_SYMBOL_GPL(ieee802154_hdr_peek_addrs); diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h index cee4425b995..6693a5cf01c 100644 --- a/net/ieee802154/ieee802154.h +++ b/net/ieee802154/ieee802154.h @@ -66,5 +66,6 @@ int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info); int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info); int ieee802154_list_iface(struct sk_buff *skb, struct genl_info *info); int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb); +int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info); #endif diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 43f1b2bf469..04b20589d97 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -123,6 +123,7 @@ static const struct genl_ops ieee8021154_ops[] = { IEEE802154_OP(IEEE802154_START_REQ, ieee802154_start_req), IEEE802154_DUMP(IEEE802154_LIST_IFACE, ieee802154_list_iface, ieee802154_dump_iface), + IEEE802154_OP(IEEE802154_SET_MACPARAMS, ieee802154_set_macparams), }; static const struct genl_multicast_group ieee802154_mcgrps[] = { diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index ba5c1e002f3..5d285498c0f 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -39,6 +39,26 @@ #include "ieee802154.h" +static int nla_put_hwaddr(struct sk_buff *msg, int type, __le64 hwaddr) +{ + return nla_put_u64(msg, type, swab64((__force u64)hwaddr)); +} + +static __le64 nla_get_hwaddr(const struct nlattr *nla) +{ + return ieee802154_devaddr_from_raw(nla_data(nla)); +} + +static int nla_put_shortaddr(struct sk_buff *msg, int type, __le16 addr) +{ + return nla_put_u16(msg, type, le16_to_cpu(addr)); +} + +static __le16 nla_get_shortaddr(const struct nlattr *nla) +{ + return cpu_to_le16(nla_get_u16(nla)); +} + int ieee802154_nl_assoc_indic(struct net_device *dev, struct ieee802154_addr *addr, u8 cap) { @@ -46,7 +66,7 @@ int ieee802154_nl_assoc_indic(struct net_device *dev, pr_debug("%s\n", __func__); - if (addr->addr_type != IEEE802154_ADDR_LONG) { + if (addr->mode != IEEE802154_ADDR_LONG) { pr_err("%s: received non-long source address!\n", __func__); return -EINVAL; } @@ -59,8 +79,8 @@ int ieee802154_nl_assoc_indic(struct net_device *dev, nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || nla_put(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, dev->dev_addr) || - nla_put(msg, IEEE802154_ATTR_SRC_HW_ADDR, IEEE802154_ADDR_LEN, - addr->hwaddr) || + nla_put_hwaddr(msg, IEEE802154_ATTR_SRC_HW_ADDR, + addr->extended_addr) || nla_put_u8(msg, IEEE802154_ATTR_CAPABILITY, cap)) goto nla_put_failure; @@ -72,7 +92,7 @@ nla_put_failure: } EXPORT_SYMBOL(ieee802154_nl_assoc_indic); -int ieee802154_nl_assoc_confirm(struct net_device *dev, u16 short_addr, +int ieee802154_nl_assoc_confirm(struct net_device *dev, __le16 short_addr, u8 status) { struct sk_buff *msg; @@ -87,7 +107,7 @@ int ieee802154_nl_assoc_confirm(struct net_device *dev, u16 short_addr, nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || nla_put(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, dev->dev_addr) || - nla_put_u16(msg, IEEE802154_ATTR_SHORT_ADDR, short_addr) || + nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, short_addr) || nla_put_u8(msg, IEEE802154_ATTR_STATUS, status)) goto nla_put_failure; return ieee802154_nl_mcast(msg, IEEE802154_COORD_MCGRP); @@ -114,13 +134,13 @@ int ieee802154_nl_disassoc_indic(struct net_device *dev, nla_put(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, dev->dev_addr)) goto nla_put_failure; - if (addr->addr_type == IEEE802154_ADDR_LONG) { - if (nla_put(msg, IEEE802154_ATTR_SRC_HW_ADDR, IEEE802154_ADDR_LEN, - addr->hwaddr)) + if (addr->mode == IEEE802154_ADDR_LONG) { + if (nla_put_hwaddr(msg, IEEE802154_ATTR_SRC_HW_ADDR, + addr->extended_addr)) goto nla_put_failure; } else { - if (nla_put_u16(msg, IEEE802154_ATTR_SRC_SHORT_ADDR, - addr->short_addr)) + if (nla_put_shortaddr(msg, IEEE802154_ATTR_SRC_SHORT_ADDR, + addr->short_addr)) goto nla_put_failure; } if (nla_put_u8(msg, IEEE802154_ATTR_REASON, reason)) @@ -157,8 +177,8 @@ nla_put_failure: } EXPORT_SYMBOL(ieee802154_nl_disassoc_confirm); -int ieee802154_nl_beacon_indic(struct net_device *dev, - u16 panid, u16 coord_addr) +int ieee802154_nl_beacon_indic(struct net_device *dev, __le16 panid, + __le16 coord_addr) { struct sk_buff *msg; @@ -172,8 +192,9 @@ int ieee802154_nl_beacon_indic(struct net_device *dev, nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || nla_put(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, dev->dev_addr) || - nla_put_u16(msg, IEEE802154_ATTR_COORD_SHORT_ADDR, coord_addr) || - nla_put_u16(msg, IEEE802154_ATTR_COORD_PAN_ID, panid)) + nla_put_shortaddr(msg, IEEE802154_ATTR_COORD_SHORT_ADDR, + coord_addr) || + nla_put_shortaddr(msg, IEEE802154_ATTR_COORD_PAN_ID, panid)) goto nla_put_failure; return ieee802154_nl_mcast(msg, IEEE802154_COORD_MCGRP); @@ -243,6 +264,8 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, { void *hdr; struct wpan_phy *phy; + struct ieee802154_mlme_ops *ops; + __le16 short_addr, pan_id; pr_debug("%s\n", __func__); @@ -251,19 +274,45 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, if (!hdr) goto out; - phy = ieee802154_mlme_ops(dev)->get_phy(dev); + ops = ieee802154_mlme_ops(dev); + phy = ops->get_phy(dev); BUG_ON(!phy); + short_addr = ops->get_short_addr(dev); + pan_id = ops->get_pan_id(dev); + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) || nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || nla_put(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, dev->dev_addr) || - nla_put_u16(msg, IEEE802154_ATTR_SHORT_ADDR, - ieee802154_mlme_ops(dev)->get_short_addr(dev)) || - nla_put_u16(msg, IEEE802154_ATTR_PAN_ID, - ieee802154_mlme_ops(dev)->get_pan_id(dev))) + nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, short_addr) || + nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, pan_id)) goto nla_put_failure; + + if (ops->get_mac_params) { + struct ieee802154_mac_params params; + + ops->get_mac_params(dev, ¶ms); + + if (nla_put_s8(msg, IEEE802154_ATTR_TXPOWER, + params.transmit_power) || + nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, params.lbt) || + nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE, + params.cca_mode) || + nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL, + params.cca_ed_level) || + nla_put_u8(msg, IEEE802154_ATTR_CSMA_RETRIES, + params.csma_retries) || + nla_put_u8(msg, IEEE802154_ATTR_CSMA_MIN_BE, + params.min_be) || + nla_put_u8(msg, IEEE802154_ATTR_CSMA_MAX_BE, + params.max_be) || + nla_put_s8(msg, IEEE802154_ATTR_FRAME_RETRIES, + params.frame_retries)) + goto nla_put_failure; + } + wpan_phy_put(phy); return genlmsg_end(msg, hdr); @@ -322,16 +371,16 @@ int ieee802154_associate_req(struct sk_buff *skb, struct genl_info *info) goto out; if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) { - addr.addr_type = IEEE802154_ADDR_LONG; - nla_memcpy(addr.hwaddr, - info->attrs[IEEE802154_ATTR_COORD_HW_ADDR], - IEEE802154_ADDR_LEN); + addr.mode = IEEE802154_ADDR_LONG; + addr.extended_addr = nla_get_hwaddr( + info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]); } else { - addr.addr_type = IEEE802154_ADDR_SHORT; - addr.short_addr = nla_get_u16( + addr.mode = IEEE802154_ADDR_SHORT; + addr.short_addr = nla_get_shortaddr( info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]); } - addr.pan_id = nla_get_u16(info->attrs[IEEE802154_ATTR_COORD_PAN_ID]); + addr.pan_id = nla_get_shortaddr( + info->attrs[IEEE802154_ATTR_COORD_PAN_ID]); if (info->attrs[IEEE802154_ATTR_PAGE]) page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]); @@ -365,14 +414,13 @@ int ieee802154_associate_resp(struct sk_buff *skb, struct genl_info *info) if (!ieee802154_mlme_ops(dev)->assoc_resp) goto out; - addr.addr_type = IEEE802154_ADDR_LONG; - nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR], - IEEE802154_ADDR_LEN); + addr.mode = IEEE802154_ADDR_LONG; + addr.extended_addr = nla_get_hwaddr( + info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]); addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - ret = ieee802154_mlme_ops(dev)->assoc_resp(dev, &addr, - nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]), + nla_get_shortaddr(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]), nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS])); out: @@ -398,13 +446,12 @@ int ieee802154_disassociate_req(struct sk_buff *skb, struct genl_info *info) goto out; if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) { - addr.addr_type = IEEE802154_ADDR_LONG; - nla_memcpy(addr.hwaddr, - info->attrs[IEEE802154_ATTR_DEST_HW_ADDR], - IEEE802154_ADDR_LEN); + addr.mode = IEEE802154_ADDR_LONG; + addr.extended_addr = nla_get_hwaddr( + info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]); } else { - addr.addr_type = IEEE802154_ADDR_SHORT; - addr.short_addr = nla_get_u16( + addr.mode = IEEE802154_ADDR_SHORT; + addr.short_addr = nla_get_shortaddr( info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]); } addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); @@ -449,10 +496,11 @@ int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) if (!ieee802154_mlme_ops(dev)->start_req) goto out; - addr.addr_type = IEEE802154_ADDR_SHORT; - addr.short_addr = nla_get_u16( + addr.mode = IEEE802154_ADDR_SHORT; + addr.short_addr = nla_get_shortaddr( info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]); - addr.pan_id = nla_get_u16(info->attrs[IEEE802154_ATTR_COORD_PAN_ID]); + addr.pan_id = nla_get_shortaddr( + info->attrs[IEEE802154_ATTR_COORD_PAN_ID]); channel = nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]); bcn_ord = nla_get_u8(info->attrs[IEEE802154_ATTR_BCN_ORD]); @@ -467,7 +515,7 @@ int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) page = 0; - if (addr.short_addr == IEEE802154_ADDR_BROADCAST) { + if (addr.short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) { ieee802154_nl_start_confirm(dev, IEEE802154_NO_SHORT_ADDRESS); dev_put(dev); return -EINVAL; @@ -577,3 +625,93 @@ cont: return skb->len; } + +int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device *dev = NULL; + struct ieee802154_mlme_ops *ops; + struct ieee802154_mac_params params; + struct wpan_phy *phy; + int rc = -EINVAL; + + pr_debug("%s\n", __func__); + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + ops = ieee802154_mlme_ops(dev); + + if (!ops->get_mac_params || !ops->set_mac_params) { + rc = -EOPNOTSUPP; + goto out; + } + + if (netif_running(dev)) { + rc = -EBUSY; + goto out; + } + + if (!info->attrs[IEEE802154_ATTR_LBT_ENABLED] && + !info->attrs[IEEE802154_ATTR_CCA_MODE] && + !info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL] && + !info->attrs[IEEE802154_ATTR_CSMA_RETRIES] && + !info->attrs[IEEE802154_ATTR_CSMA_MIN_BE] && + !info->attrs[IEEE802154_ATTR_CSMA_MAX_BE] && + !info->attrs[IEEE802154_ATTR_FRAME_RETRIES]) + goto out; + + phy = ops->get_phy(dev); + + if ((!phy->set_lbt && info->attrs[IEEE802154_ATTR_LBT_ENABLED]) || + (!phy->set_cca_mode && info->attrs[IEEE802154_ATTR_CCA_MODE]) || + (!phy->set_cca_ed_level && + info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) || + (!phy->set_csma_params && + (info->attrs[IEEE802154_ATTR_CSMA_RETRIES] || + info->attrs[IEEE802154_ATTR_CSMA_MIN_BE] || + info->attrs[IEEE802154_ATTR_CSMA_MAX_BE])) || + (!phy->set_frame_retries && + info->attrs[IEEE802154_ATTR_FRAME_RETRIES])) { + rc = -EOPNOTSUPP; + goto out_phy; + } + + ops->get_mac_params(dev, ¶ms); + + if (info->attrs[IEEE802154_ATTR_TXPOWER]) + params.transmit_power = nla_get_s8(info->attrs[IEEE802154_ATTR_TXPOWER]); + + if (info->attrs[IEEE802154_ATTR_LBT_ENABLED]) + params.lbt = nla_get_u8(info->attrs[IEEE802154_ATTR_LBT_ENABLED]); + + if (info->attrs[IEEE802154_ATTR_CCA_MODE]) + params.cca_mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]); + + if (info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) + params.cca_ed_level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]); + + if (info->attrs[IEEE802154_ATTR_CSMA_RETRIES]) + params.csma_retries = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_RETRIES]); + + if (info->attrs[IEEE802154_ATTR_CSMA_MIN_BE]) + params.min_be = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_MIN_BE]); + + if (info->attrs[IEEE802154_ATTR_CSMA_MAX_BE]) + params.max_be = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_MAX_BE]); + + if (info->attrs[IEEE802154_ATTR_FRAME_RETRIES]) + params.frame_retries = nla_get_s8(info->attrs[IEEE802154_ATTR_FRAME_RETRIES]); + + rc = ops->set_mac_params(dev, ¶ms); + + wpan_phy_put(phy); + dev_put(dev); + return rc; + +out_phy: + wpan_phy_put(phy); +out: + dev_put(dev); + return rc; +} diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index 6adda4d46f9..fd7be5e45ce 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -52,5 +52,15 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_DURATION] = { .type = NLA_U8, }, [IEEE802154_ATTR_ED_LIST] = { .len = 27 }, [IEEE802154_ATTR_CHANNEL_PAGE_LIST] = { .len = 32 * 4, }, + + [IEEE802154_ATTR_TXPOWER] = { .type = NLA_S8, }, + [IEEE802154_ATTR_LBT_ENABLED] = { .type = NLA_U8, }, + [IEEE802154_ATTR_CCA_MODE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_CCA_ED_LEVEL] = { .type = NLA_S32, }, + [IEEE802154_ATTR_CSMA_RETRIES] = { .type = NLA_U8, }, + [IEEE802154_ATTR_CSMA_MIN_BE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_CSMA_MAX_BE] = { .type = NLA_U8, }, + + [IEEE802154_ATTR_FRAME_RETRIES] = { .type = NLA_S8, }, }; diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c index 41f538b8e59..74d54fae33d 100644 --- a/net/ieee802154/raw.c +++ b/net/ieee802154/raw.c @@ -28,6 +28,7 @@ #include <linux/slab.h> #include <net/sock.h> #include <net/af_ieee802154.h> +#include <net/ieee802154_netdev.h> #include "af802154.h" @@ -55,21 +56,24 @@ static void raw_close(struct sock *sk, long timeout) sk_common_release(sk); } -static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int len) +static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len) { - struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; + struct ieee802154_addr addr; + struct sockaddr_ieee802154 *uaddr = (struct sockaddr_ieee802154 *)_uaddr; int err = 0; struct net_device *dev = NULL; - if (len < sizeof(*addr)) + if (len < sizeof(*uaddr)) return -EINVAL; - if (addr->family != AF_IEEE802154) + uaddr = (struct sockaddr_ieee802154 *)_uaddr; + if (uaddr->family != AF_IEEE802154) return -EINVAL; lock_sock(sk); - dev = ieee802154_get_dev(sock_net(sk), &addr->addr); + ieee802154_addr_from_sa(&addr, &uaddr->addr); + dev = ieee802154_get_dev(sock_net(sk), &addr); if (!dev) { err = -ENODEV; goto out; @@ -209,6 +213,10 @@ out: static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) { + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_RX_DROP; + if (sock_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); return NET_RX_DROP; diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c new file mode 100644 index 00000000000..ef2d54372b1 --- /dev/null +++ b/net/ieee802154/reassembly.c @@ -0,0 +1,571 @@ +/* 6LoWPAN fragment reassembly + * + * + * Authors: + * Alexander Aring <aar@pengutronix.de> + * + * Based on: net/ipv6/reassembly.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) "6LoWPAN: " fmt + +#include <linux/net.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/random.h> +#include <linux/jhash.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/export.h> + +#include <net/ieee802154_netdev.h> +#include <net/6lowpan.h> +#include <net/ipv6.h> +#include <net/inet_frag.h> + +#include "reassembly.h" + +struct lowpan_frag_info { + __be16 d_tag; + u16 d_size; + u8 d_offset; +}; + +struct lowpan_frag_info *lowpan_cb(struct sk_buff *skb) +{ + return (struct lowpan_frag_info *)skb->cb; +} + +static struct inet_frags lowpan_frags; + +static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, + struct sk_buff *prev, struct net_device *dev); + +static unsigned int lowpan_hash_frag(__be16 tag, u16 d_size, + const struct ieee802154_addr *saddr, + const struct ieee802154_addr *daddr) +{ + u32 c; + + net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd)); + c = jhash_3words(ieee802154_addr_hash(saddr), + ieee802154_addr_hash(daddr), + (__force u32)(tag + (d_size << 16)), + lowpan_frags.rnd); + + return c & (INETFRAGS_HASHSZ - 1); +} + +static unsigned int lowpan_hashfn(struct inet_frag_queue *q) +{ + struct lowpan_frag_queue *fq; + + fq = container_of(q, struct lowpan_frag_queue, q); + return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr); +} + +static bool lowpan_frag_match(struct inet_frag_queue *q, void *a) +{ + struct lowpan_frag_queue *fq; + struct lowpan_create_arg *arg = a; + + fq = container_of(q, struct lowpan_frag_queue, q); + return fq->tag == arg->tag && fq->d_size == arg->d_size && + ieee802154_addr_equal(&fq->saddr, arg->src) && + ieee802154_addr_equal(&fq->daddr, arg->dst); +} + +static void lowpan_frag_init(struct inet_frag_queue *q, void *a) +{ + struct lowpan_frag_queue *fq; + struct lowpan_create_arg *arg = a; + + fq = container_of(q, struct lowpan_frag_queue, q); + + fq->tag = arg->tag; + fq->d_size = arg->d_size; + fq->saddr = *arg->src; + fq->daddr = *arg->dst; +} + +static void lowpan_frag_expire(unsigned long data) +{ + struct frag_queue *fq; + struct net *net; + + fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags); + + spin_lock(&fq->q.lock); + + if (fq->q.last_in & INET_FRAG_COMPLETE) + goto out; + + inet_frag_kill(&fq->q, &lowpan_frags); +out: + spin_unlock(&fq->q.lock); + inet_frag_put(&fq->q, &lowpan_frags); +} + +static inline struct lowpan_frag_queue * +fq_find(struct net *net, const struct lowpan_frag_info *frag_info, + const struct ieee802154_addr *src, + const struct ieee802154_addr *dst) +{ + struct inet_frag_queue *q; + struct lowpan_create_arg arg; + unsigned int hash; + + arg.tag = frag_info->d_tag; + arg.d_size = frag_info->d_size; + arg.src = src; + arg.dst = dst; + + read_lock(&lowpan_frags.lock); + hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst); + + q = inet_frag_find(&net->ieee802154_lowpan.frags, + &lowpan_frags, &arg, hash); + if (IS_ERR_OR_NULL(q)) { + inet_frag_maybe_warn_overflow(q, pr_fmt()); + return NULL; + } + return container_of(q, struct lowpan_frag_queue, q); +} + +static int lowpan_frag_queue(struct lowpan_frag_queue *fq, + struct sk_buff *skb, const u8 frag_type) +{ + struct sk_buff *prev, *next; + struct net_device *dev; + int end, offset; + + if (fq->q.last_in & INET_FRAG_COMPLETE) + goto err; + + offset = lowpan_cb(skb)->d_offset << 3; + end = lowpan_cb(skb)->d_size; + + /* Is this the final fragment? */ + if (offset + skb->len == end) { + /* If we already have some bits beyond end + * or have different end, the segment is corrupted. + */ + if (end < fq->q.len || + ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) + goto err; + fq->q.last_in |= INET_FRAG_LAST_IN; + fq->q.len = end; + } else { + if (end > fq->q.len) { + /* Some bits beyond end -> corruption. */ + if (fq->q.last_in & INET_FRAG_LAST_IN) + goto err; + fq->q.len = end; + } + } + + /* Find out which fragments are in front and at the back of us + * in the chain of fragments so far. We must know where to put + * this fragment, right? + */ + prev = fq->q.fragments_tail; + if (!prev || lowpan_cb(prev)->d_offset < lowpan_cb(skb)->d_offset) { + next = NULL; + goto found; + } + prev = NULL; + for (next = fq->q.fragments; next != NULL; next = next->next) { + if (lowpan_cb(next)->d_offset >= lowpan_cb(skb)->d_offset) + break; /* bingo! */ + prev = next; + } + +found: + /* Insert this fragment in the chain of fragments. */ + skb->next = next; + if (!next) + fq->q.fragments_tail = skb; + if (prev) + prev->next = skb; + else + fq->q.fragments = skb; + + dev = skb->dev; + if (dev) + skb->dev = NULL; + + fq->q.stamp = skb->tstamp; + if (frag_type == LOWPAN_DISPATCH_FRAG1) { + /* Calculate uncomp. 6lowpan header to estimate full size */ + fq->q.meat += lowpan_uncompress_size(skb, NULL); + fq->q.last_in |= INET_FRAG_FIRST_IN; + } else { + fq->q.meat += skb->len; + } + add_frag_mem_limit(&fq->q, skb->truesize); + + if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + fq->q.meat == fq->q.len) { + int res; + unsigned long orefdst = skb->_skb_refdst; + + skb->_skb_refdst = 0UL; + res = lowpan_frag_reasm(fq, prev, dev); + skb->_skb_refdst = orefdst; + return res; + } + + inet_frag_lru_move(&fq->q); + return -1; +err: + kfree_skb(skb); + return -1; +} + +/* Check if this packet is complete. + * Returns NULL on failure by any reason, and pointer + * to current nexthdr field in reassembled frame. + * + * It is called with locked fq, and caller must check that + * queue is eligible for reassembly i.e. it is not COMPLETE, + * the last and the first frames arrived and all the bits are here. + */ +static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, + struct net_device *dev) +{ + struct sk_buff *fp, *head = fq->q.fragments; + int sum_truesize; + + inet_frag_kill(&fq->q, &lowpan_frags); + + /* Make the one we just received the head. */ + if (prev) { + head = prev->next; + fp = skb_clone(head, GFP_ATOMIC); + + if (!fp) + goto out_oom; + + fp->next = head->next; + if (!fp->next) + fq->q.fragments_tail = fp; + prev->next = fp; + + skb_morph(head, fq->q.fragments); + head->next = fq->q.fragments->next; + + consume_skb(fq->q.fragments); + fq->q.fragments = head; + } + + /* Head of list must not be cloned. */ + if (skb_unclone(head, GFP_ATOMIC)) + goto out_oom; + + /* If the first fragment is fragmented itself, we split + * it to two chunks: the first with data and paged part + * and the second, holding only fragments. + */ + if (skb_has_frag_list(head)) { + struct sk_buff *clone; + int i, plen = 0; + + clone = alloc_skb(0, GFP_ATOMIC); + if (!clone) + goto out_oom; + clone->next = head->next; + head->next = clone; + skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; + skb_frag_list_init(head); + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) + plen += skb_frag_size(&skb_shinfo(head)->frags[i]); + clone->len = head->data_len - plen; + clone->data_len = clone->len; + head->data_len -= clone->len; + head->len -= clone->len; + add_frag_mem_limit(&fq->q, clone->truesize); + } + + WARN_ON(head == NULL); + + sum_truesize = head->truesize; + for (fp = head->next; fp;) { + bool headstolen; + int delta; + struct sk_buff *next = fp->next; + + sum_truesize += fp->truesize; + if (skb_try_coalesce(head, fp, &headstolen, &delta)) { + kfree_skb_partial(fp, headstolen); + } else { + if (!skb_shinfo(head)->frag_list) + skb_shinfo(head)->frag_list = fp; + head->data_len += fp->len; + head->len += fp->len; + head->truesize += fp->truesize; + } + fp = next; + } + sub_frag_mem_limit(&fq->q, sum_truesize); + + head->next = NULL; + head->dev = dev; + head->tstamp = fq->q.stamp; + + fq->q.fragments = NULL; + fq->q.fragments_tail = NULL; + + return 1; +out_oom: + net_dbg_ratelimited("lowpan_frag_reasm: no memory for reassembly\n"); + return -1; +} + +static int lowpan_get_frag_info(struct sk_buff *skb, const u8 frag_type, + struct lowpan_frag_info *frag_info) +{ + bool fail; + u8 pattern = 0, low = 0; + + fail = lowpan_fetch_skb(skb, &pattern, 1); + fail |= lowpan_fetch_skb(skb, &low, 1); + frag_info->d_size = (pattern & 7) << 8 | low; + fail |= lowpan_fetch_skb(skb, &frag_info->d_tag, 2); + + if (frag_type == LOWPAN_DISPATCH_FRAGN) { + fail |= lowpan_fetch_skb(skb, &frag_info->d_offset, 1); + } else { + skb_reset_network_header(skb); + frag_info->d_offset = 0; + } + + if (unlikely(fail)) + return -EIO; + + return 0; +} + +int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type) +{ + struct lowpan_frag_queue *fq; + struct net *net = dev_net(skb->dev); + struct lowpan_frag_info *frag_info = lowpan_cb(skb); + struct ieee802154_addr source, dest; + int err; + + source = mac_cb(skb)->source; + dest = mac_cb(skb)->dest; + + err = lowpan_get_frag_info(skb, frag_type, frag_info); + if (err < 0) + goto err; + + if (frag_info->d_size > net->ieee802154_lowpan.max_dsize) + goto err; + + inet_frag_evictor(&net->ieee802154_lowpan.frags, &lowpan_frags, false); + + fq = fq_find(net, frag_info, &source, &dest); + if (fq != NULL) { + int ret; + spin_lock(&fq->q.lock); + ret = lowpan_frag_queue(fq, skb, frag_type); + spin_unlock(&fq->q.lock); + + inet_frag_put(&fq->q, &lowpan_frags); + return ret; + } + +err: + kfree_skb(skb); + return -1; +} +EXPORT_SYMBOL(lowpan_frag_rcv); + +#ifdef CONFIG_SYSCTL +static struct ctl_table lowpan_frags_ns_ctl_table[] = { + { + .procname = "6lowpanfrag_high_thresh", + .data = &init_net.ieee802154_lowpan.frags.high_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "6lowpanfrag_low_thresh", + .data = &init_net.ieee802154_lowpan.frags.low_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "6lowpanfrag_time", + .data = &init_net.ieee802154_lowpan.frags.timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "6lowpanfrag_max_datagram_size", + .data = &init_net.ieee802154_lowpan.max_dsize, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { } +}; + +static struct ctl_table lowpan_frags_ctl_table[] = { + { + .procname = "6lowpanfrag_secret_interval", + .data = &lowpan_frags.secret_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { } +}; + +static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) +{ + struct ctl_table *table; + struct ctl_table_header *hdr; + + table = lowpan_frags_ns_ctl_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(table, sizeof(lowpan_frags_ns_ctl_table), + GFP_KERNEL); + if (table == NULL) + goto err_alloc; + + table[0].data = &net->ieee802154_lowpan.frags.high_thresh; + table[1].data = &net->ieee802154_lowpan.frags.low_thresh; + table[2].data = &net->ieee802154_lowpan.frags.timeout; + table[3].data = &net->ieee802154_lowpan.max_dsize; + + /* Don't export sysctls to unprivileged users */ + if (net->user_ns != &init_user_ns) + table[0].procname = NULL; + } + + hdr = register_net_sysctl(net, "net/ieee802154/6lowpan", table); + if (hdr == NULL) + goto err_reg; + + net->ieee802154_lowpan.sysctl.frags_hdr = hdr; + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void __net_exit lowpan_frags_ns_sysctl_unregister(struct net *net) +{ + struct ctl_table *table; + + table = net->ieee802154_lowpan.sysctl.frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ieee802154_lowpan.sysctl.frags_hdr); + if (!net_eq(net, &init_net)) + kfree(table); +} + +static struct ctl_table_header *lowpan_ctl_header; + +static int lowpan_frags_sysctl_register(void) +{ + lowpan_ctl_header = register_net_sysctl(&init_net, + "net/ieee802154/6lowpan", + lowpan_frags_ctl_table); + return lowpan_ctl_header == NULL ? -ENOMEM : 0; +} + +static void lowpan_frags_sysctl_unregister(void) +{ + unregister_net_sysctl_table(lowpan_ctl_header); +} +#else +static inline int lowpan_frags_ns_sysctl_register(struct net *net) +{ + return 0; +} + +static inline void lowpan_frags_ns_sysctl_unregister(struct net *net) +{ +} + +static inline int lowpan_frags_sysctl_register(void) +{ + return 0; +} + +static inline void lowpan_frags_sysctl_unregister(void) +{ +} +#endif + +static int __net_init lowpan_frags_init_net(struct net *net) +{ + net->ieee802154_lowpan.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + net->ieee802154_lowpan.frags.low_thresh = IPV6_FRAG_LOW_THRESH; + net->ieee802154_lowpan.frags.timeout = IPV6_FRAG_TIMEOUT; + net->ieee802154_lowpan.max_dsize = 0xFFFF; + + inet_frags_init_net(&net->ieee802154_lowpan.frags); + + return lowpan_frags_ns_sysctl_register(net); +} + +static void __net_exit lowpan_frags_exit_net(struct net *net) +{ + lowpan_frags_ns_sysctl_unregister(net); + inet_frags_exit_net(&net->ieee802154_lowpan.frags, &lowpan_frags); +} + +static struct pernet_operations lowpan_frags_ops = { + .init = lowpan_frags_init_net, + .exit = lowpan_frags_exit_net, +}; + +int __init lowpan_net_frag_init(void) +{ + int ret; + + ret = lowpan_frags_sysctl_register(); + if (ret) + return ret; + + ret = register_pernet_subsys(&lowpan_frags_ops); + if (ret) + goto err_pernet; + + lowpan_frags.hashfn = lowpan_hashfn; + lowpan_frags.constructor = lowpan_frag_init; + lowpan_frags.destructor = NULL; + lowpan_frags.skb_free = NULL; + lowpan_frags.qsize = sizeof(struct frag_queue); + lowpan_frags.match = lowpan_frag_match; + lowpan_frags.frag_expire = lowpan_frag_expire; + lowpan_frags.secret_interval = 10 * 60 * HZ; + inet_frags_init(&lowpan_frags); + + return ret; +err_pernet: + lowpan_frags_sysctl_unregister(); + return ret; +} + +void lowpan_net_frag_exit(void) +{ + inet_frags_fini(&lowpan_frags); + lowpan_frags_sysctl_unregister(); + unregister_pernet_subsys(&lowpan_frags_ops); +} diff --git a/net/ieee802154/reassembly.h b/net/ieee802154/reassembly.h new file mode 100644 index 00000000000..74e4a7c9819 --- /dev/null +++ b/net/ieee802154/reassembly.h @@ -0,0 +1,41 @@ +#ifndef __IEEE802154_6LOWPAN_REASSEMBLY_H__ +#define __IEEE802154_6LOWPAN_REASSEMBLY_H__ + +#include <net/inet_frag.h> + +struct lowpan_create_arg { + __be16 tag; + u16 d_size; + const struct ieee802154_addr *src; + const struct ieee802154_addr *dst; +}; + +/* Equivalent of ipv4 struct ip + */ +struct lowpan_frag_queue { + struct inet_frag_queue q; + + __be16 tag; + u16 d_size; + struct ieee802154_addr saddr; + struct ieee802154_addr daddr; +}; + +static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a) +{ + switch (a->mode) { + case IEEE802154_ADDR_LONG: + return (((__force u64)a->extended_addr) >> 32) ^ + (((__force u64)a->extended_addr) & 0xffffffff); + case IEEE802154_ADDR_SHORT: + return (__force u32)(a->short_addr); + default: + return 0; + } +} + +int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type); +void lowpan_net_frag_exit(void); +int lowpan_net_frag_init(void); + +#endif /* __IEEE802154_6LOWPAN_REASSEMBLY_H__ */ diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c index 4dd37615a74..8d6f6704da8 100644 --- a/net/ieee802154/wpan-class.c +++ b/net/ieee802154/wpan-class.c @@ -44,9 +44,7 @@ static DEVICE_ATTR_RO(name); MASTER_SHOW(current_channel, "%d"); MASTER_SHOW(current_page, "%d"); -MASTER_SHOW_COMPLEX(transmit_power, "%d +- %d dB", - ((signed char) (phy->transmit_power << 2)) >> 2, - (phy->transmit_power >> 6) ? (phy->transmit_power >> 6) * 3 : 1); +MASTER_SHOW(transmit_power, "%d +- 1 dB"); MASTER_SHOW(cca_mode, "%d"); static ssize_t channels_supported_show(struct device *dev, diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index f8c49ce5b28..f032688d20d 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -55,4 +55,4 @@ obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ - xfrm4_output.o + xfrm4_output.o xfrm4_protocol.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ecd2c3f245c..8c54870db79 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1296,8 +1296,11 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ERR_PTR(-EPROTONOSUPPORT); - /* Note : following gso_segment() might change skb->encapsulation */ - udpfrag = !skb->encapsulation && proto == IPPROTO_UDP; + if (skb->encapsulation && + skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) + udpfrag = proto == IPPROTO_UDP && encap; + else + udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) @@ -1502,9 +1505,9 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) bhptr = per_cpu_ptr(mib[0], cpu); syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); do { - start = u64_stats_fetch_begin_bh(syncp); + start = u64_stats_fetch_begin_irq(syncp); v = *(((u64 *) bhptr) + offt); - } while (u64_stats_fetch_retry_bh(syncp, start)); + } while (u64_stats_fetch_retry_irq(syncp, start)); res += v; } diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 717902669d2..a2afa89513a 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -155,6 +155,10 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph, *top_iph; struct ip_auth_hdr *ah; struct ah_data *ahp; + int seqhi_len = 0; + __be32 *seqhi; + int sglists = 0; + struct scatterlist *seqhisg; ahp = x->data; ahash = ahp->ahash; @@ -167,14 +171,19 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) ah = ip_auth_hdr(skb); ihl = ip_hdrlen(skb); + if (x->props.flags & XFRM_STATE_ESN) { + sglists = 1; + seqhi_len = sizeof(*seqhi); + } err = -ENOMEM; - iph = ah_alloc_tmp(ahash, nfrags, ihl); + iph = ah_alloc_tmp(ahash, nfrags + sglists, ihl + seqhi_len); if (!iph) goto out; - - icv = ah_tmp_icv(ahash, iph, ihl); + seqhi = (__be32 *)((char *)iph + ihl); + icv = ah_tmp_icv(ahash, seqhi, seqhi_len); req = ah_tmp_req(ahash, icv); sg = ah_req_sg(ahash, req); + seqhisg = sg + nfrags; memset(ah->auth_data, 0, ahp->icv_trunc_len); @@ -210,10 +219,15 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) ah->spi = x->id.spi; ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + sg_init_table(sg, nfrags + sglists); + skb_to_sgvec_nomark(skb, sg, 0, skb->len); - ahash_request_set_crypt(req, sg, icv, skb->len); + if (x->props.flags & XFRM_STATE_ESN) { + /* Attach seqhi sg right after packet payload */ + *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + sg_set_buf(seqhisg, seqhi, seqhi_len); + } + ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len); ahash_request_set_callback(req, 0, ah_output_done, skb); AH_SKB_CB(skb)->tmp = iph; @@ -295,6 +309,10 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) struct ip_auth_hdr *ah; struct ah_data *ahp; int err = -ENOMEM; + int seqhi_len = 0; + __be32 *seqhi; + int sglists = 0; + struct scatterlist *seqhisg; if (!pskb_may_pull(skb, sizeof(*ah))) goto out; @@ -335,14 +353,22 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) iph = ip_hdr(skb); ihl = ip_hdrlen(skb); - work_iph = ah_alloc_tmp(ahash, nfrags, ihl + ahp->icv_trunc_len); + if (x->props.flags & XFRM_STATE_ESN) { + sglists = 1; + seqhi_len = sizeof(*seqhi); + } + + work_iph = ah_alloc_tmp(ahash, nfrags + sglists, ihl + + ahp->icv_trunc_len + seqhi_len); if (!work_iph) goto out; - auth_data = ah_tmp_auth(work_iph, ihl); + seqhi = (__be32 *)((char *)work_iph + ihl); + auth_data = ah_tmp_auth(seqhi, seqhi_len); icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len); req = ah_tmp_req(ahash, icv); sg = ah_req_sg(ahash, req); + seqhisg = sg + nfrags; memcpy(work_iph, iph, ihl); memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); @@ -361,10 +387,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) skb_push(skb, ihl); - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + sg_init_table(sg, nfrags + sglists); + skb_to_sgvec_nomark(skb, sg, 0, skb->len); - ahash_request_set_crypt(req, sg, icv, skb->len); + if (x->props.flags & XFRM_STATE_ESN) { + /* Attach seqhi sg right after packet payload */ + *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; + sg_set_buf(seqhisg, seqhi, seqhi_len); + } + ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len); ahash_request_set_callback(req, 0, ah_input_done, skb); AH_SKB_CB(skb)->tmp = work_iph; @@ -397,7 +428,7 @@ out: return err; } -static void ah4_err(struct sk_buff *skb, u32 info) +static int ah4_err(struct sk_buff *skb, u32 info) { struct net *net = dev_net(skb->dev); const struct iphdr *iph = (const struct iphdr *)skb->data; @@ -407,23 +438,25 @@ static void ah4_err(struct sk_buff *skb, u32 info) switch (icmp_hdr(skb)->type) { case ICMP_DEST_UNREACH: if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) - return; + return 0; case ICMP_REDIRECT: break; default: - return; + return 0; } x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); if (!x) - return; + return 0; if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); else ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0); xfrm_state_put(x); + + return 0; } static int ah_init_state(struct xfrm_state *x) @@ -505,6 +538,10 @@ static void ah_destroy(struct xfrm_state *x) kfree(ahp); } +static int ah4_rcv_cb(struct sk_buff *skb, int err) +{ + return 0; +} static const struct xfrm_type ah_type = { @@ -518,11 +555,12 @@ static const struct xfrm_type ah_type = .output = ah_output }; -static const struct net_protocol ah4_protocol = { +static struct xfrm4_protocol ah4_protocol = { .handler = xfrm4_rcv, + .input_handler = xfrm_input, + .cb_handler = ah4_rcv_cb, .err_handler = ah4_err, - .no_policy = 1, - .netns_ok = 1, + .priority = 0, }; static int __init ah4_init(void) @@ -531,7 +569,7 @@ static int __init ah4_init(void) pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } - if (inet_add_protocol(&ah4_protocol, IPPROTO_AH) < 0) { + if (xfrm4_protocol_register(&ah4_protocol, IPPROTO_AH) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ah_type, AF_INET); return -EAGAIN; @@ -541,7 +579,7 @@ static int __init ah4_init(void) static void __exit ah4_fini(void) { - if (inet_del_protocol(&ah4_protocol, IPPROTO_AH) < 0) + if (xfrm4_protocol_deregister(&ah4_protocol, IPPROTO_AH) < 0) pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&ah_type, AF_INET) < 0) pr_info("%s: can't remove xfrm type\n", __func__); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ac2dff3c2c1..bdbf68bb2e2 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1443,7 +1443,8 @@ static size_t inet_nlmsg_size(void) + nla_total_size(4) /* IFA_LOCAL */ + nla_total_size(4) /* IFA_BROADCAST */ + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ - + nla_total_size(4); /* IFA_FLAGS */ + + nla_total_size(4) /* IFA_FLAGS */ + + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ } static inline u32 cstamp_delta(unsigned long cstamp) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 7785b28061a..360b565918c 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -473,7 +473,7 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) net_adj) & ~(blksize - 1)) + net_adj - 2; } -static void esp4_err(struct sk_buff *skb, u32 info) +static int esp4_err(struct sk_buff *skb, u32 info) { struct net *net = dev_net(skb->dev); const struct iphdr *iph = (const struct iphdr *)skb->data; @@ -483,23 +483,25 @@ static void esp4_err(struct sk_buff *skb, u32 info) switch (icmp_hdr(skb)->type) { case ICMP_DEST_UNREACH: if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) - return; + return 0; case ICMP_REDIRECT: break; default: - return; + return 0; } x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); if (!x) - return; + return 0; if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); else ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0); xfrm_state_put(x); + + return 0; } static void esp_destroy(struct xfrm_state *x) @@ -672,6 +674,11 @@ error: return err; } +static int esp4_rcv_cb(struct sk_buff *skb, int err) +{ + return 0; +} + static const struct xfrm_type esp_type = { .description = "ESP4", @@ -685,11 +692,12 @@ static const struct xfrm_type esp_type = .output = esp_output }; -static const struct net_protocol esp4_protocol = { +static struct xfrm4_protocol esp4_protocol = { .handler = xfrm4_rcv, + .input_handler = xfrm_input, + .cb_handler = esp4_rcv_cb, .err_handler = esp4_err, - .no_policy = 1, - .netns_ok = 1, + .priority = 0, }; static int __init esp4_init(void) @@ -698,7 +706,7 @@ static int __init esp4_init(void) pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } - if (inet_add_protocol(&esp4_protocol, IPPROTO_ESP) < 0) { + if (xfrm4_protocol_register(&esp4_protocol, IPPROTO_ESP) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&esp_type, AF_INET); return -EAGAIN; @@ -708,7 +716,7 @@ static int __init esp4_init(void) static void __exit esp4_fini(void) { - if (inet_del_protocol(&esp4_protocol, IPPROTO_ESP) < 0) + if (xfrm4_protocol_deregister(&esp4_protocol, IPPROTO_ESP) < 0) pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&esp_type, AF_INET) < 0) pr_info("%s: can't remove xfrm type\n", __func__); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c7539e22868..255aa9946fe 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -250,7 +250,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, bool dev_match; fl4.flowi4_oif = 0; - fl4.flowi4_iif = oif; + fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; fl4.daddr = src; fl4.saddr = dst; fl4.flowi4_tos = tos; @@ -659,7 +659,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) - return ip_rt_dump(skb, cb); + return skb->len; s_h = cb->args[0]; s_e = cb->args[1]; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b53f0bf84dc..8a043f03c88 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -631,6 +631,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, .daddr = nh->nh_gw, .flowi4_scope = cfg->fc_scope + 1, .flowi4_oif = nh->nh_oif, + .flowi4_iif = LOOPBACK_IFINDEX, }; /* It is not necessary, but requires a bit of thinking */ diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 1863422fb7d..250be7421ab 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -182,6 +182,14 @@ static int gre_cisco_rcv(struct sk_buff *skb) int i; bool csum_err = false; +#ifdef CONFIG_NET_IPGRE_BROADCAST + if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { + /* Looped back packet, drop it! */ + if (rt_is_output_route(skb_rtable(skb))) + goto drop; + } +#endif + if (parse_gre_header(skb, &tpi, &csum_err) < 0) goto drop; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index bb075fc9a14..3b01959bf4b 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -208,7 +208,7 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) } work = frag_mem_limit(nf) - nf->low_thresh; - while (work > 0) { + while (work > 0 || force) { spin_lock(&nf->lru_lock); if (list_empty(&nf->lru_list)) { @@ -278,9 +278,10 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, atomic_inc(&qp->refcnt); hlist_add_head(&qp->list, &hb->chain); + inet_frag_lru_add(nf, qp); spin_unlock(&hb->chain_lock); read_unlock(&f->lock); - inet_frag_lru_add(nf, qp); + return qp; } diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index e9f1217a8af..be8abe73bb9 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -39,6 +39,71 @@ #include <net/route.h> #include <net/xfrm.h> +static bool ip_may_fragment(const struct sk_buff *skb) +{ + return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || + !skb->local_df; +} + +static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) +{ + if (skb->len <= mtu || skb->local_df) + return false; + + if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) + return false; + + return true; +} + +static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb) +{ + unsigned int mtu; + + if (skb->local_df || !skb_is_gso(skb)) + return false; + + mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true); + + /* if seglen > mtu, do software segmentation for IP fragmentation on + * output. DF bit cannot be set since ip_forward would have sent + * icmp error. + */ + return skb_gso_network_seglen(skb) > mtu; +} + +/* called if GSO skb needs to be fragmented on forward */ +static int ip_forward_finish_gso(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + netdev_features_t features; + struct sk_buff *segs; + int ret = 0; + + features = netif_skb_dev_features(skb, dst->dev); + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + if (IS_ERR(segs)) { + kfree_skb(skb); + return -ENOMEM; + } + + consume_skb(skb); + + do { + struct sk_buff *nskb = segs->next; + int err; + + segs->next = NULL; + err = dst_output(segs); + + if (err && ret == 0) + ret = err; + segs = nskb; + } while (segs); + + return ret; +} + static int ip_forward_finish(struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); @@ -49,6 +114,9 @@ static int ip_forward_finish(struct sk_buff *skb) if (unlikely(opt->optlen)) ip_forward_options(skb); + if (ip_gso_exceeds_dst_mtu(skb)) + return ip_forward_finish_gso(skb); + return dst_output(skb); } @@ -59,6 +127,10 @@ int ip_forward(struct sk_buff *skb) struct rtable *rt; /* Route we use */ struct ip_options *opt = &(IPCB(skb)->opt); + /* that should never happen */ + if (skb->pkt_type != PACKET_HOST) + goto drop; + if (skb_warn_if_lro(skb)) goto drop; @@ -68,9 +140,6 @@ int ip_forward(struct sk_buff *skb) if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb)) return NET_RX_SUCCESS; - if (skb->pkt_type != PACKET_HOST) - goto drop; - skb_forward_csum(skb); /* @@ -91,8 +160,7 @@ int ip_forward(struct sk_buff *skb) IPCB(skb)->flags |= IPSKB_FORWARDED; mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); - if (unlikely(skb->len > mtu && !skb_is_gso(skb) && - (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { + if (!ip_may_fragment(skb) && ip_exceeds_mtu(skb, mtu)) { IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ec4f762efda..94213c89156 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -463,6 +463,7 @@ static const struct net_device_ops ipgre_netdev_ops = { static void ipgre_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipgre_netdev_ops; + dev->type = ARPHRD_IPGRE; ip_tunnel_setup(dev, ipgre_net_id); } @@ -501,7 +502,6 @@ static int ipgre_tunnel_init(struct net_device *dev) memcpy(dev->dev_addr, &iph->saddr, 4); memcpy(dev->broadcast, &iph->daddr, 4); - dev->type = ARPHRD_IPGRE; dev->flags = IFF_NOARP; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; dev->addr_len = 4; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8971780aec7..1cbeba5edff 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -101,17 +101,17 @@ int __ip_local_out(struct sk_buff *skb) skb_dst(skb)->dev, dst_output); } -int ip_local_out(struct sk_buff *skb) +int ip_local_out_sk(struct sock *sk, struct sk_buff *skb) { int err; err = __ip_local_out(skb); if (likely(err == 1)) - err = dst_output(skb); + err = dst_output_sk(sk, skb); return err; } -EXPORT_SYMBOL_GPL(ip_local_out); +EXPORT_SYMBOL_GPL(ip_local_out_sk); static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) { @@ -226,9 +226,8 @@ static int ip_finish_output(struct sk_buff *skb) return ip_finish_output2(skb); } -int ip_mc_output(struct sk_buff *skb) +int ip_mc_output(struct sock *sk, struct sk_buff *skb) { - struct sock *sk = skb->sk; struct rtable *rt = skb_rtable(skb); struct net_device *dev = rt->dst.dev; @@ -287,7 +286,7 @@ int ip_mc_output(struct sk_buff *skb) !(IPCB(skb)->flags & IPSKB_REROUTED)); } -int ip_output(struct sk_buff *skb) +int ip_output(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; @@ -315,9 +314,9 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) sizeof(fl4->saddr) + sizeof(fl4->daddr)); } -int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) +/* Note: skb->sk can be different from sk, in case of tunnels */ +int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl) { - struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct ip_options_rcu *inet_opt; struct flowi4 *fl4; @@ -389,6 +388,7 @@ packet_routed: ip_select_ident_more(skb, &rt->dst, sk, (skb_shinfo(skb)->gso_segs ?: 1) - 1); + /* TODO : should we use skb->sk here instead of sk ? */ skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; @@ -422,9 +422,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) - to->nf_trace = from->nf_trace; -#endif #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) to->ipvs_property = from->ipvs_property; #endif @@ -449,7 +446,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) __be16 not_last_frag; struct rtable *rt = skb_rtable(skb); int err = 0; - bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; dev = rt->dst.dev; @@ -459,7 +455,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) iph = ip_hdr(skb); - mtu = ip_dst_mtu_maybe_forward(&rt->dst, forwarding); + mtu = ip_skb_dst_mtu(skb); if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) || (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size > mtu))) { @@ -825,8 +821,7 @@ static int __ip_append_data(struct sock *sk, fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ? - mtu : 0xFFFF; + maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu; if (cork->length + length > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -1149,8 +1144,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ? - mtu : 0xFFFF; + maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu; if (cork->length + size > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -1311,8 +1305,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, * to fragment the frame generated here. No matter, what transforms * how transforms change size of the packet, it will come out. */ - if (inet->pmtudisc < IP_PMTUDISC_DO) - skb->local_df = 1; + skb->local_df = ip_sk_local_df(sk); /* DF bit is set when we want to see DF on outgoing frames. * If local_df is set too, we still allow to fragment this frame diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 580dd96666e..64741b93863 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -186,7 +186,8 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) } EXPORT_SYMBOL(ip_cmsg_recv); -int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) +int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, + bool allow_ipv6) { int err, val; struct cmsghdr *cmsg; @@ -194,6 +195,22 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; +#if defined(CONFIG_IPV6) + if (allow_ipv6 && + cmsg->cmsg_level == SOL_IPV6 && + cmsg->cmsg_type == IPV6_PKTINFO) { + struct in6_pktinfo *src_info; + + if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info))) + return -EINVAL; + src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); + if (!ipv6_addr_v4mapped(&src_info->ipi6_addr)) + return -EINVAL; + ipc->oif = src_info->ipi6_ifindex; + ipc->addr = src_info->ipi6_addr.s6_addr32[3]; + continue; + } +#endif if (cmsg->cmsg_level != SOL_IP) continue; switch (cmsg->cmsg_type) { @@ -626,7 +643,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, inet->nodefrag = val ? 1 : 0; break; case IP_MTU_DISCOVER: - if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_INTERFACE) + if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) goto e_inval; inet->pmtudisc = val; break; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index bd28f386bd0..fa5b7519765 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -93,83 +93,32 @@ static void tunnel_dst_reset(struct ip_tunnel *t) tunnel_dst_set(t, NULL); } -static void tunnel_dst_reset_all(struct ip_tunnel *t) +void ip_tunnel_dst_reset_all(struct ip_tunnel *t) { int i; for_each_possible_cpu(i) __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); } +EXPORT_SYMBOL(ip_tunnel_dst_reset_all); -static struct dst_entry *tunnel_dst_get(struct ip_tunnel *t) +static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie) { struct dst_entry *dst; rcu_read_lock(); dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); - if (dst) + if (dst) { + if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) { + rcu_read_unlock(); + tunnel_dst_reset(t); + return NULL; + } dst_hold(dst); - rcu_read_unlock(); - return dst; -} - -static struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie) -{ - struct dst_entry *dst = tunnel_dst_get(t); - - if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { - tunnel_dst_reset(t); - return NULL; - } - - return dst; -} - -/* Often modified stats are per cpu, other are shared (netdev->stats) */ -struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) -{ - int i; - - for_each_possible_cpu(i) { - const struct pcpu_sw_netstats *tstats = - per_cpu_ptr(dev->tstats, i); - u64 rx_packets, rx_bytes, tx_packets, tx_bytes; - unsigned int start; - - do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); - rx_packets = tstats->rx_packets; - tx_packets = tstats->tx_packets; - rx_bytes = tstats->rx_bytes; - tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; } - - tot->multicast = dev->stats.multicast; - - tot->rx_crc_errors = dev->stats.rx_crc_errors; - tot->rx_fifo_errors = dev->stats.rx_fifo_errors; - tot->rx_length_errors = dev->stats.rx_length_errors; - tot->rx_frame_errors = dev->stats.rx_frame_errors; - tot->rx_errors = dev->stats.rx_errors; - - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; - - tot->collisions = dev->stats.collisions; - - return tot; + rcu_read_unlock(); + return (struct rtable *)dst; } -EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, __be16 flags, __be32 key) @@ -286,13 +235,17 @@ static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, { unsigned int h; __be32 remote; + __be32 i_key = parms->i_key; if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) remote = parms->iph.daddr; else remote = 0; - h = ip_tunnel_hash(parms->i_key, remote); + if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI)) + i_key = 0; + + h = ip_tunnel_hash(i_key, remote); return &itn->tunnels[h]; } @@ -449,7 +402,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, fbt = netdev_priv(itn->fb_tunnel_dev); dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); if (IS_ERR(dev)) - return NULL; + return ERR_CAST(dev); dev->mtu = ip_tunnel_bind_dev(dev); @@ -467,9 +420,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { - /* Looped back packet, drop it! */ - if (rt_is_output_route(skb_rtable(skb))) - goto drop; tunnel->dev->stats.multicast++; skb->pkt_type = PACKET_BROADCAST; } @@ -584,7 +534,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi4 fl4; u8 tos, ttl; __be16 df; - struct rtable *rt = NULL; /* Route to the other host */ + struct rtable *rt; /* Route to the other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; int err; @@ -657,8 +607,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); - if (connected) - rt = (struct rtable *)tunnel_dst_check(tunnel, 0); + rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL; if (!rt) { rt = ip_route_output_key(tunnel->net, &fl4); @@ -721,7 +670,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, return; } - err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, + err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); @@ -766,26 +715,25 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, if (set_mtu) dev->mtu = mtu; } - tunnel_dst_reset_all(t); + ip_tunnel_dst_reset_all(t); netdev_state_change(dev); } int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) { int err = 0; - struct ip_tunnel *t; - struct net *net = dev_net(dev); - struct ip_tunnel *tunnel = netdev_priv(dev); - struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); + struct ip_tunnel *t = netdev_priv(dev); + struct net *net = t->net; + struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); BUG_ON(!itn->fb_tunnel_dev); switch (cmd) { case SIOCGETTUNNEL: - t = NULL; - if (dev == itn->fb_tunnel_dev) + if (dev == itn->fb_tunnel_dev) { t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); - if (t == NULL) - t = netdev_priv(dev); + if (t == NULL) + t = netdev_priv(dev); + } memcpy(p, &t->parms, sizeof(*p)); break; @@ -803,9 +751,13 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); - if (!t && (cmd == SIOCADDTUNNEL)) + if (!t && (cmd == SIOCADDTUNNEL)) { t = ip_tunnel_create(net, itn, p); - + if (IS_ERR(t)) { + err = PTR_ERR(t); + break; + } + } if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { if (t != NULL) { if (t->dev != dev) { @@ -832,8 +784,9 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) if (t) { err = 0; ip_tunnel_update(itn, t, dev, p, true); - } else - err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + } else { + err = -ENOENT; + } break; case SIOCDELTUNNEL: @@ -1048,19 +1001,13 @@ int ip_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; - int i, err; + int err; dev->destructor = ip_tunnel_dev_free; - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ipt_stats; - ipt_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ipt_stats->syncp); - } - tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); if (!tunnel->dst_cache) { free_percpu(dev->tstats); @@ -1095,7 +1042,7 @@ void ip_tunnel_uninit(struct net_device *dev) if (itn->fb_tunnel_dev != dev) ip_tunnel_del(netdev_priv(dev)); - tunnel_dst_reset_all(tunnel); + ip_tunnel_dst_reset_all(tunnel); } EXPORT_SYMBOL_GPL(ip_tunnel_uninit); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6156f4ef5e9..bcf206c7900 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -46,7 +46,7 @@ #include <net/netns/generic.h> #include <net/rtnetlink.h> -int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, +int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl, __be16 df, bool xnet) { @@ -76,7 +76,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, iph->ttl = ttl; __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); - err = ip_local_out(skb); + err = ip_local_out_sk(sk, skb); if (unlikely(net_xmit_eval(err))) pkt_len = 0; return pkt_len; @@ -148,3 +148,49 @@ error: return ERR_PTR(err); } EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); + +/* Often modified stats are per cpu, other are shared (netdev->stats) */ +struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) +{ + int i; + + for_each_possible_cpu(i) { + const struct pcpu_sw_netstats *tstats = + per_cpu_ptr(dev->tstats, i); + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + unsigned int start; + + do { + start = u64_stats_fetch_begin_irq(&tstats->syncp); + rx_packets = tstats->rx_packets; + tx_packets = tstats->tx_packets; + rx_bytes = tstats->rx_bytes; + tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_irq(&tstats->syncp, start)); + + tot->rx_packets += rx_packets; + tot->tx_packets += tx_packets; + tot->rx_bytes += rx_bytes; + tot->tx_bytes += tx_bytes; + } + + tot->multicast = dev->stats.multicast; + + tot->rx_crc_errors = dev->stats.rx_crc_errors; + tot->rx_fifo_errors = dev->stats.rx_fifo_errors; + tot->rx_length_errors = dev->stats.rx_length_errors; + tot->rx_frame_errors = dev->stats.rx_frame_errors; + tot->rx_errors = dev->stats.rx_errors; + + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; + tot->tx_carrier_errors = dev->stats.tx_carrier_errors; + tot->tx_dropped = dev->stats.tx_dropped; + tot->tx_aborted_errors = dev->stats.tx_aborted_errors; + tot->tx_errors = dev->stats.tx_errors; + + tot->collisions = dev->stats.collisions; + + return tot; +} +EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 48eafae5176..afcee51b90e 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -34,6 +34,7 @@ #include <linux/init.h> #include <linux/netfilter_ipv4.h> #include <linux/if_ether.h> +#include <linux/icmpv6.h> #include <net/sock.h> #include <net/ip.h> @@ -49,8 +50,8 @@ static struct rtnl_link_ops vti_link_ops __read_mostly; static int vti_net_id __read_mostly; static int vti_tunnel_init(struct net_device *dev); -/* We dont digest the packet therefore let the packet pass */ -static int vti_rcv(struct sk_buff *skb) +static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) { struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); @@ -60,79 +61,120 @@ static int vti_rcv(struct sk_buff *skb) tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); if (tunnel != NULL) { - struct pcpu_sw_netstats *tstats; - u32 oldmark = skb->mark; - int ret; - - - /* temporarily mark the skb with the tunnel o_key, to - * only match policies with this mark. - */ - skb->mark = be32_to_cpu(tunnel->parms.o_key); - ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb); - skb->mark = oldmark; - if (!ret) - return -1; - - tstats = this_cpu_ptr(tunnel->dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += skb->len; - u64_stats_update_end(&tstats->syncp); - - secpath_reset(skb); - skb->dev = tunnel->dev; + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; + skb->mark = be32_to_cpu(tunnel->parms.i_key); + + return xfrm_input(skb, nexthdr, spi, encap_type); + } + + return -EINVAL; +drop: + kfree_skb(skb); + return 0; +} + +static int vti_rcv(struct sk_buff *skb) +{ + XFRM_SPI_SKB_CB(skb)->family = AF_INET; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + + return vti_input(skb, ip_hdr(skb)->protocol, 0, 0); +} + +static int vti_rcv_cb(struct sk_buff *skb, int err) +{ + unsigned short family; + struct net_device *dev; + struct pcpu_sw_netstats *tstats; + struct xfrm_state *x; + struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4; + + if (!tunnel) return 1; + + dev = tunnel->dev; + + if (err) { + dev->stats.rx_errors++; + dev->stats.rx_dropped++; + + return 0; } - return -1; + x = xfrm_input_state(skb); + family = x->inner_mode->afinfo->family; + + if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) + return -EPERM; + + skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev))); + skb->dev = dev; + + tstats = this_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + + return 0; } -/* This function assumes it is being called from dev_queue_xmit() - * and that skb is filled properly by that function. - */ +static bool vti_state_check(const struct xfrm_state *x, __be32 dst, __be32 src) +{ + xfrm_address_t *daddr = (xfrm_address_t *)&dst; + xfrm_address_t *saddr = (xfrm_address_t *)&src; -static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) + /* if there is no transform then this tunnel is not functional. + * Or if the xfrm is not mode tunnel. + */ + if (!x || x->props.mode != XFRM_MODE_TUNNEL || + x->props.family != AF_INET) + return false; + + if (!dst) + return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET); + + if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET)) + return false; + + return true; +} + +static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, + struct flowi *fl) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct iphdr *tiph = &tunnel->parms.iph; - u8 tos; - struct rtable *rt; /* Route to the other host */ + struct ip_tunnel_parm *parms = &tunnel->parms; + struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; /* Device to other host */ - struct iphdr *old_iph = ip_hdr(skb); - __be32 dst = tiph->daddr; - struct flowi4 fl4; int err; - if (skb->protocol != htons(ETH_P_IP)) - goto tx_error; - - tos = old_iph->tos; + if (!dst) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; + } - memset(&fl4, 0, sizeof(fl4)); - flowi4_init_output(&fl4, tunnel->parms.link, - be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos), - RT_SCOPE_UNIVERSE, - IPPROTO_IPIP, 0, - dst, tiph->saddr, 0, 0); - rt = ip_route_output_key(dev_net(dev), &fl4); - if (IS_ERR(rt)) { + dst_hold(dst); + dst = xfrm_lookup(tunnel->net, dst, fl, NULL, 0); + if (IS_ERR(dst)) { dev->stats.tx_carrier_errors++; goto tx_error_icmp; } - /* if there is no transform then this tunnel is not functional. - * Or if the xfrm is not mode tunnel. - */ - if (!rt->dst.xfrm || - rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) { + + if (!vti_state_check(dst->xfrm, parms->iph.daddr, parms->iph.saddr)) { dev->stats.tx_carrier_errors++; - ip_rt_put(rt); + dst_release(dst); goto tx_error_icmp; } - tdev = rt->dst.dev; + + tdev = dst->dev; if (tdev == dev) { - ip_rt_put(rt); + dst_release(dst); dev->stats.collisions++; goto tx_error; } @@ -146,10 +188,8 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) tunnel->err_count = 0; } - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - nf_reset(skb); + skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); + skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; err = dst_output(skb); @@ -166,6 +206,95 @@ tx_error: return NETDEV_TX_OK; } +/* This function assumes it is being called from dev_queue_xmit() + * and that skb is filled properly by that function. + */ +static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + + skb->mark = be32_to_cpu(tunnel->parms.o_key); + + switch (skb->protocol) { + case htons(ETH_P_IP): + xfrm_decode_session(skb, &fl, AF_INET); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + break; + case htons(ETH_P_IPV6): + xfrm_decode_session(skb, &fl, AF_INET6); + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + break; + default: + dev->stats.tx_errors++; + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + + return vti_xmit(skb, dev, &fl); +} + +static int vti4_err(struct sk_buff *skb, u32 info) +{ + __be32 spi; + struct xfrm_state *x; + struct ip_tunnel *tunnel; + struct ip_esp_hdr *esph; + struct ip_auth_hdr *ah ; + struct ip_comp_hdr *ipch; + struct net *net = dev_net(skb->dev); + const struct iphdr *iph = (const struct iphdr *)skb->data; + int protocol = iph->protocol; + struct ip_tunnel_net *itn = net_generic(net, vti_net_id); + + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->daddr, iph->saddr, 0); + if (!tunnel) + return -1; + + switch (protocol) { + case IPPROTO_ESP: + esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); + spi = esph->spi; + break; + case IPPROTO_AH: + ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); + spi = ah->spi; + break; + case IPPROTO_COMP: + ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); + spi = htonl(ntohs(ipch->cpi)); + break; + default: + return 0; + } + + switch (icmp_hdr(skb)->type) { + case ICMP_DEST_UNREACH: + if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) + return 0; + case ICMP_REDIRECT: + break; + default: + return 0; + } + + x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, + spi, protocol, AF_INET); + if (!x) + return 0; + + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0); + else + ipv4_redirect(skb, net, 0, 0, protocol, 0); + xfrm_state_put(x); + + return 0; +} + static int vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { @@ -181,12 +310,13 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EINVAL; } + p.i_flags |= VTI_ISVTI; err = ip_tunnel_ioctl(dev, &p, cmd); if (err) return err; if (cmd != SIOCDELTUNNEL) { - p.i_flags |= GRE_KEY | VTI_ISVTI; + p.i_flags |= GRE_KEY; p.o_flags |= GRE_KEY; } @@ -207,6 +337,7 @@ static const struct net_device_ops vti_netdev_ops = { static void vti_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &vti_netdev_ops; + dev->type = ARPHRD_TUNNEL; ip_tunnel_setup(dev, vti_net_id); } @@ -218,13 +349,11 @@ static int vti_tunnel_init(struct net_device *dev) memcpy(dev->dev_addr, &iph->saddr, 4); memcpy(dev->broadcast, &iph->daddr, 4); - dev->type = ARPHRD_TUNNEL; dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); dev->mtu = ETH_DATA_LEN; dev->flags = IFF_NOARP; dev->iflink = 0; dev->addr_len = 4; - dev->features |= NETIF_F_NETNS_LOCAL; dev->features |= NETIF_F_LLTX; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; @@ -241,9 +370,28 @@ static void __net_init vti_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; } -static struct xfrm_tunnel_notifier vti_handler __read_mostly = { +static struct xfrm4_protocol vti_esp4_protocol __read_mostly = { + .handler = vti_rcv, + .input_handler = vti_input, + .cb_handler = vti_rcv_cb, + .err_handler = vti4_err, + .priority = 100, +}; + +static struct xfrm4_protocol vti_ah4_protocol __read_mostly = { .handler = vti_rcv, - .priority = 1, + .input_handler = vti_input, + .cb_handler = vti_rcv_cb, + .err_handler = vti4_err, + .priority = 100, +}; + +static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = { + .handler = vti_rcv, + .input_handler = vti_input, + .cb_handler = vti_rcv_cb, + .err_handler = vti4_err, + .priority = 100, }; static int __net_init vti_init_net(struct net *net) @@ -287,6 +435,8 @@ static void vti_netlink_parms(struct nlattr *data[], if (!data) return; + parms->i_flags = VTI_ISVTI; + if (data[IFLA_VTI_LINK]) parms->link = nla_get_u32(data[IFLA_VTI_LINK]); @@ -382,10 +532,31 @@ static int __init vti_init(void) err = register_pernet_device(&vti_net_ops); if (err < 0) return err; - err = xfrm4_mode_tunnel_input_register(&vti_handler); + err = xfrm4_protocol_register(&vti_esp4_protocol, IPPROTO_ESP); if (err < 0) { unregister_pernet_device(&vti_net_ops); pr_info("vti init: can't register tunnel\n"); + + return err; + } + + err = xfrm4_protocol_register(&vti_ah4_protocol, IPPROTO_AH); + if (err < 0) { + xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); + unregister_pernet_device(&vti_net_ops); + pr_info("vti init: can't register tunnel\n"); + + return err; + } + + err = xfrm4_protocol_register(&vti_ipcomp4_protocol, IPPROTO_COMP); + if (err < 0) { + xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); + xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); + unregister_pernet_device(&vti_net_ops); + pr_info("vti init: can't register tunnel\n"); + + return err; } err = rtnl_link_register(&vti_link_ops); @@ -395,7 +566,9 @@ static int __init vti_init(void) return err; rtnl_link_failed: - xfrm4_mode_tunnel_input_deregister(&vti_handler); + xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); + xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); + xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); unregister_pernet_device(&vti_net_ops); return err; } @@ -403,8 +576,13 @@ rtnl_link_failed: static void __exit vti_fini(void) { rtnl_link_unregister(&vti_link_ops); - if (xfrm4_mode_tunnel_input_deregister(&vti_handler)) + if (xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP)) pr_info("vti close: can't deregister tunnel\n"); + if (xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH)) + pr_info("vti close: can't deregister tunnel\n"); + if (xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP)) + pr_info("vti close: can't deregister tunnel\n"); + unregister_pernet_device(&vti_net_ops); } diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 826be4cb482..c0855d50a3f 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -23,7 +23,7 @@ #include <net/protocol.h> #include <net/sock.h> -static void ipcomp4_err(struct sk_buff *skb, u32 info) +static int ipcomp4_err(struct sk_buff *skb, u32 info) { struct net *net = dev_net(skb->dev); __be32 spi; @@ -34,24 +34,26 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) switch (icmp_hdr(skb)->type) { case ICMP_DEST_UNREACH: if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) - return; + return 0; case ICMP_REDIRECT: break; default: - return; + return 0; } spi = htonl(ntohs(ipch->cpi)); x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET); if (!x) - return; + return 0; if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); else ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0); xfrm_state_put(x); + + return 0; } /* We always hold one tunnel user reference to indicate a tunnel */ @@ -147,6 +149,11 @@ out: return err; } +static int ipcomp4_rcv_cb(struct sk_buff *skb, int err) +{ + return 0; +} + static const struct xfrm_type ipcomp_type = { .description = "IPCOMP4", .owner = THIS_MODULE, @@ -157,11 +164,12 @@ static const struct xfrm_type ipcomp_type = { .output = ipcomp_output }; -static const struct net_protocol ipcomp4_protocol = { +static struct xfrm4_protocol ipcomp4_protocol = { .handler = xfrm4_rcv, + .input_handler = xfrm_input, + .cb_handler = ipcomp4_rcv_cb, .err_handler = ipcomp4_err, - .no_policy = 1, - .netns_ok = 1, + .priority = 0, }; static int __init ipcomp4_init(void) @@ -170,7 +178,7 @@ static int __init ipcomp4_init(void) pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } - if (inet_add_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0) { + if (xfrm4_protocol_register(&ipcomp4_protocol, IPPROTO_COMP) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ipcomp_type, AF_INET); return -EAGAIN; @@ -180,7 +188,7 @@ static int __init ipcomp4_init(void) static void __exit ipcomp4_fini(void) { - if (inet_del_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0) + if (xfrm4_protocol_deregister(&ipcomp4_protocol, IPPROTO_COMP) < 0) pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&ipcomp_type, AF_INET) < 0) pr_info("%s: can't remove xfrm type\n", __func__); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index efa1138fa52..b3e86ea7b71 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -273,7 +273,7 @@ static int __init ic_open_devs(void) msleep(1); - if time_before(jiffies, next_msg) + if (time_before(jiffies, next_msg)) continue; elapsed = jiffies_to_msecs(jiffies - start); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b9b3472975b..d84dc8d4c91 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -455,7 +455,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) struct mr_table *mrt; struct flowi4 fl4 = { .flowi4_oif = dev->ifindex, - .flowi4_iif = skb->skb_iif, + .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi4_mark = skb->mark, }; int err; @@ -2255,13 +2255,14 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, } static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, - u32 portid, u32 seq, struct mfc_cache *c, int cmd) + u32 portid, u32 seq, struct mfc_cache *c, int cmd, + int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; int err; - nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -2329,7 +2330,7 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, if (skb == NULL) goto errout; - err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd); + err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); if (err < 0) goto errout; @@ -2368,7 +2369,8 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (ipmr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) goto done; next_entry: e++; @@ -2382,7 +2384,8 @@ next_entry: if (ipmr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) { + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) { spin_unlock_bh(&mfc_unres_lock); goto done; } diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index c3e0adea9c2..7ebd6e37875 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -61,7 +61,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type) skb_dst_set(skb, NULL); dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0); if (IS_ERR(dst)) - return PTR_ERR(dst);; + return PTR_ERR(dst); skb_dst_set(skb, dst); } #endif diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 81c6910cfa9..a26ce035e3f 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -61,6 +61,11 @@ config NFT_CHAIN_NAT_IPV4 packet transformations such as the source, destination address and source and destination ports. +config NFT_REJECT_IPV4 + depends on NF_TABLES_IPV4 + default NFT_REJECT + tristate + config NF_TABLES_ARP depends on NF_TABLES tristate "ARP nf_tables support" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index c16be9d5842..90b82405331 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o +obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o # generic IP tables diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 59da7cde072..f95b6f93814 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1044,8 +1044,10 @@ static int __do_replace(struct net *net, const char *name, xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, - sizeof(struct xt_counters) * num_counters) != 0) - ret = -EFAULT; + sizeof(struct xt_counters) * num_counters) != 0) { + /* Silent error, can't fail, new table is already in place */ + net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n"); + } vfree(counters); xt_table_unlock(t); return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 718dfbd30cb..99e810f8467 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1231,8 +1231,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, - sizeof(struct xt_counters) * num_counters) != 0) - ret = -EFAULT; + sizeof(struct xt_counters) * num_counters) != 0) { + /* Silent error, can't fail, new table is already in place */ + net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n"); + } vfree(counters); xt_table_unlock(t); return ret; diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index c49dcd0284a..4bfaedf9b34 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -89,11 +89,8 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) if (ipv4_is_multicast(iph->daddr)) { if (ipv4_is_zeronet(iph->saddr)) return ipv4_is_local_multicast(iph->daddr) ^ invert; - flow.flowi4_iif = 0; - } else { - flow.flowi4_iif = LOOPBACK_IFINDEX; } - + flow.flowi4_iif = LOOPBACK_IFINDEX; flow.daddr = iph->saddr; flow.saddr = rpfilter_get_saddr(iph->daddr); flow.flowi4_oif = 0; diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 9eea059dd62..574f7ebba0b 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -229,7 +229,10 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, ret = nf_ct_expect_related(rtcp_exp); if (ret == 0) break; - else if (ret != -EBUSY) { + else if (ret == -EBUSY) { + nf_ct_unexpect_related(rtp_exp); + continue; + } else if (ret < 0) { nf_ct_unexpect_related(rtp_exp); nated_port = 0; break; diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index d551e31b416..7c676671329 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1198,8 +1198,8 @@ static int snmp_translate(struct nf_conn *ct, map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip); } else { /* DNAT replies */ - map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip); - map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip); + map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip); + map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip); } if (map.from == map.to) diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c new file mode 100644 index 00000000000..e79718a382f --- /dev/null +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2013 Eric Leblond <eric@regit.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/icmp.h> +#include <net/netfilter/ipv4/nf_reject.h> +#include <net/netfilter/nft_reject.h> + +void nft_reject_ipv4_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_reject *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nf_send_unreach(pkt->skb, priv->icmp_code); + break; + case NFT_REJECT_TCP_RST: + nf_send_reset(pkt->skb, pkt->ops->hooknum); + break; + } + + data[NFT_REG_VERDICT].verdict = NF_DROP; +} +EXPORT_SYMBOL_GPL(nft_reject_ipv4_eval); + +static struct nft_expr_type nft_reject_ipv4_type; +static const struct nft_expr_ops nft_reject_ipv4_ops = { + .type = &nft_reject_ipv4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .eval = nft_reject_ipv4_eval, + .init = nft_reject_init, + .dump = nft_reject_dump, +}; + +static struct nft_expr_type nft_reject_ipv4_type __read_mostly = { + .family = NFPROTO_IPV4, + .name = "reject", + .ops = &nft_reject_ipv4_ops, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_reject_ipv4_module_init(void) +{ + return nft_register_expr(&nft_reject_ipv4_type); +} + +static void __exit nft_reject_ipv4_module_exit(void) +{ + nft_unregister_expr(&nft_reject_ipv4_type); +} + +module_init(nft_reject_ipv4_module_init); +module_exit(nft_reject_ipv4_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject"); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2d11c094296..8210964a9f1 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -252,26 +252,33 @@ int ping_init_sock(struct sock *sk) { struct net *net = sock_net(sk); kgid_t group = current_egid(); - struct group_info *group_info = get_current_groups(); - int i, j, count = group_info->ngroups; + struct group_info *group_info; + int i, j, count; kgid_t low, high; + int ret = 0; inet_get_ping_group_range_net(net, &low, &high); if (gid_lte(low, group) && gid_lte(group, high)) return 0; + group_info = get_current_groups(); + count = group_info->ngroups; for (i = 0; i < group_info->nblocks; i++) { int cp_count = min_t(int, NGROUPS_PER_BLOCK, count); for (j = 0; j < cp_count; j++) { kgid_t gid = group_info->blocks[i][j]; if (gid_lte(low, gid) && gid_lte(gid, high)) - return 0; + goto out_release_group; } count -= cp_count; } - return -EACCES; + ret = -EACCES; + +out_release_group: + put_group_info(group_info); + return ret; } EXPORT_SYMBOL_GPL(ping_init_sock); @@ -727,7 +734,7 @@ static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m sock_tx_timestamp(sk, &ipc.tx_flags); if (msg->msg_controllen) { - err = ip_cmsg_send(sock_net(sk), msg, &ipc); + err = ip_cmsg_send(sock_net(sk), msg, &ipc, false); if (err) return err; if (ipc.opt) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index a6c8a80ec9d..ad737fad6d8 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -273,6 +273,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE), SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE), + SNMP_MIB_ITEM("TCPFastOpenActiveFail", LINUX_MIB_TCPFASTOPENACTIVEFAIL), SNMP_MIB_ITEM("TCPFastOpenPassive", LINUX_MIB_TCPFASTOPENPASSIVE), SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL), SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), @@ -280,6 +281,11 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS), SNMP_MIB_ITEM("TCPAutoCorking", LINUX_MIB_TCPAUTOCORKING), + SNMP_MIB_ITEM("TCPFromZeroWindowAdv", LINUX_MIB_TCPFROMZEROWINDOWADV), + SNMP_MIB_ITEM("TCPToZeroWindowAdv", LINUX_MIB_TCPTOZEROWINDOWADV), + SNMP_MIB_ITEM("TCPWantZeroWindowAdv", LINUX_MIB_TCPWANTZEROWINDOWADV), + SNMP_MIB_ITEM("TCPSynRetrans", LINUX_MIB_TCPSYNRETRANS), + SNMP_MIB_ITEM("TCPOrigDataSent", LINUX_MIB_TCPORIGDATASENT), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index c04518f4850..a9dbe58bdfe 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -524,7 +524,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { - err = ip_cmsg_send(sock_net(sk), msg, &ipc); + err = ip_cmsg_send(sock_net(sk), msg, &ipc, false); if (err) goto out; if (ipc.opt) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 25071b48921..db1e0da871f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -139,11 +139,6 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static void ipv4_dst_destroy(struct dst_entry *dst); -static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, - int how) -{ -} - static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) { WARN_ON(1); @@ -162,7 +157,6 @@ static struct dst_ops ipv4_dst_ops = { .mtu = ipv4_mtu, .cow_metrics = ipv4_cow_metrics, .destroy = ipv4_dst_destroy, - .ifdown = ipv4_dst_ifdown, .negative_advice = ipv4_negative_advice, .link_failure = ipv4_link_failure, .update_pmtu = ip_rt_update_pmtu, @@ -194,7 +188,7 @@ const __u8 ip_tos2prio[16] = { EXPORT_SYMBOL(ip_tos2prio); static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); -#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) +#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field) #ifdef CONFIG_PROC_FS static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) @@ -697,7 +691,6 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, out_unlock: spin_unlock_bh(&fnhe_lock); - return; } static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4, @@ -1136,7 +1129,7 @@ static void ipv4_link_failure(struct sk_buff *skb) dst_set_expires(&rt->dst, 0); } -static int ip_rt_bug(struct sk_buff *skb) +static int ip_rt_bug(struct sock *sk, struct sk_buff *skb) { pr_debug("%s: %pI4 -> %pI4, %s\n", __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, @@ -1597,6 +1590,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_gateway = 0; rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); + RT_CACHE_STAT_INC(in_slow_tot); rth->dst.input = ip_forward; rth->dst.output = ip_output; @@ -1695,25 +1689,27 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.daddr = daddr; fl4.saddr = saddr; err = fib_lookup(net, &fl4, &res); - if (err != 0) + if (err != 0) { + if (!IN_DEV_FORWARD(in_dev)) + err = -EHOSTUNREACH; goto no_route; - - RT_CACHE_STAT_INC(in_slow_tot); + } if (res.type == RTN_BROADCAST) goto brd_input; if (res.type == RTN_LOCAL) { err = fib_validate_source(skb, saddr, daddr, tos, - LOOPBACK_IFINDEX, - dev, in_dev, &itag); + 0, dev, in_dev, &itag); if (err < 0) goto martian_source_keep_err; goto local_input; } - if (!IN_DEV_FORWARD(in_dev)) + if (!IN_DEV_FORWARD(in_dev)) { + err = -EHOSTUNREACH; goto no_route; + } if (res.type != RTN_UNICAST) goto martian_destination; @@ -1768,6 +1764,7 @@ local_input: rth->rt_gateway = 0; rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); + RT_CACHE_STAT_INC(in_slow_tot); if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; rth->dst.error= -err; @@ -2220,7 +2217,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or new->__use = 1; new->input = dst_discard; - new->output = dst_discard; + new->output = dst_discard_sk; new->dev = ort->dst.dev; if (new->dev) @@ -2359,7 +2356,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, } } else #endif - if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) + if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex)) goto nla_put_failure; } @@ -2470,11 +2467,6 @@ errout_free: goto errout; } -int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - return skb->len; -} - void ip_rt_multicast_event(struct in_device *in_dev) { rt_cache_flush(dev_net(in_dev->dev)); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4475b3bb494..4bd6d52eeff 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -387,7 +387,7 @@ void tcp_init_sock(struct sock *sk) INIT_LIST_HEAD(&tp->tsq_node); icsk->icsk_rto = TCP_TIMEOUT_INIT; - tp->mdev = TCP_TIMEOUT_INIT; + tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control @@ -1044,7 +1044,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp) } } -static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) +static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, + int *copied, size_t size) { struct tcp_sock *tp = tcp_sk(sk); int err, flags; @@ -1059,11 +1060,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) if (unlikely(tp->fastopen_req == NULL)) return -ENOBUFS; tp->fastopen_req->data = msg; + tp->fastopen_req->size = size; flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; err = __inet_stream_connect(sk->sk_socket, msg->msg_name, msg->msg_namelen, flags); - *size = tp->fastopen_req->copied; + *copied = tp->fastopen_req->copied; tcp_free_fastopen_req(tp); return err; } @@ -1083,7 +1085,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flags = msg->msg_flags; if (flags & MSG_FASTOPEN) { - err = tcp_sendmsg_fastopen(sk, msg, &copied_syn); + err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); if (err == -EINPROGRESS && copied_syn > 0) goto out; else if (err) @@ -2229,7 +2231,7 @@ adjudge_to_death: /* This is a (useful) BSD violating of the RFC. There is a * problem with TCP as specified in that the other end could * keep a socket open forever with no application left this end. - * We use a 3 minute timeout (about the same as BSD) then kill + * We use a 1 minute timeout (about the same as BSD) then kill * our end. If they send after that then tough - BUT: long enough * that we won't make the old 4*rto = almost no time - whoops * reset mistake. @@ -2339,7 +2341,7 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); - tp->srtt = 0; + tp->srtt_us = 0; if ((tp->write_seq += tp->max_window + 2) == 0) tp->write_seq = 1; icsk->icsk_backoff = 0; @@ -2783,8 +2785,8 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info) info->tcpi_pmtu = icsk->icsk_pmtu_cookie; info->tcpi_rcv_ssthresh = tp->rcv_ssthresh; - info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3; - info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2; + info->tcpi_rtt = tp->srtt_us >> 3; + info->tcpi_rttvar = tp->mdev_us >> 2; info->tcpi_snd_ssthresh = tp->snd_ssthresh; info->tcpi_snd_cwnd = tp->snd_cwnd; info->tcpi_advmss = tp->advmss; @@ -2794,6 +2796,11 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info) info->tcpi_rcv_space = tp->rcvq_space.space; info->tcpi_total_retrans = tp->total_retrans; + + info->tcpi_pacing_rate = sk->sk_pacing_rate != ~0U ? + sk->sk_pacing_rate : ~0ULL; + info->tcpi_max_pacing_rate = sk->sk_max_pacing_rate != ~0U ? + sk->sk_max_pacing_rate : ~0ULL; } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index ad37bf18ae4..2b9464c93b8 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -290,8 +290,7 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) left = tp->snd_cwnd - in_flight; if (sk_can_gso(sk) && left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && - left * tp->mss_cache < sk->sk_gso_max_size && - left < sk->sk_gso_max_segs) + left < tp->xmit_size_goal_segs) return true; return left <= tcp_max_tso_deferred_mss(tp); } @@ -362,21 +361,12 @@ u32 tcp_reno_ssthresh(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); -/* Lower bound on congestion window with halving. */ -u32 tcp_reno_min_cwnd(const struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - return tp->snd_ssthresh/2; -} -EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); - struct tcp_congestion_ops tcp_reno = { .flags = TCP_CONG_NON_RESTRICTED, .name = "reno", .owner = THIS_MODULE, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, - .min_cwnd = tcp_reno_min_cwnd, }; /* Initial congestion control used (until SYN) @@ -388,6 +378,5 @@ struct tcp_congestion_ops tcp_init_congestion_ops = { .owner = THIS_MODULE, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, - .min_cwnd = tcp_reno_min_cwnd, }; EXPORT_SYMBOL_GPL(tcp_init_congestion_ops); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 828e4c3ffba..8bf224516ba 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -476,10 +476,6 @@ static int __init cubictcp_register(void) /* divide by bic_scale and by constant Srtt (100ms) */ do_div(cube_factor, bic_scale * 10); - /* hystart needs ms clock resolution */ - if (hystart && HZ < 1000) - cubictcp.flags |= TCP_CONG_RTT_STAMP; - return tcp_register_congestion_control(&cubictcp); } diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 8ed9305dfdf..8b9e7bad77c 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -162,7 +162,6 @@ static struct tcp_congestion_ops tcp_highspeed __read_mostly = { .init = hstcp_init, .ssthresh = hstcp_ssthresh, .cong_avoid = hstcp_cong_avoid, - .min_cwnd = tcp_reno_min_cwnd, .owner = THIS_MODULE, .name = "highspeed" diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 478fe82611b..a15a799bf76 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -21,7 +21,7 @@ struct hybla { u32 rho2; /* Rho * Rho, integer part */ u32 rho_3ls; /* Rho parameter, <<3 */ u32 rho2_7ls; /* Rho^2, <<7 */ - u32 minrtt; /* Minimum smoothed round trip time value seen */ + u32 minrtt_us; /* Minimum smoothed round trip time value seen */ }; /* Hybla reference round trip time (default= 1/40 sec = 25 ms), in ms */ @@ -35,7 +35,9 @@ static inline void hybla_recalc_param (struct sock *sk) { struct hybla *ca = inet_csk_ca(sk); - ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8); + ca->rho_3ls = max_t(u32, + tcp_sk(sk)->srtt_us / (rtt0 * USEC_PER_MSEC), + 8U); ca->rho = ca->rho_3ls >> 3; ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1; ca->rho2 = ca->rho2_7ls >> 7; @@ -59,7 +61,7 @@ static void hybla_init(struct sock *sk) hybla_recalc_param(sk); /* set minimum rtt as this is the 1st ever seen */ - ca->minrtt = tp->srtt; + ca->minrtt_us = tp->srtt_us; tp->snd_cwnd = ca->rho; } @@ -94,9 +96,9 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked, int is_slowstart = 0; /* Recalculate rho only if this srtt is the lowest */ - if (tp->srtt < ca->minrtt){ + if (tp->srtt_us < ca->minrtt_us) { hybla_recalc_param(sk); - ca->minrtt = tp->srtt; + ca->minrtt_us = tp->srtt_us; } if (!tcp_is_cwnd_limited(sk, in_flight)) @@ -166,7 +168,6 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked, static struct tcp_congestion_ops tcp_hybla __read_mostly = { .init = hybla_init, .ssthresh = tcp_reno_ssthresh, - .min_cwnd = tcp_reno_min_cwnd, .cong_avoid = hybla_cong_avoid, .set_state = hybla_state, diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index e498a62b8f9..863d105e301 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -325,10 +325,8 @@ static void tcp_illinois_info(struct sock *sk, u32 ext, } static struct tcp_congestion_ops tcp_illinois __read_mostly = { - .flags = TCP_CONG_RTT_STAMP, .init = tcp_illinois_init, .ssthresh = tcp_illinois_ssthresh, - .min_cwnd = tcp_reno_min_cwnd, .cong_avoid = tcp_illinois_cong_avoid, .set_state = tcp_illinois_state, .get_info = tcp_illinois_info, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 65cf90e063d..d6b46eb2f94 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -667,10 +667,11 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics */ -static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) +static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) { struct tcp_sock *tp = tcp_sk(sk); - long m = mrtt; /* RTT */ + long m = mrtt_us; /* RTT */ + u32 srtt = tp->srtt_us; /* The following amusing code comes from Jacobson's * article in SIGCOMM '88. Note that rtt and mdev @@ -688,14 +689,12 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) * does not matter how to _calculate_ it. Seems, it was trap * that VJ failed to avoid. 8) */ - if (m == 0) - m = 1; - if (tp->srtt != 0) { - m -= (tp->srtt >> 3); /* m is now error in rtt est */ - tp->srtt += m; /* rtt = 7/8 rtt + 1/8 new */ + if (srtt != 0) { + m -= (srtt >> 3); /* m is now error in rtt est */ + srtt += m; /* rtt = 7/8 rtt + 1/8 new */ if (m < 0) { m = -m; /* m is now abs(error) */ - m -= (tp->mdev >> 2); /* similar update on mdev */ + m -= (tp->mdev_us >> 2); /* similar update on mdev */ /* This is similar to one of Eifel findings. * Eifel blocks mdev updates when rtt decreases. * This solution is a bit different: we use finer gain @@ -707,27 +706,29 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) if (m > 0) m >>= 3; } else { - m -= (tp->mdev >> 2); /* similar update on mdev */ + m -= (tp->mdev_us >> 2); /* similar update on mdev */ } - tp->mdev += m; /* mdev = 3/4 mdev + 1/4 new */ - if (tp->mdev > tp->mdev_max) { - tp->mdev_max = tp->mdev; - if (tp->mdev_max > tp->rttvar) - tp->rttvar = tp->mdev_max; + tp->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ + if (tp->mdev_us > tp->mdev_max_us) { + tp->mdev_max_us = tp->mdev_us; + if (tp->mdev_max_us > tp->rttvar_us) + tp->rttvar_us = tp->mdev_max_us; } if (after(tp->snd_una, tp->rtt_seq)) { - if (tp->mdev_max < tp->rttvar) - tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2; + if (tp->mdev_max_us < tp->rttvar_us) + tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2; tp->rtt_seq = tp->snd_nxt; - tp->mdev_max = tcp_rto_min(sk); + tp->mdev_max_us = tcp_rto_min_us(sk); } } else { /* no previous measure. */ - tp->srtt = m << 3; /* take the measured time to be rtt */ - tp->mdev = m << 1; /* make sure rto = 3*rtt */ - tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); + srtt = m << 3; /* take the measured time to be rtt */ + tp->mdev_us = m << 1; /* make sure rto = 3*rtt */ + tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk)); + tp->mdev_max_us = tp->rttvar_us; tp->rtt_seq = tp->snd_nxt; } + tp->srtt_us = max(1U, srtt); } /* Set the sk_pacing_rate to allow proper sizing of TSO packets. @@ -742,18 +743,12 @@ static void tcp_update_pacing_rate(struct sock *sk) u64 rate; /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ - rate = (u64)tp->mss_cache * 2 * (HZ << 3); + rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3); rate *= max(tp->snd_cwnd, tp->packets_out); - /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3), - * be conservative and assume srtt = 1 (125 us instead of 1.25 ms) - * We probably need usec resolution in the future. - * Note: This also takes care of possible srtt=0 case, - * when tcp_rtt_estimator() was not yet called. - */ - if (tp->srtt > 8 + 2) - do_div(rate, tp->srtt); + if (likely(tp->srtt_us)) + do_div(rate, tp->srtt_us); /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate * without any lock. We want to make sure compiler wont store @@ -1120,10 +1115,10 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, } struct tcp_sacktag_state { - int reord; - int fack_count; - int flag; - s32 rtt; /* RTT measured by SACKing never-retransmitted data */ + int reord; + int fack_count; + long rtt_us; /* RTT measured by SACKing never-retransmitted data */ + int flag; }; /* Check if skb is fully within the SACK block. In presence of GSO skbs, @@ -1184,7 +1179,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, static u8 tcp_sacktag_one(struct sock *sk, struct tcp_sacktag_state *state, u8 sacked, u32 start_seq, u32 end_seq, - int dup_sack, int pcount, u32 xmit_time) + int dup_sack, int pcount, + const struct skb_mstamp *xmit_time) { struct tcp_sock *tp = tcp_sk(sk); int fack_count = state->fack_count; @@ -1225,8 +1221,13 @@ static u8 tcp_sacktag_one(struct sock *sk, if (!after(end_seq, tp->high_seq)) state->flag |= FLAG_ORIG_SACK_ACKED; /* Pick the earliest sequence sacked for RTT */ - if (state->rtt < 0) - state->rtt = tcp_time_stamp - xmit_time; + if (state->rtt_us < 0) { + struct skb_mstamp now; + + skb_mstamp_get(&now); + state->rtt_us = skb_mstamp_us_delta(&now, + xmit_time); + } } if (sacked & TCPCB_LOST) { @@ -1285,7 +1286,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, */ tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, start_seq, end_seq, dup_sack, pcount, - TCP_SKB_CB(skb)->when); + &skb->skb_mstamp); if (skb == tp->lost_skb_hint) tp->lost_cnt_hint += pcount; @@ -1563,7 +1564,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, TCP_SKB_CB(skb)->end_seq, dup_sack, tcp_skb_pcount(skb), - TCP_SKB_CB(skb)->when); + &skb->skb_mstamp); if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) @@ -1620,7 +1621,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl static int tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, - u32 prior_snd_una, s32 *sack_rtt) + u32 prior_snd_una, long *sack_rtt_us) { struct tcp_sock *tp = tcp_sk(sk); const unsigned char *ptr = (skb_transport_header(ack_skb) + @@ -1638,7 +1639,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, state.flag = 0; state.reord = tp->packets_out; - state.rtt = -1; + state.rtt_us = -1L; if (!tp->sacked_out) { if (WARN_ON(tp->fackets_out)) @@ -1822,7 +1823,7 @@ out: WARN_ON((int)tp->retrans_out < 0); WARN_ON((int)tcp_packets_in_flight(tp) < 0); #endif - *sack_rtt = state.rtt; + *sack_rtt_us = state.rtt_us; return state.flag; } @@ -1943,8 +1944,9 @@ void tcp_enter_loss(struct sock *sk, int how) if (skb == tcp_send_head(sk)) break; - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) tp->undo_marker = 0; + TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; @@ -2032,10 +2034,12 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag) * available, or RTO is scheduled to fire first. */ if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 || - (flag & FLAG_ECE) || !tp->srtt) + (flag & FLAG_ECE) || !tp->srtt_us) return false; - delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2)); + delay = max(usecs_to_jiffies(tp->srtt_us >> 5), + msecs_to_jiffies(2)); + if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) return false; @@ -2882,7 +2886,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, } static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, - s32 seq_rtt, s32 sack_rtt) + long seq_rtt_us, long sack_rtt_us) { const struct tcp_sock *tp = tcp_sk(sk); @@ -2892,10 +2896,10 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, * is acked (RFC6298). */ if (flag & FLAG_RETRANS_DATA_ACKED) - seq_rtt = -1; + seq_rtt_us = -1L; - if (seq_rtt < 0) - seq_rtt = sack_rtt; + if (seq_rtt_us < 0) + seq_rtt_us = sack_rtt_us; /* RTTM Rule: A TSecr value received in a segment is used to * update the averaged RTT measurement only if the segment @@ -2903,14 +2907,14 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, * left edge of the send window. * See draft-ietf-tcplw-high-performance-00, section 3.3. */ - if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && + if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED) - seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; + seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - tp->rx_opt.rcv_tsecr); - if (seq_rtt < 0) + if (seq_rtt_us < 0) return false; - tcp_rtt_estimator(sk, seq_rtt); + tcp_rtt_estimator(sk, seq_rtt_us); tcp_set_rto(sk); /* RFC6298: only reset backoff on valid RTT measurement. */ @@ -2922,16 +2926,16 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) { struct tcp_sock *tp = tcp_sk(sk); - s32 seq_rtt = -1; + long seq_rtt_us = -1L; if (synack_stamp && !tp->total_retrans) - seq_rtt = tcp_time_stamp - synack_stamp; + seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp); /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack() */ - if (!tp->srtt) - tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); + if (!tp->srtt_us) + tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L); } static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) @@ -3020,26 +3024,27 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) * arrived at the other end. */ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, - u32 prior_snd_una, s32 sack_rtt) + u32 prior_snd_una, long sack_rtt_us) { - struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - struct sk_buff *skb; - u32 now = tcp_time_stamp; + struct skb_mstamp first_ackt, last_ackt, now; + struct tcp_sock *tp = tcp_sk(sk); + u32 prior_sacked = tp->sacked_out; + u32 reord = tp->packets_out; bool fully_acked = true; - int flag = 0; + long ca_seq_rtt_us = -1L; + long seq_rtt_us = -1L; + struct sk_buff *skb; u32 pkts_acked = 0; - u32 reord = tp->packets_out; - u32 prior_sacked = tp->sacked_out; - s32 seq_rtt = -1; - s32 ca_seq_rtt = -1; - ktime_t last_ackt = net_invalid_timestamp(); bool rtt_update; + int flag = 0; + + first_ackt.v64 = 0; while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); - u32 acked_pcount; u8 sacked = scb->sacked; + u32 acked_pcount; /* Determine how many packets and what bytes were acked, tso and else */ if (after(scb->end_seq, tp->snd_una)) { @@ -3061,11 +3066,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->retrans_out -= acked_pcount; flag |= FLAG_RETRANS_DATA_ACKED; } else { - ca_seq_rtt = now - scb->when; - last_ackt = skb->tstamp; - if (seq_rtt < 0) { - seq_rtt = ca_seq_rtt; - } + last_ackt = skb->skb_mstamp; + WARN_ON_ONCE(last_ackt.v64 == 0); + if (!first_ackt.v64) + first_ackt = last_ackt; + if (!(sacked & TCPCB_SACKED_ACKED)) reord = min(pkts_acked, reord); if (!after(scb->end_seq, tp->high_seq)) @@ -3111,7 +3116,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; - rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt); + skb_mstamp_get(&now); + if (first_ackt.v64) { + seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); + ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); + } + + rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us); if (flag & FLAG_ACKED) { const struct tcp_congestion_ops *ca_ops @@ -3139,25 +3150,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->fackets_out -= min(pkts_acked, tp->fackets_out); - if (ca_ops->pkts_acked) { - s32 rtt_us = -1; - - /* Is the ACK triggering packet unambiguous? */ - if (!(flag & FLAG_RETRANS_DATA_ACKED)) { - /* High resolution needed and available? */ - if (ca_ops->flags & TCP_CONG_RTT_STAMP && - !ktime_equal(last_ackt, - net_invalid_timestamp())) - rtt_us = ktime_us_delta(ktime_get_real(), - last_ackt); - else if (ca_seq_rtt >= 0) - rtt_us = jiffies_to_usecs(ca_seq_rtt); - } + if (ca_ops->pkts_acked) + ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us); - ca_ops->pkts_acked(sk, pkts_acked, rtt_us); - } - } else if (skb && rtt_update && sack_rtt >= 0 && - sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) { + } else if (skb && rtt_update && sack_rtt_us >= 0 && + sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { /* Do not re-arm RTO if the sack RTT is measured from data sent * after when the head was last (re)transmitted. Otherwise the * timeout may continue to extend in loss recovery. @@ -3367,12 +3364,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; - u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt; + u32 prior_in_flight; u32 prior_fackets; int prior_packets = tp->packets_out; const int prior_unsacked = tp->packets_out - tp->sacked_out; int acked = 0; /* Number of packets newly acked */ - s32 sack_rtt = -1; + long sack_rtt_us = -1L; /* If the ack is older than previous acks * then we can probably ignore it. @@ -3430,7 +3427,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (TCP_SKB_CB(skb)->sacked) flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, - &sack_rtt); + &sack_rtt_us); if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb))) flag |= FLAG_ECE; @@ -3449,7 +3446,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ acked = tp->packets_out; - flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt); + flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, + sack_rtt_us); acked -= tp->packets_out; /* Advance cwnd if state allows */ @@ -3472,8 +3470,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (icsk->icsk_pending == ICSK_TIME_RETRANS) tcp_schedule_loss_probe(sk); - if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd) - tcp_update_pacing_rate(sk); + tcp_update_pacing_rate(sk); return 1; no_queue: @@ -3502,7 +3499,7 @@ old_ack: */ if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, - &sack_rtt); + &sack_rtt_us); tcp_fastretrans_alert(sk, acked, prior_unsacked, is_dupack, flag); } @@ -4416,7 +4413,7 @@ queue_and_out: if (eaten > 0) kfree_skb_partial(skb, fragstolen); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); return; } @@ -4917,7 +4914,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t BUG(); tp->urg_data = TCP_URG_VALID | tmp; if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); } } } @@ -5003,11 +5000,11 @@ static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) || (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) { tp->ucopy.wakeup = 1; - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); } } else if (chunk > 0) { tp->ucopy.wakeup = 1; - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); } out: return copied_early; @@ -5278,7 +5275,7 @@ no_ack: #endif if (eaten) kfree_skb_partial(skb, fragstolen); - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); return; } } @@ -5398,9 +5395,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, break; } tcp_rearm_rto(sk); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); return true; } tp->syn_data_acked = tp->syn_data; + if (tp->syn_data_acked) + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); return false; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3cf97651049..438f3b95143 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -435,7 +435,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) break; icsk->icsk_backoff--; - inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) : + inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) : TCP_TIMEOUT_INIT) << icsk->icsk_backoff; tcp_bound_rto(sk); @@ -854,8 +854,10 @@ static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) { int res = tcp_v4_send_synack(sk, NULL, req, 0); - if (!res) + if (!res) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + } return res; } @@ -878,8 +880,6 @@ bool tcp_syn_flood_action(struct sock *sk, bool want_cookie = false; struct listen_sock *lopt; - - #ifdef CONFIG_SYN_COOKIES if (sysctl_tcp_syncookies) { msg = "Sending cookies"; @@ -1434,7 +1434,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; tp->syn_data_acked = 1; } - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); bh_unlock_sock(child); sock_put(child); WARN_ON(req->sk == NULL); @@ -2628,7 +2628,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, { __be32 dest, src; __u16 destp, srcp; - long delta = tw->tw_ttd - jiffies; + s32 delta = tw->tw_ttd - inet_tw_time_stamp(); dest = tw->tw_daddr; src = tw->tw_rcv_saddr; diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 991d62a2f9b..c9aecae3132 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -315,11 +315,9 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us) } static struct tcp_congestion_ops tcp_lp __read_mostly = { - .flags = TCP_CONG_RTT_STAMP, .init = tcp_lp_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_lp_cong_avoid, - .min_cwnd = tcp_reno_min_cwnd, .pkts_acked = tcp_lp_pkts_acked, .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index f7e522c558b..d4f015ad6c8 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -103,7 +103,7 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) } static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, - const char *buffer) + char *buffer) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); unsigned long long val; @@ -219,7 +219,7 @@ static struct cftype tcp_files[] = { static int __init tcp_memcontrol_init(void) { - WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, tcp_files)); + WARN_ON(cgroup_add_cftypes(&memory_cgrp_subsys, tcp_files)); return 0; } __initcall(tcp_memcontrol_init); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index d547075d830..dcaf72f1021 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -33,6 +33,11 @@ struct tcp_fastopen_metrics { struct tcp_fastopen_cookie cookie; }; +/* TCP_METRIC_MAX includes 2 extra fields for userspace compatibility + * Kernel only stores RTT and RTTVAR in usec resolution + */ +#define TCP_METRIC_MAX_KERNEL (TCP_METRIC_MAX - 2) + struct tcp_metrics_block { struct tcp_metrics_block __rcu *tcpm_next; struct inetpeer_addr tcpm_saddr; @@ -41,7 +46,7 @@ struct tcp_metrics_block { u32 tcpm_ts; u32 tcpm_ts_stamp; u32 tcpm_lock; - u32 tcpm_vals[TCP_METRIC_MAX + 1]; + u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1]; struct tcp_fastopen_metrics tcpm_fastopen; struct rcu_head rcu_head; @@ -59,12 +64,6 @@ static u32 tcp_metric_get(struct tcp_metrics_block *tm, return tm->tcpm_vals[idx]; } -static u32 tcp_metric_get_jiffies(struct tcp_metrics_block *tm, - enum tcp_metric_index idx) -{ - return msecs_to_jiffies(tm->tcpm_vals[idx]); -} - static void tcp_metric_set(struct tcp_metrics_block *tm, enum tcp_metric_index idx, u32 val) @@ -72,13 +71,6 @@ static void tcp_metric_set(struct tcp_metrics_block *tm, tm->tcpm_vals[idx] = val; } -static void tcp_metric_set_msecs(struct tcp_metrics_block *tm, - enum tcp_metric_index idx, - u32 val) -{ - tm->tcpm_vals[idx] = jiffies_to_msecs(val); -} - static bool addr_same(const struct inetpeer_addr *a, const struct inetpeer_addr *b) { @@ -101,9 +93,11 @@ struct tcpm_hash_bucket { static DEFINE_SPINLOCK(tcp_metrics_lock); -static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, +static void tcpm_suck_dst(struct tcp_metrics_block *tm, + const struct dst_entry *dst, bool fastopen_clear) { + u32 msval; u32 val; tm->tcpm_stamp = jiffies; @@ -121,8 +115,11 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, val |= 1 << TCP_METRIC_REORDERING; tm->tcpm_lock = val; - tm->tcpm_vals[TCP_METRIC_RTT] = dst_metric_raw(dst, RTAX_RTT); - tm->tcpm_vals[TCP_METRIC_RTTVAR] = dst_metric_raw(dst, RTAX_RTTVAR); + msval = dst_metric_raw(dst, RTAX_RTT); + tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC; + + msval = dst_metric_raw(dst, RTAX_RTTVAR); + tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC; tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); @@ -384,7 +381,7 @@ void tcp_update_metrics(struct sock *sk) dst_confirm(dst); rcu_read_lock(); - if (icsk->icsk_backoff || !tp->srtt) { + if (icsk->icsk_backoff || !tp->srtt_us) { /* This session failed to estimate rtt. Why? * Probably, no packets returned in time. Reset our * results. @@ -399,8 +396,8 @@ void tcp_update_metrics(struct sock *sk) if (!tm) goto out_unlock; - rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); - m = rtt - tp->srtt; + rtt = tcp_metric_get(tm, TCP_METRIC_RTT); + m = rtt - tp->srtt_us; /* If newly calculated rtt larger than stored one, store new * one. Otherwise, use EWMA. Remember, rtt overestimation is @@ -408,10 +405,10 @@ void tcp_update_metrics(struct sock *sk) */ if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) { if (m <= 0) - rtt = tp->srtt; + rtt = tp->srtt_us; else rtt -= (m >> 3); - tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt); + tcp_metric_set(tm, TCP_METRIC_RTT, rtt); } if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) { @@ -422,16 +419,16 @@ void tcp_update_metrics(struct sock *sk) /* Scale deviation to rttvar fixed point */ m >>= 1; - if (m < tp->mdev) - m = tp->mdev; + if (m < tp->mdev_us) + m = tp->mdev_us; - var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR); + var = tcp_metric_get(tm, TCP_METRIC_RTTVAR); if (m >= var) var = m; else var -= (var - m) >> 2; - tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var); + tcp_metric_set(tm, TCP_METRIC_RTTVAR, var); } if (tcp_in_initial_slowstart(tp)) { @@ -528,7 +525,7 @@ void tcp_init_metrics(struct sock *sk) tp->reordering = val; } - crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); + crtt = tcp_metric_get(tm, TCP_METRIC_RTT); rcu_read_unlock(); reset: /* The initial RTT measurement from the SYN/SYN-ACK is not ideal @@ -551,18 +548,20 @@ reset: * to low value, and then abruptly stops to do it and starts to delay * ACKs, wait for troubles. */ - if (crtt > tp->srtt) { + if (crtt > tp->srtt_us) { /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */ - crtt >>= 3; + crtt /= 8 * USEC_PER_MSEC; inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk)); - } else if (tp->srtt == 0) { + } else if (tp->srtt_us == 0) { /* RFC6298: 5.7 We've failed to get a valid RTT sample from * 3WHS. This is most likely due to retransmission, * including spurious one. Reset the RTO back to 3secs * from the more aggressive 1sec to avoid more spurious * retransmission. */ - tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; + tp->rttvar_us = jiffies_to_usecs(TCP_TIMEOUT_FALLBACK); + tp->mdev_us = tp->mdev_max_us = tp->rttvar_us; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; } /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been @@ -809,10 +808,26 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS); if (!nest) goto nla_put_failure; - for (i = 0; i < TCP_METRIC_MAX + 1; i++) { - if (!tm->tcpm_vals[i]) + for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) { + u32 val = tm->tcpm_vals[i]; + + if (!val) continue; - if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0) + if (i == TCP_METRIC_RTT) { + if (nla_put_u32(msg, TCP_METRIC_RTT_US + 1, + val) < 0) + goto nla_put_failure; + n++; + val = max(val / 1000, 1U); + } + if (i == TCP_METRIC_RTTVAR) { + if (nla_put_u32(msg, TCP_METRIC_RTTVAR_US + 1, + val) < 0) + goto nla_put_failure; + n++; + val = max(val / 1000, 1U); + } + if (nla_put_u32(msg, i + 1, val) < 0) goto nla_put_failure; n++; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 7a436c517e4..05c1b155251 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -398,8 +398,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_init_wl(newtp, treq->rcv_isn); - newtp->srtt = 0; - newtp->mdev = TCP_TIMEOUT_INIT; + newtp->srtt_us = 0; + newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); newicsk->icsk_rto = TCP_TIMEOUT_INIT; newtp->packets_out = 0; @@ -745,7 +745,7 @@ int tcp_child_process(struct sock *parent, struct sock *child, skb->len); /* Wakeup parent, send SIGIO */ if (state == TCP_SYN_RECV && child->sk_state != state) - parent->sk_data_ready(parent, 0); + parent->sk_data_ready(parent); } else { /* Alas, it is possible again, because we do lookup * in main socket hash table and lock on listening diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 03d26b85eab..025e2509398 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -86,6 +86,9 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { tcp_rearm_rto(sk); } + + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT, + tcp_skb_pcount(skb)); } /* SND.NXT, if window was not shrunk. @@ -269,6 +272,7 @@ EXPORT_SYMBOL(tcp_select_initial_window); static u16 tcp_select_window(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + u32 old_win = tp->rcv_wnd; u32 cur_win = tcp_receive_window(tp); u32 new_win = __tcp_select_window(sk); @@ -281,6 +285,9 @@ static u16 tcp_select_window(struct sock *sk) * * Relax Will Robinson. */ + if (new_win == 0) + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPWANTZEROWINDOWADV); new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale); } tp->rcv_wnd = new_win; @@ -298,8 +305,14 @@ static u16 tcp_select_window(struct sock *sk) new_win >>= tp->rx_opt.rcv_wscale; /* If we advertise zero window, disable fast path. */ - if (new_win == 0) + if (new_win == 0) { tp->pred_flags = 0; + if (old_win) + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPTOZEROWINDOWADV); + } else if (old_win == 0) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV); + } return new_win; } @@ -698,7 +711,8 @@ static void tcp_tsq_handler(struct sock *sk) if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) - tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); + tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle, + 0, GFP_ATOMIC); } /* * One tasklet per cpu tries to send more skbs. @@ -766,6 +780,17 @@ void tcp_release_cb(struct sock *sk) if (flags & (1UL << TCP_TSQ_DEFERRED)) tcp_tsq_handler(sk); + /* Here begins the tricky part : + * We are called from release_sock() with : + * 1) BH disabled + * 2) sk_lock.slock spinlock held + * 3) socket owned by us (sk->sk_lock.owned == 1) + * + * But following code is meant to be called from BH handlers, + * so we should keep BH disabled, but early release socket ownership + */ + sock_release_ownership(sk); + if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { tcp_write_timer_handler(sk); __sock_put(sk); @@ -855,16 +880,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (clone_it) { const struct sk_buff *fclone = skb + 1; - /* If congestion control is doing timestamping, we must - * take such a timestamp before we potentially clone/copy. - */ - if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP) - __net_timestamp(skb); + skb_mstamp_get(&skb->skb_mstamp); if (unlikely(skb->fclone == SKB_FCLONE_ORIG && fclone->fclone == SKB_FCLONE_CLONE)) - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); @@ -872,6 +893,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb = skb_clone(skb, gfp_mask); if (unlikely(!skb)) return -ENOBUFS; + /* Our usage of tstamp should remain private */ + skb->tstamp.tv64 = 0; } inet = inet_sk(sk); @@ -958,7 +981,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb)); - err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl); + err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); if (likely(err <= 0)) return err; @@ -1414,7 +1437,7 @@ static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now, * With Minshall's modification: all sent small packets are ACKed. */ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, - unsigned int mss_now, int nonagle) + int nonagle) { return partial && ((nonagle & TCP_NAGLE_CORK) || @@ -1446,7 +1469,7 @@ static unsigned int tcp_mss_split_point(const struct sock *sk, * to include this last segment in this skb. * Otherwise, we'll split the skb at last MSS boundary */ - if (tcp_nagle_check(partial != 0, tp, mss_now, nonagle)) + if (tcp_nagle_check(partial != 0, tp, nonagle)) return needed - partial; return needed; @@ -1509,7 +1532,7 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) return true; - if (!tcp_nagle_check(skb->len < cur_mss, tp, cur_mss, nonagle)) + if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle)) return true; return false; @@ -1904,7 +1927,15 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (atomic_read(&sk->sk_wmem_alloc) > limit) { set_bit(TSQ_THROTTLED, &tp->tsq_flags); - break; + /* It is possible TX completion already happened + * before we set TSQ_THROTTLED, so we must + * test again the condition. + * We abuse smp_mb__after_clear_bit() because + * there is no smp_mb__after_set_bit() yet + */ + smp_mb__after_clear_bit(); + if (atomic_read(&sk->sk_wmem_alloc) > limit) + break; } limit = mss_now; @@ -1955,7 +1986,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); u32 timeout, tlp_time_stamp, rto_time_stamp; - u32 rtt = tp->srtt >> 3; + u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) return false; @@ -1977,7 +2008,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) /* Schedule a loss probe in 2*RTT for SACK capable connections * in Open state, that are either limited by cwnd or application. */ - if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out || + if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out || !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) return false; @@ -2062,7 +2093,6 @@ rearm_timer: if (likely(!err)) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); - return; } /* Push out any pending frames which were held back due to @@ -2160,7 +2190,8 @@ u32 __tcp_select_window(struct sock *sk) */ int mss = icsk->icsk_ack.rcv_mss; int free_space = tcp_space(sk); - int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk)); + int allowed_space = tcp_full_space(sk); + int full_space = min_t(int, tp->window_clamp, allowed_space); int window; if (mss > full_space) @@ -2173,7 +2204,19 @@ u32 __tcp_select_window(struct sock *sk) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); - if (free_space < mss) + /* free_space might become our new window, make sure we don't + * increase it due to wscale. + */ + free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale); + + /* if free space is less than mss estimate, or is below 1/16th + * of the maximum allowed, try to move to zero-window, else + * tcp_clamp_window() will grow rcv buf up to tcp_rmem[2], and + * new incoming data is dropped due to memory limits. + * With large window, mss test triggers way too late in order + * to announce zero window in time before rmem limit kicks in. + */ + if (free_space < (allowed_space >> 4) || free_space < mss) return 0; } @@ -2328,6 +2371,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); unsigned int cur_mss; + int err; /* Inconslusive MTU probe */ if (icsk->icsk_mtup.probe_size) { @@ -2391,11 +2435,15 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) skb_headroom(skb) >= 0xFFFF)) { struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); - return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : - -ENOBUFS; + err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : + -ENOBUFS; } else { - return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); + err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } + + if (likely(!err)) + TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; + return err; } int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) @@ -2406,7 +2454,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (err == 0) { /* Update global TCP statistics. */ TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); - + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); tp->total_retrans++; #if FASTRETRANS_DEBUG > 0 @@ -2692,7 +2741,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, int tcp_header_size; int mss; - skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); + skb = sock_wmalloc(sk, MAX_TCP_HEADER, 1, GFP_ATOMIC); if (unlikely(!skb)) { dst_release(dst); return NULL; @@ -2762,7 +2811,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, th->window = htons(min(req->rcv_wnd, 65535U)); tcp_options_write((__be32 *)(th + 1), tp, &opts); th->doff = (tcp_header_size >> 2); - TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb)); + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS); #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ @@ -2899,7 +2948,12 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - MAX_TCP_OPTION_SPACE; - syn_data = skb_copy_expand(syn, skb_headroom(syn), space, + space = min_t(size_t, space, fo->size); + + /* limit to order-0 allocations */ + space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); + + syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space, sk->sk_allocation); if (syn_data == NULL) goto fallback; @@ -2929,9 +2983,15 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) tcp_connect_queue_skb(sk, data); fo->copied = data->len; + /* syn_data is about to be sent, we need to take current time stamps + * for the packets that are in write queue : SYN packet and DATA + */ + skb_mstamp_get(&syn->skb_mstamp); + data->skb_mstamp = syn->skb_mstamp; + if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) { tp->syn_data = (fo->copied > 0); - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT); goto done; } syn_data = NULL; @@ -3019,8 +3079,9 @@ void tcp_send_delayed_ack(struct sock *sk) * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements * directly. */ - if (tp->srtt) { - int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN); + if (tp->srtt_us) { + int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3), + TCP_DELACK_MIN); if (rtt < max_ato) max_ato = rtt; diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 1f2d37613c9..3b66610d415 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -154,7 +154,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, p->snd_wnd = tp->snd_wnd; p->rcv_wnd = tp->rcv_wnd; p->ssthresh = tcp_current_ssthresh(sk); - p->srtt = tp->srtt >> 3; + p->srtt = tp->srtt_us >> 3; tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); } diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 19ea6c2951f..0ac50836da4 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -39,7 +39,6 @@ static u32 tcp_scalable_ssthresh(struct sock *sk) static struct tcp_congestion_ops tcp_scalable __read_mostly = { .ssthresh = tcp_scalable_ssthresh, .cong_avoid = tcp_scalable_cong_avoid, - .min_cwnd = tcp_reno_min_cwnd, .owner = THIS_MODULE, .name = "scalable", diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 64f0354c84c..286227abed1 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -165,6 +165,9 @@ static int tcp_write_timeout(struct sock *sk) dst_negative_advice(sk); if (tp->syn_fastopen || tp->syn_data) tcp_fastopen_cache_set(sk, 0, NULL, true); + if (tp->syn_data) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENACTIVEFAIL); } retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; syn_set = true; diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 06cae62bf20..48539fff635 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -306,11 +306,9 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) EXPORT_SYMBOL_GPL(tcp_vegas_get_info); static struct tcp_congestion_ops tcp_vegas __read_mostly = { - .flags = TCP_CONG_RTT_STAMP, .init = tcp_vegas_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_vegas_cong_avoid, - .min_cwnd = tcp_reno_min_cwnd, .pkts_acked = tcp_vegas_pkts_acked, .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 326475a9486..1b8e28fcd7e 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -203,7 +203,6 @@ static u32 tcp_veno_ssthresh(struct sock *sk) } static struct tcp_congestion_ops tcp_veno __read_mostly = { - .flags = TCP_CONG_RTT_STAMP, .init = tcp_veno_init, .ssthresh = tcp_veno_ssthresh, .cong_avoid = tcp_veno_cong_avoid, diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 76a1e23259e..b94a04ae2ed 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -276,7 +276,6 @@ static struct tcp_congestion_ops tcp_westwood __read_mostly = { .init = tcp_westwood_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, - .min_cwnd = tcp_westwood_bw_rttmin, .cwnd_event = tcp_westwood_event, .get_info = tcp_westwood_info, .pkts_acked = tcp_westwood_pkts_acked, diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 1a8d271f994..5ede0e72794 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -227,11 +227,9 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) { } static struct tcp_congestion_ops tcp_yeah __read_mostly = { - .flags = TCP_CONG_RTT_STAMP, .init = tcp_yeah_init, .ssthresh = tcp_yeah_ssthresh, .cong_avoid = tcp_yeah_cong_avoid, - .min_cwnd = tcp_reno_min_cwnd, .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, .get_info = tcp_vegas_get_info, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 77bd16fa9f3..4468e1adc09 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -931,7 +931,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sock_tx_timestamp(sk, &ipc.tx_flags); if (msg->msg_controllen) { - err = ip_cmsg_send(sock_net(sk), msg, &ipc); + err = ip_cmsg_send(sock_net(sk), msg, &ipc, + sk->sk_family == AF_INET6); if (err) return err; if (ipc.opt) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 25f5cee3a08..88b4023ecfc 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -17,6 +17,8 @@ static DEFINE_SPINLOCK(udp_offload_lock); static struct udp_offload_priv __rcu *udp_offload_base __read_mostly; +#define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock)) + struct udp_offload_priv { struct udp_offload *offload; struct rcu_head rcu; @@ -100,8 +102,7 @@ out: int udp_add_offload(struct udp_offload *uo) { - struct udp_offload_priv __rcu **head = &udp_offload_base; - struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_KERNEL); + struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC); if (!new_offload) return -ENOMEM; @@ -109,8 +110,8 @@ int udp_add_offload(struct udp_offload *uo) new_offload->offload = uo; spin_lock(&udp_offload_lock); - rcu_assign_pointer(new_offload->next, rcu_dereference(*head)); - rcu_assign_pointer(*head, new_offload); + new_offload->next = udp_offload_base; + rcu_assign_pointer(udp_offload_base, new_offload); spin_unlock(&udp_offload_lock); return 0; @@ -130,12 +131,12 @@ void udp_del_offload(struct udp_offload *uo) spin_lock(&udp_offload_lock); - uo_priv = rcu_dereference(*head); + uo_priv = udp_deref_protected(*head); for (; uo_priv != NULL; - uo_priv = rcu_dereference(*head)) { - + uo_priv = udp_deref_protected(*head)) { if (uo_priv->offload == uo) { - rcu_assign_pointer(*head, rcu_dereference(uo_priv->next)); + rcu_assign_pointer(*head, + udp_deref_protected(uo_priv->next)); goto unlock; } head = &uo_priv->next; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 1f12c8b4586..aac6197b7a7 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -37,15 +37,6 @@ drop: return NET_RX_DROP; } -int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, - int encap_type) -{ - XFRM_SPI_SKB_CB(skb)->family = AF_INET; - XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); - return xfrm_input(skb, nexthdr, spi, encap_type); -} -EXPORT_SYMBOL(xfrm4_rcv_encap); - int xfrm4_transport_finish(struct sk_buff *skb, int async) { struct iphdr *iph = ip_hdr(skb); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 31b18152528..05f2b484954 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -15,65 +15,6 @@ #include <net/ip.h> #include <net/xfrm.h> -/* Informational hook. The decap is still done here. */ -static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly; -static DEFINE_MUTEX(xfrm4_mode_tunnel_input_mutex); - -int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler) -{ - struct xfrm_tunnel_notifier __rcu **pprev; - struct xfrm_tunnel_notifier *t; - int ret = -EEXIST; - int priority = handler->priority; - - mutex_lock(&xfrm4_mode_tunnel_input_mutex); - - for (pprev = &rcv_notify_handlers; - (t = rcu_dereference_protected(*pprev, - lockdep_is_held(&xfrm4_mode_tunnel_input_mutex))) != NULL; - pprev = &t->next) { - if (t->priority > priority) - break; - if (t->priority == priority) - goto err; - - } - - handler->next = *pprev; - rcu_assign_pointer(*pprev, handler); - - ret = 0; - -err: - mutex_unlock(&xfrm4_mode_tunnel_input_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(xfrm4_mode_tunnel_input_register); - -int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler) -{ - struct xfrm_tunnel_notifier __rcu **pprev; - struct xfrm_tunnel_notifier *t; - int ret = -ENOENT; - - mutex_lock(&xfrm4_mode_tunnel_input_mutex); - for (pprev = &rcv_notify_handlers; - (t = rcu_dereference_protected(*pprev, - lockdep_is_held(&xfrm4_mode_tunnel_input_mutex))) != NULL; - pprev = &t->next) { - if (t == handler) { - *pprev = handler->next; - ret = 0; - break; - } - } - mutex_unlock(&xfrm4_mode_tunnel_input_mutex); - synchronize_net(); - - return ret; -} -EXPORT_SYMBOL_GPL(xfrm4_mode_tunnel_input_deregister); - static inline void ipip_ecn_decapsulate(struct sk_buff *skb) { struct iphdr *inner_iph = ipip_hdr(skb); @@ -127,14 +68,8 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } -#define for_each_input_rcu(head, handler) \ - for (handler = rcu_dereference(head); \ - handler != NULL; \ - handler = rcu_dereference(handler->next)) - static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_tunnel_notifier *handler; int err = -EINVAL; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) @@ -143,9 +78,6 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; - for_each_input_rcu(rcv_notify_handlers, handler) - handler->handler(skb); - err = skb_unclone(skb, GFP_ATOMIC); if (err) goto out; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index baa0f63731f..40e701f2e1e 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -86,7 +86,7 @@ int xfrm4_output_finish(struct sk_buff *skb) return xfrm_output(skb); } -int xfrm4_output(struct sk_buff *skb) +int xfrm4_output(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index e1a63930a96..6156f68a1e9 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -325,6 +325,7 @@ void __init xfrm4_init(void) xfrm4_state_init(); xfrm4_policy_init(); + xfrm4_protocol_init(); #ifdef CONFIG_SYSCTL register_pernet_subsys(&xfrm4_net_ops); #endif diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c new file mode 100644 index 00000000000..7f7b243e813 --- /dev/null +++ b/net/ipv4/xfrm4_protocol.c @@ -0,0 +1,286 @@ +/* xfrm4_protocol.c - Generic xfrm protocol multiplexer. + * + * Copyright (C) 2013 secunet Security Networks AG + * + * Author: + * Steffen Klassert <steffen.klassert@secunet.com> + * + * Based on: + * net/ipv4/tunnel4.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/init.h> +#include <linux/mutex.h> +#include <linux/skbuff.h> +#include <net/icmp.h> +#include <net/ip.h> +#include <net/protocol.h> +#include <net/xfrm.h> + +static struct xfrm4_protocol __rcu *esp4_handlers __read_mostly; +static struct xfrm4_protocol __rcu *ah4_handlers __read_mostly; +static struct xfrm4_protocol __rcu *ipcomp4_handlers __read_mostly; +static DEFINE_MUTEX(xfrm4_protocol_mutex); + +static inline struct xfrm4_protocol __rcu **proto_handlers(u8 protocol) +{ + switch (protocol) { + case IPPROTO_ESP: + return &esp4_handlers; + case IPPROTO_AH: + return &ah4_handlers; + case IPPROTO_COMP: + return &ipcomp4_handlers; + } + + return NULL; +} + +#define for_each_protocol_rcu(head, handler) \ + for (handler = rcu_dereference(head); \ + handler != NULL; \ + handler = rcu_dereference(handler->next)) \ + +int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err) +{ + int ret; + struct xfrm4_protocol *handler; + + for_each_protocol_rcu(*proto_handlers(protocol), handler) + if ((ret = handler->cb_handler(skb, err)) <= 0) + return ret; + + return 0; +} +EXPORT_SYMBOL(xfrm4_rcv_cb); + +int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) +{ + int ret; + struct xfrm4_protocol *handler; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; + XFRM_SPI_SKB_CB(skb)->family = AF_INET; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + + for_each_protocol_rcu(*proto_handlers(nexthdr), handler) + if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL) + return ret; + + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + + kfree_skb(skb); + return 0; +} +EXPORT_SYMBOL(xfrm4_rcv_encap); + +static int xfrm4_esp_rcv(struct sk_buff *skb) +{ + int ret; + struct xfrm4_protocol *handler; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; + + for_each_protocol_rcu(esp4_handlers, handler) + if ((ret = handler->handler(skb)) != -EINVAL) + return ret; + + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + + kfree_skb(skb); + return 0; +} + +static void xfrm4_esp_err(struct sk_buff *skb, u32 info) +{ + struct xfrm4_protocol *handler; + + for_each_protocol_rcu(esp4_handlers, handler) + if (!handler->err_handler(skb, info)) + break; +} + +static int xfrm4_ah_rcv(struct sk_buff *skb) +{ + int ret; + struct xfrm4_protocol *handler; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; + + for_each_protocol_rcu(ah4_handlers, handler) + if ((ret = handler->handler(skb)) != -EINVAL) + return ret;; + + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + + kfree_skb(skb); + return 0; +} + +static void xfrm4_ah_err(struct sk_buff *skb, u32 info) +{ + struct xfrm4_protocol *handler; + + for_each_protocol_rcu(ah4_handlers, handler) + if (!handler->err_handler(skb, info)) + break; +} + +static int xfrm4_ipcomp_rcv(struct sk_buff *skb) +{ + int ret; + struct xfrm4_protocol *handler; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; + + for_each_protocol_rcu(ipcomp4_handlers, handler) + if ((ret = handler->handler(skb)) != -EINVAL) + return ret; + + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + + kfree_skb(skb); + return 0; +} + +static void xfrm4_ipcomp_err(struct sk_buff *skb, u32 info) +{ + struct xfrm4_protocol *handler; + + for_each_protocol_rcu(ipcomp4_handlers, handler) + if (!handler->err_handler(skb, info)) + break; +} + +static const struct net_protocol esp4_protocol = { + .handler = xfrm4_esp_rcv, + .err_handler = xfrm4_esp_err, + .no_policy = 1, + .netns_ok = 1, +}; + +static const struct net_protocol ah4_protocol = { + .handler = xfrm4_ah_rcv, + .err_handler = xfrm4_ah_err, + .no_policy = 1, + .netns_ok = 1, +}; + +static const struct net_protocol ipcomp4_protocol = { + .handler = xfrm4_ipcomp_rcv, + .err_handler = xfrm4_ipcomp_err, + .no_policy = 1, + .netns_ok = 1, +}; + +static struct xfrm_input_afinfo xfrm4_input_afinfo = { + .family = AF_INET, + .owner = THIS_MODULE, + .callback = xfrm4_rcv_cb, +}; + +static inline const struct net_protocol *netproto(unsigned char protocol) +{ + switch (protocol) { + case IPPROTO_ESP: + return &esp4_protocol; + case IPPROTO_AH: + return &ah4_protocol; + case IPPROTO_COMP: + return &ipcomp4_protocol; + } + + return NULL; +} + +int xfrm4_protocol_register(struct xfrm4_protocol *handler, + unsigned char protocol) +{ + struct xfrm4_protocol __rcu **pprev; + struct xfrm4_protocol *t; + bool add_netproto = false; + int ret = -EEXIST; + int priority = handler->priority; + + mutex_lock(&xfrm4_protocol_mutex); + + if (!rcu_dereference_protected(*proto_handlers(protocol), + lockdep_is_held(&xfrm4_protocol_mutex))) + add_netproto = true; + + for (pprev = proto_handlers(protocol); + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&xfrm4_protocol_mutex))) != NULL; + pprev = &t->next) { + if (t->priority < priority) + break; + if (t->priority == priority) + goto err; + } + + handler->next = *pprev; + rcu_assign_pointer(*pprev, handler); + + ret = 0; + +err: + mutex_unlock(&xfrm4_protocol_mutex); + + if (add_netproto) { + if (inet_add_protocol(netproto(protocol), protocol)) { + pr_err("%s: can't add protocol\n", __func__); + ret = -EAGAIN; + } + } + + return ret; +} +EXPORT_SYMBOL(xfrm4_protocol_register); + +int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, + unsigned char protocol) +{ + struct xfrm4_protocol __rcu **pprev; + struct xfrm4_protocol *t; + int ret = -ENOENT; + + mutex_lock(&xfrm4_protocol_mutex); + + for (pprev = proto_handlers(protocol); + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&xfrm4_protocol_mutex))) != NULL; + pprev = &t->next) { + if (t == handler) { + *pprev = handler->next; + ret = 0; + break; + } + } + + if (!rcu_dereference_protected(*proto_handlers(protocol), + lockdep_is_held(&xfrm4_protocol_mutex))) { + if (inet_del_protocol(netproto(protocol), protocol) < 0) { + pr_err("%s: can't remove protocol\n", __func__); + ret = -EAGAIN; + } + } + + mutex_unlock(&xfrm4_protocol_mutex); + + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL(xfrm4_protocol_deregister); + +void __init xfrm4_protocol_init(void) +{ + xfrm_input_register_afinfo(&xfrm4_input_afinfo); +} +EXPORT_SYMBOL(xfrm4_protocol_init); diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index d92e5586783..438a73aa777 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -138,6 +138,7 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION config IPV6_VTI tristate "Virtual (secure) IPv6: tunneling" select IPV6_TUNNEL + select NET_IP_TUNNEL depends on INET6_XFRM_MODE_TUNNEL ---help--- Tunneling means encapsulating data of one protocol type within diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 17bb830872d..2fe68364bb2 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -16,7 +16,7 @@ ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ - xfrm6_output.o + xfrm6_output.o xfrm6_protocol.o ipv6-$(CONFIG_NETFILTER) += netfilter.o ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o ipv6-$(CONFIG_PROC_FS) += proc.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ad235690684..6c7fa0853fc 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -133,10 +133,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev); static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE]; static DEFINE_SPINLOCK(addrconf_hash_lock); -static void addrconf_verify(unsigned long); +static void addrconf_verify(void); +static void addrconf_verify_rtnl(void); +static void addrconf_verify_work(struct work_struct *); -static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); -static DEFINE_SPINLOCK(addrconf_verify_lock); +static struct workqueue_struct *addrconf_wq; +static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work); static void addrconf_join_anycast(struct inet6_ifaddr *ifp); static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); @@ -151,7 +153,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, u32 flags, u32 noflags); static void addrconf_dad_start(struct inet6_ifaddr *ifp); -static void addrconf_dad_timer(unsigned long data); +static void addrconf_dad_work(struct work_struct *w); static void addrconf_dad_completed(struct inet6_ifaddr *ifp); static void addrconf_dad_run(struct inet6_dev *idev); static void addrconf_rs_timer(unsigned long data); @@ -247,9 +249,9 @@ static void addrconf_del_rs_timer(struct inet6_dev *idev) __in6_dev_put(idev); } -static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp) +static void addrconf_del_dad_work(struct inet6_ifaddr *ifp) { - if (del_timer(&ifp->dad_timer)) + if (cancel_delayed_work(&ifp->dad_work)) __in6_ifa_put(ifp); } @@ -261,12 +263,12 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev, mod_timer(&idev->rs_timer, jiffies + when); } -static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp, - unsigned long when) +static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp, + unsigned long delay) { - if (!timer_pending(&ifp->dad_timer)) + if (!delayed_work_pending(&ifp->dad_work)) in6_ifa_hold(ifp); - mod_timer(&ifp->dad_timer, jiffies + when); + mod_delayed_work(addrconf_wq, &ifp->dad_work, delay); } static int snmp6_alloc_dev(struct inet6_dev *idev) @@ -751,8 +753,9 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) in6_dev_put(ifp->idev); - if (del_timer(&ifp->dad_timer)) - pr_notice("Timer is still running, when freeing ifa=%p\n", ifp); + if (cancel_delayed_work(&ifp->dad_work)) + pr_notice("delayed DAD work was pending while freeing ifa=%p\n", + ifp); if (ifp->state != INET6_IFADDR_STATE_DEAD) { pr_warn("Freeing alive inet6 address %p\n", ifp); @@ -849,8 +852,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, spin_lock_init(&ifa->lock); spin_lock_init(&ifa->state_lock); - setup_timer(&ifa->dad_timer, addrconf_dad_timer, - (unsigned long)ifa); + INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work); INIT_HLIST_NODE(&ifa->addr_lst); ifa->scope = scope; ifa->prefix_len = pfxlen; @@ -990,6 +992,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP; unsigned long expires; + ASSERT_RTNL(); + spin_lock_bh(&ifp->state_lock); state = ifp->state; ifp->state = INET6_IFADDR_STATE_DEAD; @@ -1021,7 +1025,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) write_unlock_bh(&ifp->idev->lock); - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); ipv6_ifa_notify(RTM_DELADDR, ifp); @@ -1103,8 +1107,11 @@ retry: * Lifetime is greater than REGEN_ADVANCE time units. In particular, * an implementation must not create a temporary address with a zero * Preferred Lifetime. + * Use age calculation as in addrconf_verify to avoid unnecessary + * temporary addresses being generated. */ - if (tmp_prefered_lft <= regen_advance) { + age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; + if (tmp_prefered_lft <= regen_advance + age) { in6_ifa_put(ifp); in6_dev_put(idev); ret = -1; @@ -1601,7 +1608,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) { if (ifp->flags&IFA_F_PERMANENT) { spin_lock_bh(&ifp->lock); - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); ifp->flags |= IFA_F_TENTATIVE; if (dad_failed) ifp->flags |= IFA_F_DADFAILED; @@ -1622,20 +1629,21 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) spin_unlock_bh(&ifp->lock); } ipv6_del_addr(ifp); - } else + } else { ipv6_del_addr(ifp); + } } static int addrconf_dad_end(struct inet6_ifaddr *ifp) { int err = -ENOENT; - spin_lock(&ifp->state_lock); + spin_lock_bh(&ifp->state_lock); if (ifp->state == INET6_IFADDR_STATE_DAD) { ifp->state = INET6_IFADDR_STATE_POSTDAD; err = 0; } - spin_unlock(&ifp->state_lock); + spin_unlock_bh(&ifp->state_lock); return err; } @@ -1668,7 +1676,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) } } - addrconf_dad_stop(ifp, 1); + spin_lock_bh(&ifp->state_lock); + /* transition from _POSTDAD to _ERRDAD */ + ifp->state = INET6_IFADDR_STATE_ERRDAD; + spin_unlock_bh(&ifp->state_lock); + + addrconf_mod_dad_work(ifp, 0); } /* Join to solicited addr multicast group. */ @@ -1677,6 +1690,8 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr) { struct in6_addr maddr; + ASSERT_RTNL(); + if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) return; @@ -1688,6 +1703,8 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) { struct in6_addr maddr; + ASSERT_RTNL(); + if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) return; @@ -1698,6 +1715,9 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; + + ASSERT_RTNL(); + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); @@ -1709,6 +1729,9 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; + + ASSERT_RTNL(); + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); @@ -2268,11 +2291,13 @@ ok: return; } - ifp->flags |= IFA_F_MANAGETEMPADDR; update_lft = 0; create = 1; + spin_lock_bh(&ifp->lock); + ifp->flags |= IFA_F_MANAGETEMPADDR; ifp->cstamp = jiffies; ifp->tokenized = tokenized; + spin_unlock_bh(&ifp->lock); addrconf_dad_start(ifp); } @@ -2323,7 +2348,7 @@ ok: create, now); in6_ifa_put(ifp); - addrconf_verify(0); + addrconf_verify(); } } inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo); @@ -2472,7 +2497,7 @@ static int inet6_addr_add(struct net *net, int ifindex, manage_tempaddrs(idev, ifp, valid_lft, prefered_lft, true, jiffies); in6_ifa_put(ifp); - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } @@ -2783,6 +2808,8 @@ static void addrconf_gre_config(struct net_device *dev) ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) addrconf_add_linklocal(idev, &addr); + else + addrconf_prefix_route(&addr, 64, dev, 0, 0); } #endif @@ -3006,7 +3033,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { hlist_del_init_rcu(&ifa->addr_lst); - addrconf_del_dad_timer(ifa); + addrconf_del_dad_work(ifa); goto restart; } } @@ -3044,7 +3071,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) while (!list_empty(&idev->addr_list)) { ifa = list_first_entry(&idev->addr_list, struct inet6_ifaddr, if_list); - addrconf_del_dad_timer(ifa); + addrconf_del_dad_work(ifa); list_del(&ifa->if_list); @@ -3143,10 +3170,10 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1); ifp->dad_probes = idev->cnf.dad_transmits; - addrconf_mod_dad_timer(ifp, rand_num); + addrconf_mod_dad_work(ifp, rand_num); } -static void addrconf_dad_start(struct inet6_ifaddr *ifp) +static void addrconf_dad_begin(struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; @@ -3198,25 +3225,68 @@ out: read_unlock_bh(&idev->lock); } -static void addrconf_dad_timer(unsigned long data) +static void addrconf_dad_start(struct inet6_ifaddr *ifp) +{ + bool begin_dad = false; + + spin_lock_bh(&ifp->state_lock); + if (ifp->state != INET6_IFADDR_STATE_DEAD) { + ifp->state = INET6_IFADDR_STATE_PREDAD; + begin_dad = true; + } + spin_unlock_bh(&ifp->state_lock); + + if (begin_dad) + addrconf_mod_dad_work(ifp, 0); +} + +static void addrconf_dad_work(struct work_struct *w) { - struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; + struct inet6_ifaddr *ifp = container_of(to_delayed_work(w), + struct inet6_ifaddr, + dad_work); struct inet6_dev *idev = ifp->idev; struct in6_addr mcaddr; + enum { + DAD_PROCESS, + DAD_BEGIN, + DAD_ABORT, + } action = DAD_PROCESS; + + rtnl_lock(); + + spin_lock_bh(&ifp->state_lock); + if (ifp->state == INET6_IFADDR_STATE_PREDAD) { + action = DAD_BEGIN; + ifp->state = INET6_IFADDR_STATE_DAD; + } else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) { + action = DAD_ABORT; + ifp->state = INET6_IFADDR_STATE_POSTDAD; + } + spin_unlock_bh(&ifp->state_lock); + + if (action == DAD_BEGIN) { + addrconf_dad_begin(ifp); + goto out; + } else if (action == DAD_ABORT) { + addrconf_dad_stop(ifp, 1); + goto out; + } + if (!ifp->dad_probes && addrconf_dad_end(ifp)) goto out; - write_lock(&idev->lock); + write_lock_bh(&idev->lock); if (idev->dead || !(idev->if_flags & IF_READY)) { - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); goto out; } spin_lock(&ifp->lock); if (ifp->state == INET6_IFADDR_STATE_DEAD) { spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); goto out; } @@ -3227,7 +3297,7 @@ static void addrconf_dad_timer(unsigned long data) ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); addrconf_dad_completed(ifp); @@ -3235,16 +3305,17 @@ static void addrconf_dad_timer(unsigned long data) } ifp->dad_probes--; - addrconf_mod_dad_timer(ifp, - NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME)); + addrconf_mod_dad_work(ifp, + NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME)); spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any); out: in6_ifa_put(ifp); + rtnl_unlock(); } /* ifp->idev must be at least read locked */ @@ -3271,7 +3342,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) struct in6_addr lladdr; bool send_rs, send_mld; - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); /* * Configure the address for reception. Now it is valid. @@ -3512,23 +3583,23 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) * Periodic address status verification */ -static void addrconf_verify(unsigned long foo) +static void addrconf_verify_rtnl(void) { unsigned long now, next, next_sec, next_sched; struct inet6_ifaddr *ifp; int i; + ASSERT_RTNL(); + rcu_read_lock_bh(); - spin_lock(&addrconf_verify_lock); now = jiffies; next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); - del_timer(&addr_chk_timer); + cancel_delayed_work(&addr_chk_work); for (i = 0; i < IN6_ADDR_HSIZE; i++) { restart: - hlist_for_each_entry_rcu_bh(ifp, - &inet6_addr_lst[i], addr_lst) { + hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) { unsigned long age; /* When setting preferred_lft to a value not zero or @@ -3623,13 +3694,22 @@ restart: ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", now, next, next_sec, next_sched); - - addr_chk_timer.expires = next_sched; - add_timer(&addr_chk_timer); - spin_unlock(&addrconf_verify_lock); + mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now); rcu_read_unlock_bh(); } +static void addrconf_verify_work(struct work_struct *w) +{ + rtnl_lock(); + addrconf_verify_rtnl(); + rtnl_unlock(); +} + +static void addrconf_verify(void) +{ + mod_delayed_work(addrconf_wq, &addr_chk_work, 0); +} + static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local, struct in6_addr **peer_pfx) { @@ -3686,6 +3766,8 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, bool was_managetempaddr; bool had_prefixroute; + ASSERT_RTNL(); + if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; @@ -3751,7 +3833,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, !was_managetempaddr, jiffies); } - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } @@ -4381,6 +4463,8 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) bool update_rs = false; struct in6_addr ll_addr; + ASSERT_RTNL(); + if (token == NULL) return -EINVAL; if (ipv6_addr_any(token)) @@ -4429,7 +4513,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) } write_unlock_bh(&idev->lock); - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } @@ -4631,6 +4715,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { struct net *net = dev_net(ifp->idev->dev); + if (event) + ASSERT_RTNL(); + inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); switch (event) { @@ -5239,6 +5326,12 @@ int __init addrconf_init(void) if (err < 0) goto out_addrlabel; + addrconf_wq = create_workqueue("ipv6_addrconf"); + if (!addrconf_wq) { + err = -ENOMEM; + goto out_nowq; + } + /* The addrconf netdev notifier requires that loopback_dev * has it's ipv6 private information allocated and setup * before it can bring up and give link-local addresses @@ -5269,7 +5362,7 @@ int __init addrconf_init(void) register_netdevice_notifier(&ipv6_dev_notf); - addrconf_verify(0); + addrconf_verify(); rtnl_af_register(&inet6_ops); @@ -5297,6 +5390,8 @@ errout: rtnl_af_unregister(&inet6_ops); unregister_netdevice_notifier(&ipv6_dev_notf); errlo: + destroy_workqueue(addrconf_wq); +out_nowq: unregister_pernet_subsys(&addrconf_ops); out_addrlabel: ipv6_addr_label_cleanup(); @@ -5332,7 +5427,8 @@ void addrconf_cleanup(void) for (i = 0; i < IN6_ADDR_HSIZE; i++) WARN_ON(!hlist_empty(&inet6_addr_lst[i])); spin_unlock_bh(&addrconf_hash_lock); - - del_timer(&addr_chk_timer); + cancel_delayed_work(&addr_chk_work); rtnl_unlock(); + + destroy_workqueue(addrconf_wq); } diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index b30ad3741b4..731e1e1722d 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -6,7 +6,7 @@ */ /* * Author: - * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> + * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> */ #include <linux/kernel.h> @@ -22,14 +22,13 @@ #if 0 #define ADDRLABEL(x...) printk(x) #else -#define ADDRLABEL(x...) do { ; } while(0) +#define ADDRLABEL(x...) do { ; } while (0) #endif /* * Policy Table */ -struct ip6addrlbl_entry -{ +struct ip6addrlbl_entry { #ifdef CONFIG_NET_NS struct net *lbl_net; #endif @@ -88,39 +87,39 @@ static const __net_initconst struct ip6addrlbl_init_table { /* ::/0 */ .prefix = &in6addr_any, .label = 1, - },{ /* fc00::/7 */ - .prefix = &(struct in6_addr){{{ 0xfc }}}, + }, { /* fc00::/7 */ + .prefix = &(struct in6_addr){ { { 0xfc } } } , .prefixlen = 7, .label = 5, - },{ /* fec0::/10 */ - .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}}, + }, { /* fec0::/10 */ + .prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } }, .prefixlen = 10, .label = 11, - },{ /* 2002::/16 */ - .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, + }, { /* 2002::/16 */ + .prefix = &(struct in6_addr){ { { 0x20, 0x02 } } }, .prefixlen = 16, .label = 2, - },{ /* 3ffe::/16 */ - .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}}, + }, { /* 3ffe::/16 */ + .prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } }, .prefixlen = 16, .label = 12, - },{ /* 2001::/32 */ - .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, + }, { /* 2001::/32 */ + .prefix = &(struct in6_addr){ { { 0x20, 0x01 } } }, .prefixlen = 32, .label = 6, - },{ /* 2001:10::/28 */ - .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}}, + }, { /* 2001:10::/28 */ + .prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } }, .prefixlen = 28, .label = 7, - },{ /* ::ffff:0:0 */ - .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}}, + }, { /* ::ffff:0:0 */ + .prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } }, .prefixlen = 96, .label = 4, - },{ /* ::/96 */ + }, { /* ::/96 */ .prefix = &in6addr_any, .prefixlen = 96, .label = 3, - },{ /* ::1/128 */ + }, { /* ::1/128 */ .prefix = &in6addr_loopback, .prefixlen = 128, .label = 0, @@ -441,7 +440,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh) if (label == IPV6_ADDR_LABEL_DEFAULT) return -EINVAL; - switch(nlh->nlmsg_type) { + switch (nlh->nlmsg_type) { case RTM_NEWADDRLABEL: if (ifal->ifal_index && !__dev_get_by_index(net, ifal->ifal_index)) @@ -505,12 +504,13 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { if (idx >= s_idx && net_eq(ip6addrlbl_net(p), net)) { - if ((err = ip6addrlbl_fill(skb, p, - ip6addrlbl_table.seq, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWADDRLABEL, - NLM_F_MULTI)) <= 0) + err = ip6addrlbl_fill(skb, p, + ip6addrlbl_table.seq, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWADDRLABEL, + NLM_F_MULTI); + if (err <= 0) break; } idx++; @@ -527,7 +527,7 @@ static inline int ip6addrlbl_msgsize(void) + nla_total_size(4); /* IFAL_LABEL */ } -static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh) +static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct ifaddrlblmsg *ifal; @@ -568,7 +568,8 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh) goto out; } - if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) { + skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL); + if (!skb) { ip6addrlbl_put(p); return -ENOBUFS; } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 81e496a2e00..72a4930bdc0 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -346,6 +346,10 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) struct ip_auth_hdr *ah; struct ah_data *ahp; struct tmp_ext *iph_ext; + int seqhi_len = 0; + __be32 *seqhi; + int sglists = 0; + struct scatterlist *seqhisg; ahp = x->data; ahash = ahp->ahash; @@ -359,15 +363,22 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) if (extlen) extlen += sizeof(*iph_ext); + if (x->props.flags & XFRM_STATE_ESN) { + sglists = 1; + seqhi_len = sizeof(*seqhi); + } err = -ENOMEM; - iph_base = ah_alloc_tmp(ahash, nfrags, IPV6HDR_BASELEN + extlen); + iph_base = ah_alloc_tmp(ahash, nfrags + sglists, IPV6HDR_BASELEN + + extlen + seqhi_len); if (!iph_base) goto out; iph_ext = ah_tmp_ext(iph_base); - icv = ah_tmp_icv(ahash, iph_ext, extlen); + seqhi = (__be32 *)((char *)iph_ext + extlen); + icv = ah_tmp_icv(ahash, seqhi, seqhi_len); req = ah_tmp_req(ahash, icv); sg = ah_req_sg(ahash, req); + seqhisg = sg + nfrags; ah = ip_auth_hdr(skb); memset(ah->auth_data, 0, ahp->icv_trunc_len); @@ -411,10 +422,15 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) ah->spi = x->id.spi; ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + sg_init_table(sg, nfrags + sglists); + skb_to_sgvec_nomark(skb, sg, 0, skb->len); - ahash_request_set_crypt(req, sg, icv, skb->len); + if (x->props.flags & XFRM_STATE_ESN) { + /* Attach seqhi sg right after packet payload */ + *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + sg_set_buf(seqhisg, seqhi, seqhi_len); + } + ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len); ahash_request_set_callback(req, 0, ah6_output_done, skb); AH_SKB_CB(skb)->tmp = iph_base; @@ -514,6 +530,10 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) int nexthdr; int nfrags; int err = -ENOMEM; + int seqhi_len = 0; + __be32 *seqhi; + int sglists = 0; + struct scatterlist *seqhisg; if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr))) goto out; @@ -550,14 +570,22 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) skb_push(skb, hdr_len); - work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len); + if (x->props.flags & XFRM_STATE_ESN) { + sglists = 1; + seqhi_len = sizeof(*seqhi); + } + + work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len + + ahp->icv_trunc_len + seqhi_len); if (!work_iph) goto out; - auth_data = ah_tmp_auth(work_iph, hdr_len); - icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len); + auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len); + seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len); + icv = ah_tmp_icv(ahash, seqhi, seqhi_len); req = ah_tmp_req(ahash, icv); sg = ah_req_sg(ahash, req); + seqhisg = sg + nfrags; memcpy(work_iph, ip6h, hdr_len); memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); @@ -572,10 +600,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) ip6h->flow_lbl[2] = 0; ip6h->hop_limit = 0; - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + sg_init_table(sg, nfrags + sglists); + skb_to_sgvec_nomark(skb, sg, 0, skb->len); + + if (x->props.flags & XFRM_STATE_ESN) { + /* Attach seqhi sg right after packet payload */ + *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; + sg_set_buf(seqhisg, seqhi, seqhi_len); + } - ahash_request_set_crypt(req, sg, icv, skb->len); + ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len); ahash_request_set_callback(req, 0, ah6_input_done, skb); AH_SKB_CB(skb)->tmp = work_iph; @@ -609,8 +643,8 @@ out: return err; } -static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) +static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; @@ -619,17 +653,19 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) - return; + return 0; x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); if (!x) - return; + return 0; if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0); else ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); + + return 0; } static int ah6_init_state(struct xfrm_state *x) @@ -714,6 +750,11 @@ static void ah6_destroy(struct xfrm_state *x) kfree(ahp); } +static int ah6_rcv_cb(struct sk_buff *skb, int err) +{ + return 0; +} + static const struct xfrm_type ah6_type = { .description = "AH6", @@ -727,10 +768,11 @@ static const struct xfrm_type ah6_type = .hdr_offset = xfrm6_find_1stfragopt, }; -static const struct inet6_protocol ah6_protocol = { +static struct xfrm6_protocol ah6_protocol = { .handler = xfrm6_rcv, + .cb_handler = ah6_rcv_cb, .err_handler = ah6_err, - .flags = INET6_PROTO_NOPOLICY, + .priority = 0, }; static int __init ah6_init(void) @@ -740,7 +782,7 @@ static int __init ah6_init(void) return -EAGAIN; } - if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) { + if (xfrm6_protocol_register(&ah6_protocol, IPPROTO_AH) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ah6_type, AF_INET6); return -EAGAIN; @@ -751,7 +793,7 @@ static int __init ah6_init(void) static void __exit ah6_fini(void) { - if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0) + if (xfrm6_protocol_deregister(&ah6_protocol, IPPROTO_AH) < 0) pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 6eef8a7e35f..d15da137714 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -421,8 +421,8 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) net_adj) & ~(blksize - 1)) + net_adj - 2; } -static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) +static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data; @@ -431,18 +431,20 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) - return; + return 0; x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) - return; + return 0; if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0); else ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); + + return 0; } static void esp6_destroy(struct xfrm_state *x) @@ -614,6 +616,11 @@ error: return err; } +static int esp6_rcv_cb(struct sk_buff *skb, int err) +{ + return 0; +} + static const struct xfrm_type esp6_type = { .description = "ESP6", @@ -628,10 +635,11 @@ static const struct xfrm_type esp6_type = .hdr_offset = xfrm6_find_1stfragopt, }; -static const struct inet6_protocol esp6_protocol = { - .handler = xfrm6_rcv, +static struct xfrm6_protocol esp6_protocol = { + .handler = xfrm6_rcv, + .cb_handler = esp6_rcv_cb, .err_handler = esp6_err, - .flags = INET6_PROTO_NOPOLICY, + .priority = 0, }; static int __init esp6_init(void) @@ -640,7 +648,7 @@ static int __init esp6_init(void) pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } - if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) { + if (xfrm6_protocol_register(&esp6_protocol, IPPROTO_ESP) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&esp6_type, AF_INET6); return -EAGAIN; @@ -651,7 +659,7 @@ static int __init esp6_init(void) static void __exit esp6_fini(void) { - if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0) + if (xfrm6_protocol_deregister(&esp6_protocol, IPPROTO_ESP) < 0) pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0) pr_info("%s: can't remove xfrm type\n", __func__); diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 140748debc4..8af3eb57f43 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -212,7 +212,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, found = (nexthdr == target); if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) { - if (target < 0) + if (target < 0 || found) break; return -ENOENT; } diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c index cf77f3abfd0..447a7fbd1bb 100644 --- a/net/ipv6/exthdrs_offload.c +++ b/net/ipv6/exthdrs_offload.c @@ -25,11 +25,11 @@ int __init ipv6_exthdrs_offload_init(void) int ret; ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING); - if (!ret) + if (ret) goto out; ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS); - if (!ret) + if (ret) goto out_rt; out: diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index f81f59686f2..7b326529e6a 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -414,7 +414,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) addr_type = ipv6_addr_type(&hdr->daddr); if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) || - ipv6_anycast_destination(skb)) + ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr)) saddr = &hdr->daddr; /* @@ -520,7 +520,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) np->tclass, NULL, &fl6, (struct rt6_info *)dst, MSG_DONTWAIT, np->dontfrag); if (err) { - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index c9138189415..d4ade34ab37 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -224,9 +224,8 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, return dst; } -int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) +int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused) { - struct sock *sk = skb->sk; struct ipv6_pinfo *np = inet6_sk(sk); struct flowi6 fl6; struct dst_entry *dst; diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index 72d198b8e4d..ee7a97f510c 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -79,7 +79,9 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) /* RFC 2460 section 8.1 says that we SHOULD log this error. Well, it is reasonable. */ - LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); + LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", + &ipv6_hdr(skb)->saddr, ntohs(uh->source), + &ipv6_hdr(skb)->daddr, ntohs(uh->dest)); return 1; } if (skb->ip_summed == CHECKSUM_COMPLETE && diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 075602fc6b6..34e0ded5c14 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -9,14 +9,12 @@ * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. - */ - -/* - * Changes: - * Yuji SEKIYA @USAGI: Support default route on router node; - * remove ip6_null_entry from the top of - * routing table. - * Ville Nuorvala: Fixed routing subtrees. + * + * Changes: + * Yuji SEKIYA @USAGI: Support default route on router node; + * remove ip6_null_entry from the top of + * routing table. + * Ville Nuorvala: Fixed routing subtrees. */ #define pr_fmt(fmt) "IPv6: " fmt @@ -46,10 +44,9 @@ #define RT6_TRACE(x...) do { ; } while (0) #endif -static struct kmem_cache * fib6_node_kmem __read_mostly; +static struct kmem_cache *fib6_node_kmem __read_mostly; -enum fib_walk_state_t -{ +enum fib_walk_state_t { #ifdef CONFIG_IPV6_SUBTREES FWS_S, #endif @@ -59,8 +56,7 @@ enum fib_walk_state_t FWS_U }; -struct fib6_cleaner_t -{ +struct fib6_cleaner_t { struct fib6_walker_t w; struct net *net; int (*func)(struct rt6_info *, void *arg); @@ -138,7 +134,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit) const __be32 *addr = token; /* * Here, - * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) + * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) * is optimized version of * htonl(1 << ((~fn_bit)&0x1F)) * See include/asm-generic/bitops/le.h. @@ -147,7 +143,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit) addr[fn_bit >> 5]; } -static __inline__ struct fib6_node * node_alloc(void) +static __inline__ struct fib6_node *node_alloc(void) { struct fib6_node *fn; @@ -156,7 +152,7 @@ static __inline__ struct fib6_node * node_alloc(void) return fn; } -static __inline__ void node_free(struct fib6_node * fn) +static __inline__ void node_free(struct fib6_node *fn) { kmem_cache_free(fib6_node_kmem, fn); } @@ -292,7 +288,7 @@ static int fib6_dump_node(struct fib6_walker_t *w) static void fib6_dump_end(struct netlink_callback *cb) { - struct fib6_walker_t *w = (void*)cb->args[2]; + struct fib6_walker_t *w = (void *)cb->args[2]; if (w) { if (cb->args[4]) { @@ -302,7 +298,7 @@ static void fib6_dump_end(struct netlink_callback *cb) cb->args[2] = 0; kfree(w); } - cb->done = (void*)cb->args[3]; + cb->done = (void *)cb->args[3]; cb->args[1] = 3; } @@ -485,7 +481,7 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, fn->fn_sernum = sernum; dir = addr_bit_set(addr, fn->fn_bit); pn = fn; - fn = dir ? fn->right: fn->left; + fn = dir ? fn->right : fn->left; } while (fn); if (!allow_create) { @@ -638,12 +634,41 @@ static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt) RTF_GATEWAY; } +static int fib6_commit_metrics(struct dst_entry *dst, + struct nlattr *mx, int mx_len) +{ + struct nlattr *nla; + int remaining; + u32 *mp; + + if (dst->flags & DST_HOST) { + mp = dst_metrics_write_ptr(dst); + } else { + mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (!mp) + return -ENOMEM; + dst_init_metrics(dst, mp, 0); + } + + nla_for_each_attr(nla, mx, mx_len, remaining) { + int type = nla_type(nla); + + if (type) { + if (type > RTAX_MAX) + return -EINVAL; + + mp[type - 1] = nla_get_u32(nla); + } + } + return 0; +} + /* * Insert routing information in a node. */ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nl_info *info) + struct nl_info *info, struct nlattr *mx, int mx_len) { struct rt6_info *iter = NULL; struct rt6_info **ins; @@ -653,6 +678,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, (info->nlh->nlmsg_flags & NLM_F_CREATE)); int found = 0; bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); + int err; ins = &fn->leaf; @@ -751,6 +777,11 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, pr_warn("NLM_F_CREATE should be set when creating new route\n"); add: + if (mx) { + err = fib6_commit_metrics(&rt->dst, mx, mx_len); + if (err) + return err; + } rt->dst.rt6_next = iter; *ins = rt; rt->rt6i_node = fn; @@ -770,6 +801,11 @@ add: pr_warn("NLM_F_REPLACE set, but no existing node found!\n"); return -ENOENT; } + if (mx) { + err = fib6_commit_metrics(&rt->dst, mx, mx_len); + if (err) + return err; + } *ins = rt; rt->rt6i_node = fn; rt->dst.rt6_next = iter->dst.rt6_next; @@ -806,7 +842,8 @@ void fib6_force_start_gc(struct net *net) * with source addr info in sub-trees */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) +int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, + struct nlattr *mx, int mx_len) { struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; @@ -900,7 +937,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) } #endif - err = fib6_add_rt2node(fn, rt, info); + err = fib6_add_rt2node(fn, rt, info, mx, mx_len); if (!err) { fib6_start_gc(info->nl_net, rt); if (!(rt->rt6i_flags & RTF_CACHE)) @@ -955,8 +992,8 @@ struct lookup_args { const struct in6_addr *addr; /* search key */ }; -static struct fib6_node * fib6_lookup_1(struct fib6_node *root, - struct lookup_args *args) +static struct fib6_node *fib6_lookup_1(struct fib6_node *root, + struct lookup_args *args) { struct fib6_node *fn; __be32 dir; @@ -1018,8 +1055,8 @@ backtrack: return NULL; } -struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, - const struct in6_addr *saddr) +struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, + const struct in6_addr *saddr) { struct fib6_node *fn; struct lookup_args args[] = { @@ -1051,9 +1088,9 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *da */ -static struct fib6_node * fib6_locate_1(struct fib6_node *root, - const struct in6_addr *addr, - int plen, int offset) +static struct fib6_node *fib6_locate_1(struct fib6_node *root, + const struct in6_addr *addr, + int plen, int offset) { struct fib6_node *fn; @@ -1081,9 +1118,9 @@ static struct fib6_node * fib6_locate_1(struct fib6_node *root, return NULL; } -struct fib6_node * fib6_locate(struct fib6_node *root, - const struct in6_addr *daddr, int dst_len, - const struct in6_addr *saddr, int src_len) +struct fib6_node *fib6_locate(struct fib6_node *root, + const struct in6_addr *daddr, int dst_len, + const struct in6_addr *saddr, int src_len) { struct fib6_node *fn; @@ -1151,8 +1188,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net, children = 0; child = NULL; - if (fn->right) child = fn->right, children |= 1; - if (fn->left) child = fn->left, children |= 2; + if (fn->right) + child = fn->right, children |= 1; + if (fn->left) + child = fn->left, children |= 2; if (children == 3 || FIB6_SUBTREE(fn) #ifdef CONFIG_IPV6_SUBTREES @@ -1180,8 +1219,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net, } else { WARN_ON(fn->fn_flags & RTN_ROOT); #endif - if (pn->right == fn) pn->right = child; - else if (pn->left == fn) pn->left = child; + if (pn->right == fn) + pn->right = child; + else if (pn->left == fn) + pn->left = child; #if RT6_DEBUG >= 2 else WARN_ON(1); @@ -1213,10 +1254,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net, w->node = child; if (children&2) { RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); - w->state = w->state>=FWS_R ? FWS_U : FWS_INIT; + w->state = w->state >= FWS_R ? FWS_U : FWS_INIT; } else { RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); - w->state = w->state>=FWS_C ? FWS_U : FWS_INIT; + w->state = w->state >= FWS_C ? FWS_U : FWS_INIT; } } } @@ -1314,7 +1355,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) struct rt6_info **rtp; #if RT6_DEBUG >= 2 - if (rt->dst.obsolete>0) { + if (rt->dst.obsolete > 0) { WARN_ON(fn != NULL); return -ENOENT; } @@ -1707,7 +1748,7 @@ out_rt6_stats: kfree(net->ipv6.rt6_stats); out_timer: return -ENOMEM; - } +} static void fib6_net_exit(struct net *net) { diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index dfa41bb4e0d..0961b5ef866 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -15,9 +15,7 @@ #include <linux/socket.h> #include <linux/net.h> #include <linux/netdevice.h> -#include <linux/if_arp.h> #include <linux/in6.h> -#include <linux/route.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/slab.h> @@ -28,12 +26,8 @@ #include <net/sock.h> #include <net/ipv6.h> -#include <net/ndisc.h> -#include <net/protocol.h> -#include <net/ip6_route.h> #include <net/addrconf.h> #include <net/rawv6.h> -#include <net/icmp.h> #include <net/transp_v6.h> #include <asm/uaccess.h> diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index f3ffb43f59c..9d921462b57 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1454,7 +1454,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[], static int ip6gre_tap_init(struct net_device *dev) { struct ip6_tnl *tunnel; - int i; tunnel = netdev_priv(dev); @@ -1464,16 +1463,10 @@ static int ip6gre_tap_init(struct net_device *dev) ip6gre_tnl_link_config(tunnel, 1); - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ip6gre_tap_stats; - ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ip6gre_tap_stats->syncp); - } - return 0; } @@ -1566,6 +1559,15 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], return 0; } +static void ip6gre_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + if (dev != ign->fb_tunnel_dev) + unregister_netdevice_queue(dev, head); +} + static size_t ip6gre_get_size(const struct net_device *dev) { return @@ -1643,6 +1645,7 @@ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { .validate = ip6gre_tunnel_validate, .newlink = ip6gre_newlink, .changelink = ip6gre_changelink, + .dellink = ip6gre_dellink, .get_size = ip6gre_get_size, .fill_info = ip6gre_fill_info, }; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 1e8683b135b..59f95affceb 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -89,7 +89,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, unsigned int unfrag_ip6hlen; u8 *prevhdr; int offset = 0; - bool tunnel; + bool encap, udpfrag; int nhoff; if (unlikely(skb_shinfo(skb)->gso_type & @@ -110,8 +110,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) goto out; - tunnel = SKB_GSO_CB(skb)->encap_level > 0; - if (tunnel) + encap = SKB_GSO_CB(skb)->encap_level > 0; + if (encap) features = skb->dev->hw_enc_features & netif_skb_features(skb); SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h); @@ -121,6 +121,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + if (skb->encapsulation && + skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) + udpfrag = proto == IPPROTO_UDP && encap; + else + udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; + ops = rcu_dereference(inet6_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) { skb_reset_transport_header(skb); @@ -133,13 +139,9 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h)); - if (tunnel) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } skb->network_header = (u8 *)ipv6h - skb->head; - if (!tunnel && proto == IPPROTO_UDP) { + if (udpfrag) { unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); fptr->frag_off = htons(offset); @@ -148,6 +150,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, offset += (ntohs(ipv6h->payload_len) - sizeof(struct frag_hdr)); } + if (encap) + skb_reset_inner_headers(skb); } out: diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ef02b26ccf8..40e7581374f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -132,7 +132,7 @@ static int ip6_finish_output(struct sk_buff *skb) return ip6_finish_output2(skb); } -int ip6_output(struct sk_buff *skb) +int ip6_output(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); @@ -342,6 +342,20 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) return mtu; } +static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) +{ + if (skb->len <= mtu || skb->local_df) + return false; + + if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) + return true; + + if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) + return false; + + return true; +} + int ip6_forward(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -353,6 +367,9 @@ int ip6_forward(struct sk_buff *skb) if (net->ipv6.devconf_all->forwarding == 0) goto error; + if (skb->pkt_type != PACKET_HOST) + goto drop; + if (skb_warn_if_lro(skb)) goto drop; @@ -362,9 +379,6 @@ int ip6_forward(struct sk_buff *skb) goto drop; } - if (skb->pkt_type != PACKET_HOST) - goto drop; - skb_forward_csum(skb); /* @@ -466,8 +480,7 @@ int ip6_forward(struct sk_buff *skb) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) || - (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { + if (ip6_pkt_too_big(skb, mtu)) { /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); @@ -517,9 +530,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) - to->nf_trace = from->nf_trace; -#endif skb_copy_secmark(to, from); } @@ -1091,21 +1101,19 @@ static void ip6_append_data_mtu(unsigned int *mtu, unsigned int fragheaderlen, struct sk_buff *skb, struct rt6_info *rt, - bool pmtuprobe) + unsigned int orig_mtu) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { if (skb == NULL) { /* first fragment, reserve header_len */ - *mtu = *mtu - rt->dst.header_len; + *mtu = orig_mtu - rt->dst.header_len; } else { /* * this fragment is not first, the headers * space is regarded as data space. */ - *mtu = min(*mtu, pmtuprobe ? - rt->dst.dev->mtu : - dst_mtu(rt->dst.path)); + *mtu = orig_mtu; } *maxfraglen = ((*mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); @@ -1122,7 +1130,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; struct sk_buff *skb, *skb_prev = NULL; - unsigned int maxfraglen, fragheaderlen, mtu; + unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; int exthdrlen; int dst_exthdrlen; int hh_len; @@ -1204,6 +1212,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, dst_exthdrlen = 0; mtu = cork->fragsize; } + orig_mtu = mtu; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -1221,8 +1230,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; - maxnonfragsize = (np->pmtudisc >= IPV6_PMTUDISC_DO) ? - mtu : sizeof(struct ipv6hdr) + IPV6_MAXPLEN; + if (ip6_sk_local_df(sk)) + maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; + else + maxnonfragsize = mtu; /* dontfrag active */ if ((cork->length + length > mtu - headersize) && dontfrag && @@ -1301,8 +1312,7 @@ alloc_new_skb: if (skb == NULL || skb_prev == NULL) ip6_append_data_mtu(&mtu, &maxfraglen, fragheaderlen, skb, rt, - np->pmtudisc >= - IPV6_PMTUDISC_PROBE); + orig_mtu); skb_prev = skb; @@ -1530,8 +1540,7 @@ int ip6_push_pending_frames(struct sock *sk) } /* Allow local fragmentation. */ - if (np->pmtudisc < IPV6_PMTUDISC_DO) - skb->local_df = 1; + skb->local_df = ip6_sk_local_df(sk); *final_dst = fl6->daddr; __skb_pull(skb, skb_network_header_len(skb)); @@ -1558,8 +1567,8 @@ int ip6_push_pending_frames(struct sock *sk) if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); - ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); + ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } err = ip6_local_out(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5db8d310f9c..b05b609f69d 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -108,12 +108,12 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) per_cpu_ptr(dev->tstats, i); do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); + start = u64_stats_fetch_begin_irq(&tstats->syncp); tmp.rx_packets = tstats->rx_packets; tmp.rx_bytes = tstats->rx_bytes; tmp.tx_packets = tstats->tx_packets; tmp.tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&tstats->syncp, start)); sum.rx_packets += tmp.rx_packets; sum.rx_bytes += tmp.rx_bytes; @@ -1340,8 +1340,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) int err = 0; struct ip6_tnl_parm p; struct __ip6_tnl_parm p1; - struct ip6_tnl *t = NULL; - struct net *net = dev_net(dev); + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); switch (cmd) { @@ -1353,11 +1353,11 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, 0); + if (t == NULL) + t = netdev_priv(dev); } else { memset(&p, 0, sizeof(p)); } - if (t == NULL) - t = netdev_priv(dev); ip6_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { err = -EFAULT; @@ -1502,19 +1502,12 @@ static inline int ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - int i; t->dev = dev; t->net = dev_net(dev); - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ip6_tnl_stats; - ip6_tnl_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ip6_tnl_stats->syncp); - } return 0; } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 2d19272b8ce..b7c0f827140 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -278,7 +278,6 @@ static void vti6_dev_uninit(struct net_device *dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); else vti6_tnl_unlink(ip6n, t); - ip6_tnl_dst_reset(t); dev_put(dev); } @@ -288,11 +287,8 @@ static int vti6_rcv(struct sk_buff *skb) const struct ipv6hdr *ipv6h = ipv6_hdr(skb); rcu_read_lock(); - if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr)) != NULL) { - struct pcpu_sw_netstats *tstats; - if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) { rcu_read_unlock(); goto discard; @@ -309,27 +305,58 @@ static int vti6_rcv(struct sk_buff *skb) goto discard; } - tstats = this_cpu_ptr(t->dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += skb->len; - u64_stats_update_end(&tstats->syncp); - - skb->mark = 0; - secpath_reset(skb); - skb->dev = t->dev; + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; + skb->mark = be32_to_cpu(t->parms.i_key); rcu_read_unlock(); - return 0; + + return xfrm6_rcv(skb); } rcu_read_unlock(); - return 1; - + return -EINVAL; discard: kfree_skb(skb); return 0; } +static int vti6_rcv_cb(struct sk_buff *skb, int err) +{ + unsigned short family; + struct net_device *dev; + struct pcpu_sw_netstats *tstats; + struct xfrm_state *x; + struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6; + + if (!t) + return 1; + + dev = t->dev; + + if (err) { + dev->stats.rx_errors++; + dev->stats.rx_dropped++; + + return 0; + } + + x = xfrm_input_state(skb); + family = x->inner_mode->afinfo->family; + + if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) + return -EPERM; + + skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev))); + skb->dev = dev; + + tstats = this_cpu_ptr(dev->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + + return 0; +} + /** * vti6_addr_conflict - compare packet addresses to tunnel's own * @t: the outgoing tunnel device @@ -349,44 +376,56 @@ vti6_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); } +static bool vti6_state_check(const struct xfrm_state *x, + const struct in6_addr *dst, + const struct in6_addr *src) +{ + xfrm_address_t *daddr = (xfrm_address_t *)dst; + xfrm_address_t *saddr = (xfrm_address_t *)src; + + /* if there is no transform then this tunnel is not functional. + * Or if the xfrm is not mode tunnel. + */ + if (!x || x->props.mode != XFRM_MODE_TUNNEL || + x->props.family != AF_INET6) + return false; + + if (ipv6_addr_any(dst)) + return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET6); + + if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET6)) + return false; + + return true; +} + /** * vti6_xmit - send a packet * @skb: the outgoing socket buffer * @dev: the outgoing tunnel device + * @fl: the flow informations for the xfrm_lookup **/ -static int vti6_xmit(struct sk_buff *skb, struct net_device *dev) +static int +vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) { - struct net *net = dev_net(dev); struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; - struct dst_entry *dst = NULL, *ndst = NULL; - struct flowi6 fl6; - struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; int err = -1; - if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || - !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h)) - return err; - - dst = ip6_tnl_dst_check(t); - if (!dst) { - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - - ndst = ip6_route_output(net, NULL, &fl6); + if (!dst) + goto tx_err_link_failure; - if (ndst->error) - goto tx_err_link_failure; - ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(&fl6), NULL, 0); - if (IS_ERR(ndst)) { - err = PTR_ERR(ndst); - ndst = NULL; - goto tx_err_link_failure; - } - dst = ndst; + dst_hold(dst); + dst = xfrm_lookup(t->net, dst, fl, NULL, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; + goto tx_err_link_failure; } - if (!dst->xfrm || dst->xfrm->props.mode != XFRM_MODE_TUNNEL) + if (!vti6_state_check(dst->xfrm, &t->parms.raddr, &t->parms.laddr)) goto tx_err_link_failure; tdev = dst->dev; @@ -398,14 +437,21 @@ static int vti6_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_err_dst_release; } + skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); + skb_dst_set(skb, dst); + skb->dev = skb_dst(skb)->dev; - skb_dst_drop(skb); - skb_dst_set_noref(skb, dst); + err = dst_output(skb); + if (net_xmit_eval(err) == 0) { + struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); - ip6tunnel_xmit(skb, dev); - if (ndst) { - dev->mtu = dst_mtu(ndst); - ip6_tnl_dst_store(t, ndst); + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += skb->len; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else { + stats->tx_errors++; + stats->tx_aborted_errors++; } return 0; @@ -413,7 +459,7 @@ tx_err_link_failure: stats->tx_carrier_errors++; dst_link_failure(skb); tx_err_dst_release: - dst_release(ndst); + dst_release(dst); return err; } @@ -422,16 +468,33 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; + struct ipv6hdr *ipv6h; + struct flowi fl; int ret; + memset(&fl, 0, sizeof(fl)); + skb->mark = be32_to_cpu(t->parms.o_key); + switch (skb->protocol) { case htons(ETH_P_IPV6): - ret = vti6_xmit(skb, dev); + ipv6h = ipv6_hdr(skb); + + if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || + !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h)) + goto tx_err; + + xfrm_decode_session(skb, &fl, AF_INET6); + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + break; + case htons(ETH_P_IP): + xfrm_decode_session(skb, &fl, AF_INET); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); break; default: goto tx_err; } + ret = vti6_xmit(skb, dev, &fl); if (ret < 0) goto tx_err; @@ -444,24 +507,66 @@ tx_err: return NETDEV_TX_OK; } +static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + __be32 spi; + struct xfrm_state *x; + struct ip6_tnl *t; + struct ip_esp_hdr *esph; + struct ip_auth_hdr *ah; + struct ip_comp_hdr *ipch; + struct net *net = dev_net(skb->dev); + const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data; + int protocol = iph->nexthdr; + + t = vti6_tnl_lookup(dev_net(skb->dev), &iph->daddr, &iph->saddr); + if (!t) + return -1; + + switch (protocol) { + case IPPROTO_ESP: + esph = (struct ip_esp_hdr *)(skb->data + offset); + spi = esph->spi; + break; + case IPPROTO_AH: + ah = (struct ip_auth_hdr *)(skb->data + offset); + spi = ah->spi; + break; + case IPPROTO_COMP: + ipch = (struct ip_comp_hdr *)(skb->data + offset); + spi = htonl(ntohs(ipch->cpi)); + break; + default: + return 0; + } + + if (type != ICMPV6_PKT_TOOBIG && + type != NDISC_REDIRECT) + return 0; + + x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, + spi, protocol, AF_INET6); + if (!x) + return 0; + + if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, skb->dev->ifindex, 0); + else + ip6_update_pmtu(skb, net, info, 0, 0); + xfrm_state_put(x); + + return 0; +} + static void vti6_link_config(struct ip6_tnl *t) { - struct dst_entry *dst; struct net_device *dev = t->dev; struct __ip6_tnl_parm *p = &t->parms; - struct flowi6 *fl6 = &t->fl.u.ip6; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); - /* Set up flowi template */ - fl6->saddr = p->laddr; - fl6->daddr = p->raddr; - fl6->flowi6_oif = p->link; - fl6->flowi6_mark = be32_to_cpu(p->i_key); - fl6->flowi6_proto = p->proto; - fl6->flowlabel = 0; - p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV | IP6_TNL_F_CAP_PER_PACKET); p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); @@ -472,28 +577,6 @@ static void vti6_link_config(struct ip6_tnl *t) dev->flags &= ~IFF_POINTOPOINT; dev->iflink = p->link; - - if (p->flags & IP6_TNL_F_CAP_XMIT) { - - dst = ip6_route_output(dev_net(dev), NULL, fl6); - if (dst->error) - return; - - dst = xfrm_lookup(dev_net(dev), dst, flowi6_to_flowi(fl6), - NULL, 0); - if (IS_ERR(dst)) - return; - - if (dst->dev) { - dev->hard_header_len = dst->dev->hard_header_len; - - dev->mtu = dst_mtu(dst); - - if (dev->mtu < IPV6_MIN_MTU) - dev->mtu = IPV6_MIN_MTU; - } - dst_release(dst); - } } /** @@ -720,7 +803,6 @@ static void vti6_dev_setup(struct net_device *dev) t = netdev_priv(dev); dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); - dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; } @@ -731,18 +813,12 @@ static void vti6_dev_setup(struct net_device *dev) static inline int vti6_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - int i; t->dev = dev; t->net = dev_net(dev); - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *stats; - stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&stats->syncp); - } return 0; } @@ -914,11 +990,6 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = { .fill_info = vti6_fill_info, }; -static struct xfrm_tunnel_notifier vti6_handler __read_mostly = { - .handler = vti6_rcv, - .priority = 1, -}; - static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) { int h; @@ -990,6 +1061,27 @@ static struct pernet_operations vti6_net_ops = { .size = sizeof(struct vti6_net), }; +static struct xfrm6_protocol vti_esp6_protocol __read_mostly = { + .handler = vti6_rcv, + .cb_handler = vti6_rcv_cb, + .err_handler = vti6_err, + .priority = 100, +}; + +static struct xfrm6_protocol vti_ah6_protocol __read_mostly = { + .handler = vti6_rcv, + .cb_handler = vti6_rcv_cb, + .err_handler = vti6_err, + .priority = 100, +}; + +static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { + .handler = vti6_rcv, + .cb_handler = vti6_rcv_cb, + .err_handler = vti6_err, + .priority = 100, +}; + /** * vti6_tunnel_init - register protocol and reserve needed resources * @@ -1003,11 +1095,33 @@ static int __init vti6_tunnel_init(void) if (err < 0) goto out_pernet; - err = xfrm6_mode_tunnel_input_register(&vti6_handler); + err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP); if (err < 0) { - pr_err("%s: can't register vti6\n", __func__); + unregister_pernet_device(&vti6_net_ops); + pr_err("%s: can't register vti6 protocol\n", __func__); + goto out; } + + err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH); + if (err < 0) { + xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); + unregister_pernet_device(&vti6_net_ops); + pr_err("%s: can't register vti6 protocol\n", __func__); + + goto out; + } + + err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP); + if (err < 0) { + xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); + xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); + unregister_pernet_device(&vti6_net_ops); + pr_err("%s: can't register vti6 protocol\n", __func__); + + goto out; + } + err = rtnl_link_register(&vti6_link_ops); if (err < 0) goto rtnl_link_failed; @@ -1015,7 +1129,9 @@ static int __init vti6_tunnel_init(void) return 0; rtnl_link_failed: - xfrm6_mode_tunnel_input_deregister(&vti6_handler); + xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); + xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); + xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); out: unregister_pernet_device(&vti6_net_ops); out_pernet: @@ -1028,8 +1144,12 @@ out_pernet: static void __exit vti6_tunnel_cleanup(void) { rtnl_link_unregister(&vti6_link_ops); - if (xfrm6_mode_tunnel_input_deregister(&vti6_handler)) - pr_info("%s: can't deregister vti6\n", __func__); + if (xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP)) + pr_info("%s: can't deregister protocol\n", __func__); + if (xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH)) + pr_info("%s: can't deregister protocol\n", __func__); + if (xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP)) + pr_info("%s: can't deregister protocol\n", __func__); unregister_pernet_device(&vti6_net_ops); } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 0eb4038a4d6..8659067da28 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -700,7 +700,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct mr6_table *mrt; struct flowi6 fl6 = { .flowi6_oif = dev->ifindex, - .flowi6_iif = skb->skb_iif, + .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi6_mark = skb->mark, }; int err; @@ -2349,13 +2349,14 @@ int ip6mr_get_route(struct net *net, } static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, - u32 portid, u32 seq, struct mfc6_cache *c, int cmd) + u32 portid, u32 seq, struct mfc6_cache *c, int cmd, + int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; int err; - nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -2423,7 +2424,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, if (skb == NULL) goto errout; - err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd); + err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); if (err < 0) goto errout; @@ -2462,7 +2463,8 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (ip6mr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) goto done; next_entry: e++; @@ -2476,7 +2478,8 @@ next_entry: if (ip6mr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) { + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) { spin_unlock_bh(&mfc_unres_lock); goto done; } diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index da9becb42e8..d1c793cffcb 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -53,7 +53,7 @@ #include <linux/icmpv6.h> #include <linux/mutex.h> -static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); @@ -65,19 +65,21 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) - return; + return 0; spi = htonl(ntohs(ipcomph->cpi)); x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); if (!x) - return; + return 0; if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0); else ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); + + return 0; } static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) @@ -174,6 +176,11 @@ out: return err; } +static int ipcomp6_rcv_cb(struct sk_buff *skb, int err) +{ + return 0; +} + static const struct xfrm_type ipcomp6_type = { .description = "IPCOMP6", @@ -186,11 +193,12 @@ static const struct xfrm_type ipcomp6_type = .hdr_offset = xfrm6_find_1stfragopt, }; -static const struct inet6_protocol ipcomp6_protocol = +static struct xfrm6_protocol ipcomp6_protocol = { .handler = xfrm6_rcv, + .cb_handler = ipcomp6_rcv_cb, .err_handler = ipcomp6_err, - .flags = INET6_PROTO_NOPOLICY, + .priority = 0, }; static int __init ipcomp6_init(void) @@ -199,7 +207,7 @@ static int __init ipcomp6_init(void) pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } - if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) { + if (xfrm6_protocol_register(&ipcomp6_protocol, IPPROTO_COMP) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ipcomp6_type, AF_INET6); return -EAGAIN; @@ -209,7 +217,7 @@ static int __init ipcomp6_init(void) static void __exit ipcomp6_fini(void) { - if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) + if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0) pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0) pr_info("%s: can't remove xfrm type\n", __func__); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 0a00f449de5..edb58aff4ae 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -722,7 +722,7 @@ done: case IPV6_MTU_DISCOVER: if (optlen < sizeof(int)) goto e_inval; - if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_INTERFACE) + if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) goto e_inval; np->pmtudisc = val; retv = 0; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index e1e47350784..08b367c6b9c 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1620,11 +1620,12 @@ static void mld_sendpack(struct sk_buff *skb) dst_output); out: if (!err) { - ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT); - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); - IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_OUTMCAST, payload_len); - } else - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS); + ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len); + } else { + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); + } rcu_read_unlock(); return; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 35750df744d..4bff1f297e3 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -50,6 +50,11 @@ config NFT_CHAIN_NAT_IPV6 packet transformations such as the source, destination address and source and destination ports. +config NFT_REJECT_IPV6 + depends on NF_TABLES_IPV6 + default NFT_REJECT + tristate + config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index d1b4928f34f..70d3dd66f2c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o +obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 710238f58aa..e080fbbbc0e 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1241,8 +1241,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, - sizeof(struct xt_counters) * num_counters) != 0) - ret = -EFAULT; + sizeof(struct xt_counters) * num_counters) != 0) { + /* Silent error, can't fail, new table is already in place */ + net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n"); + } vfree(counters); xt_table_unlock(t); return ret; diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c new file mode 100644 index 00000000000..0bc19fa8782 --- /dev/null +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2013 Eric Leblond <eric@regit.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_reject.h> +#include <net/netfilter/ipv6/nf_reject.h> + +void nft_reject_ipv6_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_reject *priv = nft_expr_priv(expr); + struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nf_send_unreach6(net, pkt->skb, priv->icmp_code, + pkt->ops->hooknum); + break; + case NFT_REJECT_TCP_RST: + nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); + break; + } + + data[NFT_REG_VERDICT].verdict = NF_DROP; +} +EXPORT_SYMBOL_GPL(nft_reject_ipv6_eval); + +static struct nft_expr_type nft_reject_ipv6_type; +static const struct nft_expr_ops nft_reject_ipv6_ops = { + .type = &nft_reject_ipv6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .eval = nft_reject_ipv6_eval, + .init = nft_reject_init, + .dump = nft_reject_dump, +}; + +static struct nft_expr_type nft_reject_ipv6_type __read_mostly = { + .family = NFPROTO_IPV6, + .name = "reject", + .ops = &nft_reject_ipv6_ops, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_reject_ipv6_module_init(void) +{ + return nft_register_expr(&nft_reject_ipv6_type); +} + +static void __exit nft_reject_ipv6_module_exit(void) +{ + nft_unregister_expr(&nft_reject_ipv6_type); +} + +module_init(nft_reject_ipv6_module_init); +module_exit(nft_reject_ipv6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "reject"); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 827f795209c..6313abd53c9 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -6,24 +6,24 @@ #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/addrconf.h> +#include <net/secure_seq.h> void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) { static atomic_t ipv6_fragmentation_id; + struct in6_addr addr; int old, new; #if IS_ENABLED(CONFIG_IPV6) - if (rt && !(rt->dst.flags & DST_NOPEER)) { - struct inet_peer *peer; - struct net *net; - - net = dev_net(rt->dst.dev); - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); - if (peer) { - fhdr->identification = htonl(inet_getid(peer, 0)); - inet_putpeer(peer); - return; - } + struct inet_peer *peer; + struct net *net; + + net = dev_net(rt->dst.dev); + peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); + if (peer) { + fhdr->identification = htonl(inet_getid(peer, 0)); + inet_putpeer(peer); + return; } #endif do { @@ -32,7 +32,10 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) if (!new) new = 1; } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); - fhdr->identification = htonl(new); + + addr = rt->rt6i_dst.addr; + addr.s6_addr32[0] ^= (__force __be32)new; + fhdr->identification = htonl(secure_ipv6_id(addr.s6_addr32)); } EXPORT_SYMBOL(ipv6_select_ident); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index fb9beb78f00..bda74291c3e 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -135,6 +135,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.saddr = np->saddr; fl6.daddr = *daddr; + fl6.flowi6_mark = sk->sk_mark; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); @@ -181,8 +182,8 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, MSG_DONTWAIT, np->dontfrag); if (err) { - ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev, - ICMP6_MIB_OUTERRORS); + ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev, + ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { err = icmpv6_push_pending_frames(sk, &fl6, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 11dac21e658..4011617cca6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -84,9 +84,9 @@ static void ip6_dst_ifdown(struct dst_entry *, static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); -static int ip6_pkt_discard_out(struct sk_buff *skb); +static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb); static int ip6_pkt_prohibit(struct sk_buff *skb); -static int ip6_pkt_prohibit_out(struct sk_buff *skb); +static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); @@ -149,7 +149,8 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) unsigned long prev, new; p = peer->metrics; - if (inet_metrics_new(peer)) + if (inet_metrics_new(peer) || + (old & DST_METRICS_FORCE_OVERWRITE)) memcpy(p, old_p, sizeof(u32) * RTAX_MAX); new = (unsigned long) p; @@ -289,7 +290,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = { .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EINVAL, .input = dst_discard, - .output = dst_discard, + .output = dst_discard_sk, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_protocol = RTPROT_KERNEL, @@ -373,12 +374,6 @@ static bool rt6_check_expired(const struct rt6_info *rt) return false; } -static bool rt6_need_strict(const struct in6_addr *daddr) -{ - return ipv6_addr_type(daddr) & - (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); -} - /* Multipath route selection: * Hash based function using packet header and flowlabel. * Adapted from fib_info_hashfn() @@ -857,14 +852,15 @@ EXPORT_SYMBOL(rt6_lookup); be destroyed. */ -static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) +static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, + struct nlattr *mx, int mx_len) { int err; struct fib6_table *table; table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_add(&table->tb6_root, rt, info); + err = fib6_add(&table->tb6_root, rt, info, mx, mx_len); write_unlock_bh(&table->tb6_lock); return err; @@ -875,7 +871,7 @@ int ip6_ins_rt(struct rt6_info *rt) struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; - return __ip6_ins_rt(rt, &info); + return __ip6_ins_rt(rt, &info, NULL, 0); } static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, @@ -1062,7 +1058,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori new->__use = 1; new->input = dst_discard; - new->output = dst_discard; + new->output = dst_discard_sk; if (dst_metrics_read_only(&ort->dst)) new->_metrics = ort->dst._metrics; @@ -1342,7 +1338,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) - return mtu; + goto out; mtu = IPV6_MIN_MTU; @@ -1352,7 +1348,8 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) mtu = idev->cnf.mtu6; rcu_read_unlock(); - return mtu; +out: + return min_t(unsigned int, mtu, IP6_MAX_MTU); } static struct dst_entry *icmp6_dst_gc_list; @@ -1513,7 +1510,7 @@ int ip6_route_add(struct fib6_config *cfg) if (!table) goto out; - rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table); + rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table); if (!rt) { err = -ENOMEM; @@ -1543,17 +1540,11 @@ int ip6_route_add(struct fib6_config *cfg) ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; - if (rt->rt6i_dst.plen == 128) - rt->dst.flags |= DST_HOST; - - if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) { - u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); - if (!metrics) { - err = -ENOMEM; - goto out; - } - dst_init_metrics(&rt->dst, metrics, 0); + if (rt->rt6i_dst.plen == 128) { + rt->dst.flags |= DST_HOST; + dst_metrics_set_force_overwrite(&rt->dst); } + #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); rt->rt6i_src.plen = cfg->fc_src_len; @@ -1586,7 +1577,7 @@ int ip6_route_add(struct fib6_config *cfg) switch (cfg->fc_type) { case RTN_BLACKHOLE: rt->dst.error = -EINVAL; - rt->dst.output = dst_discard; + rt->dst.output = dst_discard_sk; rt->dst.input = dst_discard; break; case RTN_PROHIBIT: @@ -1672,31 +1663,13 @@ int ip6_route_add(struct fib6_config *cfg) rt->rt6i_flags = cfg->fc_flags; install_route: - if (cfg->fc_mx) { - struct nlattr *nla; - int remaining; - - nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla_type(nla); - - if (type) { - if (type > RTAX_MAX) { - err = -EINVAL; - goto out; - } - - dst_metric_set(&rt->dst, type, nla_get_u32(nla)); - } - } - } - rt->dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; cfg->fc_nlinfo.nl_net = dev_net(dev); - return __ip6_ins_rt(rt, &cfg->fc_nlinfo); + return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len); out: if (dev) @@ -2156,7 +2129,7 @@ static int ip6_pkt_discard(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); } -static int ip6_pkt_discard_out(struct sk_buff *skb) +static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb) { skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); @@ -2167,7 +2140,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); } -static int ip6_pkt_prohibit_out(struct sk_buff *skb) +static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb) { skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3dfbcf1dcb1..e5a453ca302 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -475,6 +475,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev) ipip6_tunnel_unlink(sitn, tunnel); ipip6_tunnel_del_prl(tunnel, NULL); } + ip_tunnel_dst_reset_all(tunnel); dev_put(dev); } @@ -973,8 +974,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto out; } - err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos, - ttl, df, !net_eq(tunnel->net, dev_net(dev))); + err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, + IPPROTO_IPV6, tos, ttl, df, + !net_eq(tunnel->net, dev_net(dev))); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return NETDEV_TX_OK; @@ -1082,6 +1084,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) t->parms.link = p->link; ipip6_tunnel_bind_dev(t->dev); } + ip_tunnel_dst_reset_all(t); netdev_state_change(t->dev); } @@ -1112,6 +1115,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, t->ip6rd.relay_prefix = relay_prefix; t->ip6rd.prefixlen = ip6rd->prefixlen; t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen; + ip_tunnel_dst_reset_all(t); netdev_state_change(t->dev); return 0; } @@ -1123,8 +1127,8 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) int err = 0; struct ip_tunnel_parm p; struct ip_tunnel_prl prl; - struct ip_tunnel *t; - struct net *net = dev_net(dev); + struct ip_tunnel *t = netdev_priv(dev); + struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd ip6rd; @@ -1135,16 +1139,15 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) #ifdef CONFIG_IPV6_SIT_6RD case SIOCGET6RD: #endif - t = NULL; if (dev == sitn->fb_tunnel_dev) { if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { err = -EFAULT; break; } t = ipip6_tunnel_locate(net, &p, 0); + if (t == NULL) + t = netdev_priv(dev); } - if (t == NULL) - t = netdev_priv(dev); err = -EFAULT; if (cmd == SIOCGETTUNNEL) { @@ -1240,9 +1243,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = -EINVAL; if (dev == sitn->fb_tunnel_dev) goto done; - err = -ENOENT; - if (!(t = netdev_priv(dev))) - goto done; err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data); break; @@ -1258,9 +1258,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = -EFAULT; if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) goto done; - err = -ENOENT; - if (!(t = netdev_priv(dev))) - goto done; switch (cmd) { case SIOCDELPRL: @@ -1271,6 +1268,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); break; } + ip_tunnel_dst_reset_all(t); netdev_state_change(dev); break; @@ -1287,8 +1285,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) sizeof(ip6rd))) goto done; - t = netdev_priv(dev); - if (cmd != SIOCDEL6RD) { err = ipip6_tunnel_update_6rd(t, &ip6rd); if (err < 0) @@ -1326,6 +1322,9 @@ static const struct net_device_ops ipip6_netdev_ops = { static void ipip6_dev_free(struct net_device *dev) { + struct ip_tunnel *tunnel = netdev_priv(dev); + + free_percpu(tunnel->dst_cache); free_percpu(dev->tstats); free_netdev(dev); } @@ -1356,7 +1355,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) static int ipip6_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1365,14 +1363,14 @@ static int ipip6_tunnel_init(struct net_device *dev) memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); ipip6_tunnel_bind_dev(dev); - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ipip6_tunnel_stats; - ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ipip6_tunnel_stats->syncp); + tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); + if (!tunnel->dst_cache) { + free_percpu(dev->tstats); + return -ENOMEM; } return 0; @@ -1384,7 +1382,6 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); - int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1395,14 +1392,14 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; iph->ttl = 64; - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ipip6_fb_stats; - ipip6_fb_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ipip6_fb_stats->syncp); + tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); + if (!tunnel->dst_cache) { + free_percpu(dev->tstats); + return -ENOMEM; } dev_hold(dev); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 889079b2ea8..e289830ed6e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -39,7 +39,7 @@ #include <linux/ipsec.h> #include <linux/times.h> #include <linux/slab.h> - +#include <linux/uaccess.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/random.h> @@ -65,8 +65,6 @@ #include <net/tcp_memcontrol.h> #include <net/busy_poll.h> -#include <asm/uaccess.h> - #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -501,8 +499,10 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) int res; res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0); - if (!res) + if (!res) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + } return res; } @@ -530,8 +530,8 @@ static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk, return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr); } -static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, - int optlen) +static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval, + int optlen) { struct tcp_md5sig cmd; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; @@ -715,7 +715,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .send_ack = tcp_v6_reqsk_send_ack, .destructor = tcp_v6_reqsk_destructor, .send_reset = tcp_v6_send_reset, - .syn_ack_timeout = tcp_syn_ack_timeout, + .syn_ack_timeout = tcp_syn_ack_timeout, }; #ifdef CONFIG_TCP_MD5SIG @@ -726,7 +726,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { #endif static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, - u32 tsval, u32 tsecr, + u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int rst, u8 tclass, u32 label) { @@ -798,8 +798,10 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); fl6.flowi6_proto = IPPROTO_TCP; - if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) + if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = inet6_iif(skb); + else + fl6.flowi6_oif = oif; fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); @@ -833,6 +835,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) int genhash; struct sock *sk1 = NULL; #endif + int oif; if (th->rst) return; @@ -876,7 +879,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); - tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0, 0); + oif = sk ? sk->sk_bound_dev_if : 0; + tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); #ifdef CONFIG_TCP_MD5SIG release_sk1: @@ -888,11 +892,11 @@ release_sk1: } static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, - u32 win, u32 tsval, u32 tsecr, + u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, u8 tclass, u32 label) { - tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass, + tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass, label); } @@ -904,7 +908,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, - tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw), + tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), tw->tw_tclass, (tw->tw_flowlabel << 12)); inet_twsk_put(tw); @@ -914,7 +918,7 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, - req->rcv_wnd, tcp_time_stamp, req->ts_recent, + req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0, 0); } @@ -1259,7 +1263,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ - if ((key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr)) != NULL) { + key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr); + if (key != NULL) { /* We're using one, so create a matching key * on the newsk structure. If we fail to get * memory, then we end up not copying the key @@ -1303,9 +1308,8 @@ static __sum16 tcp_v6_checksum_init(struct sk_buff *skb) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0)); - if (skb->len <= 76) { + if (skb->len <= 76) return __skb_checksum_complete(skb); - } return 0; } @@ -1335,7 +1339,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return tcp_v4_do_rcv(sk, skb); #ifdef CONFIG_TCP_MD5SIG - if (tcp_v6_inbound_md5_hash (sk, skb)) + if (tcp_v6_inbound_md5_hash(sk, skb)) goto discard; #endif @@ -1602,7 +1606,8 @@ do_time_wait: break; case TCP_TW_RST: goto no_tcp_socket; - case TCP_TW_SUCCESS:; + case TCP_TW_SUCCESS: + ; } goto discard_it; } @@ -1647,7 +1652,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .twsk_unique = tcp_twsk_unique, - .twsk_destructor= tcp_twsk_destructor, + .twsk_destructor = tcp_twsk_destructor, }; static const struct inet_connection_sock_af_ops ipv6_specific = { @@ -1681,7 +1686,6 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { /* * TCP over IPv4 via INET6 API */ - static const struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index e7359f9eaa8..b261ee8b83f 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -113,7 +113,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); fptr->nexthdr = nexthdr; fptr->reserved = 0; - ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb)); + fptr->identification = skb_shinfo(skb)->ip6_frag_id; /* Fragment the skb. ipv6 header and the remaining fields of the * fragment header are updated in ipv6_gso_segment() diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index cb04f7a16b5..901ef6f8add 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -18,65 +18,6 @@ #include <net/ipv6.h> #include <net/xfrm.h> -/* Informational hook. The decap is still done here. */ -static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly; -static DEFINE_MUTEX(xfrm6_mode_tunnel_input_mutex); - -int xfrm6_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler) -{ - struct xfrm_tunnel_notifier __rcu **pprev; - struct xfrm_tunnel_notifier *t; - int ret = -EEXIST; - int priority = handler->priority; - - mutex_lock(&xfrm6_mode_tunnel_input_mutex); - - for (pprev = &rcv_notify_handlers; - (t = rcu_dereference_protected(*pprev, - lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL; - pprev = &t->next) { - if (t->priority > priority) - break; - if (t->priority == priority) - goto err; - - } - - handler->next = *pprev; - rcu_assign_pointer(*pprev, handler); - - ret = 0; - -err: - mutex_unlock(&xfrm6_mode_tunnel_input_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_register); - -int xfrm6_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler) -{ - struct xfrm_tunnel_notifier __rcu **pprev; - struct xfrm_tunnel_notifier *t; - int ret = -ENOENT; - - mutex_lock(&xfrm6_mode_tunnel_input_mutex); - for (pprev = &rcv_notify_handlers; - (t = rcu_dereference_protected(*pprev, - lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL; - pprev = &t->next) { - if (t == handler) { - *pprev = handler->next; - ret = 0; - break; - } - } - mutex_unlock(&xfrm6_mode_tunnel_input_mutex); - synchronize_net(); - - return ret; -} -EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_deregister); - static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) { const struct ipv6hdr *outer_iph = ipv6_hdr(skb); @@ -130,7 +71,6 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_tunnel_notifier *handler; int err = -EINVAL; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) @@ -138,9 +78,6 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; - for_each_input_rcu(rcv_notify_handlers, handler) - handler->handler(skb); - err = skb_unclone(skb, GFP_ATOMIC); if (err) goto out; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6cd625e3770..19ef329bdbf 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -163,7 +163,7 @@ static int __xfrm6_output(struct sk_buff *skb) return x->outer_mode->afinfo->output_finish(skb); } -int xfrm6_output(struct sk_buff *skb) +int xfrm6_output(struct sock *sk, struct sk_buff *skb) { return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, skb_dst(skb)->dev, __xfrm6_output); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 5f8e128c512..2a0bbda2c76 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -389,11 +389,17 @@ int __init xfrm6_init(void) if (ret) goto out_policy; + ret = xfrm6_protocol_init(); + if (ret) + goto out_state; + #ifdef CONFIG_SYSCTL register_pernet_subsys(&xfrm6_net_ops); #endif out: return ret; +out_state: + xfrm6_state_fini(); out_policy: xfrm6_policy_fini(); goto out; @@ -404,6 +410,7 @@ void xfrm6_fini(void) #ifdef CONFIG_SYSCTL unregister_pernet_subsys(&xfrm6_net_ops); #endif + xfrm6_protocol_fini(); xfrm6_policy_fini(); xfrm6_state_fini(); dst_entries_destroy(&xfrm6_dst_ops); diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c new file mode 100644 index 00000000000..6ab989c486f --- /dev/null +++ b/net/ipv6/xfrm6_protocol.c @@ -0,0 +1,270 @@ +/* xfrm6_protocol.c - Generic xfrm protocol multiplexer for ipv6. + * + * Copyright (C) 2013 secunet Security Networks AG + * + * Author: + * Steffen Klassert <steffen.klassert@secunet.com> + * + * Based on: + * net/ipv4/xfrm4_protocol.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/init.h> +#include <linux/mutex.h> +#include <linux/skbuff.h> +#include <linux/icmpv6.h> +#include <net/ipv6.h> +#include <net/protocol.h> +#include <net/xfrm.h> + +static struct xfrm6_protocol __rcu *esp6_handlers __read_mostly; +static struct xfrm6_protocol __rcu *ah6_handlers __read_mostly; +static struct xfrm6_protocol __rcu *ipcomp6_handlers __read_mostly; +static DEFINE_MUTEX(xfrm6_protocol_mutex); + +static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol) +{ + switch (protocol) { + case IPPROTO_ESP: + return &esp6_handlers; + case IPPROTO_AH: + return &ah6_handlers; + case IPPROTO_COMP: + return &ipcomp6_handlers; + } + + return NULL; +} + +#define for_each_protocol_rcu(head, handler) \ + for (handler = rcu_dereference(head); \ + handler != NULL; \ + handler = rcu_dereference(handler->next)) \ + +int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) +{ + int ret; + struct xfrm6_protocol *handler; + + for_each_protocol_rcu(*proto_handlers(protocol), handler) + if ((ret = handler->cb_handler(skb, err)) <= 0) + return ret; + + return 0; +} +EXPORT_SYMBOL(xfrm6_rcv_cb); + +static int xfrm6_esp_rcv(struct sk_buff *skb) +{ + int ret; + struct xfrm6_protocol *handler; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + + for_each_protocol_rcu(esp6_handlers, handler) + if ((ret = handler->handler(skb)) != -EINVAL) + return ret; + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + + kfree_skb(skb); + return 0; +} + +static void xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct xfrm6_protocol *handler; + + for_each_protocol_rcu(esp6_handlers, handler) + if (!handler->err_handler(skb, opt, type, code, offset, info)) + break; +} + +static int xfrm6_ah_rcv(struct sk_buff *skb) +{ + int ret; + struct xfrm6_protocol *handler; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + + for_each_protocol_rcu(ah6_handlers, handler) + if ((ret = handler->handler(skb)) != -EINVAL) + return ret; + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + + kfree_skb(skb); + return 0; +} + +static void xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct xfrm6_protocol *handler; + + for_each_protocol_rcu(ah6_handlers, handler) + if (!handler->err_handler(skb, opt, type, code, offset, info)) + break; +} + +static int xfrm6_ipcomp_rcv(struct sk_buff *skb) +{ + int ret; + struct xfrm6_protocol *handler; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + + for_each_protocol_rcu(ipcomp6_handlers, handler) + if ((ret = handler->handler(skb)) != -EINVAL) + return ret; + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + + kfree_skb(skb); + return 0; +} + +static void xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct xfrm6_protocol *handler; + + for_each_protocol_rcu(ipcomp6_handlers, handler) + if (!handler->err_handler(skb, opt, type, code, offset, info)) + break; +} + +static const struct inet6_protocol esp6_protocol = { + .handler = xfrm6_esp_rcv, + .err_handler = xfrm6_esp_err, + .flags = INET6_PROTO_NOPOLICY, +}; + +static const struct inet6_protocol ah6_protocol = { + .handler = xfrm6_ah_rcv, + .err_handler = xfrm6_ah_err, + .flags = INET6_PROTO_NOPOLICY, +}; + +static const struct inet6_protocol ipcomp6_protocol = { + .handler = xfrm6_ipcomp_rcv, + .err_handler = xfrm6_ipcomp_err, + .flags = INET6_PROTO_NOPOLICY, +}; + +static struct xfrm_input_afinfo xfrm6_input_afinfo = { + .family = AF_INET6, + .owner = THIS_MODULE, + .callback = xfrm6_rcv_cb, +}; + +static inline const struct inet6_protocol *netproto(unsigned char protocol) +{ + switch (protocol) { + case IPPROTO_ESP: + return &esp6_protocol; + case IPPROTO_AH: + return &ah6_protocol; + case IPPROTO_COMP: + return &ipcomp6_protocol; + } + + return NULL; +} + +int xfrm6_protocol_register(struct xfrm6_protocol *handler, + unsigned char protocol) +{ + struct xfrm6_protocol __rcu **pprev; + struct xfrm6_protocol *t; + bool add_netproto = false; + + int ret = -EEXIST; + int priority = handler->priority; + + mutex_lock(&xfrm6_protocol_mutex); + + if (!rcu_dereference_protected(*proto_handlers(protocol), + lockdep_is_held(&xfrm6_protocol_mutex))) + add_netproto = true; + + for (pprev = proto_handlers(protocol); + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&xfrm6_protocol_mutex))) != NULL; + pprev = &t->next) { + if (t->priority < priority) + break; + if (t->priority == priority) + goto err; + } + + handler->next = *pprev; + rcu_assign_pointer(*pprev, handler); + + ret = 0; + +err: + mutex_unlock(&xfrm6_protocol_mutex); + + if (add_netproto) { + if (inet6_add_protocol(netproto(protocol), protocol)) { + pr_err("%s: can't add protocol\n", __func__); + ret = -EAGAIN; + } + } + + return ret; +} +EXPORT_SYMBOL(xfrm6_protocol_register); + +int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, + unsigned char protocol) +{ + struct xfrm6_protocol __rcu **pprev; + struct xfrm6_protocol *t; + int ret = -ENOENT; + + mutex_lock(&xfrm6_protocol_mutex); + + for (pprev = proto_handlers(protocol); + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&xfrm6_protocol_mutex))) != NULL; + pprev = &t->next) { + if (t == handler) { + *pprev = handler->next; + ret = 0; + break; + } + } + + if (!rcu_dereference_protected(*proto_handlers(protocol), + lockdep_is_held(&xfrm6_protocol_mutex))) { + if (inet6_del_protocol(netproto(protocol), protocol) < 0) { + pr_err("%s: can't remove protocol\n", __func__); + ret = -EAGAIN; + } + } + + mutex_unlock(&xfrm6_protocol_mutex); + + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL(xfrm6_protocol_deregister); + +int __init xfrm6_protocol_init(void) +{ + return xfrm_input_register_afinfo(&xfrm6_input_afinfo); +} + +void xfrm6_protocol_fini(void) +{ + xfrm_input_unregister_afinfo(&xfrm6_input_afinfo); +} diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 994e28bfb32..41e4e93cb3a 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -52,18 +52,12 @@ #include <net/p8022.h> #include <net/psnap.h> #include <net/sock.h> +#include <net/datalink.h> #include <net/tcp_states.h> +#include <net/net_namespace.h> #include <asm/uaccess.h> -#ifdef CONFIG_SYSCTL -extern void ipx_register_sysctl(void); -extern void ipx_unregister_sysctl(void); -#else -#define ipx_register_sysctl() -#define ipx_unregister_sysctl() -#endif - /* Configuration Variables */ static unsigned char ipxcfg_max_hops = 16; static char ipxcfg_auto_select_primary; @@ -84,15 +78,6 @@ DEFINE_SPINLOCK(ipx_interfaces_lock); struct ipx_interface *ipx_primary_net; struct ipx_interface *ipx_internal_net; -extern int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc, - unsigned char *node); -extern void ipxrtr_del_routes(struct ipx_interface *intrfc); -extern int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, - struct iovec *iov, size_t len, int noblock); -extern int ipxrtr_route_skb(struct sk_buff *skb); -extern struct ipx_route *ipxrtr_lookup(__be32 net); -extern int ipxrtr_ioctl(unsigned int cmd, void __user *arg); - struct ipx_interface *ipx_interfaces_head(void) { struct ipx_interface *rc = NULL; @@ -1383,6 +1368,7 @@ static int ipx_release(struct socket *sock) goto out; lock_sock(sk); + sk->sk_shutdown = SHUTDOWN_MASK; if (!sock_flag(sk, SOCK_DEAD)) sk->sk_state_change(sk); @@ -1806,8 +1792,11 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &rc); - if (!skb) + if (!skb) { + if (rc == -EAGAIN && (sk->sk_shutdown & RCV_SHUTDOWN)) + rc = 0; goto out; + } ipx = ipx_hdr(skb); copied = ntohs(ipx->ipx_pktsize) - sizeof(struct ipxhdr); @@ -1937,6 +1926,26 @@ static int ipx_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long } #endif +static int ipx_shutdown(struct socket *sock, int mode) +{ + struct sock *sk = sock->sk; + + if (mode < SHUT_RD || mode > SHUT_RDWR) + return -EINVAL; + /* This maps: + * SHUT_RD (0) -> RCV_SHUTDOWN (1) + * SHUT_WR (1) -> SEND_SHUTDOWN (2) + * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) + */ + ++mode; + + lock_sock(sk); + sk->sk_shutdown |= mode; + release_sock(sk); + sk->sk_state_change(sk); + + return 0; +} /* * Socket family declarations @@ -1963,7 +1972,7 @@ static const struct proto_ops ipx_dgram_ops = { .compat_ioctl = ipx_compat_ioctl, #endif .listen = sock_no_listen, - .shutdown = sock_no_shutdown, /* FIXME: support shutdown */ + .shutdown = ipx_shutdown, .setsockopt = ipx_setsockopt, .getsockopt = ipx_getsockopt, .sendmsg = ipx_sendmsg, @@ -1986,9 +1995,6 @@ static struct notifier_block ipx_dev_notifier = { .notifier_call = ipxitf_device_event, }; -extern struct datalink_proto *make_EII_client(void); -extern void destroy_EII_client(struct datalink_proto *); - static const unsigned char ipx_8022_type = 0xE0; static const unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 }; static const char ipx_EII_err_msg[] __initconst = diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c index 30f4519b092..c1f03185c5e 100644 --- a/net/ipx/ipx_route.c +++ b/net/ipx/ipx_route.c @@ -20,15 +20,11 @@ DEFINE_RWLOCK(ipx_routes_lock); extern struct ipx_interface *ipx_internal_net; -extern __be16 ipx_cksum(struct ipxhdr *packet, int length); extern struct ipx_interface *ipxitf_find_using_net(__be32 net); extern int ipxitf_demux_socket(struct ipx_interface *intrfc, struct sk_buff *skb, int copy); extern int ipxitf_demux_socket(struct ipx_interface *intrfc, struct sk_buff *skb, int copy); -extern int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb, - char *node); -extern struct ipx_interface *ipxitf_find_using_net(__be32 net); struct ipx_route *ipxrtr_lookup(__be32 net) { diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c4b7218058b..01e77b0ae07 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1382,6 +1382,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (sk->sk_type == SOCK_STREAM) { if (copied < rlen) { IUCV_SKB_CB(skb)->offset = offset + copied; + skb_queue_head(&sk->sk_receive_queue, skb); goto done; } } @@ -1756,7 +1757,7 @@ static int iucv_callback_connreq(struct iucv_path *path, /* Wake up accept */ nsk->sk_state = IUCV_CONNECTED; - sk->sk_data_ready(sk, 1); + sk->sk_data_ready(sk); err = 0; fail: bh_unlock_sock(sk); @@ -1967,7 +1968,7 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb) if (!err) { iucv_accept_enqueue(sk, nsk); nsk->sk_state = IUCV_CONNECTED; - sk->sk_data_ready(sk, 1); + sk->sk_data_ready(sk); } else iucv_sock_kill(nsk); bh_unlock_sock(sk); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index cd5b8ec9be0..da787930df0 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -621,6 +621,42 @@ static void iucv_disable(void) put_online_cpus(); } +static void free_iucv_data(int cpu) +{ + kfree(iucv_param_irq[cpu]); + iucv_param_irq[cpu] = NULL; + kfree(iucv_param[cpu]); + iucv_param[cpu] = NULL; + kfree(iucv_irq_data[cpu]); + iucv_irq_data[cpu] = NULL; +} + +static int alloc_iucv_data(int cpu) +{ + /* Note: GFP_DMA used to get memory below 2G */ + iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data), + GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); + if (!iucv_irq_data[cpu]) + goto out_free; + + /* Allocate parameter blocks. */ + iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param), + GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); + if (!iucv_param[cpu]) + goto out_free; + + iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param), + GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); + if (!iucv_param_irq[cpu]) + goto out_free; + + return 0; + +out_free: + free_iucv_data(cpu); + return -ENOMEM; +} + static int iucv_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -630,38 +666,14 @@ static int iucv_cpu_notify(struct notifier_block *self, switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data), - GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); - if (!iucv_irq_data[cpu]) - return notifier_from_errno(-ENOMEM); - - iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param), - GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); - if (!iucv_param[cpu]) { - kfree(iucv_irq_data[cpu]); - iucv_irq_data[cpu] = NULL; + if (alloc_iucv_data(cpu)) return notifier_from_errno(-ENOMEM); - } - iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param), - GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); - if (!iucv_param_irq[cpu]) { - kfree(iucv_param[cpu]); - iucv_param[cpu] = NULL; - kfree(iucv_irq_data[cpu]); - iucv_irq_data[cpu] = NULL; - return notifier_from_errno(-ENOMEM); - } break; case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: - kfree(iucv_param_irq[cpu]); - iucv_param_irq[cpu] = NULL; - kfree(iucv_param[cpu]); - iucv_param[cpu] = NULL; - kfree(iucv_irq_data[cpu]); - iucv_irq_data[cpu] = NULL; + free_iucv_data(cpu); break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: @@ -2016,7 +2028,7 @@ static int __init iucv_init(void) rc = iucv_query_maxconn(); if (rc) goto out_ctl; - rc = register_external_interrupt(0x4000, iucv_external_interrupt); + rc = register_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt); if (rc) goto out_ctl; iucv_root = root_device_register("iucv"); @@ -2025,33 +2037,20 @@ static int __init iucv_init(void) goto out_int; } - for_each_online_cpu(cpu) { - /* Note: GFP_DMA used to get memory below 2G */ - iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data), - GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); - if (!iucv_irq_data[cpu]) { - rc = -ENOMEM; - goto out_free; - } + cpu_notifier_register_begin(); - /* Allocate parameter blocks. */ - iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param), - GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); - if (!iucv_param[cpu]) { - rc = -ENOMEM; - goto out_free; - } - iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param), - GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); - if (!iucv_param_irq[cpu]) { + for_each_online_cpu(cpu) { + if (alloc_iucv_data(cpu)) { rc = -ENOMEM; goto out_free; } - } - rc = register_hotcpu_notifier(&iucv_cpu_notifier); + rc = __register_hotcpu_notifier(&iucv_cpu_notifier); if (rc) goto out_free; + + cpu_notifier_register_done(); + rc = register_reboot_notifier(&iucv_reboot_notifier); if (rc) goto out_cpu; @@ -2069,19 +2068,17 @@ static int __init iucv_init(void) out_reboot: unregister_reboot_notifier(&iucv_reboot_notifier); out_cpu: - unregister_hotcpu_notifier(&iucv_cpu_notifier); + cpu_notifier_register_begin(); + __unregister_hotcpu_notifier(&iucv_cpu_notifier); out_free: - for_each_possible_cpu(cpu) { - kfree(iucv_param_irq[cpu]); - iucv_param_irq[cpu] = NULL; - kfree(iucv_param[cpu]); - iucv_param[cpu] = NULL; - kfree(iucv_irq_data[cpu]); - iucv_irq_data[cpu] = NULL; - } + for_each_possible_cpu(cpu) + free_iucv_data(cpu); + + cpu_notifier_register_done(); + root_device_unregister(iucv_root); out_int: - unregister_external_interrupt(0x4000, iucv_external_interrupt); + unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt); out_ctl: ctl_clear_bit(0, 1); out: @@ -2105,18 +2102,14 @@ static void __exit iucv_exit(void) kfree(p); spin_unlock_irq(&iucv_queue_lock); unregister_reboot_notifier(&iucv_reboot_notifier); - unregister_hotcpu_notifier(&iucv_cpu_notifier); - for_each_possible_cpu(cpu) { - kfree(iucv_param_irq[cpu]); - iucv_param_irq[cpu] = NULL; - kfree(iucv_param[cpu]); - iucv_param[cpu] = NULL; - kfree(iucv_irq_data[cpu]); - iucv_irq_data[cpu] = NULL; - } + cpu_notifier_register_begin(); + __unregister_hotcpu_notifier(&iucv_cpu_notifier); + for_each_possible_cpu(cpu) + free_iucv_data(cpu); + cpu_notifier_register_done(); root_device_unregister(iucv_root); bus_unregister(&iucv_bus); - unregister_external_interrupt(0x4000, iucv_external_interrupt); + unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt); } subsys_initcall(iucv_init); diff --git a/net/key/af_key.c b/net/key/af_key.c index 1a04c132936..f3c83073afc 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -205,7 +205,7 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) { skb_set_owner_r(*skb2, sk); skb_queue_tail(&sk->sk_receive_queue, *skb2); - sk->sk_data_ready(sk, (*skb2)->len); + sk->sk_data_ready(sk); *skb2 = NULL; err = 0; } @@ -365,6 +365,7 @@ static const u8 sadb_ext_min_len[] = { [SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address), [SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx), [SADB_X_EXT_KMADDRESS] = (u8) sizeof(struct sadb_x_kmaddress), + [SADB_X_EXT_FILTER] = (u8) sizeof(struct sadb_x_filter), }; /* Verify sadb_address_{len,prefixlen} against sa_family. */ @@ -433,12 +434,13 @@ static inline int verify_sec_ctx_len(const void *p) return 0; } -static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(const struct sadb_x_sec_ctx *sec_ctx) +static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(const struct sadb_x_sec_ctx *sec_ctx, + gfp_t gfp) { struct xfrm_user_sec_ctx *uctx = NULL; int ctx_size = sec_ctx->sadb_x_ctx_len; - uctx = kmalloc((sizeof(*uctx)+ctx_size), GFP_KERNEL); + uctx = kmalloc((sizeof(*uctx)+ctx_size), gfp); if (!uctx) return NULL; @@ -1124,7 +1126,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { - struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); + struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) goto out; @@ -1798,6 +1800,7 @@ static void pfkey_dump_sa_done(struct pfkey_sock *pfk) static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { u8 proto; + struct xfrm_address_filter *filter = NULL; struct pfkey_sock *pfk = pfkey_sk(sk); if (pfk->dump.dump != NULL) @@ -1807,11 +1810,27 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms if (proto == 0) return -EINVAL; + if (ext_hdrs[SADB_X_EXT_FILTER - 1]) { + struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1]; + + filter = kmalloc(sizeof(*filter), GFP_KERNEL); + if (filter == NULL) + return -ENOMEM; + + memcpy(&filter->saddr, &xfilter->sadb_x_filter_saddr, + sizeof(xfrm_address_t)); + memcpy(&filter->daddr, &xfilter->sadb_x_filter_daddr, + sizeof(xfrm_address_t)); + filter->family = xfilter->sadb_x_filter_family; + filter->splen = xfilter->sadb_x_filter_splen; + filter->dplen = xfilter->sadb_x_filter_dplen; + } + pfk->dump.msg_version = hdr->sadb_msg_version; pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sa; pfk->dump.done = pfkey_dump_sa_done; - xfrm_state_walk_init(&pfk->dump.u.state, proto); + xfrm_state_walk_init(&pfk->dump.u.state, proto, filter); return pfkey_do_dump(pfk); } @@ -2231,14 +2250,14 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { - struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); + struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) { err = -ENOBUFS; goto out; } - err = security_xfrm_policy_alloc(&xp->security, uctx); + err = security_xfrm_policy_alloc(&xp->security, uctx, GFP_KERNEL); kfree(uctx); if (err) @@ -2335,12 +2354,12 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { - struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); + struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) return -ENOMEM; - err = security_xfrm_policy_alloc(&pol_ctx, uctx); + err = security_xfrm_policy_alloc(&pol_ctx, uctx, GFP_KERNEL); kfree(uctx); if (err) return err; @@ -3059,6 +3078,24 @@ static u32 get_acqseq(void) return res; } +static bool pfkey_is_alive(const struct km_event *c) +{ + struct netns_pfkey *net_pfkey = net_generic(c->net, pfkey_net_id); + struct sock *sk; + bool is_alive = false; + + rcu_read_lock(); + sk_for_each_rcu(sk, &net_pfkey->table) { + if (pfkey_sk(sk)->registered) { + is_alive = true; + break; + } + } + rcu_read_unlock(); + + return is_alive; +} + static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp) { struct sk_buff *skb; @@ -3239,8 +3276,8 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, } if ((*dir = verify_sec_ctx_len(p))) goto out; - uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); - *dir = security_xfrm_policy_alloc(&xp->security, uctx); + uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_ATOMIC); + *dir = security_xfrm_policy_alloc(&xp->security, uctx, GFP_ATOMIC); kfree(uctx); if (*dir) @@ -3784,6 +3821,7 @@ static struct xfrm_mgr pfkeyv2_mgr = .new_mapping = pfkey_send_new_mapping, .notify_policy = pfkey_send_policy_notify, .migrate = pfkey_send_migrate, + .is_alive = pfkey_is_alive, }; static int __net_init pfkey_net_init(struct net *net) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 735d0f60c83..a4e37d7158d 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -112,7 +112,6 @@ struct l2tp_net { spinlock_t l2tp_session_hlist_lock; }; -static void l2tp_session_set_header_len(struct l2tp_session *session, int version); static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) @@ -1131,11 +1130,11 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, /* Queue the packet to IP for output */ skb->local_df = 1; #if IS_ENABLED(CONFIG_IPV6) - if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped) - error = inet6_csk_xmit(skb, NULL); + if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped) + error = inet6_csk_xmit(tunnel->sock, skb, NULL); else #endif - error = ip_queue_xmit(skb, fl); + error = ip_queue_xmit(tunnel->sock, skb, fl); /* Update stats */ if (error >= 0) { @@ -1151,23 +1150,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, return 0; } -/* Automatically called when the skb is freed. - */ -static void l2tp_sock_wfree(struct sk_buff *skb) -{ - sock_put(skb->sk); -} - -/* For data skbs that we transmit, we associate with the tunnel socket - * but don't do accounting. - */ -static inline void l2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk) -{ - sock_hold(sk); - skb->sk = sk; - skb->destructor = l2tp_sock_wfree; -} - #if IS_ENABLED(CONFIG_IPV6) static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb, int udp_len) @@ -1221,7 +1203,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len return NET_XMIT_DROP; } - skb_orphan(skb); /* Setup L2TP header */ session->build_header(session, __skb_push(skb, hdr_len)); @@ -1287,8 +1268,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len break; } - l2tp_skb_set_owner_w(skb, sk); - l2tp_xmit_core(session, skb, fl, data_len); out_unlock: bh_unlock_sock(sk); @@ -1809,8 +1788,6 @@ void l2tp_session_free(struct l2tp_session *session) } kfree(session); - - return; } EXPORT_SYMBOL_GPL(l2tp_session_free); @@ -1863,7 +1840,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_delete); /* We come here whenever a session's send_seq, cookie_len or * l2specific_len parameters are set. */ -static void l2tp_session_set_header_len(struct l2tp_session *session, int version) +void l2tp_session_set_header_len(struct l2tp_session *session, int version) { if (version == L2TP_HDR_VER_2) { session->hdr_len = 6; @@ -1876,6 +1853,7 @@ static void l2tp_session_set_header_len(struct l2tp_session *session, int versio } } +EXPORT_SYMBOL_GPL(l2tp_session_set_header_len); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) { @@ -2016,7 +1994,7 @@ static int __init l2tp_init(void) if (rc) goto out; - l2tp_wq = alloc_workqueue("l2tp", WQ_NON_REENTRANT | WQ_UNBOUND, 0); + l2tp_wq = alloc_workqueue("l2tp", WQ_UNBOUND, 0); if (!l2tp_wq) { pr_err("alloc_workqueue failed\n"); rc = -ENOMEM; diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 1f01ba3435b..3f93ccd6ba9 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -263,6 +263,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, int length, int (*payload_hook)(struct sk_buff *skb)); int l2tp_session_queue_purge(struct l2tp_session *session); int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); +void l2tp_session_set_header_len(struct l2tp_session *session, int version); int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 0b44d855269..3397fe6897c 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -487,7 +487,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m xmit: /* Queue the packet to IP for output */ - rc = ip_queue_xmit(skb, &inet->cork.fl); + rc = ip_queue_xmit(sk, skb, &inet->cork.fl); rcu_read_unlock(); error: diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 4cfd722e915..bd7387adea9 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -578,8 +578,10 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf if (info->attrs[L2TP_ATTR_RECV_SEQ]) session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]); - if (info->attrs[L2TP_ATTR_SEND_SEQ]) + if (info->attrs[L2TP_ATTR_SEND_SEQ]) { session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]); + l2tp_session_set_header_len(session, session->tunnel->version); + } if (info->attrs[L2TP_ATTR_LNS_MODE]) session->lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index be5fadf3473..950909f04ee 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -254,12 +254,14 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int po = pppox_sk(sk); ppp_input(&po->chan, skb); } else { - l2tp_info(session, PPPOL2TP_MSG_DATA, "%s: socket not bound\n", - session->name); + l2tp_dbg(session, PPPOL2TP_MSG_DATA, + "%s: recv %d byte data frame, passing to L2TP socket\n", + session->name, data_len); - /* Not bound. Nothing we can do, so discard. */ - atomic_long_inc(&session->stats.rx_errors); - kfree_skb(skb); + if (sock_queue_rcv_skb(sk, skb) < 0) { + atomic_long_inc(&session->stats.rx_errors); + kfree_skb(skb); + } } return; @@ -454,13 +456,11 @@ static void pppol2tp_session_close(struct l2tp_session *session) BUG_ON(session->magic != L2TP_SESSION_MAGIC); - if (sock) { inet_shutdown(sock, 2); /* Don't let the session go away before our socket does */ l2tp_session_inc_refcount(session); } - return; } /* Really kill the session socket. (Called from sock_put() if @@ -474,7 +474,6 @@ static void pppol2tp_session_destruct(struct sock *sk) BUG_ON(session->magic != L2TP_SESSION_MAGIC); l2tp_session_dec_refcount(session); } - return; } /* Called when the PPPoX socket (session) is closed. @@ -754,9 +753,9 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, session->deref = pppol2tp_session_sock_put; /* If PMTU discovery was enabled, use the MTU that was discovered */ - dst = sk_dst_get(sk); + dst = sk_dst_get(tunnel->sock); if (dst != NULL) { - u32 pmtu = dst_mtu(__sk_dst_get(sk)); + u32 pmtu = dst_mtu(__sk_dst_get(tunnel->sock)); if (pmtu != 0) session->mtu = session->mru = pmtu - PPPOL2TP_HEADER_OVERHEAD; @@ -1312,6 +1311,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk, po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ : PPPOL2TP_L2TP_HDR_SIZE_NOSEQ; } + l2tp_session_set_header_len(session, session->tunnel->version); l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set send_seq=%d\n", session->name, session->send_seq); diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 9d7d840aac6..1e46ffa6916 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -25,7 +25,8 @@ mac80211-y := \ wme.o \ event.o \ chan.o \ - trace.o mlme.o + trace.o mlme.o \ + tdls.o mac80211-$(CONFIG_MAC80211_LEDS) += led.o mac80211-$(CONFIG_MAC80211_DEBUGFS) += \ diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index 7c7df475a40..ec24378caaa 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -23,12 +23,13 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, u8 *data, size_t data_len, u8 *mic) { struct scatterlist assoc, pt, ct[2]; - struct { - struct aead_request req; - u8 priv[crypto_aead_reqsize(tfm)]; - } aead_req; - memset(&aead_req, 0, sizeof(aead_req)); + char aead_req_data[sizeof(struct aead_request) + + crypto_aead_reqsize(tfm)] + __aligned(__alignof__(struct aead_request)); + struct aead_request *aead_req = (void *) aead_req_data; + + memset(aead_req, 0, sizeof(aead_req_data)); sg_init_one(&pt, data, data_len); sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); @@ -36,23 +37,23 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, sg_set_buf(&ct[0], data, data_len); sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN); - aead_request_set_tfm(&aead_req.req, tfm); - aead_request_set_assoc(&aead_req.req, &assoc, assoc.length); - aead_request_set_crypt(&aead_req.req, &pt, ct, data_len, b_0); + aead_request_set_tfm(aead_req, tfm); + aead_request_set_assoc(aead_req, &assoc, assoc.length); + aead_request_set_crypt(aead_req, &pt, ct, data_len, b_0); - crypto_aead_encrypt(&aead_req.req); + crypto_aead_encrypt(aead_req); } int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, u8 *data, size_t data_len, u8 *mic) { struct scatterlist assoc, pt, ct[2]; - struct { - struct aead_request req; - u8 priv[crypto_aead_reqsize(tfm)]; - } aead_req; + char aead_req_data[sizeof(struct aead_request) + + crypto_aead_reqsize(tfm)] + __aligned(__alignof__(struct aead_request)); + struct aead_request *aead_req = (void *) aead_req_data; - memset(&aead_req, 0, sizeof(aead_req)); + memset(aead_req, 0, sizeof(aead_req_data)); sg_init_one(&pt, data, data_len); sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); @@ -60,12 +61,12 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, sg_set_buf(&ct[0], data, data_len); sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN); - aead_request_set_tfm(&aead_req.req, tfm); - aead_request_set_assoc(&aead_req.req, &assoc, assoc.length); - aead_request_set_crypt(&aead_req.req, ct, &pt, + aead_request_set_tfm(aead_req, tfm); + aead_request_set_assoc(aead_req, &assoc, assoc.length); + aead_request_set_crypt(aead_req, ct, &pt, data_len + IEEE80211_CCMP_MIC_LEN, b_0); - return crypto_aead_decrypt(&aead_req.req); + return crypto_aead_decrypt(aead_req); } struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[]) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 6973ccdd230..d7513a503be 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -109,6 +109,15 @@ static int ieee80211_change_iface(struct wiphy *wiphy, static int ieee80211_start_p2p_device(struct wiphy *wiphy, struct wireless_dev *wdev) { + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + int ret; + + mutex_lock(&sdata->local->chanctx_mtx); + ret = ieee80211_check_combinations(sdata, NULL, 0, 0); + mutex_unlock(&sdata->local->chanctx_mtx); + if (ret < 0) + return ret; + return ieee80211_do_open(wdev, true); } @@ -463,8 +472,10 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; + struct rate_control_ref *ref = local->rate_ctrl; struct timespec uptime; u64 packets = 0; + u32 thr = 0; int i, ac; sinfo->generation = sdata->local->sta_generation; @@ -578,6 +589,17 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER); + + /* check if the driver has a SW RC implementation */ + if (ref && ref->ops->get_expected_throughput) + thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv); + else + thr = drv_get_expected_throughput(local, &sta->sta); + + if (thr != 0) { + sinfo->filled |= STATION_INFO_EXPECTED_THROUGHPUT; + sinfo->expected_throughput = thr; + } } static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = { @@ -768,7 +790,7 @@ static void ieee80211_get_et_strings(struct wiphy *wiphy, } static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, - int idx, u8 *mac, struct station_info *sinfo) + int idx, u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -798,7 +820,7 @@ static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_info *sinfo) + const u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -972,13 +994,13 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->needed_rx_chains = sdata->local->rx_chains; mutex_lock(&local->mtx); - sdata->radar_required = params->radar_required; err = ieee80211_vif_use_channel(sdata, ¶ms->chandef, IEEE80211_CHANCTX_SHARED); + if (!err) + ieee80211_vif_copy_chanctx_to_vlans(sdata, false); mutex_unlock(&local->mtx); if (err) return err; - ieee80211_vif_copy_chanctx_to_vlans(sdata, false); /* * Apply control port protocol, this allows us to @@ -1021,8 +1043,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, IEEE80211_P2P_OPPPS_ENABLE_BIT; err = ieee80211_assign_beacon(sdata, ¶ms->beacon); - if (err < 0) + if (err < 0) { + ieee80211_vif_release_channel(sdata); return err; + } changed |= err; err = drv_start_ap(sdata->local, sdata); @@ -1032,6 +1056,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (old) kfree_rcu(old, rcu_head); RCU_INIT_POINTER(sdata->u.ap.beacon, NULL); + ieee80211_vif_release_channel(sdata); return err; } @@ -1072,6 +1097,31 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, return 0; } +bool ieee80211_csa_needs_block_tx(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata; + + lockdep_assert_held(&local->mtx); + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (!ieee80211_sdata_running(sdata)) + continue; + + if (!sdata->vif.csa_active) + continue; + + if (!sdata->csa_block_tx) + continue; + + rcu_read_unlock(); + return true; + } + rcu_read_unlock(); + + return false; +} + static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1089,12 +1139,17 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) old_probe_resp = sdata_dereference(sdata->u.ap.probe_resp, sdata); /* abort any running channel switch */ + mutex_lock(&local->mtx); sdata->vif.csa_active = false; + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_CSA); + mutex_unlock(&local->mtx); + kfree(sdata->u.ap.next_beacon); sdata->u.ap.next_beacon = NULL; - cancel_work_sync(&sdata->u.ap.request_smps_work); - /* turn off carrier for this interface and dependent VLANs */ list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) netif_carrier_off(vlan->dev); @@ -1106,6 +1161,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) kfree_rcu(old_beacon, rcu_head); if (old_probe_resp) kfree_rcu(old_probe_resp, rcu_head); + sdata->u.ap.driver_smps_mode = IEEE80211_SMPS_OFF; __sta_info_flush(sdata, true); ieee80211_free_keys(sdata, true); @@ -1129,8 +1185,8 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf); skb_queue_purge(&sdata->u.ap.ps.bc_buf); - ieee80211_vif_copy_chanctx_to_vlans(sdata, true); mutex_lock(&local->mtx); + ieee80211_vif_copy_chanctx_to_vlans(sdata, true); ieee80211_vif_release_channel(sdata); mutex_unlock(&local->mtx); @@ -1345,9 +1401,6 @@ static int sta_apply_parameters(struct ieee80211_local *local, params->vht_capa, sta); if (params->opmode_notif_used) { - enum ieee80211_band band = - ieee80211_get_sdata_band(sdata); - /* returned value is only needed for rc update, but the * rc isn't initialized here yet, so ignore it */ @@ -1417,7 +1470,8 @@ static int sta_apply_parameters(struct ieee80211_local *local, } static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_parameters *params) + const u8 *mac, + struct station_parameters *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; @@ -1451,6 +1505,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) { sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); + } else { + sta->sta.tdls = true; } err = sta_apply_parameters(local, sta, params); @@ -1484,7 +1540,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac) + const u8 *mac) { struct ieee80211_sub_if_data *sdata; @@ -1498,7 +1554,7 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_change_station(struct wiphy *wiphy, - struct net_device *dev, u8 *mac, + struct net_device *dev, const u8 *mac, struct station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1567,7 +1623,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sta->sdata->u.vlan.sta) { - rcu_assign_pointer(sta->sdata->u.vlan.sta, NULL); + RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); prev_4addr = true; } @@ -1623,7 +1679,7 @@ out_err: #ifdef CONFIG_MAC80211_MESH static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, - u8 *dst, u8 *next_hop) + const u8 *dst, const u8 *next_hop) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; @@ -1651,7 +1707,7 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, - u8 *dst) + const u8 *dst) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1662,9 +1718,8 @@ static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, return 0; } -static int ieee80211_change_mpath(struct wiphy *wiphy, - struct net_device *dev, - u8 *dst, u8 *next_hop) +static int ieee80211_change_mpath(struct wiphy *wiphy, struct net_device *dev, + const u8 *dst, const u8 *next_hop) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; @@ -1756,8 +1811,8 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, - int idx, u8 *dst, u8 *next_hop, - struct mpath_info *pinfo) + int idx, u8 *dst, u8 *next_hop, + struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; @@ -2665,6 +2720,24 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); INIT_LIST_HEAD(&roc->dependents); + /* + * cookie is either the roc cookie (for normal roc) + * or the SKB (for mgmt TX) + */ + if (!txskb) { + /* local->mtx protects this */ + local->roc_cookie_counter++; + roc->cookie = local->roc_cookie_counter; + /* wow, you wrapped 64 bits ... more likely a bug */ + if (WARN_ON(roc->cookie == 0)) { + roc->cookie = 1; + local->roc_cookie_counter++; + } + *cookie = roc->cookie; + } else { + *cookie = (unsigned long)txskb; + } + /* if there's one pending or we're scanning, queue this one */ if (!list_empty(&local->roc_list) || local->scanning || local->radar_detect_enabled) @@ -2787,24 +2860,6 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, if (!queued) list_add_tail(&roc->list, &local->roc_list); - /* - * cookie is either the roc cookie (for normal roc) - * or the SKB (for mgmt TX) - */ - if (!txskb) { - /* local->mtx protects this */ - local->roc_cookie_counter++; - roc->cookie = local->roc_cookie_counter; - /* wow, you wrapped 64 bits ... more likely a bug */ - if (WARN_ON(roc->cookie == 0)) { - roc->cookie = 1; - local->roc_cookie_counter++; - } - *cookie = roc->cookie; - } else { - *cookie = (unsigned long)txskb; - } - return 0; } @@ -2915,11 +2970,11 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, static int ieee80211_start_radar_detection(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_chan_def *chandef) + struct cfg80211_chan_def *chandef, + u32 cac_time_ms) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - unsigned long timeout; int err; mutex_lock(&local->mtx); @@ -2931,16 +2986,15 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, /* whatever, but channel contexts should not complain about that one */ sdata->smps_mode = IEEE80211_SMPS_OFF; sdata->needed_rx_chains = local->rx_chains; - sdata->radar_required = true; err = ieee80211_vif_use_channel(sdata, chandef, IEEE80211_CHANCTX_SHARED); if (err) goto out_unlock; - timeout = msecs_to_jiffies(IEEE80211_DFS_MIN_CAC_TIME_MS); ieee80211_queue_delayed_work(&sdata->local->hw, - &sdata->dfs_cac_timer_work, timeout); + &sdata->dfs_cac_timer_work, + msecs_to_jiffies(cac_time_ms)); out_unlock: mutex_unlock(&local->mtx); @@ -3012,26 +3066,11 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif) } EXPORT_SYMBOL(ieee80211_csa_finish); -static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) +static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata, + u32 *changed) { - struct ieee80211_local *local = sdata->local; - int err, changed = 0; - - sdata_assert_lock(sdata); - - mutex_lock(&local->mtx); - sdata->radar_required = sdata->csa_radar_required; - err = ieee80211_vif_change_channel(sdata, &changed); - mutex_unlock(&local->mtx); - if (WARN_ON(err < 0)) - return; - - if (!local->use_chanctx) { - local->_oper_chandef = sdata->csa_chandef; - ieee80211_hw_config(local, 0); - } + int err; - sdata->vif.csa_active = false; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon); @@ -3039,35 +3078,74 @@ static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) sdata->u.ap.next_beacon = NULL; if (err < 0) - return; - changed |= err; + return err; + *changed |= err; break; case NL80211_IFTYPE_ADHOC: err = ieee80211_ibss_finish_csa(sdata); if (err < 0) - return; - changed |= err; + return err; + *changed |= err; break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: err = ieee80211_mesh_finish_csa(sdata); if (err < 0) - return; - changed |= err; + return err; + *changed |= err; break; #endif default: WARN_ON(1); - return; + return -EINVAL; + } + + return 0; +} + +static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + u32 changed = 0; + int err; + + sdata_assert_lock(sdata); + lockdep_assert_held(&local->mtx); + + sdata->radar_required = sdata->csa_radar_required; + err = ieee80211_vif_change_channel(sdata, &changed); + if (err < 0) + return err; + + if (!local->use_chanctx) { + local->_oper_chandef = sdata->csa_chandef; + ieee80211_hw_config(local, 0); } + sdata->vif.csa_active = false; + + err = ieee80211_set_after_csa_beacon(sdata, &changed); + if (err) + return err; + ieee80211_bss_info_change_notify(sdata, changed); + cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef); - ieee80211_wake_queues_by_reason(&sdata->local->hw, + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef); + return 0; +} + +static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) +{ + if (__ieee80211_csa_finalize(sdata)) { + sdata_info(sdata, "failed to finalize CSA, disconnecting\n"); + cfg80211_stop_iface(sdata->local->hw.wiphy, &sdata->wdev, + GFP_KERNEL); + } } void ieee80211_csa_finalize_work(struct work_struct *work) @@ -3075,8 +3153,11 @@ void ieee80211_csa_finalize_work(struct work_struct *work) struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, csa_finalize_work); + struct ieee80211_local *local = sdata->local; sdata_lock(sdata); + mutex_lock(&local->mtx); + /* AP might have been stopped while waiting for the lock. */ if (!sdata->vif.csa_active) goto unlock; @@ -3087,55 +3168,15 @@ void ieee80211_csa_finalize_work(struct work_struct *work) ieee80211_csa_finalize(sdata); unlock: + mutex_unlock(&local->mtx); sdata_unlock(sdata); } -int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_csa_settings *params) +static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, + struct cfg80211_csa_settings *params, + u32 *changed) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = sdata->local; - struct ieee80211_chanctx_conf *chanctx_conf; - struct ieee80211_chanctx *chanctx; - struct ieee80211_if_mesh __maybe_unused *ifmsh; - int err, num_chanctx, changed = 0; - - sdata_assert_lock(sdata); - - if (!list_empty(&local->roc_list) || local->scanning) - return -EBUSY; - - if (sdata->wdev.cac_started) - return -EBUSY; - - if (cfg80211_chandef_identical(¶ms->chandef, - &sdata->vif.bss_conf.chandef)) - return -EINVAL; - - rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (!chanctx_conf) { - rcu_read_unlock(); - return -EBUSY; - } - - /* don't handle for multi-VIF cases */ - chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf); - if (chanctx->refcount > 1) { - rcu_read_unlock(); - return -EBUSY; - } - num_chanctx = 0; - list_for_each_entry_rcu(chanctx, &local->chanctx_list, list) - num_chanctx++; - rcu_read_unlock(); - - if (num_chanctx > 1) - return -EBUSY; - - /* don't allow another channel switch if one is already active. */ - if (sdata->vif.csa_active) - return -EBUSY; + int err; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: @@ -3163,15 +3204,31 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, if (params->count <= 1) break; - sdata->csa_counter_offset_beacon = - params->counter_offset_beacon; - sdata->csa_counter_offset_presp = params->counter_offset_presp; + if ((params->n_counter_offsets_beacon > + IEEE80211_MAX_CSA_COUNTERS_NUM) || + (params->n_counter_offsets_presp > + IEEE80211_MAX_CSA_COUNTERS_NUM)) + return -EINVAL; + + /* make sure we don't have garbage in other counters */ + memset(sdata->csa_counter_offset_beacon, 0, + sizeof(sdata->csa_counter_offset_beacon)); + memset(sdata->csa_counter_offset_presp, 0, + sizeof(sdata->csa_counter_offset_presp)); + + memcpy(sdata->csa_counter_offset_beacon, + params->counter_offsets_beacon, + params->n_counter_offsets_beacon * sizeof(u16)); + memcpy(sdata->csa_counter_offset_presp, + params->counter_offsets_presp, + params->n_counter_offsets_presp * sizeof(u16)); + err = ieee80211_assign_beacon(sdata, ¶ms->beacon_csa); if (err < 0) { kfree(sdata->u.ap.next_beacon); return err; } - changed |= err; + *changed |= err; break; case NL80211_IFTYPE_ADHOC: @@ -3205,15 +3262,15 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, err = ieee80211_ibss_csa_beacon(sdata, params); if (err < 0) return err; - changed |= err; + *changed |= err; } ieee80211_send_action_csa(sdata, params); break; #ifdef CONFIG_MAC80211_MESH - case NL80211_IFTYPE_MESH_POINT: - ifmsh = &sdata->u.mesh; + case NL80211_IFTYPE_MESH_POINT: { + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; if (params->chandef.width != sdata->vif.bss_conf.chandef.width) return -EINVAL; @@ -3238,28 +3295,86 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE; return err; } - changed |= err; + *changed |= err; } if (ifmsh->csa_role == IEEE80211_MESH_CSA_ROLE_INIT) ieee80211_send_action_csa(sdata, params); break; + } #endif default: return -EOPNOTSUPP; } - sdata->csa_radar_required = params->radar_required; + return 0; +} - if (params->block_tx) - ieee80211_stop_queues_by_reason(&local->hw, - IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_CSA); +static int +__ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_csa_settings *params) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx_conf *conf; + struct ieee80211_chanctx *chanctx; + int err, num_chanctx, changed = 0; + + sdata_assert_lock(sdata); + lockdep_assert_held(&local->mtx); + + if (!list_empty(&local->roc_list) || local->scanning) + return -EBUSY; + + if (sdata->wdev.cac_started) + return -EBUSY; + + if (cfg80211_chandef_identical(¶ms->chandef, + &sdata->vif.bss_conf.chandef)) + return -EINVAL; + + mutex_lock(&local->chanctx_mtx); + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + if (!conf) { + mutex_unlock(&local->chanctx_mtx); + return -EBUSY; + } + + /* don't handle for multi-VIF cases */ + chanctx = container_of(conf, struct ieee80211_chanctx, conf); + if (ieee80211_chanctx_refcount(local, chanctx) > 1) { + mutex_unlock(&local->chanctx_mtx); + return -EBUSY; + } + num_chanctx = 0; + list_for_each_entry_rcu(chanctx, &local->chanctx_list, list) + num_chanctx++; + mutex_unlock(&local->chanctx_mtx); + + if (num_chanctx > 1) + return -EBUSY; + + /* don't allow another channel switch if one is already active. */ + if (sdata->vif.csa_active) + return -EBUSY; + + err = ieee80211_set_csa_beacon(sdata, params, &changed); + if (err) + return err; + sdata->csa_radar_required = params->radar_required; sdata->csa_chandef = params->chandef; + sdata->csa_block_tx = params->block_tx; + sdata->csa_current_counter = params->count; sdata->vif.csa_active = true; + if (sdata->csa_block_tx) + ieee80211_stop_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_CSA); + if (changed) { ieee80211_bss_info_change_notify(sdata, changed); drv_channel_switch_beacon(sdata, ¶ms->chandef); @@ -3271,6 +3386,20 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, return 0; } +int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_csa_settings *params) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + int err; + + mutex_lock(&local->mtx); + err = __ieee80211_channel_switch(wiphy, dev, params); + mutex_unlock(&local->mtx); + + return err; +} + static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_mgmt_tx_params *params, u64 *cookie) @@ -3283,6 +3412,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, bool need_offchan = false; u32 flags; int ret; + u8 *data; if (params->dont_wait_for_ack) flags = IEEE80211_TX_CTL_NO_ACK; @@ -3376,7 +3506,20 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, } skb_reserve(skb, local->hw.extra_tx_headroom); - memcpy(skb_put(skb, params->len), params->buf, params->len); + data = skb_put(skb, params->len); + memcpy(data, params->buf, params->len); + + /* Update CSA counters */ + if (sdata->vif.csa_active && + (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_ADHOC) && + params->n_csa_offsets) { + int i; + u8 c = sdata->csa_current_counter; + + for (i = 0; i < params->n_csa_offsets; i++) + data[params->csa_offsets[i]] = c; + } IEEE80211_SKB_CB(skb)->flags = flags; @@ -3485,320 +3628,6 @@ static int ieee80211_set_rekey_data(struct wiphy *wiphy, return 0; } -static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb) -{ - u8 *pos = (void *)skb_put(skb, 7); - - *pos++ = WLAN_EID_EXT_CAPABILITY; - *pos++ = 5; /* len */ - *pos++ = 0x0; - *pos++ = 0x0; - *pos++ = 0x0; - *pos++ = 0x0; - *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED; -} - -static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_local *local = sdata->local; - u16 capab; - - capab = 0; - if (ieee80211_get_sdata_band(sdata) != IEEE80211_BAND_2GHZ) - return capab; - - if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) - capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME; - if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE)) - capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; - - return capab; -} - -static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, u8 *src_addr, - u8 *peer, u8 *bssid) -{ - struct ieee80211_tdls_lnkie *lnkid; - - lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie)); - - lnkid->ie_type = WLAN_EID_LINK_ID; - lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2; - - memcpy(lnkid->bssid, bssid, ETH_ALEN); - memcpy(lnkid->init_sta, src_addr, ETH_ALEN); - memcpy(lnkid->resp_sta, peer, ETH_ALEN); -} - -static int -ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, u8 action_code, u8 dialog_token, - u16 status_code, struct sk_buff *skb) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); - struct ieee80211_tdls_data *tf; - - tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u)); - - memcpy(tf->da, peer, ETH_ALEN); - memcpy(tf->sa, sdata->vif.addr, ETH_ALEN); - tf->ether_type = cpu_to_be16(ETH_P_TDLS); - tf->payload_type = WLAN_TDLS_SNAP_RFTYPE; - - switch (action_code) { - case WLAN_TDLS_SETUP_REQUEST: - tf->category = WLAN_CATEGORY_TDLS; - tf->action_code = WLAN_TDLS_SETUP_REQUEST; - - skb_put(skb, sizeof(tf->u.setup_req)); - tf->u.setup_req.dialog_token = dialog_token; - tf->u.setup_req.capability = - cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - - ieee80211_add_srates_ie(sdata, skb, false, band); - ieee80211_add_ext_srates_ie(sdata, skb, false, band); - ieee80211_tdls_add_ext_capab(skb); - break; - case WLAN_TDLS_SETUP_RESPONSE: - tf->category = WLAN_CATEGORY_TDLS; - tf->action_code = WLAN_TDLS_SETUP_RESPONSE; - - skb_put(skb, sizeof(tf->u.setup_resp)); - tf->u.setup_resp.status_code = cpu_to_le16(status_code); - tf->u.setup_resp.dialog_token = dialog_token; - tf->u.setup_resp.capability = - cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - - ieee80211_add_srates_ie(sdata, skb, false, band); - ieee80211_add_ext_srates_ie(sdata, skb, false, band); - ieee80211_tdls_add_ext_capab(skb); - break; - case WLAN_TDLS_SETUP_CONFIRM: - tf->category = WLAN_CATEGORY_TDLS; - tf->action_code = WLAN_TDLS_SETUP_CONFIRM; - - skb_put(skb, sizeof(tf->u.setup_cfm)); - tf->u.setup_cfm.status_code = cpu_to_le16(status_code); - tf->u.setup_cfm.dialog_token = dialog_token; - break; - case WLAN_TDLS_TEARDOWN: - tf->category = WLAN_CATEGORY_TDLS; - tf->action_code = WLAN_TDLS_TEARDOWN; - - skb_put(skb, sizeof(tf->u.teardown)); - tf->u.teardown.reason_code = cpu_to_le16(status_code); - break; - case WLAN_TDLS_DISCOVERY_REQUEST: - tf->category = WLAN_CATEGORY_TDLS; - tf->action_code = WLAN_TDLS_DISCOVERY_REQUEST; - - skb_put(skb, sizeof(tf->u.discover_req)); - tf->u.discover_req.dialog_token = dialog_token; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int -ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, u8 action_code, u8 dialog_token, - u16 status_code, struct sk_buff *skb) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); - struct ieee80211_mgmt *mgmt; - - mgmt = (void *)skb_put(skb, 24); - memset(mgmt, 0, 24); - memcpy(mgmt->da, peer, ETH_ALEN); - memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); - memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); - - mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | - IEEE80211_STYPE_ACTION); - - switch (action_code) { - case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: - skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp)); - mgmt->u.action.category = WLAN_CATEGORY_PUBLIC; - mgmt->u.action.u.tdls_discover_resp.action_code = - WLAN_PUB_ACTION_TDLS_DISCOVER_RES; - mgmt->u.action.u.tdls_discover_resp.dialog_token = - dialog_token; - mgmt->u.action.u.tdls_discover_resp.capability = - cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - - ieee80211_add_srates_ie(sdata, skb, false, band); - ieee80211_add_ext_srates_ie(sdata, skb, false, band); - ieee80211_tdls_add_ext_capab(skb); - break; - default: - return -EINVAL; - } - - return 0; -} - -static int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, u8 action_code, u8 dialog_token, - u16 status_code, const u8 *extra_ies, - size_t extra_ies_len) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = sdata->local; - struct sk_buff *skb = NULL; - bool send_direct; - int ret; - - if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) - return -ENOTSUPP; - - /* make sure we are in managed mode, and associated */ - if (sdata->vif.type != NL80211_IFTYPE_STATION || - !sdata->u.mgd.associated) - return -EINVAL; - - tdls_dbg(sdata, "TDLS mgmt action %d peer %pM\n", - action_code, peer); - - skb = dev_alloc_skb(local->hw.extra_tx_headroom + - max(sizeof(struct ieee80211_mgmt), - sizeof(struct ieee80211_tdls_data)) + - 50 + /* supported rates */ - 7 + /* ext capab */ - extra_ies_len + - sizeof(struct ieee80211_tdls_lnkie)); - if (!skb) - return -ENOMEM; - - skb_reserve(skb, local->hw.extra_tx_headroom); - - switch (action_code) { - case WLAN_TDLS_SETUP_REQUEST: - case WLAN_TDLS_SETUP_RESPONSE: - case WLAN_TDLS_SETUP_CONFIRM: - case WLAN_TDLS_TEARDOWN: - case WLAN_TDLS_DISCOVERY_REQUEST: - ret = ieee80211_prep_tdls_encap_data(wiphy, dev, peer, - action_code, dialog_token, - status_code, skb); - send_direct = false; - break; - case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: - ret = ieee80211_prep_tdls_direct(wiphy, dev, peer, action_code, - dialog_token, status_code, - skb); - send_direct = true; - break; - default: - ret = -ENOTSUPP; - break; - } - - if (ret < 0) - goto fail; - - if (extra_ies_len) - memcpy(skb_put(skb, extra_ies_len), extra_ies, extra_ies_len); - - /* the TDLS link IE is always added last */ - switch (action_code) { - case WLAN_TDLS_SETUP_REQUEST: - case WLAN_TDLS_SETUP_CONFIRM: - case WLAN_TDLS_TEARDOWN: - case WLAN_TDLS_DISCOVERY_REQUEST: - /* we are the initiator */ - ieee80211_tdls_add_link_ie(skb, sdata->vif.addr, peer, - sdata->u.mgd.bssid); - break; - case WLAN_TDLS_SETUP_RESPONSE: - case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: - /* we are the responder */ - ieee80211_tdls_add_link_ie(skb, peer, sdata->vif.addr, - sdata->u.mgd.bssid); - break; - default: - ret = -ENOTSUPP; - goto fail; - } - - if (send_direct) { - ieee80211_tx_skb(sdata, skb); - return 0; - } - - /* - * According to 802.11z: Setup req/resp are sent in AC_BK, otherwise - * we should default to AC_VI. - */ - switch (action_code) { - case WLAN_TDLS_SETUP_REQUEST: - case WLAN_TDLS_SETUP_RESPONSE: - skb_set_queue_mapping(skb, IEEE80211_AC_BK); - skb->priority = 2; - break; - default: - skb_set_queue_mapping(skb, IEEE80211_AC_VI); - skb->priority = 5; - break; - } - - /* disable bottom halves when entering the Tx path */ - local_bh_disable(); - ret = ieee80211_subif_start_xmit(skb, dev); - local_bh_enable(); - - return ret; - -fail: - dev_kfree_skb(skb); - return ret; -} - -static int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, enum nl80211_tdls_operation oper) -{ - struct sta_info *sta; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) - return -ENOTSUPP; - - if (sdata->vif.type != NL80211_IFTYPE_STATION) - return -EINVAL; - - tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer); - - switch (oper) { - case NL80211_TDLS_ENABLE_LINK: - rcu_read_lock(); - sta = sta_info_get(sdata, peer); - if (!sta) { - rcu_read_unlock(); - return -ENOLINK; - } - - set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); - rcu_read_unlock(); - break; - case NL80211_TDLS_DISABLE_LINK: - return sta_info_destroy_addr(sdata, peer); - case NL80211_TDLS_TEARDOWN: - case NL80211_TDLS_SETUP: - case NL80211_TDLS_DISCOVERY_REQ: - /* We don't support in-driver setup/teardown/discovery */ - return -ENOTSUPP; - default: - return -ENOTSUPP; - } - - return 0; -} - static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, u64 *cookie) { @@ -3937,6 +3766,21 @@ static int ieee80211_set_qos_map(struct wiphy *wiphy, return 0; } +static int ieee80211_set_ap_chanwidth(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_chan_def *chandef) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + int ret; + u32 changed = 0; + + ret = ieee80211_vif_change_bandwidth(sdata, chandef, &changed); + if (ret == 0) + ieee80211_bss_info_change_notify(sdata, changed); + + return ret; +} + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -4017,4 +3861,5 @@ const struct cfg80211_ops mac80211_config_ops = { .start_radar_detection = ieee80211_start_radar_detection, .channel_switch = ieee80211_channel_switch, .set_qos_map = ieee80211_set_qos_map, + .set_ap_chanwidth = ieee80211_set_ap_chanwidth, }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 42c659229a0..a310e33972d 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -9,6 +9,170 @@ #include "ieee80211_i.h" #include "driver-ops.h" +static int ieee80211_chanctx_num_assigned(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + struct ieee80211_sub_if_data *sdata; + int num = 0; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(sdata, &ctx->assigned_vifs, assigned_chanctx_list) + num++; + + return num; +} + +static int ieee80211_chanctx_num_reserved(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + struct ieee80211_sub_if_data *sdata; + int num = 0; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(sdata, &ctx->reserved_vifs, reserved_chanctx_list) + num++; + + return num; +} + +int ieee80211_chanctx_refcount(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + return ieee80211_chanctx_num_assigned(local, ctx) + + ieee80211_chanctx_num_reserved(local, ctx); +} + +static int ieee80211_num_chanctx(struct ieee80211_local *local) +{ + struct ieee80211_chanctx *ctx; + int num = 0; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(ctx, &local->chanctx_list, list) + num++; + + return num; +} + +static bool ieee80211_can_create_new_chanctx(struct ieee80211_local *local) +{ + lockdep_assert_held(&local->chanctx_mtx); + return ieee80211_num_chanctx(local) < ieee80211_max_num_channels(local); +} + +static const struct cfg80211_chan_def * +ieee80211_chanctx_reserved_chandef(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + const struct cfg80211_chan_def *compat) +{ + struct ieee80211_sub_if_data *sdata; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(sdata, &ctx->reserved_vifs, + reserved_chanctx_list) { + if (!compat) + compat = &sdata->reserved_chandef; + + compat = cfg80211_chandef_compatible(&sdata->reserved_chandef, + compat); + if (!compat) + break; + } + + return compat; +} + +static const struct cfg80211_chan_def * +ieee80211_chanctx_non_reserved_chandef(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + const struct cfg80211_chan_def *compat) +{ + struct ieee80211_sub_if_data *sdata; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(sdata, &ctx->assigned_vifs, + assigned_chanctx_list) { + if (sdata->reserved_chanctx != NULL) + continue; + + if (!compat) + compat = &sdata->vif.bss_conf.chandef; + + compat = cfg80211_chandef_compatible( + &sdata->vif.bss_conf.chandef, compat); + if (!compat) + break; + } + + return compat; +} + +static const struct cfg80211_chan_def * +ieee80211_chanctx_combined_chandef(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + const struct cfg80211_chan_def *compat) +{ + lockdep_assert_held(&local->chanctx_mtx); + + compat = ieee80211_chanctx_reserved_chandef(local, ctx, compat); + if (!compat) + return NULL; + + compat = ieee80211_chanctx_non_reserved_chandef(local, ctx, compat); + if (!compat) + return NULL; + + return compat; +} + +static bool +ieee80211_chanctx_can_reserve_chandef(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + const struct cfg80211_chan_def *def) +{ + lockdep_assert_held(&local->chanctx_mtx); + + if (ieee80211_chanctx_combined_chandef(local, ctx, def)) + return true; + + if (!list_empty(&ctx->reserved_vifs) && + ieee80211_chanctx_reserved_chandef(local, ctx, def)) + return true; + + return false; +} + +static struct ieee80211_chanctx * +ieee80211_find_reservation_chanctx(struct ieee80211_local *local, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode mode) +{ + struct ieee80211_chanctx *ctx; + + lockdep_assert_held(&local->chanctx_mtx); + + if (mode == IEEE80211_CHANCTX_EXCLUSIVE) + return NULL; + + list_for_each_entry(ctx, &local->chanctx_list, list) { + if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) + continue; + + if (!ieee80211_chanctx_can_reserve_chandef(local, ctx, + chandef)) + continue; + + return ctx; + } + + return NULL; +} + static enum nl80211_chan_width ieee80211_get_sta_bw(struct ieee80211_sta *sta) { switch (sta->bandwidth) { @@ -100,6 +264,12 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, } max_bw = max(max_bw, width); } + + /* use the configured bandwidth in case of monitor interface */ + sdata = rcu_dereference(local->monitor_sdata); + if (sdata && rcu_access_pointer(sdata->vif.chanctx_conf) == conf) + max_bw = max(max_bw, conf->def.width); + rcu_read_unlock(); return max_bw; @@ -184,6 +354,11 @@ ieee80211_find_chanctx(struct ieee80211_local *local, if (!compat) continue; + compat = ieee80211_chanctx_reserved_chandef(local, ctx, + compat); + if (!compat) + continue; + ieee80211_change_chanctx(local, ctx, compat); return ctx; @@ -211,62 +386,91 @@ static bool ieee80211_is_radar_required(struct ieee80211_local *local) } static struct ieee80211_chanctx * -ieee80211_new_chanctx(struct ieee80211_local *local, - const struct cfg80211_chan_def *chandef, - enum ieee80211_chanctx_mode mode) +ieee80211_alloc_chanctx(struct ieee80211_local *local, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode mode) { struct ieee80211_chanctx *ctx; - u32 changed; - int err; lockdep_assert_held(&local->chanctx_mtx); ctx = kzalloc(sizeof(*ctx) + local->hw.chanctx_data_size, GFP_KERNEL); if (!ctx) - return ERR_PTR(-ENOMEM); + return NULL; + INIT_LIST_HEAD(&ctx->assigned_vifs); + INIT_LIST_HEAD(&ctx->reserved_vifs); ctx->conf.def = *chandef; ctx->conf.rx_chains_static = 1; ctx->conf.rx_chains_dynamic = 1; ctx->mode = mode; ctx->conf.radar_enabled = ieee80211_is_radar_required(local); ieee80211_recalc_chanctx_min_def(local, ctx); + + return ctx; +} + +static int ieee80211_add_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + u32 changed; + int err; + + lockdep_assert_held(&local->mtx); + lockdep_assert_held(&local->chanctx_mtx); + if (!local->use_chanctx) local->hw.conf.radar_enabled = ctx->conf.radar_enabled; - /* we hold the mutex to prevent idle from changing */ - lockdep_assert_held(&local->mtx); /* turn idle off *before* setting channel -- some drivers need that */ changed = ieee80211_idle_off(local); if (changed) ieee80211_hw_config(local, changed); if (!local->use_chanctx) { - local->_oper_chandef = *chandef; - ieee80211_hw_config(local, 0); + local->_oper_chandef = ctx->conf.def; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); } else { err = drv_add_chanctx(local, ctx); if (err) { - kfree(ctx); ieee80211_recalc_idle(local); - return ERR_PTR(err); + return err; } } - /* and keep the mutex held until the new chanctx is on the list */ - list_add_rcu(&ctx->list, &local->chanctx_list); + return 0; +} +static struct ieee80211_chanctx * +ieee80211_new_chanctx(struct ieee80211_local *local, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode mode) +{ + struct ieee80211_chanctx *ctx; + int err; + + lockdep_assert_held(&local->mtx); + lockdep_assert_held(&local->chanctx_mtx); + + ctx = ieee80211_alloc_chanctx(local, chandef, mode); + if (!ctx) + return ERR_PTR(-ENOMEM); + + err = ieee80211_add_chanctx(local, ctx); + if (err) { + kfree(ctx); + return ERR_PTR(err); + } + + list_add_rcu(&ctx->list, &local->chanctx_list); return ctx; } -static void ieee80211_free_chanctx(struct ieee80211_local *local, - struct ieee80211_chanctx *ctx) +static void ieee80211_del_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) { - bool check_single_channel = false; lockdep_assert_held(&local->chanctx_mtx); - WARN_ON_ONCE(ctx->refcount != 0); - if (!local->use_chanctx) { struct cfg80211_chan_def *chandef = &local->_oper_chandef; chandef->width = NL80211_CHAN_WIDTH_20_NOHT; @@ -276,48 +480,29 @@ static void ieee80211_free_chanctx(struct ieee80211_local *local, /* NOTE: Disabling radar is only valid here for * single channel context. To be sure, check it ... */ - if (local->hw.conf.radar_enabled) - check_single_channel = true; + WARN_ON(local->hw.conf.radar_enabled && + !list_empty(&local->chanctx_list)); + local->hw.conf.radar_enabled = false; - ieee80211_hw_config(local, 0); + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); } else { drv_remove_chanctx(local, ctx); } - list_del_rcu(&ctx->list); - kfree_rcu(ctx, rcu_head); - - /* throw a warning if this wasn't the only channel context. */ - WARN_ON(check_single_channel && !list_empty(&local->chanctx_list)); - ieee80211_recalc_idle(local); } -static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, - struct ieee80211_chanctx *ctx) +static void ieee80211_free_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) { - struct ieee80211_local *local = sdata->local; - int ret; - lockdep_assert_held(&local->chanctx_mtx); - ret = drv_assign_vif_chanctx(local, sdata, ctx); - if (ret) - return ret; - - rcu_assign_pointer(sdata->vif.chanctx_conf, &ctx->conf); - ctx->refcount++; - - ieee80211_recalc_txpower(sdata); - ieee80211_recalc_chanctx_min_def(local, ctx); - sdata->vif.bss_conf.idle = false; - - if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && - sdata->vif.type != NL80211_IFTYPE_MONITOR) - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); + WARN_ON_ONCE(ieee80211_chanctx_refcount(local, ctx) != 0); - return 0; + list_del_rcu(&ctx->list); + ieee80211_del_chanctx(local, ctx); + kfree_rcu(ctx, rcu_head); } static void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, @@ -378,30 +563,58 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, drv_change_chanctx(local, chanctx, IEEE80211_CHANCTX_CHANGE_RADAR); } -static void ieee80211_unassign_vif_chanctx(struct ieee80211_sub_if_data *sdata, - struct ieee80211_chanctx *ctx) +static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, + struct ieee80211_chanctx *new_ctx) { struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx_conf *conf; + struct ieee80211_chanctx *curr_ctx = NULL; + int ret = 0; - lockdep_assert_held(&local->chanctx_mtx); + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); - ctx->refcount--; - rcu_assign_pointer(sdata->vif.chanctx_conf, NULL); + if (conf) { + curr_ctx = container_of(conf, struct ieee80211_chanctx, conf); - sdata->vif.bss_conf.idle = true; + drv_unassign_vif_chanctx(local, sdata, curr_ctx); + conf = NULL; + list_del(&sdata->assigned_chanctx_list); + } - if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && - sdata->vif.type != NL80211_IFTYPE_MONITOR) - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); + if (new_ctx) { + ret = drv_assign_vif_chanctx(local, sdata, new_ctx); + if (ret) + goto out; + + conf = &new_ctx->conf; + list_add(&sdata->assigned_chanctx_list, + &new_ctx->assigned_vifs); + } + +out: + rcu_assign_pointer(sdata->vif.chanctx_conf, conf); - drv_unassign_vif_chanctx(local, sdata, ctx); + sdata->vif.bss_conf.idle = !conf; - if (ctx->refcount > 0) { - ieee80211_recalc_chanctx_chantype(sdata->local, ctx); - ieee80211_recalc_smps_chanctx(local, ctx); - ieee80211_recalc_radar_chanctx(local, ctx); - ieee80211_recalc_chanctx_min_def(local, ctx); + if (curr_ctx && ieee80211_chanctx_num_assigned(local, curr_ctx) > 0) { + ieee80211_recalc_chanctx_chantype(local, curr_ctx); + ieee80211_recalc_smps_chanctx(local, curr_ctx); + ieee80211_recalc_radar_chanctx(local, curr_ctx); + ieee80211_recalc_chanctx_min_def(local, curr_ctx); } + + if (new_ctx && ieee80211_chanctx_num_assigned(local, new_ctx) > 0) { + ieee80211_recalc_txpower(sdata); + ieee80211_recalc_chanctx_min_def(local, new_ctx); + } + + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_MONITOR) + ieee80211_bss_info_change_notify(sdata, + BSS_CHANGED_IDLE); + + return ret; } static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) @@ -419,8 +632,11 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) ctx = container_of(conf, struct ieee80211_chanctx, conf); - ieee80211_unassign_vif_chanctx(sdata, ctx); - if (ctx->refcount == 0) + if (sdata->reserved_chanctx) + ieee80211_vif_unreserve_chanctx(sdata); + + ieee80211_assign_vif_chanctx(sdata, NULL); + if (ieee80211_chanctx_refcount(local, ctx) == 0) ieee80211_free_chanctx(local, ctx); } @@ -486,6 +702,13 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, rx_chains_static = max(rx_chains_static, needed_static); rx_chains_dynamic = max(rx_chains_dynamic, needed_dynamic); } + + /* Disable SMPS for the monitor interface */ + sdata = rcu_dereference(local->monitor_sdata); + if (sdata && + rcu_access_pointer(sdata->vif.chanctx_conf) == &chanctx->conf) + rx_chains_dynamic = rx_chains_static = local->rx_chains; + rcu_read_unlock(); if (!local->use_chanctx) { @@ -513,6 +736,7 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx *ctx; + u8 radar_detect_width = 0; int ret; lockdep_assert_held(&local->mtx); @@ -520,6 +744,22 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, WARN_ON(sdata->dev && netif_carrier_ok(sdata->dev)); mutex_lock(&local->chanctx_mtx); + + ret = cfg80211_chandef_dfs_required(local->hw.wiphy, + chandef, + sdata->wdev.iftype); + if (ret < 0) + goto out; + if (ret > 0) + radar_detect_width = BIT(chandef->width); + + sdata->radar_required = ret; + + ret = ieee80211_check_combinations(sdata, chandef, mode, + radar_detect_width); + if (ret < 0) + goto out; + __ieee80211_vif_release_channel(sdata); ctx = ieee80211_find_chanctx(local, chandef, mode); @@ -535,7 +775,7 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, ret = ieee80211_assign_vif_chanctx(sdata, ctx); if (ret) { /* if assign fails refcount stays the same */ - if (ctx->refcount == 0) + if (ieee80211_chanctx_refcount(local, ctx) == 0) ieee80211_free_chanctx(local, ctx); goto out; } @@ -547,15 +787,47 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, return ret; } +static int __ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, + struct ieee80211_chanctx *ctx, + u32 *changed) +{ + struct ieee80211_local *local = sdata->local; + const struct cfg80211_chan_def *chandef = &sdata->csa_chandef; + u32 chanctx_changed = 0; + + if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, + IEEE80211_CHAN_DISABLED)) + return -EINVAL; + + if (ieee80211_chanctx_refcount(local, ctx) != 1) + return -EINVAL; + + if (sdata->vif.bss_conf.chandef.width != chandef->width) { + chanctx_changed = IEEE80211_CHANCTX_CHANGE_WIDTH; + *changed |= BSS_CHANGED_BANDWIDTH; + } + + sdata->vif.bss_conf.chandef = *chandef; + ctx->conf.def = *chandef; + + chanctx_changed |= IEEE80211_CHANCTX_CHANGE_CHANNEL; + drv_change_chanctx(local, ctx, chanctx_changed); + + ieee80211_recalc_chanctx_chantype(local, ctx); + ieee80211_recalc_smps_chanctx(local, ctx); + ieee80211_recalc_radar_chanctx(local, ctx); + ieee80211_recalc_chanctx_min_def(local, ctx); + + return 0; +} + int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, u32 *changed) { struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *ctx; - const struct cfg80211_chan_def *chandef = &sdata->csa_chandef; int ret; - u32 chanctx_changed = 0; lockdep_assert_held(&local->mtx); @@ -563,11 +835,94 @@ int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, if (WARN_ON(!sdata->vif.csa_active)) return -EINVAL; - if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, - IEEE80211_CHAN_DISABLED)) + mutex_lock(&local->chanctx_mtx); + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + if (!conf) { + ret = -EINVAL; + goto out; + } + + ctx = container_of(conf, struct ieee80211_chanctx, conf); + + ret = __ieee80211_vif_change_channel(sdata, ctx, changed); + out: + mutex_unlock(&local->chanctx_mtx); + return ret; +} + +static void +__ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, + bool clear) +{ + struct ieee80211_local *local __maybe_unused = sdata->local; + struct ieee80211_sub_if_data *vlan; + struct ieee80211_chanctx_conf *conf; + + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP)) + return; + + lockdep_assert_held(&local->mtx); + + /* Check that conf exists, even when clearing this function + * must be called with the AP's channel context still there + * as it would otherwise cause VLANs to have an invalid + * channel context pointer for a while, possibly pointing + * to a channel context that has already been freed. + */ + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + WARN_ON(!conf); + + if (clear) + conf = NULL; + + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + rcu_assign_pointer(vlan->vif.chanctx_conf, conf); +} + +void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, + bool clear) +{ + struct ieee80211_local *local = sdata->local; + + mutex_lock(&local->chanctx_mtx); + + __ieee80211_vif_copy_chanctx_to_vlans(sdata, clear); + + mutex_unlock(&local->chanctx_mtx); +} + +int ieee80211_vif_unreserve_chanctx(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_chanctx *ctx = sdata->reserved_chanctx; + + lockdep_assert_held(&sdata->local->chanctx_mtx); + + if (WARN_ON(!ctx)) return -EINVAL; + list_del(&sdata->reserved_chanctx_list); + sdata->reserved_chanctx = NULL; + + if (ieee80211_chanctx_refcount(sdata->local, ctx) == 0) + ieee80211_free_chanctx(sdata->local, ctx); + + return 0; +} + +int ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode mode, + bool radar_required) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx_conf *conf; + struct ieee80211_chanctx *new_ctx, *curr_ctx; + int ret = 0; + mutex_lock(&local->chanctx_mtx); + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); if (!conf) { @@ -575,30 +930,108 @@ int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, goto out; } - ctx = container_of(conf, struct ieee80211_chanctx, conf); - if (ctx->refcount != 1) { + curr_ctx = container_of(conf, struct ieee80211_chanctx, conf); + + new_ctx = ieee80211_find_reservation_chanctx(local, chandef, mode); + if (!new_ctx) { + if (ieee80211_chanctx_refcount(local, curr_ctx) == 1 && + (local->hw.flags & IEEE80211_HW_CHANGE_RUNNING_CHANCTX)) { + /* if we're the only users of the chanctx and + * the driver supports changing a running + * context, reserve our current context + */ + new_ctx = curr_ctx; + } else if (ieee80211_can_create_new_chanctx(local)) { + /* create a new context and reserve it */ + new_ctx = ieee80211_new_chanctx(local, chandef, mode); + if (IS_ERR(new_ctx)) { + ret = PTR_ERR(new_ctx); + goto out; + } + } else { + ret = -EBUSY; + goto out; + } + } + + list_add(&sdata->reserved_chanctx_list, &new_ctx->reserved_vifs); + sdata->reserved_chanctx = new_ctx; + sdata->reserved_chandef = *chandef; + sdata->reserved_radar_required = radar_required; +out: + mutex_unlock(&local->chanctx_mtx); + return ret; +} + +int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata, + u32 *changed) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx *ctx; + struct ieee80211_chanctx *old_ctx; + struct ieee80211_chanctx_conf *conf; + int ret; + u32 tmp_changed = *changed; + + /* TODO: need to recheck if the chandef is usable etc.? */ + + lockdep_assert_held(&local->mtx); + + mutex_lock(&local->chanctx_mtx); + + ctx = sdata->reserved_chanctx; + if (WARN_ON(!ctx)) { ret = -EINVAL; goto out; } - if (sdata->vif.bss_conf.chandef.width != chandef->width) { - chanctx_changed = IEEE80211_CHANCTX_CHANGE_WIDTH; - *changed |= BSS_CHANGED_BANDWIDTH; + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + if (!conf) { + ret = -EINVAL; + goto out; } - sdata->vif.bss_conf.chandef = *chandef; - ctx->conf.def = *chandef; + old_ctx = container_of(conf, struct ieee80211_chanctx, conf); - chanctx_changed |= IEEE80211_CHANCTX_CHANGE_CHANNEL; - drv_change_chanctx(local, ctx, chanctx_changed); + if (sdata->vif.bss_conf.chandef.width != sdata->reserved_chandef.width) + tmp_changed |= BSS_CHANGED_BANDWIDTH; + + sdata->vif.bss_conf.chandef = sdata->reserved_chandef; + + /* unref our reservation */ + sdata->reserved_chanctx = NULL; + sdata->radar_required = sdata->reserved_radar_required; + list_del(&sdata->reserved_chanctx_list); + + if (old_ctx == ctx) { + /* This is our own context, just change it */ + ret = __ieee80211_vif_change_channel(sdata, old_ctx, + &tmp_changed); + if (ret) + goto out; + } else { + ret = ieee80211_assign_vif_chanctx(sdata, ctx); + if (ieee80211_chanctx_refcount(local, old_ctx) == 0) + ieee80211_free_chanctx(local, old_ctx); + if (ret) { + /* if assign fails refcount stays the same */ + if (ieee80211_chanctx_refcount(local, ctx) == 0) + ieee80211_free_chanctx(local, ctx); + goto out; + } + + if (sdata->vif.type == NL80211_IFTYPE_AP) + __ieee80211_vif_copy_chanctx_to_vlans(sdata, false); + } + + *changed = tmp_changed; ieee80211_recalc_chanctx_chantype(local, ctx); ieee80211_recalc_smps_chanctx(local, ctx); ieee80211_recalc_radar_chanctx(local, ctx); ieee80211_recalc_chanctx_min_def(local, ctx); - - ret = 0; - out: +out: mutex_unlock(&local->chanctx_mtx); return ret; } @@ -682,40 +1115,6 @@ void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->chanctx_mtx); } -void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, - bool clear) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_sub_if_data *vlan; - struct ieee80211_chanctx_conf *conf; - - ASSERT_RTNL(); - - if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP)) - return; - - mutex_lock(&local->chanctx_mtx); - - /* - * Check that conf exists, even when clearing this function - * must be called with the AP's channel context still there - * as it would otherwise cause VLANs to have an invalid - * channel context pointer for a while, possibly pointing - * to a channel context that has already been freed. - */ - conf = rcu_dereference_protected(sdata->vif.chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); - WARN_ON(!conf); - - if (clear) - conf = NULL; - - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) - rcu_assign_pointer(vlan->vif.chanctx_conf, conf); - - mutex_unlock(&local->chanctx_mtx); -} - void ieee80211_iter_chan_contexts_atomic( struct ieee80211_hw *hw, void (*iter)(struct ieee80211_hw *hw, diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index fa16e54980a..0e963bc1cea 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -128,7 +128,7 @@ static ssize_t sta_tx_latency_stat_write(struct file *file, if (!strcmp(buf, TX_LATENCY_DISABLED)) { if (!tx_latency) goto unlock; - rcu_assign_pointer(local->tx_latency, NULL); + RCU_INIT_POINTER(local->tx_latency, NULL); synchronize_rcu(); kfree(tx_latency); goto unlock; diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h index 214ed4ecd73..60c35afee29 100644 --- a/net/mac80211/debugfs.h +++ b/net/mac80211/debugfs.h @@ -1,6 +1,8 @@ #ifndef __MAC80211_DEBUGFS_H #define __MAC80211_DEBUGFS_H +#include "ieee80211_i.h" + #ifdef CONFIG_MAC80211_DEBUGFS void debugfs_hw_add(struct ieee80211_local *local); int __printf(4, 5) mac80211_format_buffer(char __user *userbuf, size_t count, diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index ebf80f3abd8..e205ebabfa5 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -34,8 +34,7 @@ static ssize_t ieee80211_if_read( ssize_t ret = -EINVAL; read_lock(&dev_base_lock); - if (sdata->dev->reg_state == NETREG_REGISTERED) - ret = (*format)(sdata, buf, sizeof(buf)); + ret = (*format)(sdata, buf, sizeof(buf)); read_unlock(&dev_base_lock); if (ret >= 0) @@ -62,8 +61,7 @@ static ssize_t ieee80211_if_write( ret = -ENODEV; rtnl_lock(); - if (sdata->dev->reg_state == NETREG_REGISTERED) - ret = (*write)(sdata, buf, count); + ret = (*write)(sdata, buf, count); rtnl_unlock(); return ret; @@ -358,6 +356,18 @@ static ssize_t ieee80211_if_parse_tkip_mic_test( } IEEE80211_IF_FILE_W(tkip_mic_test); +static ssize_t ieee80211_if_parse_beacon_loss( + struct ieee80211_sub_if_data *sdata, const char *buf, int buflen) +{ + if (!ieee80211_sdata_running(sdata) || !sdata->vif.bss_conf.assoc) + return -ENOTCONN; + + ieee80211_beacon_loss(&sdata->vif); + + return buflen; +} +IEEE80211_IF_FILE_W(beacon_loss); + static ssize_t ieee80211_if_fmt_uapsd_queues( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) { @@ -569,6 +579,7 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(beacon_timeout); DEBUGFS_ADD_MODE(smps, 0600); DEBUGFS_ADD_MODE(tkip_mic_test, 0200); + DEBUGFS_ADD_MODE(beacon_loss, 0200); DEBUGFS_ADD_MODE(uapsd_queues, 0600); DEBUGFS_ADD_MODE(uapsd_max_sp_len, 0600); } diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h index 79025e79f4d..9f5501a9a79 100644 --- a/net/mac80211/debugfs_netdev.h +++ b/net/mac80211/debugfs_netdev.h @@ -3,6 +3,8 @@ #ifndef __IEEE80211_DEBUGFS_NETDEV_H #define __IEEE80211_DEBUGFS_NETDEV_H +#include "ieee80211_i.h" + #ifdef CONFIG_MAC80211_DEBUGFS void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index ef8b385eff0..bd782dcffcc 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -5,11 +5,11 @@ #include "ieee80211_i.h" #include "trace.h" -static inline void check_sdata_in_driver(struct ieee80211_sub_if_data *sdata) +static inline bool check_sdata_in_driver(struct ieee80211_sub_if_data *sdata) { - WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER), - "%s: Failed check-sdata-in-driver check, flags: 0x%x\n", - sdata->dev ? sdata->dev->name : sdata->name, sdata->flags); + return !WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER), + "%s: Failed check-sdata-in-driver check, flags: 0x%x\n", + sdata->dev ? sdata->dev->name : sdata->name, sdata->flags); } static inline struct ieee80211_sub_if_data * @@ -168,7 +168,8 @@ static inline int drv_change_interface(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_change_interface(local, sdata, type, p2p); ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p); @@ -181,7 +182,8 @@ static inline void drv_remove_interface(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_remove_interface(local, sdata); local->ops->remove_interface(&local->hw, &sdata->vif); @@ -219,7 +221,8 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, sdata->vif.type == NL80211_IFTYPE_MONITOR)) return; - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_bss_info_changed(local, sdata, info, changed); if (local->ops->bss_info_changed) @@ -278,7 +281,8 @@ static inline int drv_set_key(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_set_key(local, cmd, sdata, sta, key); ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key); @@ -298,7 +302,8 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local, ista = &sta->sta; sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_update_tkip_key(local, sdata, conf, ista, iv32); if (local->ops->update_tkip_key) @@ -315,7 +320,8 @@ static inline int drv_hw_scan(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_hw_scan(local, sdata); ret = local->ops->hw_scan(&local->hw, &sdata->vif, req); @@ -328,7 +334,8 @@ static inline void drv_cancel_hw_scan(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_cancel_hw_scan(local, sdata); local->ops->cancel_hw_scan(&local->hw, &sdata->vif); @@ -345,7 +352,8 @@ drv_sched_scan_start(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_sched_scan_start(local, sdata); ret = local->ops->sched_scan_start(&local->hw, &sdata->vif, @@ -354,16 +362,21 @@ drv_sched_scan_start(struct ieee80211_local *local, return ret; } -static inline void drv_sched_scan_stop(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) +static inline int drv_sched_scan_stop(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) { + int ret; + might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_sched_scan_stop(local, sdata); - local->ops->sched_scan_stop(&local->hw, &sdata->vif); - trace_drv_return_void(local); + ret = local->ops->sched_scan_stop(&local->hw, &sdata->vif); + trace_drv_return_int(local, ret); + + return ret; } static inline void drv_sw_scan_start(struct ieee80211_local *local) @@ -458,7 +471,8 @@ static inline void drv_sta_notify(struct ieee80211_local *local, struct ieee80211_sta *sta) { sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_sta_notify(local, sdata, cmd, sta); if (local->ops->sta_notify) @@ -475,7 +489,8 @@ static inline int drv_sta_add(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_sta_add(local, sdata, sta); if (local->ops->sta_add) @@ -493,7 +508,8 @@ static inline void drv_sta_remove(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_sta_remove(local, sdata, sta); if (local->ops->sta_remove) @@ -511,7 +527,8 @@ static inline void drv_sta_add_debugfs(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; if (local->ops->sta_add_debugfs) local->ops->sta_add_debugfs(&local->hw, &sdata->vif, @@ -541,7 +558,8 @@ static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_sta_pre_rcu_remove(local, sdata, &sta->sta); if (local->ops->sta_pre_rcu_remove) @@ -562,7 +580,8 @@ int drv_sta_state(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_sta_state(local, sdata, &sta->sta, old_state, new_state); if (local->ops->sta_state) { @@ -586,7 +605,8 @@ static inline void drv_sta_rc_update(struct ieee80211_local *local, struct ieee80211_sta *sta, u32 changed) { sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED && (sdata->vif.type != NL80211_IFTYPE_ADHOC && @@ -608,7 +628,8 @@ static inline int drv_conf_tx(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_conf_tx(local, sdata, ac, params); if (local->ops->conf_tx) @@ -625,7 +646,8 @@ static inline u64 drv_get_tsf(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return ret; trace_drv_get_tsf(local, sdata); if (local->ops->get_tsf) @@ -640,7 +662,8 @@ static inline void drv_set_tsf(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_set_tsf(local, sdata, tsf); if (local->ops->set_tsf) @@ -653,7 +676,8 @@ static inline void drv_reset_tsf(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_reset_tsf(local, sdata); if (local->ops->reset_tsf) @@ -685,7 +709,8 @@ static inline int drv_ampdu_action(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size); @@ -722,13 +747,19 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local) } static inline void drv_flush(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, u32 queues, bool drop) { + struct ieee80211_vif *vif = sdata ? &sdata->vif : NULL; + might_sleep(); + if (sdata && !check_sdata_in_driver(sdata)) + return; + trace_drv_flush(local, queues, drop); if (local->ops->flush) - local->ops->flush(&local->hw, queues, drop); + local->ops->flush(&local->hw, vif, queues, drop); trace_drv_return_void(local); } @@ -844,7 +875,8 @@ static inline int drv_set_bitrate_mask(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_set_bitrate_mask(local, sdata, mask); if (local->ops->set_bitrate_mask) @@ -859,7 +891,8 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct cfg80211_gtk_rekey_data *data) { - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_set_rekey_data(local, sdata, data); if (local->ops->set_rekey_data) @@ -927,7 +960,8 @@ static inline void drv_mgd_prepare_tx(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION); trace_drv_mgd_prepare_tx(local, sdata); @@ -954,6 +988,9 @@ static inline int drv_add_chanctx(struct ieee80211_local *local, static inline void drv_remove_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx) { + if (WARN_ON(!ctx->driver_present)) + return; + trace_drv_remove_chanctx(local, ctx); if (local->ops->remove_chanctx) local->ops->remove_chanctx(&local->hw, &ctx->conf); @@ -979,7 +1016,8 @@ static inline int drv_assign_vif_chanctx(struct ieee80211_local *local, { int ret = 0; - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_assign_vif_chanctx(local, sdata, ctx); if (local->ops->assign_vif_chanctx) { @@ -997,7 +1035,8 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_chanctx *ctx) { - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_unassign_vif_chanctx(local, sdata, ctx); if (local->ops->unassign_vif_chanctx) { @@ -1009,12 +1048,66 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline int +drv_switch_vif_chanctx(struct ieee80211_local *local, + struct ieee80211_vif_chanctx_switch *vifs, + int n_vifs, + enum ieee80211_chanctx_switch_mode mode) +{ + int ret = 0; + int i; + + if (!local->ops->switch_vif_chanctx) + return -EOPNOTSUPP; + + for (i = 0; i < n_vifs; i++) { + struct ieee80211_chanctx *new_ctx = + container_of(vifs[i].new_ctx, + struct ieee80211_chanctx, + conf); + struct ieee80211_chanctx *old_ctx = + container_of(vifs[i].old_ctx, + struct ieee80211_chanctx, + conf); + + WARN_ON_ONCE(!old_ctx->driver_present); + WARN_ON_ONCE((mode == CHANCTX_SWMODE_SWAP_CONTEXTS && + new_ctx->driver_present) || + (mode == CHANCTX_SWMODE_REASSIGN_VIF && + !new_ctx->driver_present)); + } + + trace_drv_switch_vif_chanctx(local, vifs, n_vifs, mode); + ret = local->ops->switch_vif_chanctx(&local->hw, + vifs, n_vifs, mode); + trace_drv_return_int(local, ret); + + if (!ret && mode == CHANCTX_SWMODE_SWAP_CONTEXTS) { + for (i = 0; i < n_vifs; i++) { + struct ieee80211_chanctx *new_ctx = + container_of(vifs[i].new_ctx, + struct ieee80211_chanctx, + conf); + struct ieee80211_chanctx *old_ctx = + container_of(vifs[i].old_ctx, + struct ieee80211_chanctx, + conf); + + new_ctx->driver_present = true; + old_ctx->driver_present = false; + } + } + + return ret; +} + static inline int drv_start_ap(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { int ret = 0; - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_start_ap(local, sdata, &sdata->vif.bss_conf); if (local->ops->start_ap) @@ -1026,7 +1119,8 @@ static inline int drv_start_ap(struct ieee80211_local *local, static inline void drv_stop_ap(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_stop_ap(local, sdata); if (local->ops->stop_ap) @@ -1049,7 +1143,8 @@ drv_set_default_unicast_key(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, int key_idx) { - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; WARN_ON_ONCE(key_idx < -1 || key_idx > 3); @@ -1091,7 +1186,8 @@ static inline int drv_join_ibss(struct ieee80211_local *local, int ret = 0; might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_join_ibss(local, sdata, &sdata->vif.bss_conf); if (local->ops->join_ibss) @@ -1104,7 +1200,8 @@ static inline void drv_leave_ibss(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_leave_ibss(local, sdata); if (local->ops->leave_ibss) @@ -1112,4 +1209,17 @@ static inline void drv_leave_ibss(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline u32 drv_get_expected_throughput(struct ieee80211_local *local, + struct ieee80211_sta *sta) +{ + u32 ret = 0; + + trace_drv_get_expected_throughput(sta); + if (local->ops->get_expected_throughput) + ret = local->ops->get_expected_throughput(sta); + trace_drv_return_u32(local, ret); + + return ret; +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index dc3c28002e3..15702ff64a4 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -31,6 +31,18 @@ static void __check_htcap_disable(struct ieee80211_ht_cap *ht_capa, } } +static void __check_htcap_enable(struct ieee80211_ht_cap *ht_capa, + struct ieee80211_ht_cap *ht_capa_mask, + struct ieee80211_sta_ht_cap *ht_cap, + u16 flag) +{ + __le16 le_flag = cpu_to_le16(flag); + + if ((ht_capa_mask->cap_info & le_flag) && + (ht_capa->cap_info & le_flag)) + ht_cap->cap |= flag; +} + void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_ht_cap *ht_cap) { @@ -59,7 +71,7 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, smask = (u8 *)(&ht_capa_mask->mcs.rx_mask); /* NOTE: If you add more over-rides here, update register_hw - * ht_capa_mod_msk logic in main.c as well. + * ht_capa_mod_mask logic in main.c as well. * And, if this method can ever change ht_cap.ht_supported, fix * the check in ieee80211_add_ht_ie. */ @@ -86,6 +98,14 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_MAX_AMSDU); + /* Allow user to disable LDPC */ + __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap, + IEEE80211_HT_CAP_LDPC_CODING); + + /* Allow user to enable 40 MHz intolerant bit. */ + __check_htcap_enable(ht_capa, ht_capa_mask, ht_cap, + IEEE80211_HT_CAP_40MHZ_INTOLERANT); + /* Allow user to decrease AMPDU factor */ if (ht_capa_mask->ampdu_params_info & IEEE80211_HT_AMPDU_PARM_FACTOR) { @@ -466,7 +486,9 @@ void ieee80211_request_smps_ap_work(struct work_struct *work) u.ap.request_smps_work); sdata_lock(sdata); - __ieee80211_request_smps_ap(sdata, sdata->u.ap.driver_smps_mode); + if (sdata_dereference(sdata->u.ap.beacon, sdata)) + __ieee80211_request_smps_ap(sdata, + sdata->u.ap.driver_smps_mode); sdata_unlock(sdata); } @@ -480,8 +502,6 @@ void ieee80211_request_smps(struct ieee80211_vif *vif, return; if (vif->type == NL80211_IFTYPE_STATION) { - if (WARN_ON(smps_mode == IEEE80211_SMPS_OFF)) - smps_mode = IEEE80211_SMPS_AUTOMATIC; if (sdata->u.mgd.driver_smps_mode == smps_mode) return; sdata->u.mgd.driver_smps_mode = smps_mode; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 9c84b75f3de..18ee0a256b1 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -143,7 +143,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, *pos++ = csa_settings->block_tx ? 1 : 0; *pos++ = ieee80211_frequency_to_channel( csa_settings->chandef.chan->center_freq); - sdata->csa_counter_offset_beacon = (pos - presp->head); + sdata->csa_counter_offset_beacon[0] = (pos - presp->head); *pos++ = csa_settings->count; } @@ -228,7 +228,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct beacon_data *presp; enum nl80211_bss_scan_width scan_width; bool have_higher_than_11mbit; - bool radar_required = false; + bool radar_required; int err; sdata_assert_lock(sdata); @@ -253,7 +253,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, presp = rcu_dereference_protected(ifibss->presp, lockdep_is_held(&sdata->wdev.mtx)); - rcu_assign_pointer(ifibss->presp, NULL); + RCU_INIT_POINTER(ifibss->presp, NULL); if (presp) kfree_rcu(presp, rcu_head); @@ -262,7 +262,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, /* make a copy of the chandef, it could be modified below. */ chandef = *req_chandef; chan = chandef.chan; - if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) { + if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef, + NL80211_IFTYPE_ADHOC)) { if (chandef.width == NL80211_CHAN_WIDTH_5 || chandef.width == NL80211_CHAN_WIDTH_10 || chandef.width == NL80211_CHAN_WIDTH_20_NOHT || @@ -274,7 +275,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, chandef.width = NL80211_CHAN_WIDTH_20; chandef.center_freq1 = chan->center_freq; /* check again for downgraded chandef */ - if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) { + if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef, + NL80211_IFTYPE_ADHOC)) { sdata_info(sdata, "Failed to join IBSS, beacons forbidden\n"); return; @@ -282,16 +284,20 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, } err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - &chandef); - if (err > 0) { - if (!ifibss->userspace_handles_dfs) { - sdata_info(sdata, - "Failed to join IBSS, DFS channel without control program\n"); - return; - } - radar_required = true; + &chandef, NL80211_IFTYPE_ADHOC); + if (err < 0) { + sdata_info(sdata, + "Failed to join IBSS, invalid chandef\n"); + return; + } + if (err > 0 && !ifibss->userspace_handles_dfs) { + sdata_info(sdata, + "Failed to join IBSS, DFS channel without control program\n"); + return; } + radar_required = err; + mutex_lock(&local->mtx); if (ieee80211_vif_use_channel(sdata, &chandef, ifibss->fixed_channel ? @@ -684,12 +690,9 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata) struct cfg80211_bss *cbss; struct beacon_data *presp; struct sta_info *sta; - int active_ibss; u16 capability; - active_ibss = ieee80211_sta_active_ibss(sdata); - - if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) { + if (!is_zero_ether_addr(ifibss->bssid)) { capability = WLAN_CAPABILITY_IBSS; if (ifibss->privacy) @@ -773,7 +776,8 @@ static void ieee80211_ibss_csa_mark_radar(struct ieee80211_sub_if_data *sdata) * unavailable. */ err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - &ifibss->chandef); + &ifibss->chandef, + NL80211_IFTYPE_ADHOC); if (err > 0) cfg80211_radar_event(sdata->local->hw.wiphy, &ifibss->chandef, GFP_ATOMIC); @@ -859,7 +863,8 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, goto disconnect; } - if (!cfg80211_reg_can_beacon(sdata->local->hw.wiphy, ¶ms.chandef)) { + if (!cfg80211_reg_can_beacon(sdata->local->hw.wiphy, ¶ms.chandef, + NL80211_IFTYPE_ADHOC)) { sdata_info(sdata, "IBSS %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", ifibss->bssid, @@ -871,17 +876,17 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, } err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - ¶ms.chandef); + ¶ms.chandef, + NL80211_IFTYPE_ADHOC); if (err < 0) goto disconnect; - if (err) { + if (err > 0 && !ifibss->userspace_handles_dfs) { /* IBSS-DFS only allowed with a control program */ - if (!ifibss->userspace_handles_dfs) - goto disconnect; - - params.radar_required = true; + goto disconnect; } + params.radar_required = err; + if (cfg80211_chandef_identical(¶ms.chandef, &sdata->vif.bss_conf.chandef)) { ibss_dbg(sdata, @@ -989,7 +994,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems) { struct ieee80211_local *local = sdata->local; - int freq; struct cfg80211_bss *cbss; struct ieee80211_bss *bss; struct sta_info *sta; @@ -1001,15 +1005,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; bool rates_updated = false; - if (elems->ds_params) - freq = ieee80211_channel_to_frequency(elems->ds_params[0], - band); - else - freq = rx_status->freq; - - channel = ieee80211_get_channel(local->hw.wiphy, freq); - - if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) + channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq); + if (!channel) return; if (sdata->vif.type == NL80211_IFTYPE_ADHOC && @@ -1642,7 +1639,33 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, u32 changed = 0; u32 rate_flags; struct ieee80211_supported_band *sband; + enum ieee80211_chanctx_mode chanmode; + struct ieee80211_local *local = sdata->local; + int radar_detect_width = 0; int i; + int ret; + + ret = cfg80211_chandef_dfs_required(local->hw.wiphy, + ¶ms->chandef, + sdata->wdev.iftype); + if (ret < 0) + return ret; + + if (ret > 0) { + if (!params->userspace_handles_dfs) + return -EINVAL; + radar_detect_width = BIT(params->chandef.width); + } + + chanmode = (params->channel_fixed && !ret) ? + IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE; + + mutex_lock(&local->chanctx_mtx); + ret = ieee80211_check_combinations(sdata, ¶ms->chandef, chanmode, + radar_detect_width); + mutex_unlock(&local->chanctx_mtx); + if (ret < 0) + return ret; if (params->bssid) { memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN); @@ -1654,10 +1677,11 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.control_port = params->control_port; sdata->u.ibss.userspace_handles_dfs = params->userspace_handles_dfs; sdata->u.ibss.basic_rates = params->basic_rates; + sdata->u.ibss.last_scan_completed = jiffies; /* fix basic_rates if channel does not support these rates */ rate_flags = ieee80211_chandef_rate_flags(¶ms->chandef); - sband = sdata->local->hw.wiphy->bands[params->chandef.chan->band]; + sband = local->hw.wiphy->bands[params->chandef.chan->band]; for (i = 0; i < sband->n_bitrates; i++) { if ((rate_flags & sband->bitrates[i].flags) != rate_flags) sdata->u.ibss.basic_rates &= ~BIT(i); @@ -1706,9 +1730,9 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, ieee80211_bss_info_change_notify(sdata, changed); sdata->smps_mode = IEEE80211_SMPS_OFF; - sdata->needed_rx_chains = sdata->local->rx_chains; + sdata->needed_rx_chains = local->rx_chains; - ieee80211_queue_work(&sdata->local->hw, &sdata->work); + ieee80211_queue_work(&local->hw, &sdata->work); return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 0014b5396ce..ac9836e0aab 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -260,7 +260,7 @@ struct ieee80211_if_ap { /* to be used after channel switch. */ struct cfg80211_beacon_data *next_beacon; - struct list_head vlans; + struct list_head vlans; /* write-protected with RTNL and local->mtx */ struct ps_data ps; atomic_t num_mcast_sta; /* number of stations receiving multicast */ @@ -276,7 +276,7 @@ struct ieee80211_if_wds { }; struct ieee80211_if_vlan { - struct list_head list; + struct list_head list; /* write-protected with RTNL and local->mtx */ /* used for all tx if the VLAN is configured to 4-addr mode */ struct sta_info __rcu *sta; @@ -317,6 +317,7 @@ struct ieee80211_roc_work { bool started, abort, hw_begun, notified; bool to_be_freed; + bool on_channel; unsigned long hw_start_time; @@ -691,8 +692,10 @@ struct ieee80211_chanctx { struct list_head list; struct rcu_head rcu_head; + struct list_head assigned_vifs; + struct list_head reserved_vifs; + enum ieee80211_chanctx_mode mode; - int refcount; bool driver_present; struct ieee80211_chanctx_conf conf; @@ -751,11 +754,21 @@ struct ieee80211_sub_if_data { struct mac80211_qos_map __rcu *qos_map; struct work_struct csa_finalize_work; - int csa_counter_offset_beacon; - int csa_counter_offset_presp; + u16 csa_counter_offset_beacon[IEEE80211_MAX_CSA_COUNTERS_NUM]; + u16 csa_counter_offset_presp[IEEE80211_MAX_CSA_COUNTERS_NUM]; bool csa_radar_required; + bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */ struct cfg80211_chan_def csa_chandef; + struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */ + struct list_head reserved_chanctx_list; /* protected by chanctx_mtx */ + + /* context reservation -- protected with chanctx_mtx */ + struct ieee80211_chanctx *reserved_chanctx; + struct cfg80211_chan_def reserved_chandef; + bool reserved_radar_required; + u8 csa_current_counter; + /* used to reconfigure hardware SM PS */ struct work_struct recalc_smps; @@ -1242,6 +1255,8 @@ struct ieee80211_local { struct ieee80211_sub_if_data __rcu *p2p_sdata; + struct napi_struct *napi; + /* virtual monitor interface */ struct ieee80211_sub_if_data __rcu *monitor_sdata; struct cfg80211_chan_def monitor_chandef; @@ -1389,6 +1404,7 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata); void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata); void ieee80211_mgd_conn_tx_status(struct ieee80211_sub_if_data *sdata, __le16 fc, bool acked); +void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata); /* IBSS code */ @@ -1445,6 +1461,7 @@ __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct cfg80211_sched_scan_request *req); int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata); +void ieee80211_sched_scan_end(struct ieee80211_local *local); void ieee80211_sched_scan_stopped_work(struct work_struct *work); /* off-channel helpers */ @@ -1459,6 +1476,7 @@ void ieee80211_sw_roc_work(struct work_struct *work); void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); /* channel switch handling */ +bool ieee80211_csa_needs_block_tx(struct ieee80211_local *local); void ieee80211_csa_finalize_work(struct work_struct *work); int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params); @@ -1698,14 +1716,8 @@ void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue); void ieee80211_add_pending_skb(struct ieee80211_local *local, struct sk_buff *skb); -void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, - struct sk_buff_head *skbs, - void (*fn)(void *data), void *data); -static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local, - struct sk_buff_head *skbs) -{ - ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL); -} +void ieee80211_add_pending_skbs(struct ieee80211_local *local, + struct sk_buff_head *skbs); void ieee80211_flush_queues(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); @@ -1774,6 +1786,16 @@ ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, const struct cfg80211_chan_def *chandef, enum ieee80211_chanctx_mode mode); int __must_check +ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode mode, + bool radar_required); +int __must_check +ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata, + u32 *changed); +int ieee80211_vif_unreserve_chanctx(struct ieee80211_sub_if_data *sdata); + +int __must_check ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, const struct cfg80211_chan_def *chandef, u32 *changed); @@ -1785,6 +1807,8 @@ void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata); void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata); void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, bool clear); +int ieee80211_chanctx_refcount(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx); void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *chanctx); @@ -1808,6 +1832,20 @@ int ieee80211_cs_headroom(struct ieee80211_local *local, enum nl80211_iftype iftype); void ieee80211_recalc_dtim(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); +int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode chanmode, + u8 radar_detect); +int ieee80211_max_num_channels(struct ieee80211_local *local); + +/* TDLS */ +int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, u32 peer_capability, + const u8 *extra_ies, size_t extra_ies_len); +int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, enum nl80211_tdls_operation oper); + #ifdef CONFIG_MAC80211_NOINLINE #define debug_noinline noinline diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 8880bc8fce0..81a8e2a0b6a 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -101,9 +101,8 @@ static u32 __ieee80211_idle_on(struct ieee80211_local *local) static u32 __ieee80211_recalc_idle(struct ieee80211_local *local, bool force_active) { - bool working = false, scanning, active; + bool working, scanning, active; unsigned int led_trig_start = 0, led_trig_stop = 0; - struct ieee80211_roc_work *roc; lockdep_assert_held(&local->mtx); @@ -111,12 +110,8 @@ static u32 __ieee80211_recalc_idle(struct ieee80211_local *local, !list_empty(&local->chanctx_list) || local->monitors; - if (!local->ops->remain_on_channel) { - list_for_each_entry(roc, &local->roc_list, list) { - working = true; - break; - } - } + working = !local->ops->remain_on_channel && + !list_empty(&local->roc_list); scanning = test_bit(SCAN_SW_SCANNING, &local->scanning) || test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning); @@ -255,6 +250,7 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *nsdata; + int ret; ASSERT_RTNL(); @@ -305,7 +301,10 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, } } - return 0; + mutex_lock(&local->chanctx_mtx); + ret = ieee80211_check_combinations(sdata, NULL, 0, 0); + mutex_unlock(&local->chanctx_mtx); + return ret; } static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata, @@ -400,6 +399,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) sdata->vif.type = NL80211_IFTYPE_MONITOR; snprintf(sdata->name, IFNAMSIZ, "%s-monitor", wiphy_name(local->hw.wiphy)); + sdata->wdev.iftype = NL80211_IFTYPE_MONITOR; sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; @@ -418,20 +418,24 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) return ret; } + mutex_lock(&local->iflist_mtx); + rcu_assign_pointer(local->monitor_sdata, sdata); + mutex_unlock(&local->iflist_mtx); + mutex_lock(&local->mtx); ret = ieee80211_vif_use_channel(sdata, &local->monitor_chandef, IEEE80211_CHANCTX_EXCLUSIVE); mutex_unlock(&local->mtx); if (ret) { + mutex_lock(&local->iflist_mtx); + RCU_INIT_POINTER(local->monitor_sdata, NULL); + mutex_unlock(&local->iflist_mtx); + synchronize_net(); drv_remove_interface(local, sdata); kfree(sdata); return ret; } - mutex_lock(&local->iflist_mtx); - rcu_assign_pointer(local->monitor_sdata, sdata); - mutex_unlock(&local->iflist_mtx); - return 0; } @@ -453,7 +457,7 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) return; } - rcu_assign_pointer(local->monitor_sdata, NULL); + RCU_INIT_POINTER(local->monitor_sdata, NULL); mutex_unlock(&local->iflist_mtx); synchronize_net(); @@ -493,7 +497,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) if (!sdata->bss) return -ENOLINK; + mutex_lock(&local->mtx); list_add(&sdata->u.vlan.list, &sdata->bss->vlans); + mutex_unlock(&local->mtx); master = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); @@ -723,8 +729,11 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) drv_stop(local); err_del_bss: sdata->bss = NULL; - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + mutex_lock(&local->mtx); list_del(&sdata->u.vlan.list); + mutex_unlock(&local->mtx); + } /* might already be clear but that doesn't matter */ clear_bit(SDATA_STATE_RUNNING, &sdata->state); return res; @@ -770,12 +779,19 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_roc_purge(local, sdata); - if (sdata->vif.type == NL80211_IFTYPE_STATION) + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: ieee80211_mgd_stop(sdata); - - if (sdata->vif.type == NL80211_IFTYPE_ADHOC) + break; + case NL80211_IFTYPE_ADHOC: ieee80211_ibss_stop(sdata); - + break; + case NL80211_IFTYPE_AP: + cancel_work_sync(&sdata->u.ap.request_smps_work); + break; + default: + break; + } /* * Remove all stations associated with this interface. @@ -823,8 +839,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, cancel_work_sync(&sdata->recalc_smps); sdata_lock(sdata); + mutex_lock(&local->mtx); sdata->vif.csa_active = false; + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_CSA); + mutex_unlock(&local->mtx); sdata_unlock(sdata); + cancel_work_sync(&sdata->csa_finalize_work); cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); @@ -869,8 +892,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: + mutex_lock(&local->mtx); list_del(&sdata->u.vlan.list); - rcu_assign_pointer(sdata->vif.chanctx_conf, NULL); + mutex_unlock(&local->mtx); + RCU_INIT_POINTER(sdata->vif.chanctx_conf, NULL); /* no need to tell driver */ break; case NL80211_IFTYPE_MONITOR: @@ -889,7 +914,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, break; case NL80211_IFTYPE_P2P_DEVICE: /* relies on synchronize_rcu() below */ - rcu_assign_pointer(local->p2p_sdata, NULL); + RCU_INIT_POINTER(local->p2p_sdata, NULL); /* fall through */ default: cancel_work_sync(&sdata->work); @@ -1048,7 +1073,8 @@ static void ieee80211_uninit(struct net_device *dev) static u16 ieee80211_netdev_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv) + void *accel_priv, + select_queue_fallback_t fallback) { return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb); } @@ -1066,7 +1092,8 @@ static const struct net_device_ops ieee80211_dataif_ops = { static u16 ieee80211_monitor_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv) + void *accel_priv, + select_queue_fallback_t fallback) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -1259,6 +1286,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE); sdata->control_port_no_encrypt = false; sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; + sdata->vif.bss_conf.idle = true; sdata->noack_map = 0; @@ -1272,6 +1300,8 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, INIT_WORK(&sdata->work, ieee80211_iface_work); INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work); INIT_WORK(&sdata->csa_finalize_work, ieee80211_csa_finalize_work); + INIT_LIST_HEAD(&sdata->assigned_chanctx_list); + INIT_LIST_HEAD(&sdata->reserved_chanctx_list); switch (type) { case NL80211_IFTYPE_P2P_GO: @@ -1766,20 +1796,19 @@ static int netdev_notify(struct notifier_block *nb, struct ieee80211_sub_if_data *sdata; if (state != NETDEV_CHANGENAME) - return 0; + return NOTIFY_DONE; if (!dev->ieee80211_ptr || !dev->ieee80211_ptr->wiphy) - return 0; + return NOTIFY_DONE; if (dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid) - return 0; + return NOTIFY_DONE; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - memcpy(sdata->name, dev->name, IFNAMSIZ); - ieee80211_debugfs_rename_netdev(sdata); - return 0; + + return NOTIFY_OK; } static struct notifier_block mac80211_netdev_notifier = { diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 6ff65a1ebaa..16d97f044a2 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -325,7 +325,8 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, struct ieee80211_key *key; int i, j, err; - BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS); + if (WARN_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)) + return ERR_PTR(-EINVAL); key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL); if (!key) @@ -481,8 +482,8 @@ int ieee80211_key_link(struct ieee80211_key *key, int idx, ret; bool pairwise; - BUG_ON(!sdata); - BUG_ON(!key); + if (WARN_ON(!sdata || !key)) + return -EINVAL; pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; idx = key->conf.keyidx; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 1f7d8422d62..d17c26d6e36 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -148,6 +148,8 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) list_for_each_entry_rcu(sdata, &local->interfaces, list) { if (!rcu_access_pointer(sdata->vif.chanctx_conf)) continue; + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + continue; power = min(power, sdata->vif.bss_conf.txpower); } rcu_read_unlock(); @@ -199,7 +201,7 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; - if (!changed) + if (!changed || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) return; drv_bss_info_changed(local, sdata, &sdata->vif.bss_conf, changed); @@ -338,7 +340,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, sdata_unlock(sdata); - return NOTIFY_DONE; + return NOTIFY_OK; } #endif @@ -369,7 +371,7 @@ static int ieee80211_ifa6_changed(struct notifier_block *nb, drv_ipv6_addr_change(local, sdata, idev); - return NOTIFY_DONE; + return NOTIFY_OK; } #endif @@ -444,7 +446,9 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = { .cap_info = cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40 | IEEE80211_HT_CAP_MAX_AMSDU | IEEE80211_HT_CAP_SGI_20 | - IEEE80211_HT_CAP_SGI_40), + IEEE80211_HT_CAP_SGI_40 | + IEEE80211_HT_CAP_LDPC_CODING | + IEEE80211_HT_CAP_40MHZ_INTOLERANT), .mcs = { .rx_mask = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }, @@ -952,6 +956,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP; + local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM; + result = wiphy_register(local->hw.wiphy); if (result < 0) goto fail_wiphy_register; @@ -1076,6 +1082,18 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_register_hw); +void ieee80211_napi_add(struct ieee80211_hw *hw, struct napi_struct *napi, + struct net_device *napi_dev, + int (*poll)(struct napi_struct *, int), + int weight) +{ + struct ieee80211_local *local = hw_to_local(hw); + + netif_napi_add(napi_dev, napi, poll, weight); + local->napi = napi; +} +EXPORT_SYMBOL_GPL(ieee80211_napi_add); + void ieee80211_unregister_hw(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index f70e9cd1055..6495a3f0428 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -366,20 +366,15 @@ int mesh_add_rsn_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) return 0; /* find RSN IE */ - data = ifmsh->ie; - while (data < ifmsh->ie + ifmsh->ie_len) { - if (*data == WLAN_EID_RSN) { - len = data[1] + 2; - break; - } - data++; - } + data = cfg80211_find_ie(WLAN_EID_RSN, ifmsh->ie, ifmsh->ie_len); + if (!data) + return 0; - if (len) { - if (skb_tailroom(skb) < len) - return -ENOMEM; - memcpy(skb_put(skb, len), data, len); - } + len = data[1] + 2; + + if (skb_tailroom(skb) < len) + return -ENOMEM; + memcpy(skb_put(skb, len), data, len); return 0; } @@ -684,7 +679,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) *pos++ = 0x0; *pos++ = ieee80211_frequency_to_channel( csa->settings.chandef.chan->center_freq); - sdata->csa_counter_offset_beacon = hdr_len + 6; + sdata->csa_counter_offset_beacon[0] = hdr_len + 6; *pos++ = csa->settings.count; *pos++ = WLAN_EID_CHAN_SWITCH_PARAM; *pos++ = 6; @@ -829,7 +824,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); bcn = rcu_dereference_protected(ifmsh->beacon, lockdep_is_held(&sdata->wdev.mtx)); - rcu_assign_pointer(ifmsh->beacon, NULL); + RCU_INIT_POINTER(ifmsh->beacon, NULL); kfree_rcu(bcn, rcu_head); /* flush STAs and mpaths on this iface */ @@ -903,14 +898,15 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, } err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - ¶ms.chandef); + ¶ms.chandef, + NL80211_IFTYPE_MESH_POINT); if (err < 0) return false; - if (err) { - params.radar_required = true; + if (err > 0) /* TODO: DFS not (yet) supported */ return false; - } + + params.radar_required = err; if (cfg80211_chandef_identical(¶ms.chandef, &sdata->vif.bss_conf.chandef)) { @@ -1068,7 +1064,7 @@ int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata) /* Remove the CSA and MCSP elements from the beacon */ tmp_csa_settings = rcu_dereference(ifmsh->csa); - rcu_assign_pointer(ifmsh->csa, NULL); + RCU_INIT_POINTER(ifmsh->csa, NULL); if (tmp_csa_settings) kfree_rcu(tmp_csa_settings, rcu_head); ret = ieee80211_mesh_rebuild_beacon(sdata); @@ -1102,7 +1098,7 @@ int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata, ret = ieee80211_mesh_rebuild_beacon(sdata); if (ret) { tmp_csa_settings = rcu_dereference(ifmsh->csa); - rcu_assign_pointer(ifmsh->csa, NULL); + RCU_INIT_POINTER(ifmsh->csa, NULL); kfree_rcu(tmp_csa_settings, rcu_head); return ret; } diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index f9514685d45..94758b9c9ed 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -37,7 +37,7 @@ static inline u32 u32_field_get(const u8 *preq_elem, int offset, bool ae) return get_unaligned_le32(preq_elem + offset); } -static inline u32 u16_field_get(const u8 *preq_elem, int offset, bool ae) +static inline u16 u16_field_get(const u8 *preq_elem, int offset, bool ae) { if (ae) offset += 6; @@ -544,9 +544,10 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, if (time_after(jiffies, ifmsh->last_sn_update + net_traversal_jiffies(sdata)) || time_before(jiffies, ifmsh->last_sn_update)) { - target_sn = ++ifmsh->sn; + ++ifmsh->sn; ifmsh->last_sn_update = jiffies; } + target_sn = ifmsh->sn; } else if (is_broadcast_ether_addr(target_addr) && (target_flags & IEEE80211_PREQ_TO_FLAG)) { rcu_read_lock(); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 7d050ed6fe5..cf032a8db9d 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -287,8 +287,10 @@ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath, struct sk_buff_head failq; unsigned long flags; - BUG_ON(gate_mpath == from_mpath); - BUG_ON(!gate_mpath->next_hop); + if (WARN_ON(gate_mpath == from_mpath)) + return; + if (WARN_ON(!gate_mpath->next_hop)) + return; __skb_queue_head_init(&failq); diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 2802f9d9279..ad8b377b4b9 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -36,6 +36,7 @@ static struct sk_buff *mps_qos_null_get(struct sta_info *sta) sdata->vif.addr); nullfunc->frame_control = fc; nullfunc->duration_id = 0; + nullfunc->seq_ctrl = 0; /* no address resolution for this frame -> set addr 1 immediately */ memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memset(skb_put(skb, 2), 0, 2); /* append QoS control field */ diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 2bc5dc25d5a..09625d6205c 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -171,7 +171,7 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata, u8 cap; WARN_ON(ifmsh->mesh_sp_id != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET); - BUG_ON(!rcu_read_lock_held()); + WARN_ON(!rcu_read_lock_held()); cap = beacon->meshconf->meshconf_cap; spin_lock_bh(&ifmsh->sync_offset_lock); diff --git a/net/mac80211/michael.h b/net/mac80211/michael.h index 3b848dad958..0e4886f881f 100644 --- a/net/mac80211/michael.h +++ b/net/mac80211/michael.h @@ -11,6 +11,7 @@ #define MICHAEL_H #include <linux/types.h> +#include <linux/ieee80211.h> #define MICHAEL_MIC_LEN 8 diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 61604834b91..3345401be1b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -131,13 +131,13 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata) if (unlikely(!sdata->u.mgd.associated)) return; + ifmgd->probe_send_count = 0; + if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) return; mod_timer(&sdata->u.mgd.conn_mon_timer, round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME)); - - ifmgd->probe_send_count = 0; } static int ecw2cw(int ecw) @@ -222,6 +222,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, switch (vht_oper->chan_width) { case IEEE80211_VHT_CHANWIDTH_USE_HT: vht_chandef.width = chandef->width; + vht_chandef.center_freq1 = chandef->center_freq1; break; case IEEE80211_VHT_CHANWIDTH_80MHZ: vht_chandef.width = NL80211_CHAN_WIDTH_80; @@ -271,6 +272,28 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, ret = 0; out: + /* + * When tracking the current AP, don't do any further checks if the + * new chandef is identical to the one we're currently using for the + * connection. This keeps us from playing ping-pong with regulatory, + * without it the following can happen (for example): + * - connect to an AP with 80 MHz, world regdom allows 80 MHz + * - AP advertises regdom US + * - CRDA loads regdom US with 80 MHz prohibited (old database) + * - the code below detects an unsupported channel, downgrades, and + * we disconnect from the AP in the caller + * - disconnect causes CRDA to reload world regdomain and the game + * starts anew. + * (see https://bugzilla.kernel.org/show_bug.cgi?id=70881) + * + * It seems possible that there are still scenarios with CSA or real + * bandwidth changes where a this could happen, but those cases are + * less common and wouldn't completely prevent using the AP. + */ + if (tracking && + cfg80211_chandef_identical(chandef, &sdata->vif.bss_conf.chandef)) + return ret; + /* don't print the message below for VHT mismatch if VHT is disabled */ if (ret & IEEE80211_STA_DISABLE_VHT) vht_chandef = *chandef; @@ -952,16 +975,23 @@ static void ieee80211_chswitch_work(struct work_struct *work) /* XXX: shouldn't really modify cfg80211-owned data! */ ifmgd->associated->channel = sdata->csa_chandef.chan; + ieee80211_bss_info_change_notify(sdata, changed); + + mutex_lock(&local->mtx); + sdata->vif.csa_active = false; /* XXX: wait for a beacon first? */ - ieee80211_wake_queues_by_reason(&local->hw, + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); + mutex_unlock(&local->mtx); - ieee80211_bss_info_change_notify(sdata, changed); - - out: - sdata->vif.csa_active = false; ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; + + ieee80211_sta_reset_beacon_monitor(sdata); + ieee80211_sta_reset_conn_monitor(sdata); + +out: sdata_unlock(sdata); } @@ -1066,7 +1096,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, } chanctx = container_of(rcu_access_pointer(sdata->vif.chanctx_conf), struct ieee80211_chanctx, conf); - if (chanctx->refcount > 1) { + if (ieee80211_chanctx_refcount(local, chanctx) > 1) { sdata_info(sdata, "channel switch with multiple interfaces on the same channel, disconnecting\n"); ieee80211_queue_work(&local->hw, @@ -1077,12 +1107,16 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, mutex_unlock(&local->chanctx_mtx); sdata->csa_chandef = csa_ie.chandef; + + mutex_lock(&local->mtx); sdata->vif.csa_active = true; + sdata->csa_block_tx = csa_ie.mode; - if (csa_ie.mode) + if (sdata->csa_block_tx) ieee80211_stop_queues_by_reason(&local->hw, - IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_CSA); + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_CSA); + mutex_unlock(&local->mtx); if (local->ops->channel_switch) { /* use driver's channel switch callback */ @@ -1794,6 +1828,12 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ifmgd->flags = 0; mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); + + sdata->vif.csa_active = false; + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_CSA); mutex_unlock(&local->mtx); sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; @@ -2022,6 +2062,7 @@ EXPORT_SYMBOL(ieee80211_ap_probereq_get); static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; @@ -2035,10 +2076,14 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, true, frame_buf); ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; + + mutex_lock(&local->mtx); sdata->vif.csa_active = false; - ieee80211_wake_queues_by_reason(&sdata->local->hw, + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); + mutex_unlock(&local->mtx); cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); @@ -2249,6 +2294,62 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, /* ignore frame -- wait for timeout */ } +#define case_WLAN(type) \ + case WLAN_REASON_##type: return #type + +static const char *ieee80211_get_reason_code_string(u16 reason_code) +{ + switch (reason_code) { + case_WLAN(UNSPECIFIED); + case_WLAN(PREV_AUTH_NOT_VALID); + case_WLAN(DEAUTH_LEAVING); + case_WLAN(DISASSOC_DUE_TO_INACTIVITY); + case_WLAN(DISASSOC_AP_BUSY); + case_WLAN(CLASS2_FRAME_FROM_NONAUTH_STA); + case_WLAN(CLASS3_FRAME_FROM_NONASSOC_STA); + case_WLAN(DISASSOC_STA_HAS_LEFT); + case_WLAN(STA_REQ_ASSOC_WITHOUT_AUTH); + case_WLAN(DISASSOC_BAD_POWER); + case_WLAN(DISASSOC_BAD_SUPP_CHAN); + case_WLAN(INVALID_IE); + case_WLAN(MIC_FAILURE); + case_WLAN(4WAY_HANDSHAKE_TIMEOUT); + case_WLAN(GROUP_KEY_HANDSHAKE_TIMEOUT); + case_WLAN(IE_DIFFERENT); + case_WLAN(INVALID_GROUP_CIPHER); + case_WLAN(INVALID_PAIRWISE_CIPHER); + case_WLAN(INVALID_AKMP); + case_WLAN(UNSUPP_RSN_VERSION); + case_WLAN(INVALID_RSN_IE_CAP); + case_WLAN(IEEE8021X_FAILED); + case_WLAN(CIPHER_SUITE_REJECTED); + case_WLAN(DISASSOC_UNSPECIFIED_QOS); + case_WLAN(DISASSOC_QAP_NO_BANDWIDTH); + case_WLAN(DISASSOC_LOW_ACK); + case_WLAN(DISASSOC_QAP_EXCEED_TXOP); + case_WLAN(QSTA_LEAVE_QBSS); + case_WLAN(QSTA_NOT_USE); + case_WLAN(QSTA_REQUIRE_SETUP); + case_WLAN(QSTA_TIMEOUT); + case_WLAN(QSTA_CIPHER_NOT_SUPP); + case_WLAN(MESH_PEER_CANCELED); + case_WLAN(MESH_MAX_PEERS); + case_WLAN(MESH_CONFIG); + case_WLAN(MESH_CLOSE); + case_WLAN(MESH_MAX_RETRIES); + case_WLAN(MESH_CONFIRM_TIMEOUT); + case_WLAN(MESH_INVALID_GTK); + case_WLAN(MESH_INCONSISTENT_PARAM); + case_WLAN(MESH_INVALID_SECURITY); + case_WLAN(MESH_PATH_ERROR); + case_WLAN(MESH_PATH_NOFORWARD); + case_WLAN(MESH_PATH_DEST_UNREACHABLE); + case_WLAN(MAC_EXISTS_IN_MBSS); + case_WLAN(MESH_CHAN_REGULATORY); + case_WLAN(MESH_CHAN); + default: return "<unknown>"; + } +} static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) @@ -2270,8 +2371,8 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); - sdata_info(sdata, "deauthenticated from %pM (Reason: %u)\n", - bssid, reason_code); + sdata_info(sdata, "deauthenticated from %pM (Reason: %u=%s)\n", + bssid, reason_code, ieee80211_get_reason_code_string(reason_code)); ieee80211_set_disassoc(sdata, 0, 0, false, NULL); @@ -2704,28 +2805,20 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems) { struct ieee80211_local *local = sdata->local; - int freq; struct ieee80211_bss *bss; struct ieee80211_channel *channel; sdata_assert_lock(sdata); - if (elems->ds_params) - freq = ieee80211_channel_to_frequency(elems->ds_params[0], - rx_status->band); - else - freq = rx_status->freq; - - channel = ieee80211_get_channel(local->hw.wiphy, freq); - - if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) + channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq); + if (!channel) return; bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, channel); if (bss) { - ieee80211_rx_bss_put(local, bss); sdata->vif.bss_conf.beacon_rate = bss->beacon_rate; + ieee80211_rx_bss_put(local, bss); } } @@ -3475,6 +3568,9 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data) if (local->quiescing) return; + if (sdata->vif.csa_active) + return; + sdata->u.mgd.connection_loss = false; ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.beacon_connection_loss_work); @@ -3490,6 +3586,9 @@ static void ieee80211_sta_conn_mon_timer(unsigned long data) if (local->quiescing) return; + if (sdata->vif.csa_active) + return; + ieee80211_queue_work(&local->hw, &ifmgd->monitor_work); } @@ -3520,6 +3619,38 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) } #ifdef CONFIG_PM +void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; + + sdata_lock(sdata); + + if (ifmgd->auth_data || ifmgd->assoc_data) { + const u8 *bssid = ifmgd->auth_data ? + ifmgd->auth_data->bss->bssid : + ifmgd->assoc_data->bss->bssid; + + /* + * If we are trying to authenticate / associate while suspending, + * cfg80211 won't know and won't actually abort those attempts, + * thus we need to do that ourselves. + */ + ieee80211_send_deauth_disassoc(sdata, bssid, + IEEE80211_STYPE_DEAUTH, + WLAN_REASON_DEAUTH_LEAVING, + false, frame_buf); + if (ifmgd->assoc_data) + ieee80211_destroy_assoc_data(sdata, false); + if (ifmgd->auth_data) + ieee80211_destroy_auth_data(sdata, false); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); + } + + sdata_unlock(sdata); +} + void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -3604,7 +3735,7 @@ int ieee80211_max_network_latency(struct notifier_block *nb, ieee80211_recalc_ps(local, latency_usec); mutex_unlock(&local->iflist_mtx); - return 0; + return NOTIFY_OK; } static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata, @@ -3792,6 +3923,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); + sta_info_free(local, new_sta); return -EINVAL; } rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def); @@ -4337,37 +4469,41 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; bool tx = !req->local_state_change; - bool report_frame = false; - sdata_info(sdata, - "deauthenticating from %pM by local choice (reason=%d)\n", - req->bssid, req->reason_code); + if (ifmgd->auth_data && + ether_addr_equal(ifmgd->auth_data->bss->bssid, req->bssid)) { + sdata_info(sdata, + "aborting authentication with %pM by local choice (Reason: %u=%s)\n", + req->bssid, req->reason_code, + ieee80211_get_reason_code_string(req->reason_code)); - if (ifmgd->auth_data) { drv_mgd_prepare_tx(sdata->local, sdata); ieee80211_send_deauth_disassoc(sdata, req->bssid, IEEE80211_STYPE_DEAUTH, req->reason_code, tx, frame_buf); ieee80211_destroy_auth_data(sdata, false); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); - report_frame = true; - goto out; + return 0; } if (ifmgd->associated && ether_addr_equal(ifmgd->associated->bssid, req->bssid)) { + sdata_info(sdata, + "deauthenticating from %pM by local choice (Reason: %u=%s)\n", + req->bssid, req->reason_code, + ieee80211_get_reason_code_string(req->reason_code)); + ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, req->reason_code, tx, frame_buf); - report_frame = true; - } - - out: - if (report_frame) cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); + return 0; + } - return 0; + return -ENOTCONN; } int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, @@ -4387,8 +4523,8 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, return -ENOLINK; sdata_info(sdata, - "disassociating from %pM by local choice (reason=%d)\n", - req->bss->bssid, req->reason_code); + "disassociating from %pM by local choice (Reason: %u=%s)\n", + req->bss->bssid, req->reason_code, ieee80211_get_reason_code_string(req->reason_code)); memcpy(bssid, req->bss->bssid, ETH_ALEN); ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DISASSOC, diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 0c2a29484c0..7a17decd27f 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -333,7 +333,7 @@ void ieee80211_sw_roc_work(struct work_struct *work) container_of(work, struct ieee80211_roc_work, work.work); struct ieee80211_sub_if_data *sdata = roc->sdata; struct ieee80211_local *local = sdata->local; - bool started; + bool started, on_channel; mutex_lock(&local->mtx); @@ -354,13 +354,26 @@ void ieee80211_sw_roc_work(struct work_struct *work) if (!roc->started) { struct ieee80211_roc_work *dep; - /* start this ROC */ + WARN_ON(local->use_chanctx); + + /* If actually operating on the desired channel (with at least + * 20 MHz channel width) don't stop all the operations but still + * treat it as though the ROC operation started properly, so + * other ROC operations won't interfere with this one. + */ + roc->on_channel = roc->chan == local->_oper_chandef.chan && + local->_oper_chandef.width != NL80211_CHAN_WIDTH_5 && + local->_oper_chandef.width != NL80211_CHAN_WIDTH_10; - /* switch channel etc */ + /* start this ROC */ ieee80211_recalc_idle(local); - local->tmp_channel = roc->chan; - ieee80211_hw_config(local, 0); + if (!roc->on_channel) { + ieee80211_offchannel_stop_vifs(local); + + local->tmp_channel = roc->chan; + ieee80211_hw_config(local, 0); + } /* tell userspace or send frame */ ieee80211_handle_roc_started(roc); @@ -379,9 +392,10 @@ void ieee80211_sw_roc_work(struct work_struct *work) finish: list_del(&roc->list); started = roc->started; + on_channel = roc->on_channel; ieee80211_roc_notify_destroy(roc, !roc->abort); - if (started) { + if (started && !on_channel) { ieee80211_flush_queues(local, NULL); local->tmp_channel = NULL; diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index af64fb8e8ad..d478b880a0a 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -100,10 +100,18 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) /* remove all interfaces that were created in the driver */ list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata) || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN || - sdata->vif.type == NL80211_IFTYPE_MONITOR) + if (!ieee80211_sdata_running(sdata)) continue; + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MONITOR: + continue; + case NL80211_IFTYPE_STATION: + ieee80211_mgd_quiesce(sdata); + break; + default: + break; + } drv_remove_interface(local, sdata); } diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 26fd94fa0ae..1c1469c36dc 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -657,6 +657,17 @@ minstrel_free(void *priv) kfree(priv); } +static u32 minstrel_get_expected_throughput(void *priv_sta) +{ + struct minstrel_sta_info *mi = priv_sta; + int idx = mi->max_tp_rate[0]; + + /* convert pkt per sec in kbps (1200 is the average pkt size used for + * computing cur_tp + */ + return MINSTREL_TRUNC(mi->r[idx].cur_tp) * 1200 * 8 / 1024; +} + const struct rate_control_ops mac80211_minstrel = { .name = "minstrel", .tx_status = minstrel_tx_status, @@ -670,6 +681,7 @@ const struct rate_control_ops mac80211_minstrel = { .add_sta_debugfs = minstrel_add_sta_debugfs, .remove_sta_debugfs = minstrel_remove_sta_debugfs, #endif + .get_expected_throughput = minstrel_get_expected_throughput, }; int __init diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index bccaf854a30..85c1e74b771 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -22,7 +22,7 @@ #define MCS_NBITS (AVG_PKT_SIZE << 3) /* Number of symbols for a packet with (bps) bits per symbol */ -#define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps)) +#define MCS_NSYMS(bps) DIV_ROUND_UP(MCS_NBITS, (bps)) /* Transmission time (nanoseconds) for a packet containing (syms) symbols */ #define MCS_SYMBOL_TIME(sgi, syms) \ @@ -226,8 +226,9 @@ minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); nsecs += minstrel_mcs_groups[group].duration[rate]; - tp = 1000000 * ((prob * 1000) / nsecs); + /* prob is scaled - see MINSTREL_FRAC above */ + tp = 1000000 * ((prob * 1000) / nsecs); mr->cur_tp = MINSTREL_TRUNC(tp); } @@ -1031,6 +1032,22 @@ minstrel_ht_free(void *priv) mac80211_minstrel.free(priv); } +static u32 minstrel_ht_get_expected_throughput(void *priv_sta) +{ + struct minstrel_ht_sta_priv *msp = priv_sta; + struct minstrel_ht_sta *mi = &msp->ht; + int i, j; + + if (!msp->is_ht) + return mac80211_minstrel.get_expected_throughput(priv_sta); + + i = mi->max_tp_rate / MCS_GROUP_RATES; + j = mi->max_tp_rate % MCS_GROUP_RATES; + + /* convert cur_tp from pkt per second in kbps */ + return mi->groups[i].rates[j].cur_tp * AVG_PKT_SIZE * 8 / 1024; +} + static const struct rate_control_ops mac80211_minstrel_ht = { .name = "minstrel_ht", .tx_status = minstrel_ht_tx_status, @@ -1045,6 +1062,7 @@ static const struct rate_control_ops mac80211_minstrel_ht = { .add_sta_debugfs = minstrel_ht_add_sta_debugfs, .remove_sta_debugfs = minstrel_ht_remove_sta_debugfs, #endif + .get_expected_throughput = minstrel_ht_get_expected_throughput, }; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 593062109c5..394e201cde6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -54,24 +54,25 @@ static struct sk_buff *remove_monitor_info(struct ieee80211_local *local, return skb; } -static inline int should_drop_frame(struct sk_buff *skb, int present_fcs_len) +static inline bool should_drop_frame(struct sk_buff *skb, int present_fcs_len) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - struct ieee80211_hdr *hdr; - - hdr = (void *)(skb->data); + struct ieee80211_hdr *hdr = (void *)skb->data; if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC | RX_FLAG_AMPDU_IS_ZEROLEN)) - return 1; + return true; + if (unlikely(skb->len < 16 + present_fcs_len)) - return 1; + return true; + if (ieee80211_is_ctl(hdr->frame_control) && !ieee80211_is_pspoll(hdr->frame_control) && !ieee80211_is_back_req(hdr->frame_control)) - return 1; - return 0; + return true; + + return false; } static int @@ -333,6 +334,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* in VHT, STBC is binary */ if (status->flag & RX_FLAG_STBC_MASK) *pos |= IEEE80211_RADIOTAP_VHT_FLAG_STBC; + if (status->vht_flag & RX_VHT_FLAG_BF) + *pos |= IEEE80211_RADIOTAP_VHT_FLAG_BEAMFORMED; pos++; /* bandwidth */ if (status->vht_flag & RX_VHT_FLAG_80MHZ) @@ -1092,6 +1095,13 @@ static void sta_ps_end(struct sta_info *sta) sta->sta.addr, sta->sta.aid); if (test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { + /* + * Clear the flag only if the other one is still set + * so that the TX path won't start TX'ing new frames + * directly ... In the case that the driver flag isn't + * set ieee80211_sta_ps_deliver_wakeup() will clear it. + */ + clear_sta_flag(sta, WLAN_STA_PS_STA); ps_dbg(sta->sdata, "STA %pM aid %d driver-ps-blocked\n", sta->sta.addr, sta->sta.aid); return; @@ -1222,7 +1232,8 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) if (ether_addr_equal(bssid, rx->sdata->u.ibss.bssid) && test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { sta->last_rx = jiffies; - if (ieee80211_is_data(hdr->frame_control)) { + if (ieee80211_is_data(hdr->frame_control) && + !is_multicast_ether_addr(hdr->addr1)) { sta->last_rx_rate_idx = status->rate_idx; sta->last_rx_rate_flag = status->flag; sta->last_rx_rate_vht_flag = status->vht_flag; @@ -1238,6 +1249,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) if (ieee80211_is_data(hdr->frame_control)) { sta->last_rx_rate_idx = status->rate_idx; sta->last_rx_rate_flag = status->flag; + sta->last_rx_rate_vht_flag = status->vht_flag; sta->last_rx_rate_vht_nss = status->vht_nss; } } @@ -1954,7 +1966,10 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) /* deliver to local stack */ skb->protocol = eth_type_trans(skb, dev); memset(skb->cb, 0, sizeof(skb->cb)); - netif_receive_skb(skb); + if (rx->local->napi) + napi_gro_receive(rx->local->napi, skb); + else + netif_receive_skb(skb); } if (xmit_skb) { @@ -3177,7 +3192,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, } /* - * This is the actual Rx frames handler. as it blongs to Rx path it must + * This is the actual Rx frames handler. as it belongs to Rx path it must * be called with rcu_read_lock protection. */ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 88c81616f8f..f40661eb75b 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -309,7 +309,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) if (local->scan_req != local->int_scan_req) cfg80211_scan_done(local->scan_req, aborted); local->scan_req = NULL; - rcu_assign_pointer(local->scan_sdata, NULL); + RCU_INIT_POINTER(local->scan_sdata, NULL); local->scanning = 0; local->scan_chandef.chan = NULL; @@ -472,9 +472,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, if (local->ops->hw_scan) { u8 *ies; - local->hw_scan_ies_bufsize = 2 + IEEE80211_MAX_SSID_LEN + - local->scan_ies_len + - req->ie_len; + local->hw_scan_ies_bufsize = local->scan_ies_len + req->ie_len; local->hw_scan_req = kmalloc( sizeof(*local->hw_scan_req) + req->n_channels * sizeof(req->channels[0]) + @@ -561,7 +559,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_idle(local); local->scan_req = NULL; - rcu_assign_pointer(local->scan_sdata, NULL); + RCU_INIT_POINTER(local->scan_sdata, NULL); } return rc; @@ -775,7 +773,7 @@ void ieee80211_scan_work(struct work_struct *work) int rc; local->scan_req = NULL; - rcu_assign_pointer(local->scan_sdata, NULL); + RCU_INIT_POINTER(local->scan_sdata, NULL); rc = __ieee80211_start_scan(sdata, req); if (rc) { @@ -979,8 +977,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct cfg80211_chan_def chandef; int ret, i, iebufsz; - iebufsz = 2 + IEEE80211_MAX_SSID_LEN + - local->scan_ies_len + req->ie_len; + iebufsz = local->scan_ies_len + req->ie_len; lockdep_assert_held(&local->mtx); @@ -1017,7 +1014,7 @@ out_free: if (ret) { /* Clean in case of failure after HW restart or upon resume. */ - rcu_assign_pointer(local->sched_scan_sdata, NULL); + RCU_INIT_POINTER(local->sched_scan_sdata, NULL); local->sched_scan_req = NULL; } @@ -1058,9 +1055,11 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata) /* We don't want to restart sched scan anymore. */ local->sched_scan_req = NULL; - if (rcu_access_pointer(local->sched_scan_sdata)) - drv_sched_scan_stop(local, sdata); - + if (rcu_access_pointer(local->sched_scan_sdata)) { + ret = drv_sched_scan_stop(local, sdata); + if (!ret) + rcu_assign_pointer(local->sched_scan_sdata, NULL); + } out: mutex_unlock(&local->mtx); @@ -1077,12 +1076,8 @@ void ieee80211_sched_scan_results(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_sched_scan_results); -void ieee80211_sched_scan_stopped_work(struct work_struct *work) +void ieee80211_sched_scan_end(struct ieee80211_local *local) { - struct ieee80211_local *local = - container_of(work, struct ieee80211_local, - sched_scan_stopped_work); - mutex_lock(&local->mtx); if (!rcu_access_pointer(local->sched_scan_sdata)) { @@ -1090,7 +1085,7 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work) return; } - rcu_assign_pointer(local->sched_scan_sdata, NULL); + RCU_INIT_POINTER(local->sched_scan_sdata, NULL); /* If sched scan was aborted by the driver. */ local->sched_scan_req = NULL; @@ -1100,6 +1095,15 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work) cfg80211_sched_scan_stopped(local->hw.wiphy); } +void ieee80211_sched_scan_stopped_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, + sched_scan_stopped_work); + + ieee80211_sched_scan_end(local); +} + void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index decd30c1e29..a9b46d8ea22 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -91,7 +91,7 @@ static int sta_info_hash_del(struct ieee80211_local *local, return -ENOENT; } -static void cleanup_single_sta(struct sta_info *sta) +static void __cleanup_single_sta(struct sta_info *sta) { int ac, i; struct tid_ampdu_tx *tid_tx; @@ -99,7 +99,8 @@ static void cleanup_single_sta(struct sta_info *sta) struct ieee80211_local *local = sdata->local; struct ps_data *ps; - if (test_sta_flag(sta, WLAN_STA_PS_STA)) { + if (test_sta_flag(sta, WLAN_STA_PS_STA) || + test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) ps = &sdata->bss->ps; @@ -109,6 +110,7 @@ static void cleanup_single_sta(struct sta_info *sta) return; clear_sta_flag(sta, WLAN_STA_PS_STA); + clear_sta_flag(sta, WLAN_STA_PS_DRIVER); atomic_dec(&ps->num_sta_ps); sta_info_recalc_tim(sta); @@ -139,7 +141,14 @@ static void cleanup_single_sta(struct sta_info *sta) ieee80211_purge_tx_queue(&local->hw, &tid_tx->pending); kfree(tid_tx); } +} +static void cleanup_single_sta(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + + __cleanup_single_sta(sta); sta_info_free(local, sta); } @@ -231,6 +240,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr); + kfree(rcu_dereference_raw(sta->sta.rates)); kfree(sta); } @@ -330,6 +340,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); spin_lock_init(&sta->lock); + spin_lock_init(&sta->ps_lock); INIT_WORK(&sta->drv_unblock_wk, sta_unblock); INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); mutex_init(&sta->ampdu_mlme.mtx); @@ -487,21 +498,26 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) goto out_err; } - /* notify driver */ - err = sta_info_insert_drv_state(local, sdata, sta); - if (err) - goto out_err; - local->num_sta++; local->sta_generation++; smp_mb(); + /* simplify things and don't accept BA sessions yet */ + set_sta_flag(sta, WLAN_STA_BLOCK_BA); + /* make the station visible */ sta_info_hash_add(local, sta); list_add_rcu(&sta->list, &local->sta_list); + /* notify driver */ + err = sta_info_insert_drv_state(local, sdata, sta); + if (err) + goto out_remove; + set_sta_flag(sta, WLAN_STA_INSERTED); + /* accept BA sessions now */ + clear_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_recalc_min_chandef(sdata); ieee80211_sta_debugfs_add(sta); @@ -522,6 +538,12 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) mesh_accept_plinks_update(sdata); return 0; + out_remove: + sta_info_hash_del(local, sta); + list_del_rcu(&sta->list); + local->num_sta--; + synchronize_net(); + __cleanup_single_sta(sta); out_err: mutex_unlock(&local->sta_mtx); rcu_read_lock(); @@ -531,7 +553,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) { struct ieee80211_local *local = sta->local; - int err = 0; + int err; might_sleep(); @@ -549,7 +571,6 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) return 0; out_free: - BUG_ON(!err); sta_info_free(local, sta); return err; } @@ -1071,10 +1092,14 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, } EXPORT_SYMBOL(ieee80211_find_sta); -static void clear_sta_ps_flags(void *_sta) +/* powersave support code */ +void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) { - struct sta_info *sta = _sta; struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + struct sk_buff_head pending; + int filtered = 0, buffered = 0, ac; + unsigned long flags; struct ps_data *ps; if (sdata->vif.type == NL80211_IFTYPE_AP || @@ -1085,20 +1110,6 @@ static void clear_sta_ps_flags(void *_sta) else return; - clear_sta_flag(sta, WLAN_STA_PS_DRIVER); - if (test_and_clear_sta_flag(sta, WLAN_STA_PS_STA)) - atomic_dec(&ps->num_sta_ps); -} - -/* powersave support code */ -void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) -{ - struct ieee80211_sub_if_data *sdata = sta->sdata; - struct ieee80211_local *local = sdata->local; - struct sk_buff_head pending; - int filtered = 0, buffered = 0, ac; - unsigned long flags; - clear_sta_flag(sta, WLAN_STA_SP); BUILD_BUG_ON(BITS_TO_LONGS(IEEE80211_NUM_TIDS) > 1); @@ -1109,6 +1120,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) skb_queue_head_init(&pending); + /* sync with ieee80211_tx_h_unicast_ps_buf */ + spin_lock(&sta->ps_lock); /* Send all buffered frames to the station */ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { int count = skb_queue_len(&pending), tmp; @@ -1127,10 +1140,16 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) buffered += tmp - count; } - ieee80211_add_pending_skbs_fn(local, &pending, clear_sta_ps_flags, sta); + ieee80211_add_pending_skbs(local, &pending); + clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + clear_sta_flag(sta, WLAN_STA_PS_STA); + spin_unlock(&sta->ps_lock); + + atomic_dec(&ps->num_sta_ps); /* This station just woke up and isn't aware of our SMPS state */ - if (!ieee80211_smps_is_restrictive(sta->known_smps_mode, + if (!ieee80211_vif_is_mesh(&sdata->vif) && + !ieee80211_smps_is_restrictive(sta->known_smps_mode, sdata->smps_mode) && sta->known_smps_mode != sdata->bss->req_smps && sta_info_tx_streams(sta) != 1) { @@ -1188,6 +1207,7 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); + nullfunc->seq_ctrl = 0; skb->priority = tid; skb_set_queue_mapping(skb, ieee802_1d_to_ac[tid]); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index d4d85de0d75..4acc5fc402f 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -268,6 +268,7 @@ struct ieee80211_tx_latency_stat { * @drv_unblock_wk: used for driver PS unblocking * @listen_interval: listen interval of this station, when we're acting as AP * @_flags: STA flags, see &enum ieee80211_sta_info_flags, do not use directly + * @ps_lock: used for powersave (when mac80211 is the AP) related locking * @ps_tx_buf: buffers (per AC) of frames to transmit to this station * when it leaves power saving state or polls * @tx_filtered: buffers (per AC) of frames we already tried to @@ -357,10 +358,8 @@ struct sta_info { /* use the accessors defined below */ unsigned long _flags; - /* - * STA powersave frame queues, no more than the internal - * locking required. - */ + /* STA powersave lock and frame queues */ + spinlock_t ps_lock; struct sk_buff_head ps_tx_buf[IEEE80211_NUM_ACS]; struct sk_buff_head tx_filtered[IEEE80211_NUM_ACS]; unsigned long driver_buffered_tids; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index e6e574a307c..ba29ebc8614 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -314,10 +314,9 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, !is_multicast_ether_addr(hdr->addr1)) txflags |= IEEE80211_RADIOTAP_F_TX_FAIL; - if ((info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || - (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) + if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) txflags |= IEEE80211_RADIOTAP_F_TX_CTS; - else if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) + if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) txflags |= IEEE80211_RADIOTAP_F_TX_RTS; put_unaligned_le16(txflags, pos); @@ -542,6 +541,23 @@ static void ieee80211_tx_latency_end_msrmnt(struct ieee80211_local *local, */ #define STA_LOST_PKT_THRESHOLD 50 +static void ieee80211_lost_packet(struct sta_info *sta, struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + + /* This packet was aggregated but doesn't carry status info */ + if ((info->flags & IEEE80211_TX_CTL_AMPDU) && + !(info->flags & IEEE80211_TX_STAT_AMPDU)) + return; + + if (++sta->lost_packets < STA_LOST_PKT_THRESHOLD) + return; + + cfg80211_cqm_pktloss_notify(sta->sdata->dev, sta->sta.addr, + sta->lost_packets, GFP_ATOMIC); + sta->lost_packets = 0; +} + void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) { struct sk_buff *skb2; @@ -618,6 +634,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) sta, true, acked); if ((local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) && + (ieee80211_is_data(hdr->frame_control)) && (rates_idx != -1)) sta->last_tx_rate = info->status.rates[rates_idx]; @@ -680,12 +697,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (info->flags & IEEE80211_TX_STAT_ACK) { if (sta->lost_packets) sta->lost_packets = 0; - } else if (++sta->lost_packets >= STA_LOST_PKT_THRESHOLD) { - cfg80211_cqm_pktloss_notify(sta->sdata->dev, - sta->sta.addr, - sta->lost_packets, - GFP_ATOMIC); - sta->lost_packets = 0; + } else { + ieee80211_lost_packet(sta, skb); } } diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c new file mode 100644 index 00000000000..652813b2d3d --- /dev/null +++ b/net/mac80211/tdls.c @@ -0,0 +1,325 @@ +/* + * mac80211 TDLS handling code + * + * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> + * Copyright 2014, Intel Corporation + * + * This file is GPLv2 as found in COPYING. + */ + +#include <linux/ieee80211.h> +#include "ieee80211_i.h" + +static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb) +{ + u8 *pos = (void *)skb_put(skb, 7); + + *pos++ = WLAN_EID_EXT_CAPABILITY; + *pos++ = 5; /* len */ + *pos++ = 0x0; + *pos++ = 0x0; + *pos++ = 0x0; + *pos++ = 0x0; + *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED; +} + +static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + u16 capab; + + capab = 0; + if (ieee80211_get_sdata_band(sdata) != IEEE80211_BAND_2GHZ) + return capab; + + if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) + capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME; + if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE)) + capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; + + return capab; +} + +static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, const u8 *src_addr, + const u8 *peer, const u8 *bssid) +{ + struct ieee80211_tdls_lnkie *lnkid; + + lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie)); + + lnkid->ie_type = WLAN_EID_LINK_ID; + lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2; + + memcpy(lnkid->bssid, bssid, ETH_ALEN); + memcpy(lnkid->init_sta, src_addr, ETH_ALEN); + memcpy(lnkid->resp_sta, peer, ETH_ALEN); +} + +static int +ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, struct sk_buff *skb) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + struct ieee80211_tdls_data *tf; + + tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u)); + + memcpy(tf->da, peer, ETH_ALEN); + memcpy(tf->sa, sdata->vif.addr, ETH_ALEN); + tf->ether_type = cpu_to_be16(ETH_P_TDLS); + tf->payload_type = WLAN_TDLS_SNAP_RFTYPE; + + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_SETUP_REQUEST; + + skb_put(skb, sizeof(tf->u.setup_req)); + tf->u.setup_req.dialog_token = dialog_token; + tf->u.setup_req.capability = + cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); + + ieee80211_add_srates_ie(sdata, skb, false, band); + ieee80211_add_ext_srates_ie(sdata, skb, false, band); + ieee80211_tdls_add_ext_capab(skb); + break; + case WLAN_TDLS_SETUP_RESPONSE: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_SETUP_RESPONSE; + + skb_put(skb, sizeof(tf->u.setup_resp)); + tf->u.setup_resp.status_code = cpu_to_le16(status_code); + tf->u.setup_resp.dialog_token = dialog_token; + tf->u.setup_resp.capability = + cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); + + ieee80211_add_srates_ie(sdata, skb, false, band); + ieee80211_add_ext_srates_ie(sdata, skb, false, band); + ieee80211_tdls_add_ext_capab(skb); + break; + case WLAN_TDLS_SETUP_CONFIRM: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_SETUP_CONFIRM; + + skb_put(skb, sizeof(tf->u.setup_cfm)); + tf->u.setup_cfm.status_code = cpu_to_le16(status_code); + tf->u.setup_cfm.dialog_token = dialog_token; + break; + case WLAN_TDLS_TEARDOWN: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_TEARDOWN; + + skb_put(skb, sizeof(tf->u.teardown)); + tf->u.teardown.reason_code = cpu_to_le16(status_code); + break; + case WLAN_TDLS_DISCOVERY_REQUEST: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_DISCOVERY_REQUEST; + + skb_put(skb, sizeof(tf->u.discover_req)); + tf->u.discover_req.dialog_token = dialog_token; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int +ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, struct sk_buff *skb) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + struct ieee80211_mgmt *mgmt; + + mgmt = (void *)skb_put(skb, 24); + memset(mgmt, 0, 24); + memcpy(mgmt->da, peer, ETH_ALEN); + memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); + + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + + switch (action_code) { + case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: + skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp)); + mgmt->u.action.category = WLAN_CATEGORY_PUBLIC; + mgmt->u.action.u.tdls_discover_resp.action_code = + WLAN_PUB_ACTION_TDLS_DISCOVER_RES; + mgmt->u.action.u.tdls_discover_resp.dialog_token = + dialog_token; + mgmt->u.action.u.tdls_discover_resp.capability = + cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); + + ieee80211_add_srates_ie(sdata, skb, false, band); + ieee80211_add_ext_srates_ie(sdata, skb, false, band); + ieee80211_tdls_add_ext_capab(skb); + break; + default: + return -EINVAL; + } + + return 0; +} + +int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, u32 peer_capability, + const u8 *extra_ies, size_t extra_ies_len) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb = NULL; + bool send_direct; + int ret; + + if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) + return -ENOTSUPP; + + /* make sure we are in managed mode, and associated */ + if (sdata->vif.type != NL80211_IFTYPE_STATION || + !sdata->u.mgd.associated) + return -EINVAL; + + tdls_dbg(sdata, "TDLS mgmt action %d peer %pM\n", + action_code, peer); + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + max(sizeof(struct ieee80211_mgmt), + sizeof(struct ieee80211_tdls_data)) + + 50 + /* supported rates */ + 7 + /* ext capab */ + extra_ies_len + + sizeof(struct ieee80211_tdls_lnkie)); + if (!skb) + return -ENOMEM; + + skb_reserve(skb, local->hw.extra_tx_headroom); + + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + case WLAN_TDLS_SETUP_RESPONSE: + case WLAN_TDLS_SETUP_CONFIRM: + case WLAN_TDLS_TEARDOWN: + case WLAN_TDLS_DISCOVERY_REQUEST: + ret = ieee80211_prep_tdls_encap_data(wiphy, dev, peer, + action_code, dialog_token, + status_code, skb); + send_direct = false; + break; + case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: + ret = ieee80211_prep_tdls_direct(wiphy, dev, peer, action_code, + dialog_token, status_code, + skb); + send_direct = true; + break; + default: + ret = -ENOTSUPP; + break; + } + + if (ret < 0) + goto fail; + + if (extra_ies_len) + memcpy(skb_put(skb, extra_ies_len), extra_ies, extra_ies_len); + + /* the TDLS link IE is always added last */ + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + case WLAN_TDLS_SETUP_CONFIRM: + case WLAN_TDLS_TEARDOWN: + case WLAN_TDLS_DISCOVERY_REQUEST: + /* we are the initiator */ + ieee80211_tdls_add_link_ie(skb, sdata->vif.addr, peer, + sdata->u.mgd.bssid); + break; + case WLAN_TDLS_SETUP_RESPONSE: + case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: + /* we are the responder */ + ieee80211_tdls_add_link_ie(skb, peer, sdata->vif.addr, + sdata->u.mgd.bssid); + break; + default: + ret = -ENOTSUPP; + goto fail; + } + + if (send_direct) { + ieee80211_tx_skb(sdata, skb); + return 0; + } + + /* + * According to 802.11z: Setup req/resp are sent in AC_BK, otherwise + * we should default to AC_VI. + */ + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + case WLAN_TDLS_SETUP_RESPONSE: + skb_set_queue_mapping(skb, IEEE80211_AC_BK); + skb->priority = 2; + break; + default: + skb_set_queue_mapping(skb, IEEE80211_AC_VI); + skb->priority = 5; + break; + } + + /* disable bottom halves when entering the Tx path */ + local_bh_disable(); + ret = ieee80211_subif_start_xmit(skb, dev); + local_bh_enable(); + + return ret; + +fail: + dev_kfree_skb(skb); + return ret; +} + +int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, enum nl80211_tdls_operation oper) +{ + struct sta_info *sta; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) + return -ENOTSUPP; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return -EINVAL; + + tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer); + + switch (oper) { + case NL80211_TDLS_ENABLE_LINK: + rcu_read_lock(); + sta = sta_info_get(sdata, peer); + if (!sta) { + rcu_read_unlock(); + return -ENOLINK; + } + + set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); + rcu_read_unlock(); + break; + case NL80211_TDLS_DISABLE_LINK: + return sta_info_destroy_addr(sdata, peer); + case NL80211_TDLS_TEARDOWN: + case NL80211_TDLS_SETUP: + case NL80211_TDLS_DISCOVERY_REQ: + /* We don't support in-driver setup/teardown/discovery */ + return -ENOTSUPP; + default: + return -ENOTSUPP; + } + + return 0; +} diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index a0b0aea7652..cfe1a0688b5 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -21,10 +21,10 @@ #define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \ __field(bool, p2p) \ - __string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>") + __string(vif_name, sdata->name) #define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \ __entry->p2p = sdata->vif.p2p; \ - __assign_str(vif_name, sdata->dev ? sdata->dev->name : sdata->name) + __assign_str(vif_name, sdata->name) #define VIF_PR_FMT " vif:%s(%d%s)" #define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" @@ -184,6 +184,20 @@ TRACE_EVENT(drv_return_bool, "true" : "false") ); +TRACE_EVENT(drv_return_u32, + TP_PROTO(struct ieee80211_local *local, u32 ret), + TP_ARGS(local, ret), + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, ret) + ), + TP_fast_assign( + LOCAL_ASSIGN; + __entry->ret = ret; + ), + TP_printk(LOCAL_PR_FMT " - %u", LOCAL_PR_ARG, __entry->ret) +); + TRACE_EVENT(drv_return_u64, TP_PROTO(struct ieee80211_local *local, u64 ret), TP_ARGS(local, ret), @@ -1375,6 +1389,91 @@ TRACE_EVENT(drv_change_chanctx, ) ); +#if !defined(__TRACE_VIF_ENTRY) +#define __TRACE_VIF_ENTRY +struct trace_vif_entry { + enum nl80211_iftype vif_type; + bool p2p; + char vif_name[IFNAMSIZ]; +} __packed; + +struct trace_chandef_entry { + u32 control_freq; + u32 chan_width; + u32 center_freq1; + u32 center_freq2; +} __packed; + +struct trace_switch_entry { + struct trace_vif_entry vif; + struct trace_chandef_entry old_chandef; + struct trace_chandef_entry new_chandef; +} __packed; + +#define SWITCH_ENTRY_ASSIGN(to, from) local_vifs[i].to = vifs[i].from +#endif + +TRACE_EVENT(drv_switch_vif_chanctx, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_vif_chanctx_switch *vifs, + int n_vifs, enum ieee80211_chanctx_switch_mode mode), + TP_ARGS(local, vifs, n_vifs, mode), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(int, n_vifs) + __field(u32, mode) + __dynamic_array(u8, vifs, + sizeof(struct trace_switch_entry) * n_vifs) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->n_vifs = n_vifs; + __entry->mode = mode; + { + struct trace_switch_entry *local_vifs = + __get_dynamic_array(vifs); + int i; + + for (i = 0; i < n_vifs; i++) { + struct ieee80211_sub_if_data *sdata; + + sdata = container_of(vifs[i].vif, + struct ieee80211_sub_if_data, + vif); + + SWITCH_ENTRY_ASSIGN(vif.vif_type, vif->type); + SWITCH_ENTRY_ASSIGN(vif.p2p, vif->p2p); + strncpy(local_vifs[i].vif.vif_name, + sdata->name, + sizeof(local_vifs[i].vif.vif_name)); + SWITCH_ENTRY_ASSIGN(old_chandef.control_freq, + old_ctx->def.chan->center_freq); + SWITCH_ENTRY_ASSIGN(old_chandef.chan_width, + old_ctx->def.width); + SWITCH_ENTRY_ASSIGN(old_chandef.center_freq1, + old_ctx->def.center_freq1); + SWITCH_ENTRY_ASSIGN(old_chandef.center_freq2, + old_ctx->def.center_freq2); + SWITCH_ENTRY_ASSIGN(new_chandef.control_freq, + new_ctx->def.chan->center_freq); + SWITCH_ENTRY_ASSIGN(new_chandef.chan_width, + new_ctx->def.width); + SWITCH_ENTRY_ASSIGN(new_chandef.center_freq1, + new_ctx->def.center_freq1); + SWITCH_ENTRY_ASSIGN(new_chandef.center_freq2, + new_ctx->def.center_freq2); + } + } + ), + + TP_printk( + LOCAL_PR_FMT " n_vifs:%d mode:%d", + LOCAL_PR_ARG, __entry->n_vifs, __entry->mode + ) +); + DECLARE_EVENT_CLASS(local_sdata_chanctx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, @@ -1499,6 +1598,24 @@ DEFINE_EVENT(local_sdata_evt, drv_leave_ibss, TP_ARGS(local, sdata) ); +TRACE_EVENT(drv_get_expected_throughput, + TP_PROTO(struct ieee80211_sta *sta), + + TP_ARGS(sta), + + TP_STRUCT__entry( + STA_ENTRY + ), + + TP_fast_assign( + STA_ASSIGN; + ), + + TP_printk( + STA_PR_FMT, STA_PR_ARG + ) +); + /* * Tracing for API calls that drivers call. */ diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5476a69b45c..5214686d9fd 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -477,6 +477,20 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) sta->sta.addr, sta->sta.aid, ac); if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) purge_old_ps_buffers(tx->local); + + /* sync with ieee80211_sta_ps_deliver_wakeup */ + spin_lock(&sta->ps_lock); + /* + * STA woke up the meantime and all the frames on ps_tx_buf have + * been queued to pending queue. No reordering can happen, go + * ahead and Tx the packet. + */ + if (!test_sta_flag(sta, WLAN_STA_PS_STA) && + !test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { + spin_unlock(&sta->ps_lock); + return TX_CONTINUE; + } + if (skb_queue_len(&sta->ps_tx_buf[ac]) >= STA_MAX_TX_BUFFER) { struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf[ac]); ps_dbg(tx->sdata, @@ -491,6 +505,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS; skb_queue_tail(&sta->ps_tx_buf[ac], tx->skb); + spin_unlock(&sta->ps_lock); if (!timer_pending(&local->sta_cleanup)) mod_timer(&local->sta_cleanup, @@ -874,7 +889,7 @@ static int ieee80211_fragment(struct ieee80211_tx_data *tx, } /* adjust first fragment's length */ - skb->len = hdrlen + per_fragm; + skb_trim(skb, hdrlen + per_fragm); return 0; } @@ -2313,7 +2328,8 @@ void ieee80211_tx_pending(unsigned long data) /* functions for drivers to get certain frames */ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, - struct ps_data *ps, struct sk_buff *skb) + struct ps_data *ps, struct sk_buff *skb, + bool is_template) { u8 *pos, *tim; int aid0 = 0; @@ -2326,11 +2342,12 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, * checking byte-for-byte */ have_bits = !bitmap_empty((unsigned long *)ps->tim, IEEE80211_MAX_AID+1); - - if (ps->dtim_count == 0) - ps->dtim_count = sdata->vif.bss_conf.dtim_period - 1; - else - ps->dtim_count--; + if (!is_template) { + if (ps->dtim_count == 0) + ps->dtim_count = sdata->vif.bss_conf.dtim_period - 1; + else + ps->dtim_count--; + } tim = pos = (u8 *) skb_put(skb, 6); *pos++ = WLAN_EID_TIM; @@ -2376,7 +2393,8 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, } static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, - struct ps_data *ps, struct sk_buff *skb) + struct ps_data *ps, struct sk_buff *skb, + bool is_template) { struct ieee80211_local *local = sdata->local; @@ -2388,24 +2406,24 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, * of the tim bitmap in mac80211 and the driver. */ if (local->tim_in_locked_section) { - __ieee80211_beacon_add_tim(sdata, ps, skb); + __ieee80211_beacon_add_tim(sdata, ps, skb, is_template); } else { spin_lock_bh(&local->tim_lock); - __ieee80211_beacon_add_tim(sdata, ps, skb); + __ieee80211_beacon_add_tim(sdata, ps, skb, is_template); spin_unlock_bh(&local->tim_lock); } return 0; } -static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata, - struct beacon_data *beacon) +static void ieee80211_set_csa(struct ieee80211_sub_if_data *sdata, + struct beacon_data *beacon) { struct probe_resp *resp; - int counter_offset_beacon = sdata->csa_counter_offset_beacon; - int counter_offset_presp = sdata->csa_counter_offset_presp; u8 *beacon_data; size_t beacon_data_len; + int i; + u8 count = sdata->csa_current_counter; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: @@ -2423,40 +2441,57 @@ static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata, default: return; } - if (WARN_ON(counter_offset_beacon >= beacon_data_len)) - return; - /* Warn if the driver did not check for/react to csa - * completeness. A beacon with CSA counter set to 0 should - * never occur, because a counter of 1 means switch just - * before the next beacon. - */ - if (WARN_ON(beacon_data[counter_offset_beacon] == 1)) - return; + for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; ++i) { + u16 counter_offset_beacon = + sdata->csa_counter_offset_beacon[i]; + u16 counter_offset_presp = sdata->csa_counter_offset_presp[i]; + + if (counter_offset_beacon) { + if (WARN_ON(counter_offset_beacon >= beacon_data_len)) + return; - beacon_data[counter_offset_beacon]--; + beacon_data[counter_offset_beacon] = count; + } - if (sdata->vif.type == NL80211_IFTYPE_AP && counter_offset_presp) { - rcu_read_lock(); - resp = rcu_dereference(sdata->u.ap.probe_resp); + if (sdata->vif.type == NL80211_IFTYPE_AP && + counter_offset_presp) { + rcu_read_lock(); + resp = rcu_dereference(sdata->u.ap.probe_resp); - /* if nl80211 accepted the offset, this should not happen. */ - if (WARN_ON(!resp)) { + /* If nl80211 accepted the offset, this should + * not happen. + */ + if (WARN_ON(!resp)) { + rcu_read_unlock(); + return; + } + resp->data[counter_offset_presp] = count; rcu_read_unlock(); - return; } - resp->data[counter_offset_presp]--; - rcu_read_unlock(); } } +u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + sdata->csa_current_counter--; + + /* the counter should never reach 0 */ + WARN_ON(!sdata->csa_current_counter); + + return sdata->csa_current_counter; +} +EXPORT_SYMBOL(ieee80211_csa_update_counter); + bool ieee80211_csa_is_complete(struct ieee80211_vif *vif) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct beacon_data *beacon = NULL; u8 *beacon_data; size_t beacon_data_len; - int counter_beacon = sdata->csa_counter_offset_beacon; + int counter_beacon = sdata->csa_counter_offset_beacon[0]; int ret = false; if (!ieee80211_sdata_running(sdata)) @@ -2506,9 +2541,11 @@ bool ieee80211_csa_is_complete(struct ieee80211_vif *vif) } EXPORT_SYMBOL(ieee80211_csa_is_complete); -struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - u16 *tim_offset, u16 *tim_length) +static struct sk_buff * +__ieee80211_beacon_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_mutable_offsets *offs, + bool is_template) { struct ieee80211_local *local = hw_to_local(hw); struct sk_buff *skb = NULL; @@ -2517,6 +2554,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, enum ieee80211_band band; struct ieee80211_tx_rate_control txrc; struct ieee80211_chanctx_conf *chanctx_conf; + int csa_off_base = 0; rcu_read_lock(); @@ -2526,18 +2564,20 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (!ieee80211_sdata_running(sdata) || !chanctx_conf) goto out; - if (tim_offset) - *tim_offset = 0; - if (tim_length) - *tim_length = 0; + if (offs) + memset(offs, 0, sizeof(*offs)); if (sdata->vif.type == NL80211_IFTYPE_AP) { struct ieee80211_if_ap *ap = &sdata->u.ap; struct beacon_data *beacon = rcu_dereference(ap->beacon); if (beacon) { - if (sdata->vif.csa_active) - ieee80211_update_csa(sdata, beacon); + if (sdata->vif.csa_active) { + if (!is_template) + ieee80211_csa_update_counter(vif); + + ieee80211_set_csa(sdata, beacon); + } /* * headroom, head length, @@ -2554,12 +2594,16 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, memcpy(skb_put(skb, beacon->head_len), beacon->head, beacon->head_len); - ieee80211_beacon_add_tim(sdata, &ap->ps, skb); + ieee80211_beacon_add_tim(sdata, &ap->ps, skb, + is_template); - if (tim_offset) - *tim_offset = beacon->head_len; - if (tim_length) - *tim_length = skb->len - beacon->head_len; + if (offs) { + offs->tim_offset = beacon->head_len; + offs->tim_length = skb->len - beacon->head_len; + + /* for AP the csa offsets are from tail */ + csa_off_base = skb->len; + } if (beacon->tail) memcpy(skb_put(skb, beacon->tail_len), @@ -2574,9 +2618,12 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (!presp) goto out; - if (sdata->vif.csa_active) - ieee80211_update_csa(sdata, presp); + if (sdata->vif.csa_active) { + if (!is_template) + ieee80211_csa_update_counter(vif); + ieee80211_set_csa(sdata, presp); + } skb = dev_alloc_skb(local->tx_headroom + presp->head_len + local->hw.extra_beacon_tailroom); @@ -2596,8 +2643,17 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (!bcn) goto out; - if (sdata->vif.csa_active) - ieee80211_update_csa(sdata, bcn); + if (sdata->vif.csa_active) { + if (!is_template) + /* TODO: For mesh csa_counter is in TU, so + * decrementing it by one isn't correct, but + * for now we leave it consistent with overall + * mac80211's behavior. + */ + ieee80211_csa_update_counter(vif); + + ieee80211_set_csa(sdata, bcn); + } if (ifmsh->sync_ops) ifmsh->sync_ops->adjust_tbtt(sdata, bcn); @@ -2611,13 +2667,33 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, goto out; skb_reserve(skb, local->tx_headroom); memcpy(skb_put(skb, bcn->head_len), bcn->head, bcn->head_len); - ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb); + ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb, is_template); + + if (offs) { + offs->tim_offset = bcn->head_len; + offs->tim_length = skb->len - bcn->head_len; + } + memcpy(skb_put(skb, bcn->tail_len), bcn->tail, bcn->tail_len); } else { WARN_ON(1); goto out; } + /* CSA offsets */ + if (offs) { + int i; + + for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; i++) { + u16 csa_off = sdata->csa_counter_offset_beacon[i]; + + if (!csa_off) + continue; + + offs->csa_counter_offs[i] = csa_off_base + csa_off; + } + } + band = chanctx_conf->def.chan->band; info = IEEE80211_SKB_CB(skb); @@ -2648,6 +2724,32 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, out: rcu_read_unlock(); return skb; + +} + +struct sk_buff * +ieee80211_beacon_get_template(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_mutable_offsets *offs) +{ + return __ieee80211_beacon_get(hw, vif, offs, true); +} +EXPORT_SYMBOL(ieee80211_beacon_get_template); + +struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u16 *tim_offset, u16 *tim_length) +{ + struct ieee80211_mutable_offsets offs = {}; + struct sk_buff *bcn = __ieee80211_beacon_get(hw, vif, &offs, false); + + if (tim_offset) + *tim_offset = offs.tim_offset; + + if (tim_length) + *tim_length = offs.tim_length; + + return bcn; } EXPORT_SYMBOL(ieee80211_beacon_get_tim); @@ -2885,7 +2987,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, cpu_to_le16(IEEE80211_FCTL_MOREDATA); } - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + if (sdata->vif.type == NL80211_IFTYPE_AP) sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev); if (!ieee80211_tx_prepare(sdata, &tx, skb)) break; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index d842af5c8a9..6886601afe1 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -435,9 +435,8 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local, spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } -void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, - struct sk_buff_head *skbs, - void (*fn)(void *data), void *data) +void ieee80211_add_pending_skbs(struct ieee80211_local *local, + struct sk_buff_head *skbs) { struct ieee80211_hw *hw = &local->hw; struct sk_buff *skb; @@ -461,9 +460,6 @@ void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, __skb_queue_tail(&local->pending[queue], skb); } - if (fn) - fn(data); - for (i = 0; i < hw->queues; i++) __ieee80211_wake_queue(hw, i, IEEE80211_QUEUE_STOP_REASON_SKB_ADD); @@ -558,7 +554,7 @@ void ieee80211_flush_queues(struct ieee80211_local *local, ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_FLUSH); - drv_flush(local, queues, false); + drv_flush(local, sdata, queues, false); ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_FLUSH); @@ -1461,6 +1457,44 @@ void ieee80211_stop_device(struct ieee80211_local *local) drv_stop(local); } +static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata; + struct ieee80211_chanctx *ctx; + + /* + * We get here if during resume the device can't be restarted properly. + * We might also get here if this happens during HW reset, which is a + * slightly different situation and we need to drop all connections in + * the latter case. + * + * Ask cfg80211 to turn off all interfaces, this will result in more + * warnings but at least we'll then get into a clean stopped state. + */ + + local->resuming = false; + local->suspended = false; + local->started = false; + + /* scheduled scan clearly can't be running any more, but tell + * cfg80211 and clear local state + */ + ieee80211_sched_scan_end(local); + + list_for_each_entry(sdata, &local->interfaces, list) + sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER; + + /* Mark channel contexts as not being in the driver any more to avoid + * removing them from the driver during the shutdown process... + */ + mutex_lock(&local->chanctx_mtx); + list_for_each_entry(ctx, &local->chanctx_list, list) + ctx->driver_present = false; + mutex_unlock(&local->chanctx_mtx); + + cfg80211_shutdown_all_interfaces(local->hw.wiphy); +} + static void ieee80211_assign_chanctx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { @@ -1524,9 +1558,11 @@ int ieee80211_reconfig(struct ieee80211_local *local) */ res = drv_start(local); if (res) { - WARN(local->suspended, "Hardware became unavailable " - "upon resume. This could be a software issue " - "prior to suspend or a hardware issue.\n"); + if (local->suspended) + WARN(1, "Hardware became unavailable upon resume. This could be a software issue prior to suspend or a hardware issue.\n"); + else + WARN(1, "Hardware became unavailable during restart.\n"); + ieee80211_handle_reconfig_failure(local); return res; } @@ -1550,7 +1586,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) WARN_ON(local->resuming); res = drv_add_interface(local, sdata); if (WARN_ON(res)) { - rcu_assign_pointer(local->monitor_sdata, NULL); + RCU_INIT_POINTER(local->monitor_sdata, NULL); synchronize_net(); kfree(sdata); } @@ -1569,17 +1605,17 @@ int ieee80211_reconfig(struct ieee80211_local *local) list_for_each_entry(ctx, &local->chanctx_list, list) WARN_ON(drv_add_chanctx(local, ctx)); mutex_unlock(&local->chanctx_mtx); - } - list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata)) - continue; - ieee80211_assign_chanctx(local, sdata); - } + list_for_each_entry(sdata, &local->interfaces, list) { + if (!ieee80211_sdata_running(sdata)) + continue; + ieee80211_assign_chanctx(local, sdata); + } - sdata = rtnl_dereference(local->monitor_sdata); - if (sdata && ieee80211_sdata_running(sdata)) - ieee80211_assign_chanctx(local, sdata); + sdata = rtnl_dereference(local->monitor_sdata); + if (sdata && ieee80211_sdata_running(sdata)) + ieee80211_assign_chanctx(local, sdata); + } /* add STAs back */ mutex_lock(&local->sta_mtx); @@ -1675,13 +1711,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) } break; case NL80211_IFTYPE_WDS: - break; case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: - /* ignore virtual */ - break; case NL80211_IFTYPE_P2P_DEVICE: - changed = BSS_CHANGED_IDLE; + /* nothing to do */ break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: @@ -1767,6 +1800,26 @@ int ieee80211_reconfig(struct ieee80211_local *local) IEEE80211_QUEUE_STOP_REASON_SUSPEND); /* + * Reconfigure sched scan if it was interrupted by FW restart or + * suspend. + */ + mutex_lock(&local->mtx); + sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata, + lockdep_is_held(&local->mtx)); + if (sched_scan_sdata && local->sched_scan_req) + /* + * Sched scan stopped, but we don't want to report it. Instead, + * we're trying to reschedule. + */ + if (__ieee80211_request_sched_scan_start(sched_scan_sdata, + local->sched_scan_req)) + sched_scan_stopped = true; + mutex_unlock(&local->mtx); + + if (sched_scan_stopped) + cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy); + + /* * If this is for hw restart things are still running. * We may want to change that later, however. */ @@ -1794,26 +1847,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) WARN_ON(1); #endif - /* - * Reconfigure sched scan if it was interrupted by FW restart or - * suspend. - */ - mutex_lock(&local->mtx); - sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata, - lockdep_is_held(&local->mtx)); - if (sched_scan_sdata && local->sched_scan_req) - /* - * Sched scan stopped, but we don't want to report it. Instead, - * we're trying to reschedule. - */ - if (__ieee80211_request_sched_scan_start(sched_scan_sdata, - local->sched_scan_req)) - sched_scan_stopped = true; - mutex_unlock(&local->mtx); - - if (sched_scan_stopped) - cfg80211_sched_scan_stopped(local->hw.wiphy); - return 0; } @@ -2801,3 +2834,121 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local, ps->dtim_count = dtim_count; } + +int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode chanmode, + u8 radar_detect) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *sdata_iter; + enum nl80211_iftype iftype = sdata->wdev.iftype; + int num[NUM_NL80211_IFTYPES]; + struct ieee80211_chanctx *ctx; + int num_different_channels = 0; + int total = 1; + + lockdep_assert_held(&local->chanctx_mtx); + + if (WARN_ON(hweight32(radar_detect) > 1)) + return -EINVAL; + + if (WARN_ON(chandef && chanmode == IEEE80211_CHANCTX_SHARED && + !chandef->chan)) + return -EINVAL; + + if (chandef) + num_different_channels = 1; + + if (WARN_ON(iftype >= NUM_NL80211_IFTYPES)) + return -EINVAL; + + /* Always allow software iftypes */ + if (local->hw.wiphy->software_iftypes & BIT(iftype)) { + if (radar_detect) + return -EINVAL; + return 0; + } + + memset(num, 0, sizeof(num)); + + if (iftype != NL80211_IFTYPE_UNSPECIFIED) + num[iftype] = 1; + + list_for_each_entry(ctx, &local->chanctx_list, list) { + if (ctx->conf.radar_enabled) + radar_detect |= BIT(ctx->conf.def.width); + if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) { + num_different_channels++; + continue; + } + if (chandef && chanmode == IEEE80211_CHANCTX_SHARED && + cfg80211_chandef_compatible(chandef, + &ctx->conf.def)) + continue; + num_different_channels++; + } + + list_for_each_entry_rcu(sdata_iter, &local->interfaces, list) { + struct wireless_dev *wdev_iter; + + wdev_iter = &sdata_iter->wdev; + + if (sdata_iter == sdata || + rcu_access_pointer(sdata_iter->vif.chanctx_conf) == NULL || + local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype)) + continue; + + num[wdev_iter->iftype]++; + total++; + } + + if (total == 1 && !radar_detect) + return 0; + + return cfg80211_check_combinations(local->hw.wiphy, + num_different_channels, + radar_detect, num); +} + +static void +ieee80211_iter_max_chans(const struct ieee80211_iface_combination *c, + void *data) +{ + u32 *max_num_different_channels = data; + + *max_num_different_channels = max(*max_num_different_channels, + c->num_different_channels); +} + +int ieee80211_max_num_channels(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata; + int num[NUM_NL80211_IFTYPES] = {}; + struct ieee80211_chanctx *ctx; + int num_different_channels = 0; + u8 radar_detect = 0; + u32 max_num_different_channels = 1; + int err; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(ctx, &local->chanctx_list, list) { + num_different_channels++; + + if (ctx->conf.radar_enabled) + radar_detect |= BIT(ctx->conf.def.width); + } + + list_for_each_entry_rcu(sdata, &local->interfaces, list) + num[sdata->wdev.iftype]++; + + err = cfg80211_iter_combinations(local->hw.wiphy, + num_different_channels, radar_detect, + num, ieee80211_iter_max_chans, + &max_num_different_channels); + if (err < 0) + return err; + + return max_num_different_channels; +} diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index e9e36a25616..9265adfdabf 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -129,9 +129,12 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, if (!vht_cap_ie || !sband->vht_cap.vht_supported) return; - /* A VHT STA must support 40 MHz */ - if (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40)) - return; + /* + * A VHT STA must support 40 MHz, but if we verify that here + * then we break a few things - some APs (e.g. Netgear R6300v2 + * and others based on the BCM4360 chipset) will unset this + * capability bit when operating in 20 MHz. + */ vht_cap->vht_supported = true; diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 21211c60ca9..d51422c778d 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -154,6 +154,11 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, return IEEE80211_AC_BE; } + if (skb->protocol == sdata->control_port_protocol) { + skb->priority = 7; + return ieee80211_downgrade_queue(sdata, skb); + } + /* use the data classifier to determine what 802.1d tag the * data frame has */ rcu_read_lock(); diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index b8600e3c29c..9b3dcc20114 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -406,7 +406,10 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key && !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) && - !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) { + !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && + !((info->control.hw_key->flags & + IEEE80211_KEY_FLAG_GENERATE_IV_MGMT) && + ieee80211_is_mgmt(hdr->frame_control))) { /* * hwaccel has no need for preallocated room for CCMP * header or MIC fields diff --git a/net/mac802154/Makefile b/net/mac802154/Makefile index 57cf5d1a2e4..15d62df5218 100644 --- a/net/mac802154/Makefile +++ b/net/mac802154/Makefile @@ -1,2 +1,4 @@ obj-$(CONFIG_MAC802154) += mac802154.o mac802154-objs := ieee802154_dev.o rx.o tx.o mac_cmd.o mib.o monitor.o wpan.o + +ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c index 52ae6646a41..2cf66d885e6 100644 --- a/net/mac802154/ieee802154_dev.c +++ b/net/mac802154/ieee802154_dev.c @@ -27,6 +27,7 @@ #include <net/netlink.h> #include <linux/nl802154.h> #include <net/mac802154.h> +#include <net/ieee802154_netdev.h> #include <net/route.h> #include <net/wpan-phy.h> @@ -35,9 +36,28 @@ int mac802154_slave_open(struct net_device *dev) { struct mac802154_sub_if_data *priv = netdev_priv(dev); + struct mac802154_sub_if_data *subif; struct mac802154_priv *ipriv = priv->hw; int res = 0; + ASSERT_RTNL(); + + if (priv->type == IEEE802154_DEV_WPAN) { + mutex_lock(&priv->hw->slaves_mtx); + list_for_each_entry(subif, &priv->hw->slaves, list) { + if (subif != priv && subif->type == priv->type && + subif->running) { + mutex_unlock(&priv->hw->slaves_mtx); + return -EBUSY; + } + } + mutex_unlock(&priv->hw->slaves_mtx); + } + + mutex_lock(&priv->hw->slaves_mtx); + priv->running = true; + mutex_unlock(&priv->hw->slaves_mtx); + if (ipriv->open_count++ == 0) { res = ipriv->ops->start(&ipriv->hw); WARN_ON(res); @@ -46,7 +66,9 @@ int mac802154_slave_open(struct net_device *dev) } if (ipriv->ops->ieee_addr) { - res = ipriv->ops->ieee_addr(&ipriv->hw, dev->dev_addr); + __le64 addr = ieee802154_devaddr_from_raw(dev->dev_addr); + + res = ipriv->ops->ieee_addr(&ipriv->hw, addr); WARN_ON(res); if (res) goto err; @@ -66,8 +88,14 @@ int mac802154_slave_close(struct net_device *dev) struct mac802154_sub_if_data *priv = netdev_priv(dev); struct mac802154_priv *ipriv = priv->hw; + ASSERT_RTNL(); + netif_stop_queue(dev); + mutex_lock(&priv->hw->slaves_mtx); + priv->running = false; + mutex_unlock(&priv->hw->slaves_mtx); + if (!--ipriv->open_count) ipriv->ops->stop(&ipriv->hw); @@ -165,6 +193,49 @@ err: return ERR_PTR(err); } +static int mac802154_set_txpower(struct wpan_phy *phy, int db) +{ + struct mac802154_priv *priv = wpan_phy_priv(phy); + + return priv->ops->set_txpower(&priv->hw, db); +} + +static int mac802154_set_lbt(struct wpan_phy *phy, bool on) +{ + struct mac802154_priv *priv = wpan_phy_priv(phy); + + return priv->ops->set_lbt(&priv->hw, on); +} + +static int mac802154_set_cca_mode(struct wpan_phy *phy, u8 mode) +{ + struct mac802154_priv *priv = wpan_phy_priv(phy); + + return priv->ops->set_cca_mode(&priv->hw, mode); +} + +static int mac802154_set_cca_ed_level(struct wpan_phy *phy, s32 level) +{ + struct mac802154_priv *priv = wpan_phy_priv(phy); + + return priv->ops->set_cca_ed_level(&priv->hw, level); +} + +static int mac802154_set_csma_params(struct wpan_phy *phy, u8 min_be, + u8 max_be, u8 retries) +{ + struct mac802154_priv *priv = wpan_phy_priv(phy); + + return priv->ops->set_csma_params(&priv->hw, min_be, max_be, retries); +} + +static int mac802154_set_frame_retries(struct wpan_phy *phy, s8 retries) +{ + struct mac802154_priv *priv = wpan_phy_priv(phy); + + return priv->ops->set_frame_retries(&priv->hw, retries); +} + struct ieee802154_dev * ieee802154_alloc_device(size_t priv_data_len, struct ieee802154_ops *ops) { @@ -242,6 +313,18 @@ int ieee802154_register_device(struct ieee802154_dev *dev) priv->phy->add_iface = mac802154_add_iface; priv->phy->del_iface = mac802154_del_iface; + if (priv->ops->set_txpower) + priv->phy->set_txpower = mac802154_set_txpower; + if (priv->ops->set_lbt) + priv->phy->set_lbt = mac802154_set_lbt; + if (priv->ops->set_cca_mode) + priv->phy->set_cca_mode = mac802154_set_cca_mode; + if (priv->ops->set_cca_ed_level) + priv->phy->set_cca_ed_level = mac802154_set_cca_ed_level; + if (priv->ops->set_csma_params) + priv->phy->set_csma_params = mac802154_set_csma_params; + if (priv->ops->set_frame_retries) + priv->phy->set_frame_retries = mac802154_set_frame_retries; rc = wpan_phy_register(priv->phy); if (rc < 0) diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h index d48422e2711..28ef59c566e 100644 --- a/net/mac802154/mac802154.h +++ b/net/mac802154/mac802154.h @@ -23,6 +23,8 @@ #ifndef MAC802154_H #define MAC802154_H +#include <net/ieee802154_netdev.h> + /* mac802154 device private data */ struct mac802154_priv { struct ieee802154_dev hw; @@ -71,15 +73,19 @@ struct mac802154_sub_if_data { struct net_device *dev; int type; + bool running; spinlock_t mib_lock; __le16 pan_id; __le16 short_addr; + __le64 extended_addr; u8 chan; u8 page; + struct ieee802154_mac_params mac_params; + /* MAC BSN field */ u8 bsn; /* MAC DSN field */ @@ -106,12 +112,17 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb, u8 page, u8 chan); /* MIB callbacks */ -void mac802154_dev_set_short_addr(struct net_device *dev, u16 val); -u16 mac802154_dev_get_short_addr(const struct net_device *dev); +void mac802154_dev_set_short_addr(struct net_device *dev, __le16 val); +__le16 mac802154_dev_get_short_addr(const struct net_device *dev); void mac802154_dev_set_ieee_addr(struct net_device *dev); -u16 mac802154_dev_get_pan_id(const struct net_device *dev); -void mac802154_dev_set_pan_id(struct net_device *dev, u16 val); +__le16 mac802154_dev_get_pan_id(const struct net_device *dev); +void mac802154_dev_set_pan_id(struct net_device *dev, __le16 val); void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan); u8 mac802154_dev_get_dsn(const struct net_device *dev); +int mac802154_set_mac_params(struct net_device *dev, + const struct ieee802154_mac_params *params); +void mac802154_get_mac_params(struct net_device *dev, + struct ieee802154_mac_params *params); + #endif /* MAC802154_H */ diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index a99910d4d52..d40c0928bc6 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -40,7 +40,7 @@ static int mac802154_mlme_start_req(struct net_device *dev, u8 pan_coord, u8 blx, u8 coord_realign) { - BUG_ON(addr->addr_type != IEEE802154_ADDR_SHORT); + BUG_ON(addr->mode != IEEE802154_ADDR_SHORT); mac802154_dev_set_pan_id(dev, addr->pan_id); mac802154_dev_set_short_addr(dev, addr->short_addr); @@ -74,4 +74,7 @@ struct ieee802154_mlme_ops mac802154_mlme_wpan = { .get_pan_id = mac802154_dev_get_pan_id, .get_short_addr = mac802154_dev_get_short_addr, .get_dsn = mac802154_dev_get_dsn, + + .set_mac_params = mac802154_set_mac_params, + .get_mac_params = mac802154_get_mac_params, }; diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c index 8ded97cf1c3..f0991f2344d 100644 --- a/net/mac802154/mib.c +++ b/net/mac802154/mib.c @@ -24,6 +24,7 @@ #include <linux/if_arp.h> #include <net/mac802154.h> +#include <net/ieee802154_netdev.h> #include <net/wpan-phy.h> #include "mac802154.h" @@ -62,8 +63,6 @@ static void hw_addr_notify(struct work_struct *work) pr_debug("failed changed mask %lx\n", nw->changed); kfree(nw); - - return; } static void set_hw_addr_filt(struct net_device *dev, unsigned long changed) @@ -79,11 +78,9 @@ static void set_hw_addr_filt(struct net_device *dev, unsigned long changed) work->dev = dev; work->changed = changed; queue_work(priv->hw->dev_workqueue, &work->work); - - return; } -void mac802154_dev_set_short_addr(struct net_device *dev, u16 val) +void mac802154_dev_set_short_addr(struct net_device *dev, __le16 val) { struct mac802154_sub_if_data *priv = netdev_priv(dev); @@ -100,10 +97,10 @@ void mac802154_dev_set_short_addr(struct net_device *dev, u16 val) } } -u16 mac802154_dev_get_short_addr(const struct net_device *dev) +__le16 mac802154_dev_get_short_addr(const struct net_device *dev) { struct mac802154_sub_if_data *priv = netdev_priv(dev); - u16 ret; + __le16 ret; BUG_ON(dev->type != ARPHRD_IEEE802154); @@ -119,19 +116,19 @@ void mac802154_dev_set_ieee_addr(struct net_device *dev) struct mac802154_sub_if_data *priv = netdev_priv(dev); struct mac802154_priv *mac = priv->hw; + priv->extended_addr = ieee802154_devaddr_from_raw(dev->dev_addr); + if (mac->ops->set_hw_addr_filt && - memcmp(mac->hw.hw_filt.ieee_addr, - dev->dev_addr, IEEE802154_ADDR_LEN)) { - memcpy(mac->hw.hw_filt.ieee_addr, - dev->dev_addr, IEEE802154_ADDR_LEN); + mac->hw.hw_filt.ieee_addr != priv->extended_addr) { + mac->hw.hw_filt.ieee_addr = priv->extended_addr; set_hw_addr_filt(dev, IEEE802515_AFILT_IEEEADDR_CHANGED); } } -u16 mac802154_dev_get_pan_id(const struct net_device *dev) +__le16 mac802154_dev_get_pan_id(const struct net_device *dev) { struct mac802154_sub_if_data *priv = netdev_priv(dev); - u16 ret; + __le16 ret; BUG_ON(dev->type != ARPHRD_IEEE802154); @@ -142,7 +139,7 @@ u16 mac802154_dev_get_pan_id(const struct net_device *dev) return ret; } -void mac802154_dev_set_pan_id(struct net_device *dev, u16 val) +void mac802154_dev_set_pan_id(struct net_device *dev, __le16 val) { struct mac802154_sub_if_data *priv = netdev_priv(dev); diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index 38548ec2098..03855b0677c 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -80,7 +80,6 @@ mac802154_subif_rx(struct ieee802154_dev *hw, struct sk_buff *skb, u8 lqi) mac802154_wpans_rx(priv, skb); out: dev_kfree_skb(skb); - return; } static void mac802154_rx_worker(struct work_struct *work) diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index 372d8a222b9..1df7a6a5738 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -35,35 +35,6 @@ #include "mac802154.h" -static inline int mac802154_fetch_skb_u8(struct sk_buff *skb, u8 *val) -{ - if (unlikely(!pskb_may_pull(skb, 1))) - return -EINVAL; - - *val = skb->data[0]; - skb_pull(skb, 1); - - return 0; -} - -static inline int mac802154_fetch_skb_u16(struct sk_buff *skb, u16 *val) -{ - if (unlikely(!pskb_may_pull(skb, 2))) - return -EINVAL; - - *val = skb->data[0] | (skb->data[1] << 8); - skb_pull(skb, 2); - - return 0; -} - -static inline void mac802154_haddr_copy_swap(u8 *dest, const u8 *src) -{ - int i; - for (i = 0; i < IEEE802154_ADDR_LEN; i++) - dest[IEEE802154_ADDR_LEN - i - 1] = src[i]; -} - static int mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { @@ -76,19 +47,25 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGIFADDR: - if (priv->pan_id == IEEE802154_PANID_BROADCAST || - priv->short_addr == IEEE802154_ADDR_BROADCAST) { + { + u16 pan_id, short_addr; + + pan_id = le16_to_cpu(priv->pan_id); + short_addr = le16_to_cpu(priv->short_addr); + if (pan_id == IEEE802154_PANID_BROADCAST || + short_addr == IEEE802154_ADDR_BROADCAST) { err = -EADDRNOTAVAIL; break; } sa->family = AF_IEEE802154; sa->addr.addr_type = IEEE802154_ADDR_SHORT; - sa->addr.pan_id = priv->pan_id; - sa->addr.short_addr = priv->short_addr; + sa->addr.pan_id = pan_id; + sa->addr.short_addr = short_addr; err = 0; break; + } case SIOCSIFADDR: dev_warn(&dev->dev, "Using DEBUGing ioctl SIOCSIFADDR isn't recommened!\n"); @@ -101,8 +78,8 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; } - priv->pan_id = sa->addr.pan_id; - priv->short_addr = sa->addr.short_addr; + priv->pan_id = cpu_to_le16(sa->addr.pan_id); + priv->short_addr = cpu_to_le16(sa->addr.short_addr); err = 0; break; @@ -125,190 +102,154 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p) return 0; } -static int mac802154_header_create(struct sk_buff *skb, - struct net_device *dev, - unsigned short type, - const void *_daddr, - const void *_saddr, - unsigned len) +int mac802154_set_mac_params(struct net_device *dev, + const struct ieee802154_mac_params *params) { - const struct ieee802154_addr *saddr = _saddr; - const struct ieee802154_addr *daddr = _daddr; - struct ieee802154_addr dev_addr; struct mac802154_sub_if_data *priv = netdev_priv(dev); - int pos = 2; - u8 head[MAC802154_FRAME_HARD_HEADER_LEN]; - u16 fc; - if (!daddr) - return -EINVAL; + mutex_lock(&priv->hw->slaves_mtx); + priv->mac_params = *params; + mutex_unlock(&priv->hw->slaves_mtx); - head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */ - fc = mac_cb_type(skb); - if (mac_cb_is_ackreq(skb)) - fc |= IEEE802154_FC_ACK_REQ; + return 0; +} - if (!saddr) { - spin_lock_bh(&priv->mib_lock); +void mac802154_get_mac_params(struct net_device *dev, + struct ieee802154_mac_params *params) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); - if (priv->short_addr == IEEE802154_ADDR_BROADCAST || - priv->short_addr == IEEE802154_ADDR_UNDEF || - priv->pan_id == IEEE802154_PANID_BROADCAST) { - dev_addr.addr_type = IEEE802154_ADDR_LONG; - memcpy(dev_addr.hwaddr, dev->dev_addr, - IEEE802154_ADDR_LEN); - } else { - dev_addr.addr_type = IEEE802154_ADDR_SHORT; - dev_addr.short_addr = priv->short_addr; - } + mutex_lock(&priv->hw->slaves_mtx); + *params = priv->mac_params; + mutex_unlock(&priv->hw->slaves_mtx); +} - dev_addr.pan_id = priv->pan_id; - saddr = &dev_addr; +int mac802154_wpan_open(struct net_device *dev) +{ + int rc; + struct mac802154_sub_if_data *priv = netdev_priv(dev); + struct wpan_phy *phy = priv->hw->phy; - spin_unlock_bh(&priv->mib_lock); - } + rc = mac802154_slave_open(dev); + if (rc < 0) + return rc; - if (daddr->addr_type != IEEE802154_ADDR_NONE) { - fc |= (daddr->addr_type << IEEE802154_FC_DAMODE_SHIFT); + mutex_lock(&phy->pib_lock); - head[pos++] = daddr->pan_id & 0xff; - head[pos++] = daddr->pan_id >> 8; + if (phy->set_txpower) { + rc = phy->set_txpower(phy, priv->mac_params.transmit_power); + if (rc < 0) + goto out; + } - if (daddr->addr_type == IEEE802154_ADDR_SHORT) { - head[pos++] = daddr->short_addr & 0xff; - head[pos++] = daddr->short_addr >> 8; - } else { - mac802154_haddr_copy_swap(head + pos, daddr->hwaddr); - pos += IEEE802154_ADDR_LEN; - } + if (phy->set_lbt) { + rc = phy->set_lbt(phy, priv->mac_params.lbt); + if (rc < 0) + goto out; } - if (saddr->addr_type != IEEE802154_ADDR_NONE) { - fc |= (saddr->addr_type << IEEE802154_FC_SAMODE_SHIFT); + if (phy->set_cca_mode) { + rc = phy->set_cca_mode(phy, priv->mac_params.cca_mode); + if (rc < 0) + goto out; + } - if ((saddr->pan_id == daddr->pan_id) && - (saddr->pan_id != IEEE802154_PANID_BROADCAST)) { - /* PANID compression/intra PAN */ - fc |= IEEE802154_FC_INTRA_PAN; - } else { - head[pos++] = saddr->pan_id & 0xff; - head[pos++] = saddr->pan_id >> 8; - } + if (phy->set_cca_ed_level) { + rc = phy->set_cca_ed_level(phy, priv->mac_params.cca_ed_level); + if (rc < 0) + goto out; + } - if (saddr->addr_type == IEEE802154_ADDR_SHORT) { - head[pos++] = saddr->short_addr & 0xff; - head[pos++] = saddr->short_addr >> 8; - } else { - mac802154_haddr_copy_swap(head + pos, saddr->hwaddr); - pos += IEEE802154_ADDR_LEN; - } + if (phy->set_csma_params) { + rc = phy->set_csma_params(phy, priv->mac_params.min_be, + priv->mac_params.max_be, + priv->mac_params.csma_retries); + if (rc < 0) + goto out; } - head[0] = fc; - head[1] = fc >> 8; + if (phy->set_frame_retries) { + rc = phy->set_frame_retries(phy, + priv->mac_params.frame_retries); + if (rc < 0) + goto out; + } - memcpy(skb_push(skb, pos), head, pos); - skb_reset_mac_header(skb); - skb->mac_len = pos; + mutex_unlock(&phy->pib_lock); + return 0; - return pos; +out: + mutex_unlock(&phy->pib_lock); + return rc; } -static int -mac802154_header_parse(const struct sk_buff *skb, unsigned char *haddr) +static int mac802154_header_create(struct sk_buff *skb, + struct net_device *dev, + unsigned short type, + const void *daddr, + const void *saddr, + unsigned len) { - const u8 *hdr = skb_mac_header(skb); - const u8 *tail = skb_tail_pointer(skb); - struct ieee802154_addr *addr = (struct ieee802154_addr *)haddr; - u16 fc; - int da_type; - - if (hdr + 3 > tail) - goto malformed; + struct ieee802154_hdr hdr; + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int hlen; - fc = hdr[0] | (hdr[1] << 8); + if (!daddr) + return -EINVAL; - hdr += 3; + memset(&hdr.fc, 0, sizeof(hdr.fc)); + hdr.fc.type = mac_cb_type(skb); + hdr.fc.security_enabled = mac_cb_is_secen(skb); + hdr.fc.ack_request = mac_cb_is_ackreq(skb); - da_type = IEEE802154_FC_DAMODE(fc); - addr->addr_type = IEEE802154_FC_SAMODE(fc); + if (!saddr) { + spin_lock_bh(&priv->mib_lock); - switch (da_type) { - case IEEE802154_ADDR_NONE: - if (fc & IEEE802154_FC_INTRA_PAN) - goto malformed; - break; - case IEEE802154_ADDR_LONG: - if (fc & IEEE802154_FC_INTRA_PAN) { - if (hdr + 2 > tail) - goto malformed; - addr->pan_id = hdr[0] | (hdr[1] << 8); - hdr += 2; + if (priv->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST) || + priv->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) || + priv->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST)) { + hdr.source.mode = IEEE802154_ADDR_LONG; + hdr.source.extended_addr = priv->extended_addr; + } else { + hdr.source.mode = IEEE802154_ADDR_SHORT; + hdr.source.short_addr = priv->short_addr; } - if (hdr + IEEE802154_ADDR_LEN > tail) - goto malformed; + hdr.source.pan_id = priv->pan_id; - hdr += IEEE802154_ADDR_LEN; - break; - case IEEE802154_ADDR_SHORT: - if (fc & IEEE802154_FC_INTRA_PAN) { - if (hdr + 2 > tail) - goto malformed; - addr->pan_id = hdr[0] | (hdr[1] << 8); - hdr += 2; - } - - if (hdr + 2 > tail) - goto malformed; + spin_unlock_bh(&priv->mib_lock); + } else { + hdr.source = *(const struct ieee802154_addr *)saddr; + } - hdr += 2; - break; - default: - goto malformed; + hdr.dest = *(const struct ieee802154_addr *)daddr; - } + hlen = ieee802154_hdr_push(skb, &hdr); + if (hlen < 0) + return -EINVAL; - switch (addr->addr_type) { - case IEEE802154_ADDR_NONE: - break; - case IEEE802154_ADDR_LONG: - if (!(fc & IEEE802154_FC_INTRA_PAN)) { - if (hdr + 2 > tail) - goto malformed; - addr->pan_id = hdr[0] | (hdr[1] << 8); - hdr += 2; - } + skb_reset_mac_header(skb); + skb->mac_len = hlen; - if (hdr + IEEE802154_ADDR_LEN > tail) - goto malformed; + if (hlen + len + 2 > dev->mtu) + return -EMSGSIZE; - mac802154_haddr_copy_swap(addr->hwaddr, hdr); - hdr += IEEE802154_ADDR_LEN; - break; - case IEEE802154_ADDR_SHORT: - if (!(fc & IEEE802154_FC_INTRA_PAN)) { - if (hdr + 2 > tail) - goto malformed; - addr->pan_id = hdr[0] | (hdr[1] << 8); - hdr += 2; - } + return hlen; +} - if (hdr + 2 > tail) - goto malformed; +static int +mac802154_header_parse(const struct sk_buff *skb, unsigned char *haddr) +{ + struct ieee802154_hdr hdr; + struct ieee802154_addr *addr = (struct ieee802154_addr *)haddr; - addr->short_addr = hdr[0] | (hdr[1] << 8); - hdr += 2; - break; - default: - goto malformed; + if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) { + pr_debug("malformed packet\n"); + return 0; } - return sizeof(struct ieee802154_addr); - -malformed: - pr_debug("malformed packet\n"); - return 0; + *addr = hdr.source; + return sizeof(*addr); } static netdev_tx_t @@ -344,7 +285,7 @@ static struct header_ops mac802154_header_ops = { }; static const struct net_device_ops mac802154_wpan_ops = { - .ndo_open = mac802154_slave_open, + .ndo_open = mac802154_wpan_open, .ndo_stop = mac802154_slave_close, .ndo_start_xmit = mac802154_wpan_xmit, .ndo_do_ioctl = mac802154_wpan_ioctl, @@ -382,8 +323,14 @@ void mac802154_wpan_setup(struct net_device *dev) get_random_bytes(&priv->bsn, 1); get_random_bytes(&priv->dsn, 1); - priv->pan_id = IEEE802154_PANID_BROADCAST; - priv->short_addr = IEEE802154_ADDR_BROADCAST; + /* defaults per 802.15.4-2011 */ + priv->mac_params.min_be = 3; + priv->mac_params.max_be = 5; + priv->mac_params.csma_retries = 4; + priv->mac_params.frame_retries = -1; /* for compatibility, actual default is 3 */ + + priv->pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST); + priv->short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); } static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb) @@ -394,13 +341,18 @@ static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb) static int mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb) { + __le16 span, sshort; + pr_debug("getting packet via slave interface %s\n", sdata->dev->name); spin_lock_bh(&sdata->mib_lock); - switch (mac_cb(skb)->da.addr_type) { + span = sdata->pan_id; + sshort = sdata->short_addr; + + switch (mac_cb(skb)->dest.mode) { case IEEE802154_ADDR_NONE: - if (mac_cb(skb)->sa.addr_type != IEEE802154_ADDR_NONE) + if (mac_cb(skb)->dest.mode != IEEE802154_ADDR_NONE) /* FIXME: check if we are PAN coordinator */ skb->pkt_type = PACKET_OTHERHOST; else @@ -408,23 +360,22 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb) skb->pkt_type = PACKET_HOST; break; case IEEE802154_ADDR_LONG: - if (mac_cb(skb)->da.pan_id != sdata->pan_id && - mac_cb(skb)->da.pan_id != IEEE802154_PANID_BROADCAST) + if (mac_cb(skb)->dest.pan_id != span && + mac_cb(skb)->dest.pan_id != cpu_to_le16(IEEE802154_PANID_BROADCAST)) skb->pkt_type = PACKET_OTHERHOST; - else if (!memcmp(mac_cb(skb)->da.hwaddr, sdata->dev->dev_addr, - IEEE802154_ADDR_LEN)) + else if (mac_cb(skb)->dest.extended_addr == sdata->extended_addr) skb->pkt_type = PACKET_HOST; else skb->pkt_type = PACKET_OTHERHOST; break; case IEEE802154_ADDR_SHORT: - if (mac_cb(skb)->da.pan_id != sdata->pan_id && - mac_cb(skb)->da.pan_id != IEEE802154_PANID_BROADCAST) + if (mac_cb(skb)->dest.pan_id != span && + mac_cb(skb)->dest.pan_id != cpu_to_le16(IEEE802154_PANID_BROADCAST)) skb->pkt_type = PACKET_OTHERHOST; - else if (mac_cb(skb)->da.short_addr == sdata->short_addr) + else if (mac_cb(skb)->dest.short_addr == sshort) skb->pkt_type = PACKET_HOST; - else if (mac_cb(skb)->da.short_addr == - IEEE802154_ADDR_BROADCAST) + else if (mac_cb(skb)->dest.short_addr == + cpu_to_le16(IEEE802154_ADDR_BROADCAST)) skb->pkt_type = PACKET_BROADCAST; else skb->pkt_type = PACKET_OTHERHOST; @@ -451,88 +402,82 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb) } } -static int mac802154_parse_frame_start(struct sk_buff *skb) +static void mac802154_print_addr(const char *name, + const struct ieee802154_addr *addr) { - u8 *head = skb->data; - u16 fc; - - if (mac802154_fetch_skb_u16(skb, &fc) || - mac802154_fetch_skb_u8(skb, &(mac_cb(skb)->seq))) - goto err; - - pr_debug("fc: %04x dsn: %02x\n", fc, head[2]); - - mac_cb(skb)->flags = IEEE802154_FC_TYPE(fc); - mac_cb(skb)->sa.addr_type = IEEE802154_FC_SAMODE(fc); - mac_cb(skb)->da.addr_type = IEEE802154_FC_DAMODE(fc); + if (addr->mode == IEEE802154_ADDR_NONE) + pr_debug("%s not present\n", name); - if (fc & IEEE802154_FC_INTRA_PAN) - mac_cb(skb)->flags |= MAC_CB_FLAG_INTRAPAN; + pr_debug("%s PAN ID: %04x\n", name, le16_to_cpu(addr->pan_id)); + if (addr->mode == IEEE802154_ADDR_SHORT) { + pr_debug("%s is short: %04x\n", name, + le16_to_cpu(addr->short_addr)); + } else { + u64 hw = swab64((__force u64) addr->extended_addr); - if (mac_cb(skb)->da.addr_type != IEEE802154_ADDR_NONE) { - if (mac802154_fetch_skb_u16(skb, &(mac_cb(skb)->da.pan_id))) - goto err; - - /* source PAN id compression */ - if (mac_cb_is_intrapan(skb)) - mac_cb(skb)->sa.pan_id = mac_cb(skb)->da.pan_id; + pr_debug("%s is hardware: %8phC\n", name, &hw); + } +} - pr_debug("dest PAN addr: %04x\n", mac_cb(skb)->da.pan_id); +static int mac802154_parse_frame_start(struct sk_buff *skb) +{ + int hlen; + struct ieee802154_hdr hdr; - if (mac_cb(skb)->da.addr_type == IEEE802154_ADDR_SHORT) { - u16 *da = &(mac_cb(skb)->da.short_addr); + hlen = ieee802154_hdr_pull(skb, &hdr); + if (hlen < 0) + return -EINVAL; - if (mac802154_fetch_skb_u16(skb, da)) - goto err; + skb->mac_len = hlen; - pr_debug("destination address is short: %04x\n", - mac_cb(skb)->da.short_addr); - } else { - if (!pskb_may_pull(skb, IEEE802154_ADDR_LEN)) - goto err; + pr_debug("fc: %04x dsn: %02x\n", le16_to_cpup((__le16 *)&hdr.fc), + hdr.seq); - mac802154_haddr_copy_swap(mac_cb(skb)->da.hwaddr, - skb->data); - skb_pull(skb, IEEE802154_ADDR_LEN); + mac_cb(skb)->flags = hdr.fc.type; - pr_debug("destination address is hardware\n"); - } - } + if (hdr.fc.ack_request) + mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; + if (hdr.fc.security_enabled) + mac_cb(skb)->flags |= MAC_CB_FLAG_SECEN; - if (mac_cb(skb)->sa.addr_type != IEEE802154_ADDR_NONE) { - /* non PAN-compression, fetch source address id */ - if (!(mac_cb_is_intrapan(skb))) { - u16 *sa_pan = &(mac_cb(skb)->sa.pan_id); + mac802154_print_addr("destination", &hdr.dest); + mac802154_print_addr("source", &hdr.source); - if (mac802154_fetch_skb_u16(skb, sa_pan)) - goto err; - } + mac_cb(skb)->source = hdr.source; + mac_cb(skb)->dest = hdr.dest; - pr_debug("source PAN addr: %04x\n", mac_cb(skb)->da.pan_id); + if (hdr.fc.security_enabled) { + u64 key; - if (mac_cb(skb)->sa.addr_type == IEEE802154_ADDR_SHORT) { - u16 *sa = &(mac_cb(skb)->sa.short_addr); + pr_debug("seclevel %i\n", hdr.sec.level); - if (mac802154_fetch_skb_u16(skb, sa)) - goto err; + switch (hdr.sec.key_id_mode) { + case IEEE802154_SCF_KEY_IMPLICIT: + pr_debug("implicit key\n"); + break; - pr_debug("source address is short: %04x\n", - mac_cb(skb)->sa.short_addr); - } else { - if (!pskb_may_pull(skb, IEEE802154_ADDR_LEN)) - goto err; + case IEEE802154_SCF_KEY_INDEX: + pr_debug("key %02x\n", hdr.sec.key_id); + break; - mac802154_haddr_copy_swap(mac_cb(skb)->sa.hwaddr, - skb->data); - skb_pull(skb, IEEE802154_ADDR_LEN); + case IEEE802154_SCF_KEY_SHORT_INDEX: + pr_debug("key %04x:%04x %02x\n", + le32_to_cpu(hdr.sec.short_src) >> 16, + le32_to_cpu(hdr.sec.short_src) & 0xffff, + hdr.sec.key_id); + break; - pr_debug("source address is hardware\n"); + case IEEE802154_SCF_KEY_HW_INDEX: + key = swab64((__force u64) hdr.sec.extended_src); + pr_debug("key source %8phC %02x\n", &key, + hdr.sec.key_id); + break; } + + return -EINVAL; } return 0; -err: - return -EINVAL; } void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c37467562fd..e9410d17619 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -513,7 +513,6 @@ config NFT_QUEUE config NFT_REJECT depends on NF_TABLES - depends on NF_TABLES_IPV6 || !NF_TABLES_IPV6 default m if NETFILTER_ADVANCED=n tristate "Netfilter nf_tables reject support" help @@ -521,6 +520,11 @@ config NFT_REJECT explicitly deny and notify via TCP reset/ICMP informational errors unallowed traffic. +config NFT_REJECT_INET + depends on NF_TABLES_INET + default NFT_REJECT + tristate + config NFT_COMPAT depends on NF_TABLES depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index ee9c4de5f8e..bffdad774da 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -79,6 +79,7 @@ obj-$(CONFIG_NFT_LIMIT) += nft_limit.o obj-$(CONFIG_NFT_NAT) += nft_nat.o obj-$(CONFIG_NFT_QUEUE) += nft_queue.o obj-$(CONFIG_NFT_REJECT) += nft_reject.o +obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o obj-$(CONFIG_NFT_HASH) += nft_hash.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 44cd4f58adf..2f7f5c32c6f 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -61,6 +61,15 @@ config IP_SET_HASH_IP To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_IPMARK + tristate "hash:ip,mark set support" + depends on IP_SET + help + This option adds the hash:ip,mark set type support, by which one + can store IPv4/IPv6 address and mark pairs. + + To compile it as a module, choose M here. If unsure, say N. + config IP_SET_HASH_IPPORT tristate "hash:ip,port set support" depends on IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 44b2d38476f..231f10196cb 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o # hash types obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o +obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index de770ec39e5..117208321f1 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -54,10 +54,10 @@ MODULE_DESCRIPTION("core IP set support"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); /* When the nfnl mutex is held: */ -#define nfnl_dereference(p) \ +#define ip_set_dereference(p) \ rcu_dereference_protected(p, 1) -#define nfnl_set(inst, id) \ - nfnl_dereference((inst)->ip_set_list)[id] +#define ip_set(inst, id) \ + ip_set_dereference((inst)->ip_set_list)[id] /* * The set types are implemented in modules and registered set types @@ -368,6 +368,8 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len) if (tb[IPSET_ATTR_CADT_FLAGS]) cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_WITH_FORCEADD) + set->flags |= IPSET_CREATE_FLAG_FORCEADD; for (id = 0; id < IPSET_EXT_ID_MAX; id++) { if (!add_extension(id, cadt_flags, tb)) continue; @@ -510,7 +512,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, if (opt->dim < set->type->dimension || !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) - return 0; + return -IPSET_ERR_TYPE_MISMATCH; write_lock_bh(&set->lock); ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); @@ -533,7 +535,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, if (opt->dim < set->type->dimension || !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) - return 0; + return -IPSET_ERR_TYPE_MISMATCH; write_lock_bh(&set->lock); ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); @@ -640,7 +642,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) return IPSET_INVALID_ID; nfnl_lock(NFNL_SUBSYS_IPSET); - set = nfnl_set(inst, index); + set = ip_set(inst, index); if (set) __ip_set_get(set); else @@ -666,7 +668,7 @@ ip_set_nfnl_put(struct net *net, ip_set_id_t index) nfnl_lock(NFNL_SUBSYS_IPSET); if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */ - set = nfnl_set(inst, index); + set = ip_set(inst, index); if (set != NULL) __ip_set_put(set); } @@ -734,7 +736,7 @@ find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id) *id = IPSET_INVALID_ID; for (i = 0; i < inst->ip_set_max; i++) { - set = nfnl_set(inst, i); + set = ip_set(inst, i); if (set != NULL && STREQ(set->name, name)) { *id = i; break; @@ -760,7 +762,7 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, *index = IPSET_INVALID_ID; for (i = 0; i < inst->ip_set_max; i++) { - s = nfnl_set(inst, i); + s = ip_set(inst, i); if (s == NULL) { if (*index == IPSET_INVALID_ID) *index = i; @@ -883,7 +885,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, if (!list) goto cleanup; /* nfnl mutex is held, both lists are valid */ - tmp = nfnl_dereference(inst->ip_set_list); + tmp = ip_set_dereference(inst->ip_set_list); memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max); rcu_assign_pointer(inst->ip_set_list, list); /* Make sure all current packets have passed through */ @@ -900,7 +902,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, * Finally! Add our shiny new set to the list, and be done. */ pr_debug("create: '%s' created with index %u!\n", set->name, index); - nfnl_set(inst, index) = set; + ip_set(inst, index) = set; return ret; @@ -925,10 +927,10 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { static void ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index) { - struct ip_set *set = nfnl_set(inst, index); + struct ip_set *set = ip_set(inst, index); pr_debug("set: %s\n", set->name); - nfnl_set(inst, index) = NULL; + ip_set(inst, index) = NULL; /* Must call it without holding any lock */ set->variant->destroy(set); @@ -962,7 +964,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < inst->ip_set_max; i++) { - s = nfnl_set(inst, i); + s = ip_set(inst, i); if (s != NULL && s->ref) { ret = -IPSET_ERR_BUSY; goto out; @@ -970,7 +972,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, } read_unlock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { - s = nfnl_set(inst, i); + s = ip_set(inst, i); if (s != NULL) ip_set_destroy_set(inst, i); } @@ -1020,7 +1022,7 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < inst->ip_set_max; i++) { - s = nfnl_set(inst, i); + s = ip_set(inst, i); if (s != NULL) ip_set_flush_set(s); } @@ -1074,7 +1076,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); for (i = 0; i < inst->ip_set_max; i++) { - s = nfnl_set(inst, i); + s = ip_set(inst, i); if (s != NULL && STREQ(s->name, name2)) { ret = -IPSET_ERR_EXIST_SETNAME2; goto out; @@ -1134,8 +1136,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, write_lock_bh(&ip_set_ref_lock); swap(from->ref, to->ref); - nfnl_set(inst, from_id) = to; - nfnl_set(inst, to_id) = from; + ip_set(inst, from_id) = to; + ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); return 0; @@ -1157,7 +1159,7 @@ ip_set_dump_done(struct netlink_callback *cb) struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET]; if (cb->args[IPSET_CB_ARG0]) { pr_debug("release set %s\n", - nfnl_set(inst, cb->args[IPSET_CB_INDEX])->name); + ip_set(inst, cb->args[IPSET_CB_INDEX])->name); __ip_set_put_byindex(inst, (ip_set_id_t) cb->args[IPSET_CB_INDEX]); } @@ -1254,7 +1256,7 @@ dump_last: dump_type, dump_flags, cb->args[IPSET_CB_INDEX]); for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) { index = (ip_set_id_t) cb->args[IPSET_CB_INDEX]; - set = nfnl_set(inst, index); + set = ip_set(inst, index); if (set == NULL) { if (dump_type == DUMP_ONE) { ret = -ENOENT; @@ -1332,7 +1334,7 @@ next_set: release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[IPSET_CB_ARG0]) { - pr_debug("release set %s\n", nfnl_set(inst, index)->name); + pr_debug("release set %s\n", ip_set(inst, index)->name); __ip_set_put_byindex(inst, index); cb->args[IPSET_CB_ARG0] = 0; } @@ -1887,7 +1889,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) find_set_and_id(inst, req_get->set.name, &id); req_get->set.index = id; if (id != IPSET_INVALID_ID) - req_get->family = nfnl_set(inst, id)->family; + req_get->family = ip_set(inst, id)->family; nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; } @@ -1901,7 +1903,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) goto done; } nfnl_lock(NFNL_SUBSYS_IPSET); - set = nfnl_set(inst, req_get->set.index); + set = ip_set(inst, req_get->set.index); strncpy(req_get->set.name, set ? set->name : "", IPSET_MAXNAMELEN); nfnl_unlock(NFNL_SUBSYS_IPSET); @@ -1945,7 +1947,6 @@ ip_set_net_init(struct net *net) return -ENOMEM; inst->is_deleted = 0; rcu_assign_pointer(inst->ip_set_list, list); - pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL); return 0; } @@ -1960,7 +1961,7 @@ ip_set_net_exit(struct net *net) inst->is_deleted = 1; /* flag for ip_set_nfnl_put */ for (i = 0; i < inst->ip_set_max; i++) { - set = nfnl_set(inst, i); + set = ip_set(inst, i); if (set != NULL) ip_set_destroy_set(inst, i); } @@ -1996,6 +1997,7 @@ ip_set_init(void) nfnetlink_subsys_unregister(&ip_set_netlink_subsys); return ret; } + pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL); return 0; } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index be6932ad3a8..61c7fb05280 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -263,6 +263,9 @@ struct htype { u32 maxelem; /* max elements in the hash */ u32 elements; /* current element (vs timeout) */ u32 initval; /* random jhash init value */ +#ifdef IP_SET_HASH_WITH_MARKMASK + u32 markmask; /* markmask value for mark mask to store */ +#endif struct timer_list gc; /* garbage collection when timeout enabled */ struct mtype_elem next; /* temporary storage for uadd */ #ifdef IP_SET_HASH_WITH_MULTI @@ -454,6 +457,9 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) #ifdef IP_SET_HASH_WITH_NETMASK x->netmask == y->netmask && #endif +#ifdef IP_SET_HASH_WITH_MARKMASK + x->markmask == y->markmask && +#endif a->extensions == b->extensions; } @@ -627,6 +633,18 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, bool flag_exist = flags & IPSET_FLAG_EXIST; u32 key, multi = 0; + if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set)) { + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); + key = HKEY(value, h->initval, t->htable_bits); + n = hbucket(t,key); + if (n->pos) { + /* Choosing the first entry in the array to replace */ + j = 0; + goto reuse_slot; + } + rcu_read_unlock_bh(); + } if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) /* FIXME: when set is full, we slow down here */ mtype_expire(set, h, NLEN(set->family), set->dsize); @@ -908,6 +926,10 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask)) goto nla_put_failure; #endif +#ifdef IP_SET_HASH_WITH_MARKMASK + if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask)) + goto nla_put_failure; +#endif if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) goto nla_put_failure; @@ -1016,6 +1038,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, struct nlattr *tb[], u32 flags) { u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; +#ifdef IP_SET_HASH_WITH_MARKMASK + u32 markmask; +#endif u8 hbits; #ifdef IP_SET_HASH_WITH_NETMASK u8 netmask; @@ -1026,6 +1051,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) return -IPSET_ERR_INVALID_FAMILY; + +#ifdef IP_SET_HASH_WITH_MARKMASK + markmask = 0xffffffff; +#endif #ifdef IP_SET_HASH_WITH_NETMASK netmask = set->family == NFPROTO_IPV4 ? 32 : 128; pr_debug("Create set %s with family %s\n", @@ -1034,6 +1063,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || +#ifdef IP_SET_HASH_WITH_MARKMASK + !ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK) || +#endif !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; @@ -1057,6 +1089,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, return -IPSET_ERR_INVALID_NETMASK; } #endif +#ifdef IP_SET_HASH_WITH_MARKMASK + if (tb[IPSET_ATTR_MARKMASK]) { + markmask = ntohl(nla_get_u32(tb[IPSET_ATTR_MARKMASK])); + + if ((markmask > 4294967295u) || markmask == 0) + return -IPSET_ERR_INVALID_MARKMASK; + } +#endif hsize = sizeof(*h); #ifdef IP_SET_HASH_WITH_NETS @@ -1071,6 +1111,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, #ifdef IP_SET_HASH_WITH_NETMASK h->netmask = netmask; #endif +#ifdef IP_SET_HASH_WITH_MARKMASK + h->markmask = markmask; +#endif get_random_bytes(&h->initval, sizeof(h->initval)); set->timeout = IPSET_NO_TIMEOUT; diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index e65fc2423d5..dd40607f878 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -25,7 +25,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 Counters support */ -#define IPSET_TYPE_REV_MAX 2 /* Comments support */ +/* 2 Comments support */ +#define IPSET_TYPE_REV_MAX 3 /* Forceadd support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c new file mode 100644 index 00000000000..4eff0a29725 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -0,0 +1,321 @@ +/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * Copyright (C) 2013 Smoothwall Ltd. <vytas.dauksa@smoothwall.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:ip,mark type */ + +#include <linux/jhash.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/skbuff.h> +#include <linux/errno.h> +#include <linux/random.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/netlink.h> +#include <net/tcp.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/ipset/pfxlen.h> +#include <linux/netfilter/ipset/ip_set.h> +#include <linux/netfilter/ipset/ip_set_hash.h> + +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 1 /* Forceadd support */ + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>"); +IP_SET_MODULE_DESC("hash:ip,mark", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); +MODULE_ALIAS("ip_set_hash:ip,mark"); + +/* Type specific function prefix */ +#define HTYPE hash_ipmark +#define IP_SET_HASH_WITH_MARKMASK + +/* IPv4 variant */ + +/* Member elements */ +struct hash_ipmark4_elem { + __be32 ip; + __u32 mark; +}; + +/* Common functions */ + +static inline bool +hash_ipmark4_data_equal(const struct hash_ipmark4_elem *ip1, + const struct hash_ipmark4_elem *ip2, + u32 *multi) +{ + return ip1->ip == ip2->ip && + ip1->mark == ip2->mark; +} + +static bool +hash_ipmark4_data_list(struct sk_buff *skb, + const struct hash_ipmark4_elem *data) +{ + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || + nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_ipmark4_data_next(struct hash_ipmark4_elem *next, + const struct hash_ipmark4_elem *d) +{ + next->ip = d->ip; +} + +#define MTYPE hash_ipmark4 +#define PF 4 +#define HOST_MASK 32 +#define HKEY_DATALEN sizeof(struct hash_ipmark4_elem) +#include "ip_set_hash_gen.h" + +static int +hash_ipmark4_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_ipmark *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmark4_elem e = { }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + e.mark = skb->mark; + e.mark &= h->markmask; + + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + const struct hash_ipmark *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmark4_elem e = { }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip, ip_to = 0; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK])); + e.mark &= h->markmask; + + if (adt == IPSET_TEST || + !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) { + ret = adtfn(set, &e, &ext, &ext, flags); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + ip_to = ip = ntohl(e.ip); + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip > ip_to) + swap(ip, ip_to); + } else if (tb[IPSET_ATTR_CIDR]) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (!cidr || cidr > 32) + return -IPSET_ERR_INVALID_CIDR; + ip_set_mask_from_to(ip, ip_to, cidr); + } + + if (retried) + ip = ntohl(h->next.ip); + for (; !before(ip_to, ip); ip++) { + e.ip = htonl(ip); + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +/* IPv6 variant */ + +struct hash_ipmark6_elem { + union nf_inet_addr ip; + __u32 mark; +}; + +/* Common functions */ + +static inline bool +hash_ipmark6_data_equal(const struct hash_ipmark6_elem *ip1, + const struct hash_ipmark6_elem *ip2, + u32 *multi) +{ + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && + ip1->mark == ip2->mark; +} + +static bool +hash_ipmark6_data_list(struct sk_buff *skb, + const struct hash_ipmark6_elem *data) +{ + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || + nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_ipmark6_data_next(struct hash_ipmark4_elem *next, + const struct hash_ipmark6_elem *d) +{ +} + +#undef MTYPE +#undef PF +#undef HOST_MASK +#undef HKEY_DATALEN + +#define MTYPE hash_ipmark6 +#define PF 6 +#define HOST_MASK 128 +#define HKEY_DATALEN sizeof(struct hash_ipmark6_elem) +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h" + + +static int +hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_ipmark *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmark6_elem e = { }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + e.mark = skb->mark; + e.mark &= h->markmask; + + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + const struct hash_ipmark *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmark6_elem e = { }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + tb[IPSET_ATTR_IP_TO] || + tb[IPSET_ATTR_CIDR])) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK])); + e.mark &= h->markmask; + + if (adt == IPSET_TEST) { + ret = adtfn(set, &e, &ext, &ext, flags); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + + return ret; +} + +static struct ip_set_type hash_ipmark_type __read_mostly = { + .name = "hash:ip,mark", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_MARK, + .dimension = IPSET_DIM_TWO, + .family = NFPROTO_UNSPEC, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, + .create = hash_ipmark_create, + .create_policy = { + [IPSET_ATTR_MARKMASK] = { .type = NLA_U32 }, + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_MARK] = { .type = NLA_U32 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, + [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_ipmark_init(void) +{ + return ip_set_type_register(&hash_ipmark_type); +} + +static void __exit +hash_ipmark_fini(void) +{ + ip_set_type_unregister(&hash_ipmark_type); +} + +module_init(hash_ipmark_init); +module_exit(hash_ipmark_fini); diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 525a595dd1f..7597b82a8b0 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -27,7 +27,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 SCTP and UDPLITE support added */ /* 2 Counters support added */ -#define IPSET_TYPE_REV_MAX 3 /* Comments support added */ +/* 3 Comments support added */ +#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index f5636631466..672655ffd57 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -27,7 +27,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 SCTP and UDPLITE support added */ /* 2 Counters support added */ -#define IPSET_TYPE_REV_MAX 3 /* Comments support added */ +/* 3 Comments support added */ +#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 5d87fe8a41f..7308d84f927 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -29,7 +29,8 @@ /* 2 Range as input support for IPv4 added */ /* 3 nomatch flag support added */ /* 4 Counters support added */ -#define IPSET_TYPE_REV_MAX 5 /* Comments support added */ +/* 5 Comments support added */ +#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 8295cf4f9fd..4c7d495783a 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -26,7 +26,8 @@ /* 1 Range as input support for IPv4 added */ /* 2 nomatch flag support added */ /* 3 Counters support added */ -#define IPSET_TYPE_REV_MAX 4 /* Comments support added */ +/* 4 Comments support added */ +#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index b827a0f1f35..db2606805b3 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -27,7 +27,8 @@ /* 1 nomatch flag support added */ /* 2 /0 support added */ /* 3 Counters support added */ -#define IPSET_TYPE_REV_MAX 4 /* Comments support added */ +/* 4 Comments support added */ +#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 6226803fc49..3e99987e4bf 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -24,7 +24,7 @@ #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 0 +#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>"); @@ -112,10 +112,10 @@ hash_netnet4_data_list(struct sk_buff *skb, (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void @@ -334,10 +334,10 @@ hash_netnet6_data_list(struct sk_buff *skb, (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; - return 0; + return false; nla_put_failure: - return 1; + return true; } static inline void diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 7097fb0141b..1c645fbd09c 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -28,7 +28,8 @@ /* 2 Range as input support for IPv4 added */ /* 3 nomatch flag support added */ /* 4 Counters support added */ -#define IPSET_TYPE_REV_MAX 5 /* Comments support added */ +/* 5 Comments support added */ +#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 703d1192a6a..c0d2ba73f8b 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -25,7 +25,8 @@ #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 0 /* Comments support added */ +/* 0 Comments support added */ +#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>"); diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c index 4f29fa97044..04d15fdc99e 100644 --- a/net/netfilter/ipset/pfxlen.c +++ b/net/netfilter/ipset/pfxlen.c @@ -7,8 +7,8 @@ #define E(a, b, c, d) \ {.ip6 = { \ - __constant_htonl(a), __constant_htonl(b), \ - __constant_htonl(c), __constant_htonl(d), \ + htonl(a), htonl(b), \ + htonl(c), htonl(d), \ } } /* diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 59a1a85bcb3..a8eb0a89326 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -871,11 +871,11 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, cp->protocol = p->protocol; ip_vs_addr_set(p->af, &cp->caddr, p->caddr); cp->cport = p->cport; - ip_vs_addr_set(p->af, &cp->vaddr, p->vaddr); - cp->vport = p->vport; - /* proto should only be IPPROTO_IP if d_addr is a fwmark */ + /* proto should only be IPPROTO_IP if p->vaddr is a fwmark */ ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, - &cp->daddr, daddr); + &cp->vaddr, p->vaddr); + cp->vport = p->vport; + ip_vs_addr_set(p->af, &cp->daddr, daddr); cp->dport = dport; cp->flags = flags; cp->fwmark = fwmark; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 35be035ee0c..c42e83d2751 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2177,10 +2177,10 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) __u64 inbytes, outbytes; do { - start = u64_stats_fetch_begin_bh(&u->syncp); + start = u64_stats_fetch_begin_irq(&u->syncp); inbytes = u->ustats.inbytes; outbytes = u->ustats.outbytes; - } while (u64_stats_fetch_retry_bh(&u->syncp, start)); + } while (u64_stats_fetch_retry_irq(&u->syncp, start)); seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n", i, u->ustats.conns, u->ustats.inpkts, @@ -3580,7 +3580,7 @@ out: } -static const struct genl_ops ip_vs_genl_ops[] __read_mostly = { +static const struct genl_ops ip_vs_genl_ops[] = { { .cmd = IPVS_CMD_NEW_SERVICE, .flags = GENL_ADMIN_PERM, diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index ca056a331e6..547ff33c1ef 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -238,7 +238,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc) spin_lock_bh(&svc->sched_lock); tbl->dead = 1; - for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { + for (i = 0; i < IP_VS_LBLC_TAB_SIZE; i++) { hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { ip_vs_lblc_del(en); atomic_dec(&tbl->entries); @@ -265,7 +265,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) unsigned long now = jiffies; int i, j; - for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { + for (i = 0, j = tbl->rover; i < IP_VS_LBLC_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLC_TAB_MASK; spin_lock(&svc->sched_lock); @@ -321,7 +321,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) if (goal > tbl->max_size/2) goal = tbl->max_size/2; - for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { + for (i = 0, j = tbl->rover; i < IP_VS_LBLC_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLC_TAB_MASK; spin_lock(&svc->sched_lock); @@ -340,7 +340,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) tbl->rover = j; out: - mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL); + mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); } @@ -363,7 +363,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) /* * Initialize the hash buckets */ - for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { + for (i = 0; i < IP_VS_LBLC_TAB_SIZE; i++) { INIT_HLIST_HEAD(&tbl->bucket[i]); } tbl->max_size = IP_VS_LBLC_TAB_SIZE*16; @@ -536,8 +536,7 @@ out: /* * IPVS LBLC Scheduler structure */ -static struct ip_vs_scheduler ip_vs_lblc_scheduler = -{ +static struct ip_vs_scheduler ip_vs_lblc_scheduler = { .name = "lblc", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8824ed0ccc9..75421f2ba8b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -60,8 +60,59 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, const struct nlattr *attr) __read_mostly; EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); -DEFINE_SPINLOCK(nf_conntrack_lock); -EXPORT_SYMBOL_GPL(nf_conntrack_lock); +__cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; +EXPORT_SYMBOL_GPL(nf_conntrack_locks); + +__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); +EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); + +static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2) +{ + h1 %= CONNTRACK_LOCKS; + h2 %= CONNTRACK_LOCKS; + spin_unlock(&nf_conntrack_locks[h1]); + if (h1 != h2) + spin_unlock(&nf_conntrack_locks[h2]); +} + +/* return true if we need to recompute hashes (in case hash table was resized) */ +static bool nf_conntrack_double_lock(struct net *net, unsigned int h1, + unsigned int h2, unsigned int sequence) +{ + h1 %= CONNTRACK_LOCKS; + h2 %= CONNTRACK_LOCKS; + if (h1 <= h2) { + spin_lock(&nf_conntrack_locks[h1]); + if (h1 != h2) + spin_lock_nested(&nf_conntrack_locks[h2], + SINGLE_DEPTH_NESTING); + } else { + spin_lock(&nf_conntrack_locks[h2]); + spin_lock_nested(&nf_conntrack_locks[h1], + SINGLE_DEPTH_NESTING); + } + if (read_seqcount_retry(&net->ct.generation, sequence)) { + nf_conntrack_double_unlock(h1, h2); + return true; + } + return false; +} + +static void nf_conntrack_all_lock(void) +{ + int i; + + for (i = 0; i < CONNTRACK_LOCKS; i++) + spin_lock_nested(&nf_conntrack_locks[i], i); +} + +static void nf_conntrack_all_unlock(void) +{ + int i; + + for (i = 0; i < CONNTRACK_LOCKS; i++) + spin_unlock(&nf_conntrack_locks[i]); +} unsigned int nf_conntrack_htable_size __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); @@ -192,6 +243,50 @@ clean_from_lists(struct nf_conn *ct) nf_ct_remove_expectations(ct); } +/* must be called with local_bh_disable */ +static void nf_ct_add_to_dying_list(struct nf_conn *ct) +{ + struct ct_pcpu *pcpu; + + /* add this conntrack to the (per cpu) dying list */ + ct->cpu = smp_processor_id(); + pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); + + spin_lock(&pcpu->lock); + hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, + &pcpu->dying); + spin_unlock(&pcpu->lock); +} + +/* must be called with local_bh_disable */ +static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct) +{ + struct ct_pcpu *pcpu; + + /* add this conntrack to the (per cpu) unconfirmed list */ + ct->cpu = smp_processor_id(); + pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); + + spin_lock(&pcpu->lock); + hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, + &pcpu->unconfirmed); + spin_unlock(&pcpu->lock); +} + +/* must be called with local_bh_disable */ +static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) +{ + struct ct_pcpu *pcpu; + + /* We overload first tuple to link into unconfirmed or dying list.*/ + pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); + + spin_lock(&pcpu->lock); + BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode)); + hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); + spin_unlock(&pcpu->lock); +} + static void destroy_conntrack(struct nf_conntrack *nfct) { @@ -203,9 +298,6 @@ destroy_conntrack(struct nf_conntrack *nfct) NF_CT_ASSERT(atomic_read(&nfct->use) == 0); NF_CT_ASSERT(!timer_pending(&ct->timeout)); - /* To make sure we don't get any weird locking issues here: - * destroy_conntrack() MUST NOT be called with a write lock - * to nf_conntrack_lock!!! -HW */ rcu_read_lock(); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto && l4proto->destroy) @@ -213,19 +305,18 @@ destroy_conntrack(struct nf_conntrack *nfct) rcu_read_unlock(); - spin_lock_bh(&nf_conntrack_lock); + local_bh_disable(); /* Expectations will have been removed in clean_from_lists, * except TFTP can create an expectation on the first packet, * before connection is in the list, so we need to clean here, - * too. */ + * too. + */ nf_ct_remove_expectations(ct); - /* We overload first tuple to link into unconfirmed or dying list.*/ - BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode)); - hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); + nf_ct_del_from_dying_or_unconfirmed_list(ct); NF_CT_STAT_INC(net, delete); - spin_unlock_bh(&nf_conntrack_lock); + local_bh_enable(); if (ct->master) nf_ct_put(ct->master); @@ -237,17 +328,28 @@ destroy_conntrack(struct nf_conntrack *nfct) static void nf_ct_delete_from_lists(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); + unsigned int hash, reply_hash; + u16 zone = nf_ct_zone(ct); + unsigned int sequence; nf_ct_helper_destroy(ct); - spin_lock_bh(&nf_conntrack_lock); - /* Inside lock so preempt is disabled on module removal path. - * Otherwise we can get spurious warnings. */ - NF_CT_STAT_INC(net, delete_list); + + local_bh_disable(); + do { + sequence = read_seqcount_begin(&net->ct.generation); + hash = hash_conntrack(net, zone, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + reply_hash = hash_conntrack(net, zone, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); + clean_from_lists(ct); - /* add this conntrack to the dying list */ - hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &net->ct.dying); - spin_unlock_bh(&nf_conntrack_lock); + nf_conntrack_double_unlock(hash, reply_hash); + + nf_ct_add_to_dying_list(ct); + + NF_CT_STAT_INC(net, delete_list); + local_bh_enable(); } static void death_by_event(unsigned long ul_conntrack) @@ -312,12 +414,25 @@ static void death_by_timeout(unsigned long ul_conntrack) nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0); } +static inline bool +nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, + const struct nf_conntrack_tuple *tuple, + u16 zone) +{ + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + + /* A conntrack can be recreated with the equal tuple, + * so we need to check that the conntrack is confirmed + */ + return nf_ct_tuple_equal(tuple, &h->tuple) && + nf_ct_zone(ct) == zone && + nf_ct_is_confirmed(ct); +} + /* * Warning : * - Caller must take a reference on returned object * and recheck nf_ct_tuple_equal(tuple, &h->tuple) - * OR - * - Caller must lock nf_conntrack_lock before calling this function */ static struct nf_conntrack_tuple_hash * ____nf_conntrack_find(struct net *net, u16 zone, @@ -333,8 +448,7 @@ ____nf_conntrack_find(struct net *net, u16 zone, local_bh_disable(); begin: hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) { - if (nf_ct_tuple_equal(tuple, &h->tuple) && - nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) { + if (nf_ct_key_equal(h, tuple, zone)) { NF_CT_STAT_INC(net, found); local_bh_enable(); return h; @@ -372,8 +486,7 @@ begin: !atomic_inc_not_zero(&ct->ct_general.use))) h = NULL; else { - if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) || - nf_ct_zone(ct) != zone)) { + if (unlikely(!nf_ct_key_equal(h, tuple, zone))) { nf_ct_put(ct); goto begin; } @@ -395,32 +508,36 @@ EXPORT_SYMBOL_GPL(nf_conntrack_find_get); static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int hash, - unsigned int repl_hash) + unsigned int reply_hash) { struct net *net = nf_ct_net(ct); hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, &net->ct.hash[hash]); hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, - &net->ct.hash[repl_hash]); + &net->ct.hash[reply_hash]); } int nf_conntrack_hash_check_insert(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); - unsigned int hash, repl_hash; + unsigned int hash, reply_hash; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; u16 zone; + unsigned int sequence; zone = nf_ct_zone(ct); - hash = hash_conntrack(net, zone, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(net, zone, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); - spin_lock_bh(&nf_conntrack_lock); + local_bh_disable(); + do { + sequence = read_seqcount_begin(&net->ct.generation); + hash = hash_conntrack(net, zone, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + reply_hash = hash_conntrack(net, zone, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); /* See if there's one in the list already, including reverse */ hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) @@ -428,32 +545,57 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) &h->tuple) && zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; - hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode) + hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, &h->tuple) && zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; add_timer(&ct->timeout); - nf_conntrack_get(&ct->ct_general); - __nf_conntrack_hash_insert(ct, hash, repl_hash); + smp_wmb(); + /* The caller holds a reference to this object */ + atomic_set(&ct->ct_general.use, 2); + __nf_conntrack_hash_insert(ct, hash, reply_hash); + nf_conntrack_double_unlock(hash, reply_hash); NF_CT_STAT_INC(net, insert); - spin_unlock_bh(&nf_conntrack_lock); - + local_bh_enable(); return 0; out: + nf_conntrack_double_unlock(hash, reply_hash); NF_CT_STAT_INC(net, insert_failed); - spin_unlock_bh(&nf_conntrack_lock); + local_bh_enable(); return -EEXIST; } EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); +/* deletion from this larval template list happens via nf_ct_put() */ +void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl) +{ + struct ct_pcpu *pcpu; + + __set_bit(IPS_TEMPLATE_BIT, &tmpl->status); + __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); + nf_conntrack_get(&tmpl->ct_general); + + /* add this conntrack to the (per cpu) tmpl list */ + local_bh_disable(); + tmpl->cpu = smp_processor_id(); + pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu); + + spin_lock(&pcpu->lock); + /* Overload tuple linked list to put us in template list. */ + hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, + &pcpu->tmpl); + spin_unlock_bh(&pcpu->lock); +} +EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert); + /* Confirm a connection given skb; places it in hash table */ int __nf_conntrack_confirm(struct sk_buff *skb) { - unsigned int hash, repl_hash; + unsigned int hash, reply_hash; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; struct nf_conn_help *help; @@ -462,6 +604,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) enum ip_conntrack_info ctinfo; struct net *net; u16 zone; + unsigned int sequence; ct = nf_ct_get(skb, &ctinfo); net = nf_ct_net(ct); @@ -474,31 +617,37 @@ __nf_conntrack_confirm(struct sk_buff *skb) return NF_ACCEPT; zone = nf_ct_zone(ct); - /* reuse the hash saved before */ - hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; - hash = hash_bucket(hash, net); - repl_hash = hash_conntrack(net, zone, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + local_bh_disable(); + + do { + sequence = read_seqcount_begin(&net->ct.generation); + /* reuse the hash saved before */ + hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; + hash = hash_bucket(hash, net); + reply_hash = hash_conntrack(net, zone, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + + } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); /* We're not in hash table, and we refuse to set up related - connections for unconfirmed conns. But packet copies and - REJECT will give spurious warnings here. */ + * connections for unconfirmed conns. But packet copies and + * REJECT will give spurious warnings here. + */ /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ /* No external references means no one else could have - confirmed us. */ + * confirmed us. + */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); pr_debug("Confirming conntrack %p\n", ct); - - spin_lock_bh(&nf_conntrack_lock); - /* We have to check the DYING flag inside the lock to prevent a race against nf_ct_get_next_corpse() possibly called from user context, else we insert an already 'dead' hash, blocking further use of that particular connection -JM */ if (unlikely(nf_ct_is_dying(ct))) { - spin_unlock_bh(&nf_conntrack_lock); + nf_conntrack_double_unlock(hash, reply_hash); + local_bh_enable(); return NF_ACCEPT; } @@ -510,14 +659,13 @@ __nf_conntrack_confirm(struct sk_buff *skb) &h->tuple) && zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; - hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode) + hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, &h->tuple) && zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; - /* Remove from unconfirmed list */ - hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); + nf_ct_del_from_dying_or_unconfirmed_list(ct); /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in @@ -540,9 +688,10 @@ __nf_conntrack_confirm(struct sk_buff *skb) * guarantee that no other CPU can find the conntrack before the above * stores are visible. */ - __nf_conntrack_hash_insert(ct, hash, repl_hash); + __nf_conntrack_hash_insert(ct, hash, reply_hash); + nf_conntrack_double_unlock(hash, reply_hash); NF_CT_STAT_INC(net, insert); - spin_unlock_bh(&nf_conntrack_lock); + local_bh_enable(); help = nfct_help(ct); if (help && help->helper) @@ -553,8 +702,9 @@ __nf_conntrack_confirm(struct sk_buff *skb) return NF_ACCEPT; out: + nf_conntrack_double_unlock(hash, reply_hash); NF_CT_STAT_INC(net, insert_failed); - spin_unlock_bh(&nf_conntrack_lock); + local_bh_enable(); return NF_DROP; } EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); @@ -597,39 +747,48 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static noinline int early_drop(struct net *net, unsigned int hash) +static noinline int early_drop(struct net *net, unsigned int _hash) { /* Use oldest entry, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; struct nf_conn *ct = NULL, *tmp; struct hlist_nulls_node *n; - unsigned int i, cnt = 0; + unsigned int i = 0, cnt = 0; int dropped = 0; + unsigned int hash, sequence; + spinlock_t *lockp; - rcu_read_lock(); - for (i = 0; i < net->ct.htable_size; i++) { + local_bh_disable(); +restart: + sequence = read_seqcount_begin(&net->ct.generation); + hash = hash_bucket(_hash, net); + for (; i < net->ct.htable_size; i++) { + lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS]; + spin_lock(lockp); + if (read_seqcount_retry(&net->ct.generation, sequence)) { + spin_unlock(lockp); + goto restart; + } hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); - if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) + if (!test_bit(IPS_ASSURED_BIT, &tmp->status) && + !nf_ct_is_dying(tmp) && + atomic_inc_not_zero(&tmp->ct_general.use)) { ct = tmp; + break; + } cnt++; } - if (ct != NULL) { - if (likely(!nf_ct_is_dying(ct) && - atomic_inc_not_zero(&ct->ct_general.use))) - break; - else - ct = NULL; - } + hash = (hash + 1) % net->ct.htable_size; + spin_unlock(lockp); - if (cnt >= NF_CT_EVICTION_RANGE) + if (ct || cnt >= NF_CT_EVICTION_RANGE) break; - hash = (hash + 1) % net->ct.htable_size; } - rcu_read_unlock(); + local_bh_enable(); if (!ct) return dropped; @@ -678,7 +837,7 @@ __nf_conntrack_alloc(struct net *net, u16 zone, if (nf_conntrack_max && unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { - if (!early_drop(net, hash_bucket(hash, net))) { + if (!early_drop(net, hash)) { atomic_dec(&net->ct.count); net_warn_ratelimited("nf_conntrack: table full, dropping packet\n"); return ERR_PTR(-ENOMEM); @@ -720,11 +879,10 @@ __nf_conntrack_alloc(struct net *net, u16 zone, nf_ct_zone->id = zone; } #endif - /* - * changes to lookup keys must be done before setting refcnt to 1 + /* Because we use RCU lookups, we set ct_general.use to zero before + * this is inserted in any list. */ - smp_wmb(); - atomic_set(&ct->ct_general.use, 1); + atomic_set(&ct->ct_general.use, 0); return ct; #ifdef CONFIG_NF_CONNTRACK_ZONES @@ -748,6 +906,11 @@ void nf_conntrack_free(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); + /* A freed object has refcnt == 0, that's + * the golden rule for SLAB_DESTROY_BY_RCU + */ + NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0); + nf_ct_ext_destroy(ct); nf_ct_ext_free(ct); kmem_cache_free(net->ct.nf_conntrack_cachep, ct); @@ -771,7 +934,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, struct nf_conn_help *help; struct nf_conntrack_tuple repl_tuple; struct nf_conntrack_ecache *ecache; - struct nf_conntrack_expect *exp; + struct nf_conntrack_expect *exp = NULL; u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; struct nf_conn_timeout *timeout_ext; unsigned int *timeouts; @@ -815,39 +978,44 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, ecache ? ecache->expmask : 0, GFP_ATOMIC); - spin_lock_bh(&nf_conntrack_lock); - exp = nf_ct_find_expectation(net, zone, tuple); - if (exp) { - pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", - ct, exp); - /* Welcome, Mr. Bond. We've been expecting you... */ - __set_bit(IPS_EXPECTED_BIT, &ct->status); - ct->master = exp->master; - if (exp->helper) { - help = nf_ct_helper_ext_add(ct, exp->helper, - GFP_ATOMIC); - if (help) - rcu_assign_pointer(help->helper, exp->helper); - } + local_bh_disable(); + if (net->ct.expect_count) { + spin_lock(&nf_conntrack_expect_lock); + exp = nf_ct_find_expectation(net, zone, tuple); + if (exp) { + pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", + ct, exp); + /* Welcome, Mr. Bond. We've been expecting you... */ + __set_bit(IPS_EXPECTED_BIT, &ct->status); + /* exp->master safe, refcnt bumped in nf_ct_find_expectation */ + ct->master = exp->master; + if (exp->helper) { + help = nf_ct_helper_ext_add(ct, exp->helper, + GFP_ATOMIC); + if (help) + rcu_assign_pointer(help->helper, exp->helper); + } #ifdef CONFIG_NF_CONNTRACK_MARK - ct->mark = exp->master->mark; + ct->mark = exp->master->mark; #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK - ct->secmark = exp->master->secmark; + ct->secmark = exp->master->secmark; #endif - nf_conntrack_get(&ct->master->ct_general); - NF_CT_STAT_INC(net, expect_new); - } else { + NF_CT_STAT_INC(net, expect_new); + } + spin_unlock(&nf_conntrack_expect_lock); + } + if (!exp) { __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); NF_CT_STAT_INC(net, new); } - /* Overload tuple linked list to put us in unconfirmed list. */ - hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &net->ct.unconfirmed); + /* Now it is inserted into the unconfirmed list, bump refcount */ + nf_conntrack_get(&ct->ct_general); + nf_ct_add_to_unconfirmed_list(ct); - spin_unlock_bh(&nf_conntrack_lock); + local_bh_enable(); if (exp) { if (exp->expectfn) @@ -1217,27 +1385,42 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; struct hlist_nulls_node *n; + int cpu; + spinlock_t *lockp; - spin_lock_bh(&nf_conntrack_lock); for (; *bucket < net->ct.htable_size; (*bucket)++) { - hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { - if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) - continue; + lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; + local_bh_disable(); + spin_lock(lockp); + if (*bucket < net->ct.htable_size) { + hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { + if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) + continue; + ct = nf_ct_tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; + } + } + spin_unlock(lockp); + local_bh_enable(); + } + + for_each_possible_cpu(cpu) { + struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); + + spin_lock_bh(&pcpu->lock); + hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) - goto found; + set_bit(IPS_DYING_BIT, &ct->status); } + spin_unlock_bh(&pcpu->lock); } - hlist_nulls_for_each_entry(h, n, &net->ct.unconfirmed, hnnode) { - ct = nf_ct_tuplehash_to_ctrack(h); - if (iter(ct, data)) - set_bit(IPS_DYING_BIT, &ct->status); - } - spin_unlock_bh(&nf_conntrack_lock); return NULL; found: atomic_inc(&ct->ct_general.use); - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock(lockp); + local_bh_enable(); return ct; } @@ -1286,14 +1469,19 @@ static void nf_ct_release_dying_list(struct net *net) struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; struct hlist_nulls_node *n; + int cpu; - spin_lock_bh(&nf_conntrack_lock); - hlist_nulls_for_each_entry(h, n, &net->ct.dying, hnnode) { - ct = nf_ct_tuplehash_to_ctrack(h); - /* never fails to remove them, no listeners at this point */ - nf_ct_kill(ct); + for_each_possible_cpu(cpu) { + struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); + + spin_lock_bh(&pcpu->lock); + hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { + ct = nf_ct_tuplehash_to_ctrack(h); + /* never fails to remove them, no listeners at this point */ + nf_ct_kill(ct); + } + spin_unlock_bh(&pcpu->lock); } - spin_unlock_bh(&nf_conntrack_lock); } static int untrack_refs(void) @@ -1380,6 +1568,7 @@ i_see_dead_people: kmem_cache_destroy(net->ct.nf_conntrack_cachep); kfree(net->ct.slabname); free_percpu(net->ct.stat); + free_percpu(net->ct.pcpu_lists); } } @@ -1432,12 +1621,16 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) if (!hash) return -ENOMEM; + local_bh_disable(); + nf_conntrack_all_lock(); + write_seqcount_begin(&init_net.ct.generation); + /* Lookups in the old hash might happen in parallel, which means we * might get false negatives during connection lookup. New connections * created because of a false negative won't make it into the hash - * though since that required taking the lock. + * though since that required taking the locks. */ - spin_lock_bh(&nf_conntrack_lock); + for (i = 0; i < init_net.ct.htable_size; i++) { while (!hlist_nulls_empty(&init_net.ct.hash[i])) { h = hlist_nulls_entry(init_net.ct.hash[i].first, @@ -1454,7 +1647,10 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; init_net.ct.hash = hash; - spin_unlock_bh(&nf_conntrack_lock); + + write_seqcount_end(&init_net.ct.generation); + nf_conntrack_all_unlock(); + local_bh_enable(); nf_ct_free_hashtable(old_hash, old_size); return 0; @@ -1476,7 +1672,10 @@ EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or); int nf_conntrack_init_start(void) { int max_factor = 8; - int ret, cpu; + int i, ret, cpu; + + for (i = 0; i < CONNTRACK_LOCKS; i++) + spin_lock_init(&nf_conntrack_locks[i]); /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ @@ -1592,37 +1791,44 @@ void nf_conntrack_init_end(void) int nf_conntrack_init_net(struct net *net) { - int ret; + int ret = -ENOMEM; + int cpu; atomic_set(&net->ct.count, 0); - INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL); - INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL); - INIT_HLIST_NULLS_HEAD(&net->ct.tmpl, TEMPLATE_NULLS_VAL); - net->ct.stat = alloc_percpu(struct ip_conntrack_stat); - if (!net->ct.stat) { - ret = -ENOMEM; + seqcount_init(&net->ct.generation); + + net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); + if (!net->ct.pcpu_lists) goto err_stat; + + for_each_possible_cpu(cpu) { + struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); + + spin_lock_init(&pcpu->lock); + INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL); + INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL); + INIT_HLIST_NULLS_HEAD(&pcpu->tmpl, TEMPLATE_NULLS_VAL); } + net->ct.stat = alloc_percpu(struct ip_conntrack_stat); + if (!net->ct.stat) + goto err_pcpu_lists; + net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net); - if (!net->ct.slabname) { - ret = -ENOMEM; + if (!net->ct.slabname) goto err_slabname; - } net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname, sizeof(struct nf_conn), 0, SLAB_DESTROY_BY_RCU, NULL); if (!net->ct.nf_conntrack_cachep) { printk(KERN_ERR "Unable to create nf_conn slab cache\n"); - ret = -ENOMEM; goto err_cache; } net->ct.htable_size = nf_conntrack_htable_size; net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1); if (!net->ct.hash) { - ret = -ENOMEM; printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); goto err_hash; } @@ -1664,6 +1870,8 @@ err_cache: kfree(net->ct.slabname); err_slabname: free_percpu(net->ct.stat); +err_pcpu_lists: + free_percpu(net->ct.pcpu_lists); err_stat: return ret; } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 4fd1ca94fd4..f87e8f68ad4 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -66,9 +66,9 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect) { struct nf_conntrack_expect *exp = (void *)ul_expect; - spin_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_expect_lock); nf_ct_unlink_expect(exp); - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); nf_ct_expect_put(exp); } @@ -155,6 +155,18 @@ nf_ct_find_expectation(struct net *net, u16 zone, if (!nf_ct_is_confirmed(exp->master)) return NULL; + /* Avoid race with other CPUs, that for exp->master ct, is + * about to invoke ->destroy(), or nf_ct_delete() via timeout + * or early_drop(). + * + * The atomic_inc_not_zero() check tells: If that fails, we + * know that the ct is being destroyed. If it succeeds, we + * can be sure the ct cannot disappear underneath. + */ + if (unlikely(nf_ct_is_dying(exp->master) || + !atomic_inc_not_zero(&exp->master->ct_general.use))) + return NULL; + if (exp->flags & NF_CT_EXPECT_PERMANENT) { atomic_inc(&exp->use); return exp; @@ -162,6 +174,8 @@ nf_ct_find_expectation(struct net *net, u16 zone, nf_ct_unlink_expect(exp); return exp; } + /* Undo exp->master refcnt increase, if del_timer() failed */ + nf_ct_put(exp->master); return NULL; } @@ -177,12 +191,14 @@ void nf_ct_remove_expectations(struct nf_conn *ct) if (!help) return; + spin_lock_bh(&nf_conntrack_expect_lock); hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); } } + spin_unlock_bh(&nf_conntrack_expect_lock); } EXPORT_SYMBOL_GPL(nf_ct_remove_expectations); @@ -217,12 +233,12 @@ static inline int expect_matches(const struct nf_conntrack_expect *a, /* Generally a bad idea to call this: could have matched already. */ void nf_ct_unexpect_related(struct nf_conntrack_expect *exp) { - spin_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_expect_lock); if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); } - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); } EXPORT_SYMBOL_GPL(nf_ct_unexpect_related); @@ -335,7 +351,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp) setup_timer(&exp->timeout, nf_ct_expectation_timed_out, (unsigned long)exp); helper = rcu_dereference_protected(master_help->helper, - lockdep_is_held(&nf_conntrack_lock)); + lockdep_is_held(&nf_conntrack_expect_lock)); if (helper) { exp->timeout.expires = jiffies + helper->expect_policy[exp->class].timeout * HZ; @@ -395,7 +411,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) } /* Will be over limit? */ helper = rcu_dereference_protected(master_help->helper, - lockdep_is_held(&nf_conntrack_lock)); + lockdep_is_held(&nf_conntrack_expect_lock)); if (helper) { p = &helper->expect_policy[expect->class]; if (p->max_expected && @@ -417,12 +433,12 @@ out: return ret; } -int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, +int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, u32 portid, int report) { int ret; - spin_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_expect_lock); ret = __nf_ct_expect_check(expect); if (ret <= 0) goto out; @@ -430,11 +446,11 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, ret = nf_ct_expect_insert(expect); if (ret < 0) goto out; - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report); return ret; out: - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); return ret; } EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 70866d192ef..3a3a60b126e 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -1476,7 +1476,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, nf_ct_refresh(ct, skb, info->timeout * HZ); /* Set expect timeout */ - spin_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_expect_lock); exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3, info->sig_port[!dir]); if (exp) { @@ -1486,7 +1486,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, nf_ct_dump_tuple(&exp->tuple); set_expect_timeout(exp, info->timeout); } - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); } return 0; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 974a2a4adef..5b3eae7d4c9 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -250,16 +250,14 @@ out: } EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); +/* appropiate ct lock protecting must be taken by caller */ static inline int unhelp(struct nf_conntrack_tuple_hash *i, const struct nf_conntrack_helper *me) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); struct nf_conn_help *help = nfct_help(ct); - if (help && rcu_dereference_protected( - help->helper, - lockdep_is_held(&nf_conntrack_lock) - ) == me) { + if (help && rcu_dereference_raw(help->helper) == me) { nf_conntrack_event(IPCT_HELPER, ct); RCU_INIT_POINTER(help->helper, NULL); } @@ -284,17 +282,17 @@ static LIST_HEAD(nf_ct_helper_expectfn_list); void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n) { - spin_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_expect_lock); list_add_rcu(&n->head, &nf_ct_helper_expectfn_list); - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_register); void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n) { - spin_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_expect_lock); list_del_rcu(&n->head); - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister); @@ -396,15 +394,17 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, const struct hlist_node *next; const struct hlist_nulls_node *nn; unsigned int i; + int cpu; /* Get rid of expectations */ + spin_lock_bh(&nf_conntrack_expect_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, next, &net->ct.expect_hash[i], hnode) { struct nf_conn_help *help = nfct_help(exp->master); if ((rcu_dereference_protected( help->helper, - lockdep_is_held(&nf_conntrack_lock) + lockdep_is_held(&nf_conntrack_expect_lock) ) == me || exp->helper == me) && del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); @@ -412,14 +412,27 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, } } } + spin_unlock_bh(&nf_conntrack_expect_lock); /* Get rid of expecteds, set helpers to NULL. */ - hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode) - unhelp(h, me); - for (i = 0; i < net->ct.htable_size; i++) { - hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) + for_each_possible_cpu(cpu) { + struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); + + spin_lock_bh(&pcpu->lock); + hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode) unhelp(h, me); + spin_unlock_bh(&pcpu->lock); + } + local_bh_disable(); + for (i = 0; i < net->ct.htable_size; i++) { + spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); + if (i < net->ct.htable_size) { + hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) + unhelp(h, me); + } + spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); } + local_bh_enable(); } void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) @@ -437,10 +450,8 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) synchronize_rcu(); rtnl_lock(); - spin_lock_bh(&nf_conntrack_lock); for_each_net(net) __nf_conntrack_helper_unregister(me, net); - spin_unlock_bh(&nf_conntrack_lock); rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index bb322d0beb4..ccc46fa5edb 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -764,14 +764,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; int res; + spinlock_t *lockp; + #ifdef CONFIG_NF_CONNTRACK_MARK const struct ctnetlink_dump_filter *filter = cb->data; #endif - spin_lock_bh(&nf_conntrack_lock); last = (struct nf_conn *)cb->args[1]; + + local_bh_disable(); for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) { restart: + lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS]; + spin_lock(lockp); + if (cb->args[0] >= net->ct.htable_size) { + spin_unlock(lockp); + goto out; + } hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]], hnnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) @@ -803,16 +812,18 @@ restart: if (res < 0) { nf_conntrack_get(&ct->ct_general); cb->args[1] = (unsigned long)ct; + spin_unlock(lockp); goto out; } } + spin_unlock(lockp); if (cb->args[1]) { cb->args[1] = 0; goto restart; } } out: - spin_unlock_bh(&nf_conntrack_lock); + local_bh_enable(); if (last) nf_ct_put(last); @@ -966,7 +977,6 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, return 0; } -#define __CTA_LABELS_MAX_LENGTH ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE) static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_TUPLE_ORIG] = { .type = NLA_NESTED }, [CTA_TUPLE_REPLY] = { .type = NLA_NESTED }, @@ -984,9 +994,9 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_ZONE] = { .type = NLA_U16 }, [CTA_MARK_MASK] = { .type = NLA_U32 }, [CTA_LABELS] = { .type = NLA_BINARY, - .len = __CTA_LABELS_MAX_LENGTH }, + .len = NF_CT_LABELS_MAX_SIZE }, [CTA_LABELS_MASK] = { .type = NLA_BINARY, - .len = __CTA_LABELS_MAX_LENGTH }, + .len = NF_CT_LABELS_MAX_SIZE }, }; static int @@ -1138,50 +1148,65 @@ static int ctnetlink_done_list(struct netlink_callback *cb) } static int -ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, - struct hlist_nulls_head *list) +ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying) { - struct nf_conn *ct, *last; + struct nf_conn *ct, *last = NULL; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; int res; + int cpu; + struct hlist_nulls_head *list; + struct net *net = sock_net(skb->sk); if (cb->args[2]) return 0; - spin_lock_bh(&nf_conntrack_lock); - last = (struct nf_conn *)cb->args[1]; -restart: - hlist_nulls_for_each_entry(h, n, list, hnnode) { - ct = nf_ct_tuplehash_to_ctrack(h); - if (l3proto && nf_ct_l3num(ct) != l3proto) + if (cb->args[0] == nr_cpu_ids) + return 0; + + for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) { + struct ct_pcpu *pcpu; + + if (!cpu_possible(cpu)) continue; - if (cb->args[1]) { - if (ct != last) + + pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); + spin_lock_bh(&pcpu->lock); + last = (struct nf_conn *)cb->args[1]; + list = dying ? &pcpu->dying : &pcpu->unconfirmed; +restart: + hlist_nulls_for_each_entry(h, n, list, hnnode) { + ct = nf_ct_tuplehash_to_ctrack(h); + if (l3proto && nf_ct_l3num(ct) != l3proto) continue; - cb->args[1] = 0; - } - rcu_read_lock(); - res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NFNL_MSG_TYPE(cb->nlh->nlmsg_type), - ct); - rcu_read_unlock(); - if (res < 0) { - nf_conntrack_get(&ct->ct_general); - cb->args[1] = (unsigned long)ct; - goto out; + if (cb->args[1]) { + if (ct != last) + continue; + cb->args[1] = 0; + } + rcu_read_lock(); + res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFNL_MSG_TYPE(cb->nlh->nlmsg_type), + ct); + rcu_read_unlock(); + if (res < 0) { + nf_conntrack_get(&ct->ct_general); + cb->args[1] = (unsigned long)ct; + spin_unlock_bh(&pcpu->lock); + goto out; + } } + if (cb->args[1]) { + cb->args[1] = 0; + goto restart; + } else + cb->args[2] = 1; + spin_unlock_bh(&pcpu->lock); } - if (cb->args[1]) { - cb->args[1] = 0; - goto restart; - } else - cb->args[2] = 1; out: - spin_unlock_bh(&nf_conntrack_lock); if (last) nf_ct_put(last); @@ -1191,9 +1216,7 @@ out: static int ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = sock_net(skb->sk); - - return ctnetlink_dump_list(skb, cb, &net->ct.dying); + return ctnetlink_dump_list(skb, cb, true); } static int @@ -1215,9 +1238,7 @@ ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb, static int ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = sock_net(skb->sk); - - return ctnetlink_dump_list(skb, cb, &net->ct.unconfirmed); + return ctnetlink_dump_list(skb, cb, false); } static int @@ -1310,27 +1331,22 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) } static int -ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[]) +ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[]) { #ifdef CONFIG_NF_NAT_NEEDED int ret; - if (cda[CTA_NAT_DST]) { - ret = ctnetlink_parse_nat_setup(ct, - NF_NAT_MANIP_DST, - cda[CTA_NAT_DST]); - if (ret < 0) - return ret; - } - if (cda[CTA_NAT_SRC]) { - ret = ctnetlink_parse_nat_setup(ct, - NF_NAT_MANIP_SRC, - cda[CTA_NAT_SRC]); - if (ret < 0) - return ret; - } - return 0; + ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_DST, + cda[CTA_NAT_DST]); + if (ret < 0) + return ret; + + ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_SRC, + cda[CTA_NAT_SRC]); + return ret; #else + if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC]) + return 0; return -EOPNOTSUPP; #endif } @@ -1366,14 +1382,14 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) nf_ct_protonum(ct)); if (helper == NULL) { #ifdef CONFIG_MODULES - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); if (request_module("nfct-helper-%s", helpname) < 0) { - spin_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_expect_lock); return -EOPNOTSUPP; } - spin_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_expect_lock); helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), nf_ct_protonum(ct)); if (helper) @@ -1659,11 +1675,9 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, goto err2; } - if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { - err = ctnetlink_change_nat(ct, cda); - if (err < 0) - goto err2; - } + err = ctnetlink_setup_nat(ct, cda); + if (err < 0) + goto err2; nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); @@ -1811,9 +1825,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, err = -EEXIST; ct = nf_ct_tuplehash_to_ctrack(h); if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { - spin_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_expect_lock); err = ctnetlink_change_conntrack(ct, cda); - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); if (err == 0) { nf_conntrack_eventmask_report((1 << IPCT_REPLY) | (1 << IPCT_ASSURED) | @@ -2142,9 +2156,9 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct) if (ret < 0) return ret; - spin_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_expect_lock); ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); return ret; } @@ -2699,13 +2713,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, } /* after list removal, usage count == 1 */ - spin_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_expect_lock); if (del_timer(&exp->timeout)) { nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_expect_put(exp); } - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); /* have to put what we 'get' above. * after this line usage count == 0 */ nf_ct_expect_put(exp); @@ -2714,7 +2728,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct nf_conn_help *m_help; /* delete all expectations for this helper */ - spin_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_expect_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, next, &net->ct.expect_hash[i], @@ -2729,10 +2743,10 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, } } } - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); } else { /* This basically means we have to flush everything*/ - spin_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_expect_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, next, &net->ct.expect_hash[i], @@ -2745,7 +2759,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, } } } - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); } return 0; @@ -2971,11 +2985,11 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - spin_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_expect_lock); exp = __nf_ct_expect_find(net, zone, &tuple); if (!exp) { - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) { err = ctnetlink_create_expect(net, zone, cda, @@ -2989,7 +3003,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, err = -EEXIST; if (!(nlh->nlmsg_flags & NLM_F_EXCL)) err = ctnetlink_change_expect(exp, cda); - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); return err; } diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 7bd03decd36..825c3e3f830 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -605,32 +605,14 @@ static struct nf_conntrack_helper pptp __read_mostly = { .expect_policy = &pptp_exp_policy, }; -static void nf_conntrack_pptp_net_exit(struct net *net) -{ - nf_ct_gre_keymap_flush(net); -} - -static struct pernet_operations nf_conntrack_pptp_net_ops = { - .exit = nf_conntrack_pptp_net_exit, -}; - static int __init nf_conntrack_pptp_init(void) { - int rv; - - rv = nf_conntrack_helper_register(&pptp); - if (rv < 0) - return rv; - rv = register_pernet_subsys(&nf_conntrack_pptp_net_ops); - if (rv < 0) - nf_conntrack_helper_unregister(&pptp); - return rv; + return nf_conntrack_helper_register(&pptp); } static void __exit nf_conntrack_pptp_fini(void) { nf_conntrack_helper_unregister(&pptp); - unregister_pernet_subsys(&nf_conntrack_pptp_net_ops); } module_init(nf_conntrack_pptp_init); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 9d9c0dade60..d5665739e3b 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -66,7 +66,7 @@ static inline struct netns_proto_gre *gre_pernet(struct net *net) return net_generic(net, proto_gre_net_id); } -void nf_ct_gre_keymap_flush(struct net *net) +static void nf_ct_gre_keymap_flush(struct net *net) { struct netns_proto_gre *net_gre = gre_pernet(net); struct nf_ct_gre_keymap *km, *tmp; @@ -78,7 +78,6 @@ void nf_ct_gre_keymap_flush(struct net *net) } write_unlock_bh(&net_gre->keymap_lock); } -EXPORT_SYMBOL(nf_ct_gre_keymap_flush); static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, const struct nf_conntrack_tuple *t) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 466410eaa48..4c3ba1c8d68 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -800,7 +800,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct, struct hlist_node *next; int found = 0; - spin_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_expect_lock); hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { if (exp->class != SIP_EXPECT_SIGNALLING || !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) || @@ -815,7 +815,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct, found = 1; break; } - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); return found; } @@ -825,7 +825,7 @@ static void flush_expectations(struct nf_conn *ct, bool media) struct nf_conntrack_expect *exp; struct hlist_node *next; - spin_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_expect_lock); hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media) continue; @@ -836,7 +836,7 @@ static void flush_expectations(struct nf_conn *ct, bool media) if (!media) break; } - spin_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_expect_lock); } static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index d3f5cd6dd96..52ca952b802 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -432,15 +432,15 @@ nf_nat_setup_info(struct nf_conn *ct, } EXPORT_SYMBOL(nf_nat_setup_info); -unsigned int -nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) +static unsigned int +__nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip) { /* Force range to this IP; let proto decide mapping for * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). * Use reply in case it's already been mangled (eg local packet). */ union nf_inet_addr ip = - (HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? + (manip == NF_NAT_MANIP_SRC ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3); struct nf_nat_range range = { @@ -448,7 +448,13 @@ nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) .min_addr = ip, .max_addr = ip, }; - return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); + return nf_nat_setup_info(ct, &range, manip); +} + +unsigned int +nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) +{ + return __nf_nat_alloc_null_binding(ct, HOOK2MANIP(hooknum)); } EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding); @@ -702,9 +708,9 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { static int nfnetlink_parse_nat(const struct nlattr *nat, - const struct nf_conn *ct, struct nf_nat_range *range) + const struct nf_conn *ct, struct nf_nat_range *range, + const struct nf_nat_l3proto *l3proto) { - const struct nf_nat_l3proto *l3proto; struct nlattr *tb[CTA_NAT_MAX+1]; int err; @@ -714,38 +720,46 @@ nfnetlink_parse_nat(const struct nlattr *nat, if (err < 0) return err; - rcu_read_lock(); - l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); - if (l3proto == NULL) { - err = -EAGAIN; - goto out; - } err = l3proto->nlattr_to_range(tb, range); if (err < 0) - goto out; + return err; if (!tb[CTA_NAT_PROTO]) - goto out; + return 0; - err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); -out: - rcu_read_unlock(); - return err; + return nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); } +/* This function is called under rcu_read_lock() */ static int nfnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, const struct nlattr *attr) { struct nf_nat_range range; + const struct nf_nat_l3proto *l3proto; int err; - err = nfnetlink_parse_nat(attr, ct, &range); + /* Should not happen, restricted to creating new conntracks + * via ctnetlink. + */ + if (WARN_ON_ONCE(nf_nat_initialized(ct, manip))) + return -EEXIST; + + /* Make sure that L3 NAT is there by when we call nf_nat_setup_info to + * attach the null binding, otherwise this may oops. + */ + l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); + if (l3proto == NULL) + return -EAGAIN; + + /* No NAT information has been passed, allocate the null-binding */ + if (attr == NULL) + return __nf_nat_alloc_null_binding(ct, manip); + + err = nfnetlink_parse_nat(attr, ct, &range, l3proto); if (err < 0) return err; - if (nf_nat_initialized(ct, manip)) - return -EEXIST; return nf_nat_setup_info(ct, &range, manip); } diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 9858e3e51a3..52e20c9a46a 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -363,9 +363,8 @@ static int __net_init synproxy_net_init(struct net *net) goto err2; if (!nfct_synproxy_ext_add(ct)) goto err2; - __set_bit(IPS_TEMPLATE_BIT, &ct->status); - __set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_tmpl_insert(net, ct); snet->tmpl = ct; snet->stats = alloc_percpu(struct synproxy_stats); @@ -390,7 +389,7 @@ static void __net_exit synproxy_net_exit(struct net *net) { struct synproxy_net *snet = synproxy_pernet(net); - nf_conntrack_free(snet->tmpl); + nf_ct_put(snet->tmpl); synproxy_proc_exit(net); free_percpu(snet->stats); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 117bbaaddde..3fd159db9f0 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -152,8 +152,8 @@ nf_tables_chain_type_lookup(const struct nft_af_info *afi, #ifdef CONFIG_MODULES if (autoload) { nfnl_unlock(NFNL_SUBSYS_NFTABLES); - request_module("nft-chain-%u-%*.s", afi->family, - nla_len(nla)-1, (const char *)nla_data(nla)); + request_module("nft-chain-%u-%.*s", afi->family, + nla_len(nla), (const char *)nla_data(nla)); nfnl_lock(NFNL_SUBSYS_NFTABLES); type = __nf_tables_chain_type_lookup(afi->family, nla); if (type != NULL) @@ -794,9 +794,8 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr) stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); if (chain->stats) { - /* nfnl_lock is held, add some nfnl function for this, later */ struct nft_stats __percpu *oldstats = - rcu_dereference_protected(chain->stats, 1); + nft_dereference(chain->stats); rcu_assign_pointer(chain->stats, newstats); synchronize_rcu(); @@ -1008,10 +1007,8 @@ notify: return 0; } -static void nf_tables_rcu_chain_destroy(struct rcu_head *head) +static void nf_tables_chain_destroy(struct nft_chain *chain) { - struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head); - BUG_ON(chain->use > 0); if (chain->flags & NFT_BASE_CHAIN) { @@ -1045,7 +1042,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(chain)) return PTR_ERR(chain); - if (!list_empty(&chain->rules)) + if (!list_empty(&chain->rules) || chain->use > 0) return -EBUSY; list_del(&chain->list); @@ -1059,7 +1056,9 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, family); /* Make sure all rule references are gone before this is released */ - call_rcu(&chain->rcu_head, nf_tables_rcu_chain_destroy); + synchronize_rcu(); + + nf_tables_chain_destroy(chain); return 0; } @@ -1114,35 +1113,45 @@ void nft_unregister_expr(struct nft_expr_type *type) } EXPORT_SYMBOL_GPL(nft_unregister_expr); -static const struct nft_expr_type *__nft_expr_type_get(struct nlattr *nla) +static const struct nft_expr_type *__nft_expr_type_get(u8 family, + struct nlattr *nla) { const struct nft_expr_type *type; list_for_each_entry(type, &nf_tables_expressions, list) { - if (!nla_strcmp(nla, type->name)) + if (!nla_strcmp(nla, type->name) && + (!type->family || type->family == family)) return type; } return NULL; } -static const struct nft_expr_type *nft_expr_type_get(struct nlattr *nla) +static const struct nft_expr_type *nft_expr_type_get(u8 family, + struct nlattr *nla) { const struct nft_expr_type *type; if (nla == NULL) return ERR_PTR(-EINVAL); - type = __nft_expr_type_get(nla); + type = __nft_expr_type_get(family, nla); if (type != NULL && try_module_get(type->owner)) return type; #ifdef CONFIG_MODULES if (type == NULL) { nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-expr-%u-%.*s", family, + nla_len(nla), (char *)nla_data(nla)); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (__nft_expr_type_get(family, nla)) + return ERR_PTR(-EAGAIN); + + nfnl_unlock(NFNL_SUBSYS_NFTABLES); request_module("nft-expr-%.*s", nla_len(nla), (char *)nla_data(nla)); nfnl_lock(NFNL_SUBSYS_NFTABLES); - if (__nft_expr_type_get(nla)) + if (__nft_expr_type_get(family, nla)) return ERR_PTR(-EAGAIN); } #endif @@ -1193,7 +1202,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, if (err < 0) return err; - type = nft_expr_type_get(tb[NFTA_EXPR_NAME]); + type = nft_expr_type_get(ctx->afi->family, tb[NFTA_EXPR_NAME]); if (IS_ERR(type)) return PTR_ERR(type); @@ -1244,10 +1253,11 @@ err1: return err; } -static void nf_tables_expr_destroy(struct nft_expr *expr) +static void nf_tables_expr_destroy(const struct nft_ctx *ctx, + struct nft_expr *expr) { if (expr->ops->destroy) - expr->ops->destroy(expr); + expr->ops->destroy(ctx, expr); module_put(expr->ops->type->owner); } @@ -1286,6 +1296,8 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, [NFTA_RULE_POSITION] = { .type = NLA_U64 }, + [NFTA_RULE_USERDATA] = { .type = NLA_BINARY, + .len = NFT_USERDATA_MAXLEN }, }; static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, @@ -1338,6 +1350,10 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, } nla_nest_end(skb, list); + if (rule->ulen && + nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule))) + goto nla_put_failure; + return nlmsg_end(skb, nlh); nla_put_failure: @@ -1521,9 +1537,9 @@ err: return err; } -static void nf_tables_rcu_rule_destroy(struct rcu_head *head) +static void nf_tables_rule_destroy(const struct nft_ctx *ctx, + struct nft_rule *rule) { - struct nft_rule *rule = container_of(head, struct nft_rule, rcu_head); struct nft_expr *expr; /* @@ -1532,23 +1548,18 @@ static void nf_tables_rcu_rule_destroy(struct rcu_head *head) */ expr = nft_expr_first(rule); while (expr->ops && expr != nft_expr_last(rule)) { - nf_tables_expr_destroy(expr); + nf_tables_expr_destroy(ctx, expr); expr = nft_expr_next(expr); } kfree(rule); } -static void nf_tables_rule_destroy(struct nft_rule *rule) -{ - call_rcu(&rule->rcu_head, nf_tables_rcu_rule_destroy); -} - #define NFT_RULE_MAXEXPRS 128 static struct nft_expr_info *info; static struct nft_rule_trans * -nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx) +nf_tables_trans_add(struct nft_ctx *ctx, struct nft_rule *rule) { struct nft_rule_trans *rupd; @@ -1556,11 +1567,8 @@ nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx) if (rupd == NULL) return NULL; - rupd->chain = ctx->chain; - rupd->table = ctx->table; + rupd->ctx = *ctx; rupd->rule = rule; - rupd->family = ctx->afi->family; - rupd->nlh = ctx->nlh; list_add_tail(&rupd->list, &ctx->net->nft.commit_list); return rupd; @@ -1580,7 +1588,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, struct nft_expr *expr; struct nft_ctx ctx; struct nlattr *tmp; - unsigned int size, i, n; + unsigned int size, i, n, ulen = 0; int err, rem; bool create; u64 handle, pos_handle; @@ -1646,8 +1654,11 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, } } + if (nla[NFTA_RULE_USERDATA]) + ulen = nla_len(nla[NFTA_RULE_USERDATA]); + err = -ENOMEM; - rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL); + rule = kzalloc(sizeof(*rule) + size + ulen, GFP_KERNEL); if (rule == NULL) goto err1; @@ -1655,6 +1666,10 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, rule->handle = handle; rule->dlen = size; + rule->ulen = ulen; + + if (ulen) + nla_memcpy(nft_userdata(rule), nla[NFTA_RULE_USERDATA], ulen); expr = nft_expr_first(rule); for (i = 0; i < n; i++) { @@ -1667,7 +1682,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_REPLACE) { if (nft_rule_is_active_next(net, old_rule)) { - repl = nf_tables_trans_add(old_rule, &ctx); + repl = nf_tables_trans_add(&ctx, old_rule); if (repl == NULL) { err = -ENOMEM; goto err2; @@ -1690,7 +1705,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, list_add_rcu(&rule->list, &chain->rules); } - if (nf_tables_trans_add(rule, &ctx) == NULL) { + if (nf_tables_trans_add(&ctx, rule) == NULL) { err = -ENOMEM; goto err3; } @@ -1705,7 +1720,7 @@ err3: kfree(repl); } err2: - nf_tables_rule_destroy(rule); + nf_tables_rule_destroy(&ctx, rule); err1: for (i = 0; i < n; i++) { if (info[i].ops != NULL) @@ -1719,7 +1734,7 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) { /* You cannot delete the same rule twice */ if (nft_rule_is_active_next(ctx->net, rule)) { - if (nf_tables_trans_add(rule, ctx) == NULL) + if (nf_tables_trans_add(ctx, rule) == NULL) return -ENOMEM; nft_rule_disactivate_next(ctx->net, rule); return 0; @@ -1809,29 +1824,36 @@ static int nf_tables_commit(struct sk_buff *skb) synchronize_rcu(); list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { - /* Delete this rule from the dirty list */ - list_del(&rupd->list); - /* This rule was inactive in the past and just became active. * Clear the next bit of the genmask since its meaning has * changed, now it is the future. */ if (nft_rule_is_active(net, rupd->rule)) { nft_rule_clear(net, rupd->rule); - nf_tables_rule_notify(skb, rupd->nlh, rupd->table, - rupd->chain, rupd->rule, - NFT_MSG_NEWRULE, 0, - rupd->family); + nf_tables_rule_notify(skb, rupd->ctx.nlh, + rupd->ctx.table, rupd->ctx.chain, + rupd->rule, NFT_MSG_NEWRULE, 0, + rupd->ctx.afi->family); + list_del(&rupd->list); kfree(rupd); continue; } /* This rule is in the past, get rid of it */ list_del_rcu(&rupd->rule->list); - nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain, + nf_tables_rule_notify(skb, rupd->ctx.nlh, + rupd->ctx.table, rupd->ctx.chain, rupd->rule, NFT_MSG_DELRULE, 0, - rupd->family); - nf_tables_rule_destroy(rupd->rule); + rupd->ctx.afi->family); + } + + /* Make sure we don't see any packet traversing old rules */ + synchronize_rcu(); + + /* Now we can safely release unused old rules */ + list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { + nf_tables_rule_destroy(&rupd->ctx, rupd->rule); + list_del(&rupd->list); kfree(rupd); } @@ -1844,20 +1866,26 @@ static int nf_tables_abort(struct sk_buff *skb) struct nft_rule_trans *rupd, *tmp; list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { - /* Delete all rules from the dirty list */ - list_del(&rupd->list); - if (!nft_rule_is_active_next(net, rupd->rule)) { nft_rule_clear(net, rupd->rule); + list_del(&rupd->list); kfree(rupd); continue; } /* This rule is inactive, get rid of it */ list_del_rcu(&rupd->rule->list); - nf_tables_rule_destroy(rupd->rule); + } + + /* Make sure we don't see any packet accessing aborted rules */ + synchronize_rcu(); + + list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { + nf_tables_rule_destroy(&rupd->ctx, rupd->rule); + list_del(&rupd->list); kfree(rupd); } + return 0; } @@ -1918,7 +1946,8 @@ static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_TABLE] = { .type = NLA_STRING }, - [NFTA_SET_NAME] = { .type = NLA_STRING }, + [NFTA_SET_NAME] = { .type = NLA_STRING, + .len = IFNAMSIZ - 1 }, [NFTA_SET_FLAGS] = { .type = NLA_U32 }, [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 }, [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, @@ -1943,6 +1972,9 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, } if (nla[NFTA_SET_TABLE] != NULL) { + if (afi == NULL) + return -EAFNOSUPPORT; + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -1989,13 +2021,13 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, if (!sscanf(i->name, name, &tmp)) continue; - if (tmp < 0 || tmp > BITS_PER_LONG * PAGE_SIZE) + if (tmp < 0 || tmp >= BITS_PER_BYTE * PAGE_SIZE) continue; set_bit(tmp, inuse); } - n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE); + n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE); free_page((unsigned long)inuse); } @@ -2411,8 +2443,7 @@ err1: static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) { list_del(&set->list); - if (!(set->flags & NFT_SET_ANONYMOUS)) - nf_tables_set_notify(ctx, set, NFT_MSG_DELSET); + nf_tables_set_notify(ctx, set, NFT_MSG_DELSET); set->ops->destroy(set); module_put(set->ops->owner); @@ -2428,6 +2459,8 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, struct nft_ctx ctx; int err; + if (nfmsg->nfgen_family == NFPROTO_UNSPEC) + return -EAFNOSUPPORT; if (nla[NFTA_SET_TABLE] == NULL) return -EINVAL; @@ -2435,9 +2468,6 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, if (err < 0) return err; - if (nfmsg->nfgen_family == NFPROTO_UNSPEC) - return -EAFNOSUPPORT; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); @@ -2723,6 +2753,9 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_DATA] == NULL && !(elem.flags & NFT_SET_ELEM_INTERVAL_END)) return -EINVAL; + if (nla[NFTA_SET_ELEM_DATA] != NULL && + elem.flags & NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; } else { if (nla[NFTA_SET_ELEM_DATA] != NULL) return -EINVAL; @@ -2977,6 +3010,9 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, const struct nft_set_iter *iter, const struct nft_set_elem *elem) { + if (elem->flags & NFT_SET_ELEM_INTERVAL_END) + return 0; + switch (elem->data.verdict) { case NFT_JUMP: case NFT_GOTO: @@ -3151,9 +3187,16 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); switch (data->verdict) { - case NF_ACCEPT: - case NF_DROP: - case NF_QUEUE: + default: + switch (data->verdict & NF_VERDICT_MASK) { + case NF_ACCEPT: + case NF_DROP: + case NF_QUEUE: + break; + default: + return -EINVAL; + } + /* fall through */ case NFT_CONTINUE: case NFT_BREAK: case NFT_RETURN: @@ -3174,8 +3217,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, data->chain = chain; desc->len = sizeof(data); break; - default: - return -EINVAL; } desc->type = NFT_DATA_VERDICT; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 0d879fcb876..804105391b9 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -25,9 +25,8 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1]) { const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); - u32 mask; + u32 mask = nft_cmp_fast_mask(priv->len); - mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - priv->len); if ((data[priv->sreg].data[0] & mask) == priv->data) return; data[NFT_REG_VERDICT].verdict = NFT_BREAK; @@ -103,9 +102,9 @@ static struct nf_loginfo trace_loginfo = { }, }; -static inline void nft_trace_packet(const struct nft_pktinfo *pkt, - const struct nft_chain *chain, - int rulenum, enum nft_trace type) +static void nft_trace_packet(const struct nft_pktinfo *pkt, + const struct nft_chain *chain, + int rulenum, enum nft_trace type) { struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 046aa13b4fe..e8138da4c14 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -61,6 +61,14 @@ void nfnl_unlock(__u8 subsys_id) } EXPORT_SYMBOL_GPL(nfnl_unlock); +#ifdef CONFIG_PROVE_LOCKING +int lockdep_nfnl_is_held(u8 subsys_id) +{ + return lockdep_is_held(&table[subsys_id].mutex); +} +EXPORT_SYMBOL_GPL(lockdep_nfnl_is_held); +#endif + int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) { nfnl_lock(n->subsys_id); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index a155d19a225..d292c8d286e 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -28,8 +28,6 @@ #include <linux/proc_fs.h> #include <linux/security.h> #include <linux/list.h> -#include <linux/jhash.h> -#include <linux/random.h> #include <linux/slab.h> #include <net/sock.h> #include <net/netfilter/nf_log.h> @@ -75,7 +73,6 @@ struct nfulnl_instance { }; #define INSTANCE_BUCKETS 16 -static unsigned int hash_init; static int nfnl_log_net_id __read_mostly; @@ -1067,11 +1064,6 @@ static int __init nfnetlink_log_init(void) { int status = -ENOMEM; - /* it's not really all that important to have a random value, so - * we can do this from the init function, even if there hasn't - * been that much entropy yet */ - get_random_bytes(&hash_init, sizeof(hash_init)); - netlink_register_notifier(&nfulnl_rtnl_notifier); status = nfnetlink_subsys_register(&nfulnl_subsys); if (status < 0) { diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index f072fe80351..108120f216b 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -354,13 +354,16 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, skb = nfnetlink_alloc_skb(net, size, queue->peer_portid, GFP_ATOMIC); - if (!skb) + if (!skb) { + skb_tx_error(entskb); return NULL; + } nlh = nlmsg_put(skb, 0, 0, NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, sizeof(struct nfgenmsg), 0); if (!nlh) { + skb_tx_error(entskb); kfree_skb(skb); return NULL; } @@ -488,13 +491,15 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, nla->nla_type = NFQA_PAYLOAD; nla->nla_len = nla_attr_size(data_len); - skb_zerocopy(skb, entskb, data_len, hlen); + if (skb_zerocopy(skb, entskb, data_len, hlen)) + goto nla_put_failure; } nlh->nlmsg_len = skb->len; return skb; nla_put_failure: + skb_tx_error(entskb); kfree_skb(skb); net_err_ratelimited("nf_queue: error creating packet message\n"); return NULL; diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 954925db414..e2b3f51c81f 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -128,7 +128,7 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx, BUG_ON(err < 0); desc.len *= BITS_PER_BYTE; - mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - desc.len); + mask = nft_cmp_fast_mask(desc.len); priv->data = data.data[0] & mask; priv->len = desc.len; return 0; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 82cb8236f8a..8a779be832f 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -192,7 +192,7 @@ err: } static void -nft_target_destroy(const struct nft_expr *expr) +nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct xt_target *target = expr->ops->data; @@ -379,7 +379,7 @@ err: } static void -nft_match_destroy(const struct nft_expr *expr) +nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct xt_match *match = expr->ops->data; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 917052e2060..bd0d41e6934 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -19,15 +19,15 @@ #include <net/netfilter/nf_conntrack_tuple.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_ecache.h> +#include <net/netfilter/nf_conntrack_labels.h> struct nft_ct { enum nft_ct_keys key:8; enum ip_conntrack_dir dir:8; - union{ + union { enum nft_registers dreg:8; enum nft_registers sreg:8; }; - uint8_t family; }; static void nft_ct_get_eval(const struct nft_expr *expr, @@ -97,6 +97,26 @@ static void nft_ct_get_eval(const struct nft_expr *expr, goto err; strncpy((char *)dest->data, helper->name, sizeof(dest->data)); return; +#ifdef CONFIG_NF_CONNTRACK_LABELS + case NFT_CT_LABELS: { + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + unsigned int size; + + if (!labels) { + memset(dest->data, 0, sizeof(dest->data)); + return; + } + + BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > sizeof(dest->data)); + size = labels->words * sizeof(long); + + memcpy(dest->data, labels->bits, size); + if (size < sizeof(dest->data)) + memset(((char *) dest->data) + size, 0, + sizeof(dest->data) - size); + return; + } +#endif } tuple = &ct->tuplehash[priv->dir].tuple; @@ -221,11 +241,15 @@ static int nft_ct_init_validate_get(const struct nft_expr *expr, #ifdef CONFIG_NF_CONNTRACK_SECMARK case NFT_CT_SECMARK: #endif +#ifdef CONFIG_NF_CONNTRACK_LABELS + case NFT_CT_LABELS: +#endif case NFT_CT_EXPIRATION: case NFT_CT_HELPER: if (tb[NFTA_CT_DIRECTION] != NULL) return -EINVAL; break; + case NFT_CT_L3PROTOCOL: case NFT_CT_PROTOCOL: case NFT_CT_SRC: case NFT_CT_DST: @@ -291,16 +315,13 @@ static int nft_ct_init(const struct nft_ctx *ctx, if (err < 0) return err; - priv->family = ctx->afi->family; - return 0; } -static void nft_ct_destroy(const struct nft_expr *expr) +static void nft_ct_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) { - struct nft_ct *priv = nft_expr_priv(expr); - - nft_ct_l3proto_module_put(priv->family); + nft_ct_l3proto_module_put(ctx->afi->family); } static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -311,8 +332,19 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key))) goto nla_put_failure; - if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) - goto nla_put_failure; + + switch (priv->key) { + case NFT_CT_PROTOCOL: + case NFT_CT_SRC: + case NFT_CT_DST: + case NFT_CT_PROTO_SRC: + case NFT_CT_PROTO_DST: + if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) + goto nla_put_failure; + default: + break; + } + return 0; nla_put_failure: diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 3d3f8fce10a..3b1ad876d6b 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -14,21 +14,34 @@ #include <linux/list.h> #include <linux/jhash.h> #include <linux/netlink.h> +#include <linux/vmalloc.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> +#define NFT_HASH_MIN_SIZE 4 + struct nft_hash { - struct hlist_head *hash; - unsigned int hsize; + struct nft_hash_table __rcu *tbl; +}; + +struct nft_hash_table { + unsigned int size; + unsigned int elements; + struct nft_hash_elem __rcu *buckets[]; }; struct nft_hash_elem { - struct hlist_node hnode; - struct nft_data key; - struct nft_data data[]; + struct nft_hash_elem __rcu *next; + struct nft_data key; + struct nft_data data[]; }; +#define nft_hash_for_each_entry(i, head) \ + for (i = nft_dereference(head); i != NULL; i = nft_dereference(i->next)) +#define nft_hash_for_each_entry_rcu(i, head) \ + for (i = rcu_dereference(head); i != NULL; i = rcu_dereference(i->next)) + static u32 nft_hash_rnd __read_mostly; static bool nft_hash_rnd_initted __read_mostly; @@ -38,7 +51,7 @@ static unsigned int nft_hash_data(const struct nft_data *data, unsigned int h; h = jhash(data->data, len, nft_hash_rnd); - return ((u64)h * hsize) >> 32; + return h & (hsize - 1); } static bool nft_hash_lookup(const struct nft_set *set, @@ -46,11 +59,12 @@ static bool nft_hash_lookup(const struct nft_set *set, struct nft_data *data) { const struct nft_hash *priv = nft_set_priv(set); + const struct nft_hash_table *tbl = rcu_dereference(priv->tbl); const struct nft_hash_elem *he; unsigned int h; - h = nft_hash_data(key, priv->hsize, set->klen); - hlist_for_each_entry(he, &priv->hash[h], hnode) { + h = nft_hash_data(key, tbl->size, set->klen); + nft_hash_for_each_entry_rcu(he, tbl->buckets[h]) { if (nft_data_cmp(&he->key, key, set->klen)) continue; if (set->flags & NFT_SET_MAP) @@ -60,19 +74,148 @@ static bool nft_hash_lookup(const struct nft_set *set, return false; } -static void nft_hash_elem_destroy(const struct nft_set *set, - struct nft_hash_elem *he) +static void nft_hash_tbl_free(const struct nft_hash_table *tbl) { - nft_data_uninit(&he->key, NFT_DATA_VALUE); - if (set->flags & NFT_SET_MAP) - nft_data_uninit(he->data, set->dtype); - kfree(he); + if (is_vmalloc_addr(tbl)) + vfree(tbl); + else + kfree(tbl); +} + +static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets) +{ + struct nft_hash_table *tbl; + size_t size; + + size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); + tbl = kzalloc(size, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN); + if (tbl == NULL) + tbl = vzalloc(size); + if (tbl == NULL) + return NULL; + tbl->size = nbuckets; + + return tbl; +} + +static void nft_hash_chain_unzip(const struct nft_set *set, + const struct nft_hash_table *ntbl, + struct nft_hash_table *tbl, unsigned int n) +{ + struct nft_hash_elem *he, *last, *next; + unsigned int h; + + he = nft_dereference(tbl->buckets[n]); + if (he == NULL) + return; + h = nft_hash_data(&he->key, ntbl->size, set->klen); + + /* Find last element of first chain hashing to bucket h */ + last = he; + nft_hash_for_each_entry(he, he->next) { + if (nft_hash_data(&he->key, ntbl->size, set->klen) != h) + break; + last = he; + } + + /* Unlink first chain from the old table */ + RCU_INIT_POINTER(tbl->buckets[n], last->next); + + /* If end of chain reached, done */ + if (he == NULL) + return; + + /* Find first element of second chain hashing to bucket h */ + next = NULL; + nft_hash_for_each_entry(he, he->next) { + if (nft_hash_data(&he->key, ntbl->size, set->klen) != h) + continue; + next = he; + break; + } + + /* Link the two chains */ + RCU_INIT_POINTER(last->next, next); +} + +static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv) +{ + struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl; + struct nft_hash_elem *he; + unsigned int i, h; + bool complete; + + ntbl = nft_hash_tbl_alloc(tbl->size * 2); + if (ntbl == NULL) + return -ENOMEM; + + /* Link new table's buckets to first element in the old table + * hashing to the new bucket. + */ + for (i = 0; i < ntbl->size; i++) { + h = i < tbl->size ? i : i - tbl->size; + nft_hash_for_each_entry(he, tbl->buckets[h]) { + if (nft_hash_data(&he->key, ntbl->size, set->klen) != i) + continue; + RCU_INIT_POINTER(ntbl->buckets[i], he); + break; + } + } + ntbl->elements = tbl->elements; + + /* Publish new table */ + rcu_assign_pointer(priv->tbl, ntbl); + + /* Unzip interleaved hash chains */ + do { + /* Wait for readers to use new table/unzipped chains */ + synchronize_rcu(); + + complete = true; + for (i = 0; i < tbl->size; i++) { + nft_hash_chain_unzip(set, ntbl, tbl, i); + if (tbl->buckets[i] != NULL) + complete = false; + } + } while (!complete); + + nft_hash_tbl_free(tbl); + return 0; +} + +static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv) +{ + struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl; + struct nft_hash_elem __rcu **pprev; + unsigned int i; + + ntbl = nft_hash_tbl_alloc(tbl->size / 2); + if (ntbl == NULL) + return -ENOMEM; + + for (i = 0; i < ntbl->size; i++) { + ntbl->buckets[i] = tbl->buckets[i]; + + for (pprev = &ntbl->buckets[i]; *pprev != NULL; + pprev = &nft_dereference(*pprev)->next) + ; + RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]); + } + ntbl->elements = tbl->elements; + + /* Publish new table */ + rcu_assign_pointer(priv->tbl, ntbl); + synchronize_rcu(); + + nft_hash_tbl_free(tbl); + return 0; } static int nft_hash_insert(const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_table *tbl = nft_dereference(priv->tbl); struct nft_hash_elem *he; unsigned int size, h; @@ -91,33 +234,66 @@ static int nft_hash_insert(const struct nft_set *set, if (set->flags & NFT_SET_MAP) nft_data_copy(he->data, &elem->data); - h = nft_hash_data(&he->key, priv->hsize, set->klen); - hlist_add_head_rcu(&he->hnode, &priv->hash[h]); + h = nft_hash_data(&he->key, tbl->size, set->klen); + RCU_INIT_POINTER(he->next, tbl->buckets[h]); + rcu_assign_pointer(tbl->buckets[h], he); + tbl->elements++; + + /* Expand table when exceeding 75% load */ + if (tbl->elements > tbl->size / 4 * 3) + nft_hash_tbl_expand(set, priv); + return 0; } +static void nft_hash_elem_destroy(const struct nft_set *set, + struct nft_hash_elem *he) +{ + nft_data_uninit(&he->key, NFT_DATA_VALUE); + if (set->flags & NFT_SET_MAP) + nft_data_uninit(he->data, set->dtype); + kfree(he); +} + static void nft_hash_remove(const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_hash_elem *he = elem->cookie; + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_table *tbl = nft_dereference(priv->tbl); + struct nft_hash_elem *he, __rcu **pprev; - hlist_del_rcu(&he->hnode); + pprev = elem->cookie; + he = nft_dereference((*pprev)); + + RCU_INIT_POINTER(*pprev, he->next); + synchronize_rcu(); kfree(he); + tbl->elements--; + + /* Shrink table beneath 30% load */ + if (tbl->elements < tbl->size * 3 / 10 && + tbl->size > NFT_HASH_MIN_SIZE) + nft_hash_tbl_shrink(set, priv); } static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) { const struct nft_hash *priv = nft_set_priv(set); + const struct nft_hash_table *tbl = nft_dereference(priv->tbl); + struct nft_hash_elem __rcu * const *pprev; struct nft_hash_elem *he; unsigned int h; - h = nft_hash_data(&elem->key, priv->hsize, set->klen); - hlist_for_each_entry(he, &priv->hash[h], hnode) { - if (nft_data_cmp(&he->key, &elem->key, set->klen)) + h = nft_hash_data(&elem->key, tbl->size, set->klen); + pprev = &tbl->buckets[h]; + nft_hash_for_each_entry(he, tbl->buckets[h]) { + if (nft_data_cmp(&he->key, &elem->key, set->klen)) { + pprev = &he->next; continue; + } - elem->cookie = he; - elem->flags = 0; + elem->cookie = (void *)pprev; + elem->flags = 0; if (set->flags & NFT_SET_MAP) nft_data_copy(&elem->data, he->data); return 0; @@ -129,12 +305,13 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_set_iter *iter) { const struct nft_hash *priv = nft_set_priv(set); + const struct nft_hash_table *tbl = nft_dereference(priv->tbl); const struct nft_hash_elem *he; struct nft_set_elem elem; unsigned int i; - for (i = 0; i < priv->hsize; i++) { - hlist_for_each_entry(he, &priv->hash[i], hnode) { + for (i = 0; i < tbl->size; i++) { + nft_hash_for_each_entry(he, tbl->buckets[i]) { if (iter->count < iter->skip) goto cont; @@ -161,43 +338,35 @@ static int nft_hash_init(const struct nft_set *set, const struct nlattr * const tb[]) { struct nft_hash *priv = nft_set_priv(set); - unsigned int cnt, i; + struct nft_hash_table *tbl; if (unlikely(!nft_hash_rnd_initted)) { get_random_bytes(&nft_hash_rnd, 4); nft_hash_rnd_initted = true; } - /* Aim for a load factor of 0.75 */ - // FIXME: temporarily broken until we have set descriptions - cnt = 100; - cnt = cnt * 4 / 3; - - priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL); - if (priv->hash == NULL) + tbl = nft_hash_tbl_alloc(NFT_HASH_MIN_SIZE); + if (tbl == NULL) return -ENOMEM; - priv->hsize = cnt; - - for (i = 0; i < cnt; i++) - INIT_HLIST_HEAD(&priv->hash[i]); - + RCU_INIT_POINTER(priv->tbl, tbl); return 0; } static void nft_hash_destroy(const struct nft_set *set) { const struct nft_hash *priv = nft_set_priv(set); - const struct hlist_node *next; - struct nft_hash_elem *elem; + const struct nft_hash_table *tbl = nft_dereference(priv->tbl); + struct nft_hash_elem *he, *next; unsigned int i; - for (i = 0; i < priv->hsize; i++) { - hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) { - hlist_del(&elem->hnode); - nft_hash_elem_destroy(set, elem); + for (i = 0; i < tbl->size; i++) { + for (he = nft_dereference(tbl->buckets[i]); he != NULL; + he = next) { + next = nft_dereference(he->next); + nft_hash_elem_destroy(set, he); } } - kfree(priv->hash); + kfree(tbl); } static struct nft_set_ops nft_hash_ops __read_mostly = { diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index f169501f1ad..810385eb724 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -70,7 +70,8 @@ err1: return err; } -static void nft_immediate_destroy(const struct nft_expr *expr) +static void nft_immediate_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg)); diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 5af790123ad..10cfb156cdf 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -23,7 +23,6 @@ static const char *nft_log_null_prefix = ""; struct nft_log { struct nf_loginfo loginfo; char *prefix; - int family; }; static void nft_log_eval(const struct nft_expr *expr, @@ -33,7 +32,7 @@ static void nft_log_eval(const struct nft_expr *expr, const struct nft_log *priv = nft_expr_priv(expr); struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - nf_log_packet(net, priv->family, pkt->ops->hooknum, pkt->skb, pkt->in, + nf_log_packet(net, pkt->ops->pf, pkt->ops->hooknum, pkt->skb, pkt->in, pkt->out, &priv->loginfo, "%s", priv->prefix); } @@ -52,8 +51,6 @@ static int nft_log_init(const struct nft_ctx *ctx, struct nf_loginfo *li = &priv->loginfo; const struct nlattr *nla; - priv->family = ctx->afi->family; - nla = tb[NFTA_LOG_PREFIX]; if (nla != NULL) { priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL); @@ -77,7 +74,8 @@ static int nft_log_init(const struct nft_ctx *ctx, return 0; } -static void nft_log_destroy(const struct nft_expr *expr) +static void nft_log_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) { struct nft_log *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 8a6116b75b5..7fd2bea8aa2 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -16,6 +16,7 @@ #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> struct nft_lookup { struct nft_set *set; @@ -88,11 +89,12 @@ static int nft_lookup_init(const struct nft_ctx *ctx, return 0; } -static void nft_lookup_destroy(const struct nft_expr *expr) +static void nft_lookup_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) { struct nft_lookup *priv = nft_expr_priv(expr); - nf_tables_unbind_set(NULL, priv->set, &priv->binding); + nf_tables_unbind_set(ctx, priv->set, &priv->binding); } static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index e8254ad2e5a..425cf39af89 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -116,7 +116,7 @@ static void nft_meta_get_eval(const struct nft_expr *expr, skb->sk->sk_socket->file->f_cred->fsgid); read_unlock_bh(&skb->sk->sk_callback_lock); break; -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID case NFT_META_RTCLASSID: { const struct dst_entry *dst = skb_dst(skb); @@ -199,7 +199,7 @@ static int nft_meta_init_validate_get(uint32_t key) case NFT_META_OIFTYPE: case NFT_META_SKUID: case NFT_META_SKGID: -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID case NFT_META_RTCLASSID: #endif #ifdef CONFIG_NETWORK_SECMARK diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index d3b1ffe2618..a0195d28bcf 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -31,8 +31,8 @@ struct nft_nat { enum nft_registers sreg_addr_max:8; enum nft_registers sreg_proto_min:8; enum nft_registers sreg_proto_max:8; - int family; - enum nf_nat_manip_type type; + enum nf_nat_manip_type type:8; + u8 family; }; static void nft_nat_eval(const struct nft_expr *expr, @@ -88,6 +88,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_nat *priv = nft_expr_priv(expr); + u32 family; int err; if (tb[NFTA_NAT_TYPE] == NULL) @@ -107,9 +108,12 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (tb[NFTA_NAT_FAMILY] == NULL) return -EINVAL; - priv->family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); - if (priv->family != AF_INET && priv->family != AF_INET6) - return -EINVAL; + family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); + if (family != AF_INET && family != AF_INET6) + return -EAFNOSUPPORT; + if (family != ctx->afi->family) + return -EOPNOTSUPP; + priv->family = family; if (tb[NFTA_NAT_REG_ADDR_MIN]) { priv->sreg_addr_min = ntohl(nla_get_be32( @@ -202,13 +206,7 @@ static struct nft_expr_type nft_nat_type __read_mostly = { static int __init nft_nat_module_init(void) { - int err; - - err = nft_register_expr(&nft_nat_type); - if (err < 0) - return err; - - return 0; + return nft_register_expr(&nft_nat_type); } static void __exit nft_nat_module_exit(void) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index a2aeb318678..85daa84bfdf 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -135,7 +135,8 @@ nft_payload_select_ops(const struct nft_ctx *ctx, if (len == 0 || len > FIELD_SIZEOF(struct nft_data, data)) return ERR_PTR(-EINVAL); - if (len <= 4 && IS_ALIGNED(offset, len) && base != NFT_PAYLOAD_LL_HEADER) + if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) && + base != NFT_PAYLOAD_LL_HEADER) return &nft_payload_fast_ops; else return &nft_payload_ops; diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index cbea473d69e..e8ae2f6bf23 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -25,7 +25,6 @@ struct nft_queue { u16 queuenum; u16 queues_total; u16 flags; - u8 family; }; static void nft_queue_eval(const struct nft_expr *expr, @@ -43,7 +42,7 @@ static void nft_queue_eval(const struct nft_expr *expr, queue = priv->queuenum + cpu % priv->queues_total; } else { queue = nfqueue_hash(pkt->skb, queue, - priv->queues_total, priv->family, + priv->queues_total, pkt->ops->pf, jhash_initval); } } @@ -71,7 +70,6 @@ static int nft_queue_init(const struct nft_ctx *ctx, return -EINVAL; init_hashrandom(&jhash_initval); - priv->family = ctx->afi->family; priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM])); if (tb[NFTA_QUEUE_TOTAL] != NULL) diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index ca0c1b231bf..e21d69d1350 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -69,8 +69,10 @@ static void nft_rbtree_elem_destroy(const struct nft_set *set, struct nft_rbtree_elem *rbe) { nft_data_uninit(&rbe->key, NFT_DATA_VALUE); - if (set->flags & NFT_SET_MAP) + if (set->flags & NFT_SET_MAP && + !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) nft_data_uninit(rbe->data, set->dtype); + kfree(rbe); } @@ -108,7 +110,8 @@ static int nft_rbtree_insert(const struct nft_set *set, int err; size = sizeof(*rbe); - if (set->flags & NFT_SET_MAP) + if (set->flags & NFT_SET_MAP && + !(elem->flags & NFT_SET_ELEM_INTERVAL_END)) size += sizeof(rbe->data[0]); rbe = kzalloc(size, GFP_KERNEL); @@ -117,7 +120,8 @@ static int nft_rbtree_insert(const struct nft_set *set, rbe->flags = elem->flags; nft_data_copy(&rbe->key, &elem->key); - if (set->flags & NFT_SET_MAP) + if (set->flags & NFT_SET_MAP && + !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) nft_data_copy(rbe->data, &elem->data); err = __nft_rbtree_insert(set, rbe); @@ -153,7 +157,8 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) parent = parent->rb_right; else { elem->cookie = rbe; - if (set->flags & NFT_SET_MAP) + if (set->flags & NFT_SET_MAP && + !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) nft_data_copy(&elem->data, rbe->data); elem->flags = rbe->flags; return 0; @@ -177,7 +182,8 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, rbe = rb_entry(node, struct nft_rbtree_elem, node); nft_data_copy(&elem.key, &rbe->key); - if (set->flags & NFT_SET_MAP) + if (set->flags & NFT_SET_MAP && + !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) nft_data_copy(&elem.data, rbe->data); elem.flags = rbe->flags; diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index 5e204711d70..f3448c29644 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -16,65 +16,23 @@ #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> -#include <net/icmp.h> -#include <net/netfilter/ipv4/nf_reject.h> +#include <net/netfilter/nft_reject.h> -#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) -#include <net/netfilter/ipv6/nf_reject.h> -#endif - -struct nft_reject { - enum nft_reject_types type:8; - u8 icmp_code; - u8 family; -}; - -static void nft_reject_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) -{ - struct nft_reject *priv = nft_expr_priv(expr); -#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) - struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); -#endif - switch (priv->type) { - case NFT_REJECT_ICMP_UNREACH: - if (priv->family == NFPROTO_IPV4) - nf_send_unreach(pkt->skb, priv->icmp_code); -#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) - else if (priv->family == NFPROTO_IPV6) - nf_send_unreach6(net, pkt->skb, priv->icmp_code, - pkt->ops->hooknum); -#endif - break; - case NFT_REJECT_TCP_RST: - if (priv->family == NFPROTO_IPV4) - nf_send_reset(pkt->skb, pkt->ops->hooknum); -#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) - else if (priv->family == NFPROTO_IPV6) - nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); -#endif - break; - } - - data[NFT_REG_VERDICT].verdict = NF_DROP; -} - -static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = { +const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = { [NFTA_REJECT_TYPE] = { .type = NLA_U32 }, [NFTA_REJECT_ICMP_CODE] = { .type = NLA_U8 }, }; +EXPORT_SYMBOL_GPL(nft_reject_policy); -static int nft_reject_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +int nft_reject_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_reject *priv = nft_expr_priv(expr); if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; - priv->family = ctx->afi->family; priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: @@ -89,8 +47,9 @@ static int nft_reject_init(const struct nft_ctx *ctx, return 0; } +EXPORT_SYMBOL_GPL(nft_reject_init); -static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) +int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_reject *priv = nft_expr_priv(expr); @@ -109,37 +68,7 @@ static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) nla_put_failure: return -1; } - -static struct nft_expr_type nft_reject_type; -static const struct nft_expr_ops nft_reject_ops = { - .type = &nft_reject_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), - .eval = nft_reject_eval, - .init = nft_reject_init, - .dump = nft_reject_dump, -}; - -static struct nft_expr_type nft_reject_type __read_mostly = { - .name = "reject", - .ops = &nft_reject_ops, - .policy = nft_reject_policy, - .maxattr = NFTA_REJECT_MAX, - .owner = THIS_MODULE, -}; - -static int __init nft_reject_module_init(void) -{ - return nft_register_expr(&nft_reject_type); -} - -static void __exit nft_reject_module_exit(void) -{ - nft_unregister_expr(&nft_reject_type); -} - -module_init(nft_reject_module_init); -module_exit(nft_reject_module_exit); +EXPORT_SYMBOL_GPL(nft_reject_dump); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_EXPR("reject"); diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c new file mode 100644 index 00000000000..b718a52a465 --- /dev/null +++ b/net/netfilter/nft_reject_inet.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2014 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_reject.h> + +static void nft_reject_inet_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + switch (pkt->ops->pf) { + case NFPROTO_IPV4: + return nft_reject_ipv4_eval(expr, data, pkt); + case NFPROTO_IPV6: + return nft_reject_ipv6_eval(expr, data, pkt); + } +} + +static struct nft_expr_type nft_reject_inet_type; +static const struct nft_expr_ops nft_reject_inet_ops = { + .type = &nft_reject_inet_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .eval = nft_reject_inet_eval, + .init = nft_reject_init, + .dump = nft_reject_dump, +}; + +static struct nft_expr_type nft_reject_inet_type __read_mostly = { + .family = NFPROTO_INET, + .name = "reject", + .ops = &nft_reject_inet_ops, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_reject_inet_module_init(void) +{ + return nft_register_expr(&nft_reject_inet_type); +} + +static void __exit nft_reject_inet_module_exit(void) +{ + nft_unregister_expr(&nft_reject_inet_type); +} + +module_init(nft_reject_inet_module_init); +module_exit(nft_reject_inet_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_AF_EXPR(1, "reject"); diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index 3228d7f24eb..4973cbddc44 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c @@ -146,11 +146,11 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) if (par->family == NFPROTO_BRIDGE) { switch (eth_hdr(skb)->h_proto) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): audit_ip4(ab, skb); break; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): audit_ip6(ab, skb); break; } diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 5929be622c5..75747aecdeb 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -228,12 +228,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, goto err3; } - __set_bit(IPS_TEMPLATE_BIT, &ct->status); - __set_bit(IPS_CONFIRMED_BIT, &ct->status); - - /* Overload tuple linked list to put us in template list. */ - hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &par->net->ct.tmpl); + nf_conntrack_tmpl_insert(par->net, ct); out: info->ct = ct; return 0; diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 9a8e77e7f8d..f4e83300532 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -54,7 +54,8 @@ static struct xt_match cgroup_mt_reg __read_mostly = { .matchsize = sizeof(struct xt_cgroup_info), .me = THIS_MODULE, .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING), + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_IN), }; static int __init cgroup_mt_init(void) diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index c40b2695633..fbc66bb250d 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -19,6 +19,7 @@ #include <linux/jhash.h> #include <linux/slab.h> #include <linux/list.h> +#include <linux/rbtree.h> #include <linux/module.h> #include <linux/random.h> #include <linux/skbuff.h> @@ -31,6 +32,16 @@ #include <net/netfilter/nf_conntrack_tuple.h> #include <net/netfilter/nf_conntrack_zones.h> +#define CONNLIMIT_SLOTS 256U + +#ifdef CONFIG_LOCKDEP +#define CONNLIMIT_LOCK_SLOTS 8U +#else +#define CONNLIMIT_LOCK_SLOTS 256U +#endif + +#define CONNLIMIT_GC_MAX_NODES 8 + /* we will save the tuples of all connections we care about */ struct xt_connlimit_conn { struct hlist_node node; @@ -38,16 +49,27 @@ struct xt_connlimit_conn { union nf_inet_addr addr; }; +struct xt_connlimit_rb { + struct rb_node node; + struct hlist_head hhead; /* connections/hosts in same subnet */ + union nf_inet_addr addr; /* search key */ +}; + +static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp; + struct xt_connlimit_data { - struct hlist_head iphash[256]; - spinlock_t lock; + struct rb_root climit_root4[CONNLIMIT_SLOTS]; + struct rb_root climit_root6[CONNLIMIT_SLOTS]; }; static u_int32_t connlimit_rnd __read_mostly; +static struct kmem_cache *connlimit_rb_cachep __read_mostly; +static struct kmem_cache *connlimit_conn_cachep __read_mostly; static inline unsigned int connlimit_iphash(__be32 addr) { - return jhash_1word((__force __u32)addr, connlimit_rnd) & 0xFF; + return jhash_1word((__force __u32)addr, + connlimit_rnd) % CONNLIMIT_SLOTS; } static inline unsigned int @@ -60,7 +82,8 @@ connlimit_iphash6(const union nf_inet_addr *addr, for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) res.ip6[i] = addr->ip6[i] & mask->ip6[i]; - return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6), connlimit_rnd) & 0xFF; + return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6), + connlimit_rnd) % CONNLIMIT_SLOTS; } static inline bool already_closed(const struct nf_conn *conn) @@ -72,13 +95,14 @@ static inline bool already_closed(const struct nf_conn *conn) return 0; } -static inline unsigned int +static int same_source_net(const union nf_inet_addr *addr, const union nf_inet_addr *mask, const union nf_inet_addr *u3, u_int8_t family) { if (family == NFPROTO_IPV4) { - return (addr->ip & mask->ip) == (u3->ip & mask->ip); + return ntohl(addr->ip & mask->ip) - + ntohl(u3->ip & mask->ip); } else { union nf_inet_addr lh, rh; unsigned int i; @@ -88,89 +112,205 @@ same_source_net(const union nf_inet_addr *addr, rh.ip6[i] = u3->ip6[i] & mask->ip6[i]; } - return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6)) == 0; + return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6)); } } -static int count_them(struct net *net, - struct xt_connlimit_data *data, +static bool add_hlist(struct hlist_head *head, const struct nf_conntrack_tuple *tuple, - const union nf_inet_addr *addr, - const union nf_inet_addr *mask, - u_int8_t family) + const union nf_inet_addr *addr) +{ + struct xt_connlimit_conn *conn; + + conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC); + if (conn == NULL) + return false; + conn->tuple = *tuple; + conn->addr = *addr; + hlist_add_head(&conn->node, head); + return true; +} + +static unsigned int check_hlist(struct net *net, + struct hlist_head *head, + const struct nf_conntrack_tuple *tuple, + bool *addit) { const struct nf_conntrack_tuple_hash *found; struct xt_connlimit_conn *conn; struct hlist_node *n; struct nf_conn *found_ct; - struct hlist_head *hash; - bool addit = true; - int matches = 0; - - if (family == NFPROTO_IPV6) - hash = &data->iphash[connlimit_iphash6(addr, mask)]; - else - hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)]; + unsigned int length = 0; + *addit = true; rcu_read_lock(); /* check the saved connections */ - hlist_for_each_entry_safe(conn, n, hash, node) { + hlist_for_each_entry_safe(conn, n, head, node) { found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, &conn->tuple); - found_ct = NULL; + if (found == NULL) { + hlist_del(&conn->node); + kmem_cache_free(connlimit_conn_cachep, conn); + continue; + } - if (found != NULL) - found_ct = nf_ct_tuplehash_to_ctrack(found); + found_ct = nf_ct_tuplehash_to_ctrack(found); - if (found_ct != NULL && - nf_ct_tuple_equal(&conn->tuple, tuple) && - !already_closed(found_ct)) + if (nf_ct_tuple_equal(&conn->tuple, tuple)) { /* * Just to be sure we have it only once in the list. * We should not see tuples twice unless someone hooks * this into a table without "-p tcp --syn". */ - addit = false; - - if (found == NULL) { - /* this one is gone */ - hlist_del(&conn->node); - kfree(conn); - continue; - } - - if (already_closed(found_ct)) { + *addit = false; + } else if (already_closed(found_ct)) { /* * we do not care about connections which are * closed already -> ditch it */ nf_ct_put(found_ct); hlist_del(&conn->node); - kfree(conn); + kmem_cache_free(connlimit_conn_cachep, conn); continue; } - if (same_source_net(addr, mask, &conn->addr, family)) - /* same source network -> be counted! */ - ++matches; nf_ct_put(found_ct); + length++; } rcu_read_unlock(); - if (addit) { - /* save the new connection in our list */ - conn = kmalloc(sizeof(*conn), GFP_ATOMIC); - if (conn == NULL) - return -ENOMEM; - conn->tuple = *tuple; - conn->addr = *addr; - hlist_add_head(&conn->node, hash); - ++matches; + return length; +} + +static void tree_nodes_free(struct rb_root *root, + struct xt_connlimit_rb *gc_nodes[], + unsigned int gc_count) +{ + struct xt_connlimit_rb *rbconn; + + while (gc_count) { + rbconn = gc_nodes[--gc_count]; + rb_erase(&rbconn->node, root); + kmem_cache_free(connlimit_rb_cachep, rbconn); + } +} + +static unsigned int +count_tree(struct net *net, struct rb_root *root, + const struct nf_conntrack_tuple *tuple, + const union nf_inet_addr *addr, const union nf_inet_addr *mask, + u8 family) +{ + struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES]; + struct rb_node **rbnode, *parent; + struct xt_connlimit_rb *rbconn; + struct xt_connlimit_conn *conn; + unsigned int gc_count; + bool no_gc = false; + + restart: + gc_count = 0; + parent = NULL; + rbnode = &(root->rb_node); + while (*rbnode) { + int diff; + bool addit; + + rbconn = container_of(*rbnode, struct xt_connlimit_rb, node); + + parent = *rbnode; + diff = same_source_net(addr, mask, &rbconn->addr, family); + if (diff < 0) { + rbnode = &((*rbnode)->rb_left); + } else if (diff > 0) { + rbnode = &((*rbnode)->rb_right); + } else { + /* same source network -> be counted! */ + unsigned int count; + count = check_hlist(net, &rbconn->hhead, tuple, &addit); + + tree_nodes_free(root, gc_nodes, gc_count); + if (!addit) + return count; + + if (!add_hlist(&rbconn->hhead, tuple, addr)) + return 0; /* hotdrop */ + + return count + 1; + } + + if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes)) + continue; + + /* only used for GC on hhead, retval and 'addit' ignored */ + check_hlist(net, &rbconn->hhead, tuple, &addit); + if (hlist_empty(&rbconn->hhead)) + gc_nodes[gc_count++] = rbconn; + } + + if (gc_count) { + no_gc = true; + tree_nodes_free(root, gc_nodes, gc_count); + /* tree_node_free before new allocation permits + * allocator to re-use newly free'd object. + * + * This is a rare event; in most cases we will find + * existing node to re-use. (or gc_count is 0). + */ + goto restart; + } + + /* no match, need to insert new node */ + rbconn = kmem_cache_alloc(connlimit_rb_cachep, GFP_ATOMIC); + if (rbconn == NULL) + return 0; + + conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC); + if (conn == NULL) { + kmem_cache_free(connlimit_rb_cachep, rbconn); + return 0; + } + + conn->tuple = *tuple; + conn->addr = *addr; + rbconn->addr = *addr; + + INIT_HLIST_HEAD(&rbconn->hhead); + hlist_add_head(&conn->node, &rbconn->hhead); + + rb_link_node(&rbconn->node, parent, rbnode); + rb_insert_color(&rbconn->node, root); + return 1; +} + +static int count_them(struct net *net, + struct xt_connlimit_data *data, + const struct nf_conntrack_tuple *tuple, + const union nf_inet_addr *addr, + const union nf_inet_addr *mask, + u_int8_t family) +{ + struct rb_root *root; + int count; + u32 hash; + + if (family == NFPROTO_IPV6) { + hash = connlimit_iphash6(addr, mask); + root = &data->climit_root6[hash]; + } else { + hash = connlimit_iphash(addr->ip & mask->ip); + root = &data->climit_root4[hash]; } - return matches; + spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); + + count = count_tree(net, root, tuple, addr, mask, family); + + spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); + + return count; } static bool @@ -183,7 +323,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct nf_conntrack_tuple *tuple_ptr = &tuple; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; - int connections; + unsigned int connections; ct = nf_ct_get(skb, &ctinfo); if (ct != NULL) @@ -202,12 +342,9 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) iph->daddr : iph->saddr; } - spin_lock_bh(&info->data->lock); connections = count_them(net, info->data, tuple_ptr, &addr, &info->mask, par->family); - spin_unlock_bh(&info->data->lock); - - if (connections < 0) + if (connections == 0) /* kmalloc failed, drop it entirely */ goto hotdrop; @@ -247,29 +384,44 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) return -ENOMEM; } - spin_lock_init(&info->data->lock); - for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) - INIT_HLIST_HEAD(&info->data->iphash[i]); + for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) + info->data->climit_root4[i] = RB_ROOT; + for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i) + info->data->climit_root6[i] = RB_ROOT; return 0; } -static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) +static void destroy_tree(struct rb_root *r) { - const struct xt_connlimit_info *info = par->matchinfo; struct xt_connlimit_conn *conn; + struct xt_connlimit_rb *rbconn; struct hlist_node *n; - struct hlist_head *hash = info->data->iphash; + struct rb_node *node; + + while ((node = rb_first(r)) != NULL) { + rbconn = container_of(node, struct xt_connlimit_rb, node); + + rb_erase(node, r); + + hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node) + kmem_cache_free(connlimit_conn_cachep, conn); + + kmem_cache_free(connlimit_rb_cachep, rbconn); + } +} + +static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_connlimit_info *info = par->matchinfo; unsigned int i; nf_ct_l3proto_module_put(par->family); - for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { - hlist_for_each_entry_safe(conn, n, &hash[i], node) { - hlist_del(&conn->node); - kfree(conn); - } - } + for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) + destroy_tree(&info->data->climit_root4[i]); + for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i) + destroy_tree(&info->data->climit_root6[i]); kfree(info->data); } @@ -287,12 +439,40 @@ static struct xt_match connlimit_mt_reg __read_mostly = { static int __init connlimit_mt_init(void) { - return xt_register_match(&connlimit_mt_reg); + int ret, i; + + BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS); + BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0); + + for (i = 0; i < CONNLIMIT_LOCK_SLOTS; ++i) + spin_lock_init(&xt_connlimit_locks[i]); + + connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn", + sizeof(struct xt_connlimit_conn), + 0, 0, NULL); + if (!connlimit_conn_cachep) + return -ENOMEM; + + connlimit_rb_cachep = kmem_cache_create("xt_connlimit_rb", + sizeof(struct xt_connlimit_rb), + 0, 0, NULL); + if (!connlimit_rb_cachep) { + kmem_cache_destroy(connlimit_conn_cachep); + return -ENOMEM; + } + ret = xt_register_match(&connlimit_mt_reg); + if (ret != 0) { + kmem_cache_destroy(connlimit_conn_cachep); + kmem_cache_destroy(connlimit_rb_cachep); + } + return ret; } static void __exit connlimit_mt_exit(void) { xt_unregister_match(&connlimit_mt_reg); + kmem_cache_destroy(connlimit_conn_cachep); + kmem_cache_destroy(connlimit_rb_cachep); } module_init(connlimit_mt_init); diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c index a4c7561698c..89d53104c6b 100644 --- a/net/netfilter/xt_ipcomp.c +++ b/net/netfilter/xt_ipcomp.c @@ -60,7 +60,7 @@ static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par) } return spi_match(compinfo->spis[0], compinfo->spis[1], - ntohl(chdr->cpi << 16), + ntohs(chdr->cpi), !!(compinfo->invflags & XT_IPCOMP_INV_SPI)); } diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 7174611bd67..c529161cdbf 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -422,4 +422,6 @@ module_exit(xt_osf_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>"); MODULE_DESCRIPTION("Passive OS fingerprint matching."); +MODULE_ALIAS("ipt_osf"); +MODULE_ALIAS("ip6t_osf"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index fdf51353cf7..894cda0206b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1460,7 +1460,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nlk->netlink_bind && nlk->groups[0]) { int i; - for (i=0; i<nlk->ngroups; i++) { + for (i = 0; i < nlk->ngroups; i++) { if (test_bit(i, nlk->groups)) nlk->netlink_bind(i); } @@ -1489,8 +1489,8 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (addr->sa_family != AF_NETLINK) return -EINVAL; - /* Only superuser is allowed to send multicasts */ - if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) + if ((nladdr->nl_groups || nladdr->nl_pid) && + !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; if (!nlk->portid) @@ -1653,7 +1653,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) else #endif /* CONFIG_NETLINK_MMAP */ skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, len); + sk->sk_data_ready(sk); return len; } @@ -2343,6 +2343,11 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, } #endif + /* Record the max length of recvmsg() calls for future allocations */ + nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len); + nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, + 16384); + copied = data_skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; @@ -2389,7 +2394,7 @@ out: return err ? : copied; } -static void netlink_data_ready(struct sock *sk, int len) +static void netlink_data_ready(struct sock *sk) { BUG(); } @@ -2549,7 +2554,7 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla struct nlmsghdr *nlh; int size = nlmsg_msg_size(len); - nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size)); + nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_ALIGN(size)); nlh->nlmsg_type = type; nlh->nlmsg_len = size; nlh->nlmsg_flags = flags; @@ -2587,7 +2592,27 @@ static int netlink_dump(struct sock *sk) if (!netlink_rx_is_mmaped(sk) && atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto errout_skb; - skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL); + + /* NLMSG_GOODSIZE is small to avoid high order allocations being + * required, but it makes sense to _attempt_ a 16K bytes allocation + * to reduce number of system calls on dump operations, if user + * ever provided a big enough buffer. + */ + if (alloc_size < nlk->max_recvmsg_len) { + skb = netlink_alloc_skb(sk, + nlk->max_recvmsg_len, + nlk->portid, + GFP_KERNEL | + __GFP_NOWARN | + __GFP_NORETRY); + /* available room should be exact amount to avoid MSG_TRUNC */ + if (skb) + skb_reserve(skb, skb_tailroom(skb) - + nlk->max_recvmsg_len); + } + if (!skb) + skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, + GFP_KERNEL); if (!skb) goto errout_skb; netlink_skb_set_owner_r(skb, sk); diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index acbd774eeb7..ed13a790b00 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -31,6 +31,7 @@ struct netlink_sock { u32 ngroups; unsigned long *groups; unsigned long state; + size_t max_recvmsg_len; wait_queue_head_t wait; bool cb_running; struct netlink_callback cb; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index b74aa075552..ede50d197e1 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1011,7 +1011,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) skb_queue_head(&sk->sk_receive_queue, skb); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk); bh_unlock_sock(sk); diff --git a/net/nfc/core.c b/net/nfc/core.c index ca1e65f4b13..819b87702b7 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -280,9 +280,6 @@ static struct nfc_target *nfc_find_target(struct nfc_dev *dev, u32 target_idx) { int i; - if (dev->n_targets == 0) - return NULL; - for (i = 0; i < dev->n_targets; i++) { if (dev->targets[i].idx == target_idx) return &dev->targets[i]; @@ -546,9 +543,9 @@ error: struct nfc_se *nfc_find_se(struct nfc_dev *dev, u32 se_idx) { - struct nfc_se *se, *n; + struct nfc_se *se; - list_for_each_entry_safe(se, n, &dev->secure_elements, list) + list_for_each_entry(se, &dev->secure_elements, list) if (se->idx == se_idx) return se; @@ -655,9 +652,6 @@ int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len) { pr_debug("dev_name=%s gb_len=%d\n", dev_name(&dev->dev), gb_len); - if (gb_len > NFC_MAX_GT_LEN) - return -EINVAL; - return nfc_llcp_set_remote_gb(dev, gb, gb_len); } EXPORT_SYMBOL(nfc_set_remote_general_bytes); diff --git a/net/nfc/digital.h b/net/nfc/digital.h index 08b29b55ea6..71ad7eefddd 100644 --- a/net/nfc/digital.h +++ b/net/nfc/digital.h @@ -71,7 +71,14 @@ static inline int digital_in_send_cmd(struct nfc_digital_dev *ddev, void digital_poll_next_tech(struct nfc_digital_dev *ddev); int digital_in_send_sens_req(struct nfc_digital_dev *ddev, u8 rf_tech); +int digital_in_send_sensb_req(struct nfc_digital_dev *ddev, u8 rf_tech); int digital_in_send_sensf_req(struct nfc_digital_dev *ddev, u8 rf_tech); +int digital_in_send_iso15693_inv_req(struct nfc_digital_dev *ddev, u8 rf_tech); + +int digital_in_iso_dep_pull_sod(struct nfc_digital_dev *ddev, + struct sk_buff *skb); +int digital_in_iso_dep_push_sod(struct nfc_digital_dev *ddev, + struct sk_buff *skb); int digital_target_found(struct nfc_digital_dev *ddev, struct nfc_target *target, u8 protocol); diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index c129d1571ca..b105cfb00e7 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -22,9 +22,13 @@ #define DIGITAL_PROTO_NFCA_RF_TECH \ (NFC_PROTO_JEWEL_MASK | NFC_PROTO_MIFARE_MASK | NFC_PROTO_NFC_DEP_MASK) +#define DIGITAL_PROTO_NFCB_RF_TECH NFC_PROTO_ISO14443_B_MASK + #define DIGITAL_PROTO_NFCF_RF_TECH \ (NFC_PROTO_FELICA_MASK | NFC_PROTO_NFC_DEP_MASK) +#define DIGITAL_PROTO_ISO15693_RF_TECH NFC_PROTO_ISO15693_MASK + struct digital_cmd { struct list_head queue; @@ -331,6 +335,24 @@ int digital_target_found(struct nfc_digital_dev *ddev, } break; + case NFC_PROTO_ISO15693: + framing = NFC_DIGITAL_FRAMING_ISO15693_T5T; + check_crc = digital_skb_check_crc_b; + add_crc = digital_skb_add_crc_b; + break; + + case NFC_PROTO_ISO14443: + framing = NFC_DIGITAL_FRAMING_NFCA_T4T; + check_crc = digital_skb_check_crc_a; + add_crc = digital_skb_add_crc_a; + break; + + case NFC_PROTO_ISO14443_B: + framing = NFC_DIGITAL_FRAMING_NFCB_T4T; + check_crc = digital_skb_check_crc_b; + add_crc = digital_skb_add_crc_b; + break; + default: pr_err("Invalid protocol %d\n", protocol); return -EINVAL; @@ -461,7 +483,11 @@ static int digital_start_poll(struct nfc_dev *nfc_dev, __u32 im_protocols, digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106A, digital_in_send_sens_req); - if (im_protocols & DIGITAL_PROTO_NFCF_RF_TECH) { + if (matching_im_protocols & DIGITAL_PROTO_NFCB_RF_TECH) + digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106B, + digital_in_send_sensb_req); + + if (matching_im_protocols & DIGITAL_PROTO_NFCF_RF_TECH) { digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_212F, digital_in_send_sensf_req); @@ -469,7 +495,11 @@ static int digital_start_poll(struct nfc_dev *nfc_dev, __u32 im_protocols, digital_in_send_sensf_req); } - if (tm_protocols & NFC_PROTO_NFC_DEP_MASK) { + if (matching_im_protocols & DIGITAL_PROTO_ISO15693_RF_TECH) + digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_ISO15693, + digital_in_send_iso15693_inv_req); + + if (matching_tm_protocols & NFC_PROTO_NFC_DEP_MASK) { if (ddev->ops->tg_listen_mdaa) { digital_add_poll_tech(ddev, 0, digital_tg_listen_mdaa); @@ -607,20 +637,31 @@ static void digital_in_send_complete(struct nfc_digital_dev *ddev, void *arg, if (IS_ERR(resp)) { rc = PTR_ERR(resp); + resp = NULL; goto done; } - if (ddev->curr_protocol == NFC_PROTO_MIFARE) + if (ddev->curr_protocol == NFC_PROTO_MIFARE) { rc = digital_in_recv_mifare_res(resp); - else - rc = ddev->skb_check_crc(resp); + /* crc check is done in digital_in_recv_mifare_res() */ + goto done; + } + + if ((ddev->curr_protocol == NFC_PROTO_ISO14443) || + (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) { + rc = digital_in_iso_dep_pull_sod(ddev, resp); + if (rc) + goto done; + } + + rc = ddev->skb_check_crc(resp); +done: if (rc) { kfree_skb(resp); resp = NULL; } -done: data_exch->cb(data_exch->cb_context, resp, rc); kfree(data_exch); @@ -632,6 +673,7 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target, { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); struct digital_data_exch *data_exch; + int rc; data_exch = kzalloc(sizeof(struct digital_data_exch), GFP_KERNEL); if (!data_exch) { @@ -642,13 +684,28 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target, data_exch->cb = cb; data_exch->cb_context = cb_context; - if (ddev->curr_protocol == NFC_PROTO_NFC_DEP) - return digital_in_send_dep_req(ddev, target, skb, data_exch); + if (ddev->curr_protocol == NFC_PROTO_NFC_DEP) { + rc = digital_in_send_dep_req(ddev, target, skb, data_exch); + goto exit; + } + + if ((ddev->curr_protocol == NFC_PROTO_ISO14443) || + (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) { + rc = digital_in_iso_dep_push_sod(ddev, skb); + if (rc) + goto exit; + } ddev->skb_add_crc(skb); - return digital_in_send_cmd(ddev, skb, 500, digital_in_send_complete, - data_exch); + rc = digital_in_send_cmd(ddev, skb, 500, digital_in_send_complete, + data_exch); + +exit: + if (rc) + kfree(data_exch); + + return rc; } static struct nfc_ops digital_nfc_ops = { @@ -700,6 +757,12 @@ struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops, ddev->protocols |= NFC_PROTO_FELICA_MASK; if (supported_protocols & NFC_PROTO_NFC_DEP_MASK) ddev->protocols |= NFC_PROTO_NFC_DEP_MASK; + if (supported_protocols & NFC_PROTO_ISO15693_MASK) + ddev->protocols |= NFC_PROTO_ISO15693_MASK; + if (supported_protocols & NFC_PROTO_ISO14443_MASK) + ddev->protocols |= NFC_PROTO_ISO14443_MASK; + if (supported_protocols & NFC_PROTO_ISO14443_B_MASK) + ddev->protocols |= NFC_PROTO_ISO14443_B_MASK; ddev->tx_headroom = tx_headroom + DIGITAL_MAX_HEADER_LEN; ddev->tx_tailroom = tx_tailroom + DIGITAL_CRC_LEN; diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c index 251c8c753eb..12a233e9ece 100644 --- a/net/nfc/digital_technology.c +++ b/net/nfc/digital_technology.c @@ -30,6 +30,7 @@ #define DIGITAL_SEL_RES_NFCID1_COMPLETE(sel_res) (!((sel_res) & 0x04)) #define DIGITAL_SEL_RES_IS_T2T(sel_res) (!((sel_res) & 0x60)) +#define DIGITAL_SEL_RES_IS_T4T(sel_res) ((sel_res) & 0x20) #define DIGITAL_SEL_RES_IS_NFC_DEP(sel_res) ((sel_res) & 0x40) #define DIGITAL_SENS_RES_IS_T1T(sens_res) (((sens_res) & 0x0C00) == 0x0C00) @@ -40,6 +41,24 @@ #define DIGITAL_MIFARE_READ_RES_LEN 16 #define DIGITAL_MIFARE_ACK_RES 0x0A +#define DIGITAL_CMD_SENSB_REQ 0x05 +#define DIGITAL_SENSB_ADVANCED BIT(5) +#define DIGITAL_SENSB_EXTENDED BIT(4) +#define DIGITAL_SENSB_ALLB_REQ BIT(3) +#define DIGITAL_SENSB_N(n) ((n) & 0x7) + +#define DIGITAL_CMD_SENSB_RES 0x50 + +#define DIGITAL_CMD_ATTRIB_REQ 0x1D +#define DIGITAL_ATTRIB_P1_TR0_DEFAULT (0x0 << 6) +#define DIGITAL_ATTRIB_P1_TR1_DEFAULT (0x0 << 4) +#define DIGITAL_ATTRIB_P1_SUPRESS_EOS BIT(3) +#define DIGITAL_ATTRIB_P1_SUPRESS_SOS BIT(2) +#define DIGITAL_ATTRIB_P2_LISTEN_POLL_1 (0x0 << 6) +#define DIGITAL_ATTRIB_P2_POLL_LISTEN_1 (0x0 << 4) +#define DIGITAL_ATTRIB_P2_MAX_FRAME_256 0x8 +#define DIGITAL_ATTRIB_P4_DID(n) ((n) & 0xf) + #define DIGITAL_CMD_SENSF_REQ 0x00 #define DIGITAL_CMD_SENSF_RES 0x01 @@ -51,6 +70,35 @@ #define DIGITAL_SENSF_REQ_RC_SC 1 #define DIGITAL_SENSF_REQ_RC_AP 2 +#define DIGITAL_CMD_ISO15693_INVENTORY_REQ 0x01 + +#define DIGITAL_ISO15693_REQ_FLAG_DATA_RATE BIT(1) +#define DIGITAL_ISO15693_REQ_FLAG_INVENTORY BIT(2) +#define DIGITAL_ISO15693_REQ_FLAG_NB_SLOTS BIT(5) +#define DIGITAL_ISO15693_RES_FLAG_ERROR BIT(0) +#define DIGITAL_ISO15693_RES_IS_VALID(flags) \ + (!((flags) & DIGITAL_ISO15693_RES_FLAG_ERROR)) + +#define DIGITAL_ISO_DEP_I_PCB 0x02 +#define DIGITAL_ISO_DEP_PNI(pni) ((pni) & 0x01) + +#define DIGITAL_ISO_DEP_PCB_TYPE(pcb) ((pcb) & 0xC0) + +#define DIGITAL_ISO_DEP_I_BLOCK 0x00 + +#define DIGITAL_ISO_DEP_BLOCK_HAS_DID(pcb) ((pcb) & 0x08) + +static const u8 digital_ats_fsc[] = { + 16, 24, 32, 40, 48, 64, 96, 128, +}; + +#define DIGITAL_ATS_FSCI(t0) ((t0) & 0x0F) +#define DIGITAL_SENSB_FSCI(pi2) (((pi2) & 0xF0) >> 4) +#define DIGITAL_ATS_MAX_FSC 256 + +#define DIGITAL_RATS_BYTE1 0xE0 +#define DIGITAL_RATS_PARAM 0x80 + struct digital_sdd_res { u8 nfcid1[4]; u8 bcc; @@ -63,6 +111,32 @@ struct digital_sel_req { u8 bcc; } __packed; +struct digital_sensb_req { + u8 cmd; + u8 afi; + u8 param; +} __packed; + +struct digital_sensb_res { + u8 cmd; + u8 nfcid0[4]; + u8 app_data[4]; + u8 proto_info[3]; +} __packed; + +struct digital_attrib_req { + u8 cmd; + u8 nfcid0[4]; + u8 param1; + u8 param2; + u8 param3; + u8 param4; +} __packed; + +struct digital_attrib_res { + u8 mbli_did; +} __packed; + struct digital_sensf_req { u8 cmd; u8 sc1; @@ -82,9 +156,127 @@ struct digital_sensf_res { u8 rd[2]; } __packed; +struct digital_iso15693_inv_req { + u8 flags; + u8 cmd; + u8 mask_len; + u64 mask; +} __packed; + +struct digital_iso15693_inv_res { + u8 flags; + u8 dsfid; + u64 uid; +} __packed; + static int digital_in_send_sdd_req(struct nfc_digital_dev *ddev, struct nfc_target *target); +int digital_in_iso_dep_pull_sod(struct nfc_digital_dev *ddev, + struct sk_buff *skb) +{ + u8 pcb; + u8 block_type; + + if (skb->len < 1) + return -EIO; + + pcb = *skb->data; + block_type = DIGITAL_ISO_DEP_PCB_TYPE(pcb); + + /* No support fo R-block nor S-block */ + if (block_type != DIGITAL_ISO_DEP_I_BLOCK) { + pr_err("ISO_DEP R-block and S-block not supported\n"); + return -EIO; + } + + if (DIGITAL_ISO_DEP_BLOCK_HAS_DID(pcb)) { + pr_err("DID field in ISO_DEP PCB not supported\n"); + return -EIO; + } + + skb_pull(skb, 1); + + return 0; +} + +int digital_in_iso_dep_push_sod(struct nfc_digital_dev *ddev, + struct sk_buff *skb) +{ + /* + * Chaining not supported so skb->len + 1 PCB byte + 2 CRC bytes must + * not be greater than remote FSC + */ + if (skb->len + 3 > ddev->target_fsc) + return -EIO; + + skb_push(skb, 1); + + *skb->data = DIGITAL_ISO_DEP_I_PCB | ddev->curr_nfc_dep_pni; + + ddev->curr_nfc_dep_pni = + DIGITAL_ISO_DEP_PNI(ddev->curr_nfc_dep_pni + 1); + + return 0; +} + +static void digital_in_recv_ats(struct nfc_digital_dev *ddev, void *arg, + struct sk_buff *resp) +{ + struct nfc_target *target = arg; + u8 fsdi; + int rc; + + if (IS_ERR(resp)) { + rc = PTR_ERR(resp); + resp = NULL; + goto exit; + } + + if (resp->len < 2) { + rc = -EIO; + goto exit; + } + + fsdi = DIGITAL_ATS_FSCI(resp->data[1]); + if (fsdi >= 8) + ddev->target_fsc = DIGITAL_ATS_MAX_FSC; + else + ddev->target_fsc = digital_ats_fsc[fsdi]; + + ddev->curr_nfc_dep_pni = 0; + + rc = digital_target_found(ddev, target, NFC_PROTO_ISO14443); + +exit: + dev_kfree_skb(resp); + kfree(target); + + if (rc) + digital_poll_next_tech(ddev); +} + +static int digital_in_send_rats(struct nfc_digital_dev *ddev, + struct nfc_target *target) +{ + int rc; + struct sk_buff *skb; + + skb = digital_skb_alloc(ddev, 2); + if (!skb) + return -ENOMEM; + + *skb_put(skb, 1) = DIGITAL_RATS_BYTE1; + *skb_put(skb, 1) = DIGITAL_RATS_PARAM; + + rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_ats, + target); + if (rc) + kfree_skb(skb); + + return rc; +} + static void digital_in_recv_sel_res(struct nfc_digital_dev *ddev, void *arg, struct sk_buff *resp) { @@ -122,8 +314,19 @@ static void digital_in_recv_sel_res(struct nfc_digital_dev *ddev, void *arg, goto exit_free_skb; } + target->sel_res = sel_res; + if (DIGITAL_SEL_RES_IS_T2T(sel_res)) { nfc_proto = NFC_PROTO_MIFARE; + } else if (DIGITAL_SEL_RES_IS_T4T(sel_res)) { + rc = digital_in_send_rats(ddev, target); + if (rc) + goto exit; + /* + * Skip target_found and don't free it for now. This will be + * done when receiving the ATS + */ + goto exit_free_skb; } else if (DIGITAL_SEL_RES_IS_NFC_DEP(sel_res)) { nfc_proto = NFC_PROTO_NFC_DEP; } else { @@ -131,8 +334,6 @@ static void digital_in_recv_sel_res(struct nfc_digital_dev *ddev, void *arg, goto exit; } - target->sel_res = sel_res; - rc = digital_target_found(ddev, target, nfc_proto); exit: @@ -375,6 +576,175 @@ int digital_in_recv_mifare_res(struct sk_buff *resp) return -EIO; } +static void digital_in_recv_attrib_res(struct nfc_digital_dev *ddev, void *arg, + struct sk_buff *resp) +{ + struct nfc_target *target = arg; + struct digital_attrib_res *attrib_res; + int rc; + + if (IS_ERR(resp)) { + rc = PTR_ERR(resp); + resp = NULL; + goto exit; + } + + if (resp->len < sizeof(*attrib_res)) { + PROTOCOL_ERR("12.6.2"); + rc = -EIO; + goto exit; + } + + attrib_res = (struct digital_attrib_res *)resp->data; + + if (attrib_res->mbli_did & 0x0f) { + PROTOCOL_ERR("12.6.2.1"); + rc = -EIO; + goto exit; + } + + rc = digital_target_found(ddev, target, NFC_PROTO_ISO14443_B); + +exit: + dev_kfree_skb(resp); + kfree(target); + + if (rc) + digital_poll_next_tech(ddev); +} + +int digital_in_send_attrib_req(struct nfc_digital_dev *ddev, + struct nfc_target *target, + struct digital_sensb_res *sensb_res) +{ + struct digital_attrib_req *attrib_req; + struct sk_buff *skb; + int rc; + + skb = digital_skb_alloc(ddev, sizeof(*attrib_req)); + if (!skb) + return -ENOMEM; + + attrib_req = (struct digital_attrib_req *)skb_put(skb, + sizeof(*attrib_req)); + + attrib_req->cmd = DIGITAL_CMD_ATTRIB_REQ; + memcpy(attrib_req->nfcid0, sensb_res->nfcid0, + sizeof(attrib_req->nfcid0)); + attrib_req->param1 = DIGITAL_ATTRIB_P1_TR0_DEFAULT | + DIGITAL_ATTRIB_P1_TR1_DEFAULT; + attrib_req->param2 = DIGITAL_ATTRIB_P2_LISTEN_POLL_1 | + DIGITAL_ATTRIB_P2_POLL_LISTEN_1 | + DIGITAL_ATTRIB_P2_MAX_FRAME_256; + attrib_req->param3 = sensb_res->proto_info[1] & 0x07; + attrib_req->param4 = DIGITAL_ATTRIB_P4_DID(0); + + rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_attrib_res, + target); + if (rc) + kfree_skb(skb); + + return rc; +} + +static void digital_in_recv_sensb_res(struct nfc_digital_dev *ddev, void *arg, + struct sk_buff *resp) +{ + struct nfc_target *target = NULL; + struct digital_sensb_res *sensb_res; + u8 fsci; + int rc; + + if (IS_ERR(resp)) { + rc = PTR_ERR(resp); + resp = NULL; + goto exit; + } + + if (resp->len != sizeof(*sensb_res)) { + PROTOCOL_ERR("5.6.2.1"); + rc = -EIO; + goto exit; + } + + sensb_res = (struct digital_sensb_res *)resp->data; + + if (sensb_res->cmd != DIGITAL_CMD_SENSB_RES) { + PROTOCOL_ERR("5.6.2"); + rc = -EIO; + goto exit; + } + + if (!(sensb_res->proto_info[1] & BIT(0))) { + PROTOCOL_ERR("5.6.2.12"); + rc = -EIO; + goto exit; + } + + if (sensb_res->proto_info[1] & BIT(3)) { + PROTOCOL_ERR("5.6.2.16"); + rc = -EIO; + goto exit; + } + + fsci = DIGITAL_SENSB_FSCI(sensb_res->proto_info[1]); + if (fsci >= 8) + ddev->target_fsc = DIGITAL_ATS_MAX_FSC; + else + ddev->target_fsc = digital_ats_fsc[fsci]; + + target = kzalloc(sizeof(struct nfc_target), GFP_KERNEL); + if (!target) { + rc = -ENOMEM; + goto exit; + } + + rc = digital_in_send_attrib_req(ddev, target, sensb_res); + +exit: + dev_kfree_skb(resp); + + if (rc) { + kfree(target); + digital_poll_next_tech(ddev); + } +} + +int digital_in_send_sensb_req(struct nfc_digital_dev *ddev, u8 rf_tech) +{ + struct digital_sensb_req *sensb_req; + struct sk_buff *skb; + int rc; + + rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH, + NFC_DIGITAL_RF_TECH_106B); + if (rc) + return rc; + + rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING, + NFC_DIGITAL_FRAMING_NFCB); + if (rc) + return rc; + + skb = digital_skb_alloc(ddev, sizeof(*sensb_req)); + if (!skb) + return -ENOMEM; + + sensb_req = (struct digital_sensb_req *)skb_put(skb, + sizeof(*sensb_req)); + + sensb_req->cmd = DIGITAL_CMD_SENSB_REQ; + sensb_req->afi = 0x00; /* All families and sub-families */ + sensb_req->param = DIGITAL_SENSB_N(0); + + rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sensb_res, + NULL); + if (rc) + kfree_skb(skb); + + return rc; +} + static void digital_in_recv_sensf_res(struct nfc_digital_dev *ddev, void *arg, struct sk_buff *resp) { @@ -473,6 +843,93 @@ int digital_in_send_sensf_req(struct nfc_digital_dev *ddev, u8 rf_tech) return rc; } +static void digital_in_recv_iso15693_inv_res(struct nfc_digital_dev *ddev, + void *arg, struct sk_buff *resp) +{ + struct digital_iso15693_inv_res *res; + struct nfc_target *target = NULL; + int rc; + + if (IS_ERR(resp)) { + rc = PTR_ERR(resp); + resp = NULL; + goto out_free_skb; + } + + if (resp->len != sizeof(*res)) { + rc = -EIO; + goto out_free_skb; + } + + res = (struct digital_iso15693_inv_res *)resp->data; + + if (!DIGITAL_ISO15693_RES_IS_VALID(res->flags)) { + PROTOCOL_ERR("ISO15693 - 10.3.1"); + rc = -EINVAL; + goto out_free_skb; + } + + target = kzalloc(sizeof(*target), GFP_KERNEL); + if (!target) { + rc = -ENOMEM; + goto out_free_skb; + } + + target->is_iso15693 = 1; + target->iso15693_dsfid = res->dsfid; + memcpy(target->iso15693_uid, &res->uid, sizeof(target->iso15693_uid)); + + rc = digital_target_found(ddev, target, NFC_PROTO_ISO15693); + + kfree(target); + +out_free_skb: + dev_kfree_skb(resp); + + if (rc) + digital_poll_next_tech(ddev); +} + +int digital_in_send_iso15693_inv_req(struct nfc_digital_dev *ddev, u8 rf_tech) +{ + struct digital_iso15693_inv_req *req; + struct sk_buff *skb; + int rc; + + rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH, + NFC_DIGITAL_RF_TECH_ISO15693); + if (rc) + return rc; + + rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING, + NFC_DIGITAL_FRAMING_ISO15693_INVENTORY); + if (rc) + return rc; + + skb = digital_skb_alloc(ddev, sizeof(*req)); + if (!skb) + return -ENOMEM; + + skb_put(skb, sizeof(*req) - sizeof(req->mask)); /* No mask */ + req = (struct digital_iso15693_inv_req *)skb->data; + + /* Single sub-carrier, high data rate, no AFI, single slot + * Inventory command + */ + req->flags = DIGITAL_ISO15693_REQ_FLAG_DATA_RATE | + DIGITAL_ISO15693_REQ_FLAG_INVENTORY | + DIGITAL_ISO15693_REQ_FLAG_NB_SLOTS; + req->cmd = DIGITAL_CMD_ISO15693_INVENTORY_REQ; + req->mask_len = 0; + + rc = digital_in_send_cmd(ddev, skb, 30, + digital_in_recv_iso15693_inv_res, NULL); + if (rc) + kfree_skb(skb); + + return rc; +} + static int digital_tg_send_sel_res(struct nfc_digital_dev *ddev) { struct sk_buff *skb; @@ -634,6 +1091,18 @@ exit: dev_kfree_skb(resp); } +static void digital_tg_recv_atr_or_sensf_req(struct nfc_digital_dev *ddev, + void *arg, struct sk_buff *resp) +{ + if (!IS_ERR(resp) && (resp->len >= 2) && + (resp->data[1] == DIGITAL_CMD_SENSF_REQ)) + digital_tg_recv_sensf_req(ddev, arg, resp); + else + digital_tg_recv_atr_req(ddev, arg, resp); + + return; +} + static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev, struct digital_sensf_req *sensf_req) { @@ -644,7 +1113,7 @@ static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev, size = sizeof(struct digital_sensf_res); - if (sensf_req->rc != DIGITAL_SENSF_REQ_RC_NONE) + if (sensf_req->rc == DIGITAL_SENSF_REQ_RC_NONE) size -= sizeof(sensf_res->rd); skb = digital_skb_alloc(ddev, size); @@ -679,7 +1148,7 @@ static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev, digital_skb_add_crc_f(skb); rc = digital_tg_send_cmd(ddev, skb, 300, - digital_tg_recv_atr_req, NULL); + digital_tg_recv_atr_or_sensf_req, NULL); if (rc) kfree_skb(skb); diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index a9f4d2e62d8..677d24bb70f 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -26,6 +26,8 @@ #include "hci.h" +#define MAX_FWI 4949 + static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, const u8 *param, size_t param_len, data_exchange_cb_t cb, void *cb_context) @@ -37,7 +39,7 @@ static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, * for all commands? */ return nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_COMMAND, cmd, - param, param_len, cb, cb_context, 3000); + param, param_len, cb, cb_context, MAX_FWI); } /* @@ -82,7 +84,7 @@ static int nfc_hci_execute_cmd(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, NFC_HCI_HCP_COMMAND, cmd, param, param_len, nfc_hci_execute_cb, &hcp_ew, - 3000); + MAX_FWI); if (hcp_ew.exec_result < 0) return hcp_ew.exec_result; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index d45b638e77c..47403705197 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -225,7 +225,7 @@ int nfc_hci_target_discovered(struct nfc_hci_dev *hdev, u8 gate) goto exit; } - targets->sens_res = be16_to_cpu(*(u16 *)atqa_skb->data); + targets->sens_res = be16_to_cpu(*(__be16 *)atqa_skb->data); targets->sel_res = sak_skb->data[0]; r = nfc_hci_get_param(hdev, NFC_HCI_RF_READER_A_GATE, @@ -380,34 +380,31 @@ static int hci_dev_session_init(struct nfc_hci_dev *hdev) if (r < 0) goto disconnect_all; - if (skb->len && skb->len == strlen(hdev->init_data.session_id)) - if (memcmp(hdev->init_data.session_id, skb->data, - skb->len) == 0) { - /* TODO ELa: restore gate<->pipe table from - * some TBD location. - * note: it doesn't seem possible to get the chip - * currently open gate/pipe table. - * It is only possible to obtain the supported - * gate list. - */ + if (skb->len && skb->len == strlen(hdev->init_data.session_id) && + (memcmp(hdev->init_data.session_id, skb->data, + skb->len) == 0) && hdev->ops->load_session) { + /* Restore gate<->pipe table from some proprietary location. */ - /* goto exit - * For now, always do a full initialization */ - } + r = hdev->ops->load_session(hdev); - r = nfc_hci_disconnect_all_gates(hdev); - if (r < 0) - goto exit; + if (r < 0) + goto disconnect_all; + } else { - r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count, - hdev->init_data.gates); - if (r < 0) - goto disconnect_all; + r = nfc_hci_disconnect_all_gates(hdev); + if (r < 0) + goto exit; - r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE, - NFC_HCI_ADMIN_SESSION_IDENTITY, - hdev->init_data.session_id, - strlen(hdev->init_data.session_id)); + r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count, + hdev->init_data.gates); + if (r < 0) + goto disconnect_all; + + r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE, + NFC_HCI_ADMIN_SESSION_IDENTITY, + hdev->init_data.session_id, + strlen(hdev->init_data.session_id)); + } if (r == 0) goto exit; diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c index a07d2b81848..1b90c053185 100644 --- a/net/nfc/hci/llc.c +++ b/net/nfc/hci/llc.c @@ -20,14 +20,12 @@ #include "llc.h" -static struct list_head llc_engines; +static LIST_HEAD(llc_engines); int nfc_llc_init(void) { int r; - INIT_LIST_HEAD(&llc_engines); - r = nfc_llc_nop_register(); if (r) goto exit; diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index bec6ed15f50..a3ad69a4c64 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -387,7 +387,7 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) __net_timestamp(skb); - nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX); + nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_TX); return nfc_data_exchange(dev, local->target_idx, skb, nfc_llcp_recv, local); diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 6184bd1fba3..f6278da6876 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -27,7 +27,7 @@ static u8 llcp_magic[3] = {0x46, 0x66, 0x6d}; -static struct list_head llcp_devices; +static LIST_HEAD(llcp_devices); static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb); @@ -293,9 +293,9 @@ static void nfc_llcp_sdreq_timer(unsigned long data) struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev) { - struct nfc_llcp_local *local, *n; + struct nfc_llcp_local *local; - list_for_each_entry_safe(local, n, &llcp_devices, list) + list_for_each_entry(local, &llcp_devices, list) if (local->dev == dev) return local; @@ -609,14 +609,16 @@ u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len) int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len) { - struct nfc_llcp_local *local = nfc_llcp_find_local(dev); + struct nfc_llcp_local *local; + + if (gb_len < 3 || gb_len > NFC_MAX_GT_LEN) + return -EINVAL; + local = nfc_llcp_find_local(dev); if (local == NULL) { pr_err("No LLCP device\n"); return -ENODEV; } - if (gb_len < 3) - return -EINVAL; memset(local->remote_gb, 0, NFC_MAX_GT_LEN); memcpy(local->remote_gb, gb, gb_len); @@ -678,16 +680,17 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local, continue; if (skb_copy == NULL) { - skb_copy = __pskb_copy(skb, NFC_LLCP_RAW_HEADER_SIZE, + skb_copy = __pskb_copy(skb, NFC_RAW_HEADER_SIZE, GFP_ATOMIC); if (skb_copy == NULL) continue; - data = skb_push(skb_copy, NFC_LLCP_RAW_HEADER_SIZE); + data = skb_push(skb_copy, NFC_RAW_HEADER_SIZE); data[0] = local->dev ? local->dev->idx : 0xFF; - data[1] = direction; + data[1] = direction & 0x01; + data[1] |= (RAW_PAYLOAD_LLCP << 1); } nskb = skb_clone(skb_copy, GFP_ATOMIC); @@ -745,7 +748,7 @@ static void nfc_llcp_tx_work(struct work_struct *work) __net_timestamp(skb); nfc_llcp_send_to_raw_sock(local, skb, - NFC_LLCP_DIRECTION_TX); + NFC_DIRECTION_TX); ret = nfc_data_exchange(local->dev, local->target_idx, skb, nfc_llcp_recv, local); @@ -974,7 +977,7 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, new_sk->sk_state = LLCP_CONNECTED; /* Wake the listening processes */ - parent->sk_data_ready(parent, 0); + parent->sk_data_ready(parent); /* Send CC */ nfc_llcp_send_cc(new_sock); @@ -1474,7 +1477,7 @@ static void nfc_llcp_rx_work(struct work_struct *work) __net_timestamp(skb); - nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX); + nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_RX); nfc_llcp_rx_skb(local, skb); @@ -1622,8 +1625,6 @@ void nfc_llcp_unregister_device(struct nfc_dev *dev) int __init nfc_llcp_init(void) { - INIT_LIST_HEAD(&llcp_devices); - return nfc_llcp_sock_init(); } diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 46bda010bf1..2b400e1a869 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -74,7 +74,7 @@ static int __nci_request(struct nci_dev *ndev, ndev->req_status = NCI_REQ_PEND; - init_completion(&ndev->req_completion); + reinit_completion(&ndev->req_completion); req(ndev, opt); completion_rc = wait_for_completion_interruptible_timeout(&ndev->req_completion, @@ -301,7 +301,7 @@ static int nci_open_device(struct nci_dev *ndev) rc = __nci_request(ndev, nci_reset_req, 0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); - if (ndev->ops->setup(ndev)) + if (ndev->ops->setup) ndev->ops->setup(ndev); if (!rc) { @@ -709,6 +709,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, ndev->ops = ops; ndev->tx_headroom = tx_headroom; ndev->tx_tailroom = tx_tailroom; + init_completion(&ndev->req_completion); ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops, supported_protocols, @@ -860,6 +861,10 @@ static int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb) /* Get rid of skb owner, prior to sending to the driver. */ skb_orphan(skb); + /* Send copy to sniffer */ + nfc_send_to_raw_sock(ndev->nfc_dev, skb, + RAW_PAYLOAD_NCI, NFC_DIRECTION_TX); + return ndev->ops->send(ndev, skb); } @@ -934,6 +939,11 @@ static void nci_rx_work(struct work_struct *work) struct sk_buff *skb; while ((skb = skb_dequeue(&ndev->rx_q))) { + + /* Send copy to sniffer */ + nfc_send_to_raw_sock(ndev->nfc_dev, skb, + RAW_PAYLOAD_NCI, NFC_DIRECTION_RX); + /* Process frame */ switch (nci_mt(skb->data)) { case NCI_MT_RSP_PKT: diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 1e905097456..f8f6af23138 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -366,7 +366,6 @@ static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev, struct nci_rf_intf_activated_ntf *ntf, __u8 *data) { struct activation_params_poll_nfc_dep *poll; - int i; switch (ntf->activation_rf_tech_and_mode) { case NCI_NFC_A_PASSIVE_POLL_MODE: @@ -374,10 +373,8 @@ static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev, poll = &ntf->activation_params.poll_nfc_dep; poll->atr_res_len = min_t(__u8, *data++, 63); pr_debug("atr_res_len %d\n", poll->atr_res_len); - if (poll->atr_res_len > 0) { - for (i = 0; i < poll->atr_res_len; i++) - poll->atr_res[poll->atr_res_len-1-i] = data[i]; - } + if (poll->atr_res_len > 0) + memcpy(poll->atr_res, data, poll->atr_res_len); break; default: diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c index f1d426f10cc..ec250e77763 100644 --- a/net/nfc/nci/spi.c +++ b/net/nfc/nci/spi.c @@ -105,7 +105,7 @@ int nci_spi_send(struct nci_spi *nspi, if (ret != 0 || nspi->acknowledge_mode == NCI_SPI_CRC_DISABLED) goto done; - init_completion(&nspi->req_completion); + reinit_completion(&nspi->req_completion); completion_rc = wait_for_completion_interruptible_timeout( &nspi->req_completion, NCI_SPI_SEND_TIMEOUT); @@ -145,6 +145,7 @@ struct nci_spi *nci_spi_allocate_spi(struct spi_device *spi, nspi->spi = spi; nspi->ndev = ndev; + init_completion(&nspi->req_completion); return nspi; } diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index ebbf6fb88b3..43cb1c17e26 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -94,6 +94,14 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, target->sensf_res)) goto nla_put_failure; + if (target->is_iso15693) { + if (nla_put_u8(msg, NFC_ATTR_TARGET_ISO15693_DSFID, + target->iso15693_dsfid) || + nla_put(msg, NFC_ATTR_TARGET_ISO15693_UID, + sizeof(target->iso15693_uid), target->iso15693_uid)) + goto nla_put_failure; + } + return genlmsg_end(msg, hdr); nla_put_failure: diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 9d6e74f7e6b..88d60064890 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -40,6 +40,12 @@ struct nfc_rawsock { struct work_struct tx_work; bool tx_work_scheduled; }; + +struct nfc_sock_list { + struct hlist_head head; + rwlock_t lock; +}; + #define nfc_rawsock(sk) ((struct nfc_rawsock *) sk) #define to_rawsock_sk(_tx_work) \ ((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work)) diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index c27a6e86cae..8627c75063e 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -27,6 +27,24 @@ #include "nfc.h" +static struct nfc_sock_list raw_sk_list = { + .lock = __RW_LOCK_UNLOCKED(raw_sk_list.lock) +}; + +void nfc_sock_link(struct nfc_sock_list *l, struct sock *sk) +{ + write_lock(&l->lock); + sk_add_node(sk, &l->head); + write_unlock(&l->lock); +} + +void nfc_sock_unlink(struct nfc_sock_list *l, struct sock *sk) +{ + write_lock(&l->lock); + sk_del_node_init(sk); + write_unlock(&l->lock); +} + static void rawsock_write_queue_purge(struct sock *sk) { pr_debug("sk=%p\n", sk); @@ -57,6 +75,9 @@ static int rawsock_release(struct socket *sock) if (!sk) return 0; + if (sock->type == SOCK_RAW) + nfc_sock_unlink(&raw_sk_list, sk); + sock_orphan(sk); sock_put(sk); @@ -275,6 +296,26 @@ static const struct proto_ops rawsock_ops = { .mmap = sock_no_mmap, }; +static const struct proto_ops rawsock_raw_ops = { + .family = PF_NFC, + .owner = THIS_MODULE, + .release = rawsock_release, + .bind = sock_no_bind, + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = datagram_poll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = sock_no_sendmsg, + .recvmsg = rawsock_recvmsg, + .mmap = sock_no_mmap, +}; + static void rawsock_destruct(struct sock *sk) { pr_debug("sk=%p\n", sk); @@ -300,10 +341,13 @@ static int rawsock_create(struct net *net, struct socket *sock, pr_debug("sock=%p\n", sock); - if (sock->type != SOCK_SEQPACKET) + if ((sock->type != SOCK_SEQPACKET) && (sock->type != SOCK_RAW)) return -ESOCKTNOSUPPORT; - sock->ops = &rawsock_ops; + if (sock->type == SOCK_RAW) + sock->ops = &rawsock_raw_ops; + else + sock->ops = &rawsock_ops; sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto); if (!sk) @@ -313,13 +357,53 @@ static int rawsock_create(struct net *net, struct socket *sock, sk->sk_protocol = nfc_proto->id; sk->sk_destruct = rawsock_destruct; sock->state = SS_UNCONNECTED; - - INIT_WORK(&nfc_rawsock(sk)->tx_work, rawsock_tx_work); - nfc_rawsock(sk)->tx_work_scheduled = false; + if (sock->type == SOCK_RAW) + nfc_sock_link(&raw_sk_list, sk); + else { + INIT_WORK(&nfc_rawsock(sk)->tx_work, rawsock_tx_work); + nfc_rawsock(sk)->tx_work_scheduled = false; + } return 0; } +void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb, + u8 payload_type, u8 direction) +{ + struct sk_buff *skb_copy = NULL, *nskb; + struct sock *sk; + u8 *data; + + read_lock(&raw_sk_list.lock); + + sk_for_each(sk, &raw_sk_list.head) { + if (!skb_copy) { + skb_copy = __pskb_copy(skb, NFC_RAW_HEADER_SIZE, + GFP_ATOMIC); + if (!skb_copy) + continue; + + data = skb_push(skb_copy, NFC_RAW_HEADER_SIZE); + + data[0] = dev ? dev->idx : 0xFF; + data[1] = direction & 0x01; + data[1] |= (payload_type << 1); + } + + nskb = skb_clone(skb_copy, GFP_ATOMIC); + if (!nskb) + continue; + + if (sock_queue_rcv_skb(sk, nskb)) + kfree_skb(nskb); + } + + read_unlock(&raw_sk_list.lock); + + kfree_skb(skb_copy); +} +EXPORT_SYMBOL(nfc_send_to_raw_sock); + static struct proto rawsock_proto = { .name = "NFC_RAW", .owner = THIS_MODULE, diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index df4692826ea..a3276e3c4fe 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -55,6 +55,7 @@ #include "datapath.h" #include "flow.h" +#include "flow_table.h" #include "flow_netlink.h" #include "vport-internal_dev.h" #include "vport-netdev.h" @@ -160,7 +161,6 @@ static void destroy_dp_rcu(struct rcu_head *rcu) { struct datapath *dp = container_of(rcu, struct datapath, rcu); - ovs_flow_tbl_destroy(&dp->table); free_percpu(dp->stats_percpu); release_net(ovs_dp_get_net(dp)); kfree(dp->ports); @@ -256,10 +256,10 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) out: /* Update datapath statistics. */ - u64_stats_update_begin(&stats->sync); + u64_stats_update_begin(&stats->syncp); (*stats_counter)++; stats->n_mask_hit += n_mask_hit; - u64_stats_update_end(&stats->sync); + u64_stats_update_end(&stats->syncp); } static struct genl_family dp_packet_genl_family = { @@ -295,9 +295,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, err: stats = this_cpu_ptr(dp->stats_percpu); - u64_stats_update_begin(&stats->sync); + u64_stats_update_begin(&stats->syncp); stats->n_lost++; - u64_stats_update_end(&stats->sync); + u64_stats_update_end(&stats->syncp); return err; } @@ -464,12 +464,24 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, } nla->nla_len = nla_attr_size(skb->len); - skb_zerocopy(user_skb, skb, skb->len, hlen); + err = skb_zerocopy(user_skb, skb, skb->len, hlen); + if (err) + goto out; + + /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ + if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { + size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len; + + if (plen > 0) + memset(skb_put(user_skb, plen), 0, plen); + } ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); out: + if (err) + skb_tx_error(skb); kfree_skb(nskb); return err; } @@ -598,9 +610,9 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, percpu_stats = per_cpu_ptr(dp->stats_percpu, i); do { - start = u64_stats_fetch_begin_bh(&percpu_stats->sync); + start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); local_stats = *percpu_stats; - } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start)); + } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); stats->n_hit += local_stats.n_hit; stats->n_missed += local_stats.n_missed; @@ -852,11 +864,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; /* The unmasked key has to be the same for flow updates. */ - error = -EINVAL; - if (!ovs_flow_cmp_unmasked_key(flow, &match)) { - OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n"); + if (!ovs_flow_cmp_unmasked_key(flow, &match)) goto err_unlock_ovs; - } /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); @@ -1079,6 +1088,7 @@ static size_t ovs_dp_cmd_msg_size(void) msgsize += nla_total_size(IFNAMSIZ); msgsize += nla_total_size(sizeof(struct ovs_dp_stats)); msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats)); + msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */ return msgsize; } @@ -1168,7 +1178,7 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in struct datapath *dp; dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); - if (!dp) + if (IS_ERR(dp)) return; WARN(dp->user_features, "Dropping previously announced user features\n"); @@ -1209,18 +1219,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (err) goto err_free_dp; - dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); + dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); if (!dp->stats_percpu) { err = -ENOMEM; goto err_destroy_table; } - for_each_possible_cpu(i) { - struct dp_stats_percpu *dpath_stats; - dpath_stats = per_cpu_ptr(dp->stats_percpu, i); - u64_stats_init(&dpath_stats->sync); - } - dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), GFP_KERNEL); if (!dp->ports) { @@ -1279,7 +1283,7 @@ err_destroy_ports_array: err_destroy_percpu: free_percpu(dp->stats_percpu); err_destroy_table: - ovs_flow_tbl_destroy(&dp->table); + ovs_flow_tbl_destroy(&dp->table, false); err_free_dp: release_net(ovs_dp_get_net(dp)); kfree(dp); @@ -1306,10 +1310,13 @@ static void __dp_destroy(struct datapath *dp) list_del_rcu(&dp->list_node); /* OVSP_LOCAL is datapath internal port. We need to make sure that - * all port in datapath are destroyed first before freeing datapath. + * all ports in datapath are destroyed first before freeing datapath. */ ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); + /* RCU destroy the flow table */ + ovs_flow_tbl_destroy(&dp->table, true); + call_rcu(&dp->rcu, destroy_dp_rcu); } @@ -1753,11 +1760,12 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) int bucket = cb->args[0], skip = cb->args[1]; int i, j = 0; + rcu_read_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - if (!dp) + if (!dp) { + rcu_read_unlock(); return -ENODEV; - - rcu_read_lock(); + } for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 6be9fbb5e9c..05317380fc0 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -55,7 +55,7 @@ struct dp_stats_percpu { u64 n_missed; u64 n_lost; u64 n_mask_hit; - struct u64_stats_sync sync; + struct u64_stats_sync syncp; }; /** diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 16f4b46161d..2998989e76d 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -73,6 +73,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) if ((flow->key.eth.type == htons(ETH_P_IP) || flow->key.eth.type == htons(ETH_P_IPV6)) && + flow->key.ip.frag != OVS_FRAG_TYPE_LATER && flow->key.ip.proto == IPPROTO_TCP && likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); @@ -91,7 +92,7 @@ static void stats_read(struct flow_stats *stats, unsigned long *used, __be16 *tcp_flags) { spin_lock(&stats->lock); - if (time_after(stats->used, *used)) + if (!*used || time_after(stats->used, *used)) *used = stats->used; *tcp_flags |= stats->tcp_flags; ovs_stats->n_packets += stats->packet_count; @@ -102,30 +103,24 @@ static void stats_read(struct flow_stats *stats, void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { - int cpu, cur_cpu; + int cpu; *used = 0; *tcp_flags = 0; memset(ovs_stats, 0, sizeof(*ovs_stats)); + local_bh_disable(); if (!flow->stats.is_percpu) { stats_read(flow->stats.stat, ovs_stats, used, tcp_flags); } else { - cur_cpu = get_cpu(); for_each_possible_cpu(cpu) { struct flow_stats *stats; - if (cpu == cur_cpu) - local_bh_disable(); - stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); stats_read(stats, ovs_stats, used, tcp_flags); - - if (cpu == cur_cpu) - local_bh_enable(); } - put_cpu(); } + local_bh_enable(); } static void stats_reset(struct flow_stats *stats) @@ -140,25 +135,17 @@ static void stats_reset(struct flow_stats *stats) void ovs_flow_stats_clear(struct sw_flow *flow) { - int cpu, cur_cpu; + int cpu; + local_bh_disable(); if (!flow->stats.is_percpu) { stats_reset(flow->stats.stat); } else { - cur_cpu = get_cpu(); - for_each_possible_cpu(cpu) { - - if (cpu == cur_cpu) - local_bh_disable(); - stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu)); - - if (cpu == cur_cpu) - local_bh_enable(); } - put_cpu(); } + local_bh_enable(); } static int check_header(struct sk_buff *skb, int len) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index c58a0fe3c88..3c268b3d71c 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -153,29 +153,29 @@ static void rcu_free_flow_callback(struct rcu_head *rcu) flow_free(flow); } -static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) -{ - if (!mask) - return; - - BUG_ON(!mask->ref_count); - mask->ref_count--; - - if (!mask->ref_count) { - list_del_rcu(&mask->list); - if (deferred) - kfree_rcu(mask, rcu); - else - kfree(mask); - } -} - void ovs_flow_free(struct sw_flow *flow, bool deferred) { if (!flow) return; - flow_mask_del_ref(flow->mask, deferred); + if (flow->mask) { + struct sw_flow_mask *mask = flow->mask; + + /* ovs-lock is required to protect mask-refcount and + * mask list. + */ + ASSERT_OVSL(); + BUG_ON(!mask->ref_count); + mask->ref_count--; + + if (!mask->ref_count) { + list_del_rcu(&mask->list); + if (deferred) + kfree_rcu(mask, rcu); + else + kfree(mask); + } + } if (deferred) call_rcu(&flow->rcu, rcu_free_flow_callback); @@ -188,26 +188,9 @@ static void free_buckets(struct flex_array *buckets) flex_array_free(buckets); } + static void __table_instance_destroy(struct table_instance *ti) { - int i; - - if (ti->keep_flows) - goto skip_flows; - - for (i = 0; i < ti->n_buckets; i++) { - struct sw_flow *flow; - struct hlist_head *head = flex_array_get(ti->buckets, i); - struct hlist_node *n; - int ver = ti->node_ver; - - hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { - hlist_del(&flow->hash_node[ver]); - ovs_flow_free(flow, false); - } - } - -skip_flows: free_buckets(ti->buckets); kfree(ti); } @@ -258,20 +241,38 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) static void table_instance_destroy(struct table_instance *ti, bool deferred) { + int i; + if (!ti) return; + if (ti->keep_flows) + goto skip_flows; + + for (i = 0; i < ti->n_buckets; i++) { + struct sw_flow *flow; + struct hlist_head *head = flex_array_get(ti->buckets, i); + struct hlist_node *n; + int ver = ti->node_ver; + + hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { + hlist_del_rcu(&flow->hash_node[ver]); + ovs_flow_free(flow, deferred); + } + } + +skip_flows: if (deferred) call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); else __table_instance_destroy(ti); } -void ovs_flow_tbl_destroy(struct flow_table *table) +void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) { struct table_instance *ti = ovsl_dereference(table->ti); - table_instance_destroy(ti, false); + table_instance_destroy(ti, deferred); } struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, @@ -504,16 +505,11 @@ static struct sw_flow_mask *mask_alloc(void) mask = kmalloc(sizeof(*mask), GFP_KERNEL); if (mask) - mask->ref_count = 0; + mask->ref_count = 1; return mask; } -static void mask_add_ref(struct sw_flow_mask *mask) -{ - mask->ref_count++; -} - static bool mask_equal(const struct sw_flow_mask *a, const struct sw_flow_mask *b) { @@ -554,9 +550,11 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, mask->key = new->key; mask->range = new->range; list_add_rcu(&mask->list, &tbl->mask_list); + } else { + BUG_ON(!mask->ref_count); + mask->ref_count++; } - mask_add_ref(mask); flow->mask = mask; return 0; } diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 1996e34c0fd..baaeb101924 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -60,7 +60,7 @@ void ovs_flow_free(struct sw_flow *, bool deferred); int ovs_flow_tbl_init(struct flow_table *); int ovs_flow_tbl_count(struct flow_table *table); -void ovs_flow_tbl_destroy(struct flow_table *table); +void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred); int ovs_flow_tbl_flush(struct flow_table *flow_table); int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index a3d6951602d..ebb6e244255 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -174,7 +174,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) skb->local_df = 1; - return iptunnel_xmit(rt, skb, fl.saddr, + return iptunnel_xmit(skb->sk, rt, skb, fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, OVS_CB(skb)->tun_key->ipv4_tos, OVS_CB(skb)->tun_key->ipv4_ttl, df, false); diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 208dd9a26dd..42c0f4a0b78 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -121,7 +121,6 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, { struct vport *vport; size_t alloc_size; - int i; alloc_size = sizeof(struct vport); if (priv_size) { @@ -139,19 +138,12 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); - vport->percpu_stats = alloc_percpu(struct pcpu_sw_netstats); + vport->percpu_stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!vport->percpu_stats) { kfree(vport); return ERR_PTR(-ENOMEM); } - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *vport_stats; - vport_stats = per_cpu_ptr(vport->percpu_stats, i); - u64_stats_init(&vport_stats->syncp); - } - - spin_lock_init(&vport->stats_lock); return vport; @@ -285,9 +277,9 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) percpu_stats = per_cpu_ptr(vport->percpu_stats, i); do { - start = u64_stats_fetch_begin_bh(&percpu_stats->syncp); + start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); local_stats = *percpu_stats; - } while (u64_stats_fetch_retry_bh(&percpu_stats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); stats->rx_bytes += local_stats.rx_bytes; stats->rx_packets += local_stats.rx_packets; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6a2bb37506c..b85c67ccb79 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -243,40 +243,41 @@ static int packet_direct_xmit(struct sk_buff *skb) const struct net_device_ops *ops = dev->netdev_ops; netdev_features_t features; struct netdev_queue *txq; + int ret = NETDEV_TX_BUSY; u16 queue_map; - int ret; if (unlikely(!netif_running(dev) || - !netif_carrier_ok(dev))) { - kfree_skb(skb); - return NET_XMIT_DROP; - } + !netif_carrier_ok(dev))) + goto drop; features = netif_skb_features(skb); if (skb_needs_linearize(skb, features) && - __skb_linearize(skb)) { - kfree_skb(skb); - return NET_XMIT_DROP; - } + __skb_linearize(skb)) + goto drop; queue_map = skb_get_queue_mapping(skb); txq = netdev_get_tx_queue(dev, queue_map); - __netif_tx_lock_bh(txq); - if (unlikely(netif_xmit_frozen_or_stopped(txq))) { - ret = NETDEV_TX_BUSY; - kfree_skb(skb); - goto out; + local_bh_disable(); + + HARD_TX_LOCK(dev, txq, smp_processor_id()); + if (!netif_xmit_frozen_or_drv_stopped(txq)) { + ret = ops->ndo_start_xmit(skb, dev); + if (ret == NETDEV_TX_OK) + txq_trans_update(txq); } + HARD_TX_UNLOCK(dev, txq); - ret = ops->ndo_start_xmit(skb, dev); - if (likely(dev_xmit_complete(ret))) - txq_trans_update(txq); - else + local_bh_enable(); + + if (!dev_xmit_complete(ret)) kfree_skb(skb); -out: - __netif_tx_unlock_bh(txq); + return ret; +drop: + atomic_long_inc(&dev->tx_dropped); + kfree_skb(skb); + return NET_XMIT_DROP; } static struct net_device *packet_cached_dev_get(struct packet_sock *po) @@ -308,11 +309,27 @@ static bool packet_use_direct_xmit(const struct packet_sock *po) return po->xmit == packet_direct_xmit; } -static u16 packet_pick_tx_queue(struct net_device *dev) +static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) { return (u16) raw_smp_processor_id() % dev->real_num_tx_queues; } +static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) +{ + const struct net_device_ops *ops = dev->netdev_ops; + u16 queue_index; + + if (ops->ndo_select_queue) { + queue_index = ops->ndo_select_queue(dev, skb, NULL, + __packet_pick_tx_queue); + queue_index = netdev_cap_txqueue(dev, queue_index); + } else { + queue_index = __packet_pick_tx_queue(dev, skb); + } + + skb_set_queue_mapping(skb, queue_index); +} + /* register_prot_hook must be invoked with the po->bind_lock held, * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). @@ -1261,7 +1278,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - return reciprocal_scale(skb->rxhash, num); + return reciprocal_scale(skb_get_hash(skb), num); } static unsigned int fanout_demux_lb(struct packet_fanout *f, @@ -1346,7 +1363,6 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, if (!skb) return 0; } - skb_get_hash(skb); idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: @@ -1832,7 +1848,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, skb->dropcount = atomic_read(&sk->sk_drops); __skb_queue_tail(&sk->sk_receive_queue, skb); spin_unlock(&sk->sk_receive_queue.lock); - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk); return 0; drop_n_acct: @@ -2038,7 +2054,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, else prb_clear_blk_fill_status(&po->rx_ring); - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { @@ -2053,7 +2069,7 @@ ring_is_full: po->stats.stats1.tp_drops++; spin_unlock(&sk->sk_receive_queue.lock); - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); kfree_skb(copy_skb); goto drop_n_restore; } @@ -2241,8 +2257,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if (unlikely(!(dev->flags & IFF_UP))) goto out_put; - reserve = dev->hard_header_len; - + reserve = dev->hard_header_len + VLAN_HLEN; size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); @@ -2269,8 +2284,19 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) goto out_status; tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, - addr, hlen); + addr, hlen); + if (tp_len > dev->mtu + dev->hard_header_len) { + struct ethhdr *ehdr; + /* Earlier code assumed this would be a VLAN pkt, + * double-check this now that we have the actual + * packet in hand. + */ + skb_reset_mac_header(skb); + ehdr = eth_hdr(skb); + if (ehdr->h_proto != htons(ETH_P_8021Q)) + tp_len = -EMSGSIZE; + } if (unlikely(tp_len < 0)) { if (po->tp_loss) { __packet_set_status(po, ph, @@ -2285,7 +2311,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } } - skb_set_queue_mapping(skb, packet_pick_tx_queue(dev)); + packet_pick_tx_queue(dev, skb); + skb->destructor = tpacket_destruct_skb; __packet_set_status(po, ph, TP_STATUS_SENDING); packet_inc_pending(&po->tx_ring); @@ -2499,7 +2526,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_set_queue_mapping(skb, packet_pick_tx_queue(dev)); + + packet_pick_tx_queue(dev, skb); if (po->has_vnet_hdr) { if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { @@ -3786,7 +3814,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, */ if (!tx_ring) init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring); - break; + break; default: break; } diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index a2fba7edfd1..66dc65e7c6a 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -37,7 +37,7 @@ struct gprs_dev { struct sock *sk; void (*old_state_change)(struct sock *); - void (*old_data_ready)(struct sock *, int); + void (*old_data_ready)(struct sock *); void (*old_write_space)(struct sock *); struct net_device *dev; @@ -146,7 +146,7 @@ drop: return err; } -static void gprs_data_ready(struct sock *sk, int len) +static void gprs_data_ready(struct sock *sk) { struct gprs_dev *gp = sk->sk_user_data; struct sk_buff *skb; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index e77411735de..70a547ea517 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -462,10 +462,9 @@ out: queue: skb->dev = NULL; skb_set_owner_r(skb, sk); - err = skb->len; skb_queue_tail(queue, skb); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, err); + sk->sk_data_ready(sk); return NET_RX_SUCCESS; } @@ -587,10 +586,9 @@ static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb) pn->rx_credits--; skb->dev = NULL; skb_set_owner_r(skb, sk); - err = skb->len; skb_queue_tail(&sk->sk_receive_queue, skb); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, err); + sk->sk_data_ready(sk); return NET_RX_SUCCESS; case PNS_PEP_CONNECT_RESP: @@ -698,7 +696,7 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) skb_queue_head(&sk->sk_receive_queue, skb); sk_acceptq_added(sk); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); return NET_RX_SUCCESS; case PNS_PEP_DISCONNECT_REQ: diff --git a/net/rds/iw.c b/net/rds/iw.c index 7826d46baa7..589935661d6 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -239,7 +239,8 @@ static int rds_iw_laddr_check(__be32 addr) ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); /* due to this, we will claim to support IB devices unless we check node_type. */ - if (ret || cm_id->device->node_type != RDMA_NODE_RNIC) + if (ret || !cm_id->device || + cm_id->device->node_type != RDMA_NODE_RNIC) ret = -EADDRNOTAVAIL; rdsdebug("addr %pI4 ret %d node type %d\n", diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 9cf2927d002..65637491f72 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -61,12 +61,12 @@ void rds_tcp_state_change(struct sock *sk); /* tcp_listen.c */ int rds_tcp_listen_init(void); void rds_tcp_listen_stop(void); -void rds_tcp_listen_data_ready(struct sock *sk, int bytes); +void rds_tcp_listen_data_ready(struct sock *sk); /* tcp_recv.c */ int rds_tcp_recv_init(void); void rds_tcp_recv_exit(void); -void rds_tcp_data_ready(struct sock *sk, int bytes); +void rds_tcp_data_ready(struct sock *sk); int rds_tcp_recv(struct rds_connection *conn); void rds_tcp_inc_free(struct rds_incoming *inc); int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 7787537e9c2..4e638f85118 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -108,9 +108,9 @@ static void rds_tcp_accept_worker(struct work_struct *work) cond_resched(); } -void rds_tcp_listen_data_ready(struct sock *sk, int bytes) +void rds_tcp_listen_data_ready(struct sock *sk) { - void (*ready)(struct sock *sk, int bytes); + void (*ready)(struct sock *sk); rdsdebug("listen data ready sk %p\n", sk); @@ -132,7 +132,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes) out: read_unlock(&sk->sk_callback_lock); - ready(sk, bytes); + ready(sk); } int rds_tcp_listen_init(void) diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index 4fac4f2bb9d..9ae6e0a264e 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -314,13 +314,13 @@ int rds_tcp_recv(struct rds_connection *conn) return ret; } -void rds_tcp_data_ready(struct sock *sk, int bytes) +void rds_tcp_data_ready(struct sock *sk) { - void (*ready)(struct sock *sk, int bytes); + void (*ready)(struct sock *sk); struct rds_connection *conn; struct rds_tcp_connection *tc; - rdsdebug("data ready sk %p bytes %d\n", sk, bytes); + rdsdebug("data ready sk %p\n", sk); read_lock(&sk->sk_callback_lock); conn = sk->sk_user_data; @@ -337,7 +337,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes) queue_delayed_work(rds_wq, &conn->c_recv_w, 0); out: read_unlock(&sk->sk_callback_lock); - ready(sk, bytes); + ready(sk); } int rds_tcp_recv_init(void) diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index bd2a5b90400..14c98e48f26 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -36,8 +36,6 @@ struct rfkill_gpio_data { struct gpio_desc *shutdown_gpio; struct rfkill *rfkill_dev; - char *reset_name; - char *shutdown_name; struct clk *clk; bool clk_enabled; @@ -47,17 +45,14 @@ static int rfkill_gpio_set_power(void *data, bool blocked) { struct rfkill_gpio_data *rfkill = data; - if (blocked) { - gpiod_set_value(rfkill->shutdown_gpio, 0); - gpiod_set_value(rfkill->reset_gpio, 0); - if (!IS_ERR(rfkill->clk) && rfkill->clk_enabled) - clk_disable(rfkill->clk); - } else { - if (!IS_ERR(rfkill->clk) && !rfkill->clk_enabled) - clk_enable(rfkill->clk); - gpiod_set_value(rfkill->reset_gpio, 1); - gpiod_set_value(rfkill->shutdown_gpio, 1); - } + if (!blocked && !IS_ERR(rfkill->clk) && !rfkill->clk_enabled) + clk_enable(rfkill->clk); + + gpiod_set_value_cansleep(rfkill->shutdown_gpio, !blocked); + gpiod_set_value_cansleep(rfkill->reset_gpio, !blocked); + + if (blocked && !IS_ERR(rfkill->clk) && rfkill->clk_enabled) + clk_disable(rfkill->clk); rfkill->clk_enabled = blocked; @@ -87,10 +82,8 @@ static int rfkill_gpio_probe(struct platform_device *pdev) { struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data; struct rfkill_gpio_data *rfkill; - const char *clk_name = NULL; struct gpio_desc *gpio; int ret; - int len; rfkill = devm_kzalloc(&pdev->dev, sizeof(*rfkill), GFP_KERNEL); if (!rfkill) @@ -101,28 +94,15 @@ static int rfkill_gpio_probe(struct platform_device *pdev) if (ret) return ret; } else if (pdata) { - clk_name = pdata->power_clk_name; rfkill->name = pdata->name; rfkill->type = pdata->type; } else { return -ENODEV; } - len = strlen(rfkill->name); - rfkill->reset_name = devm_kzalloc(&pdev->dev, len + 7, GFP_KERNEL); - if (!rfkill->reset_name) - return -ENOMEM; - - rfkill->shutdown_name = devm_kzalloc(&pdev->dev, len + 10, GFP_KERNEL); - if (!rfkill->shutdown_name) - return -ENOMEM; + rfkill->clk = devm_clk_get(&pdev->dev, NULL); - snprintf(rfkill->reset_name, len + 6 , "%s_reset", rfkill->name); - snprintf(rfkill->shutdown_name, len + 9, "%s_shutdown", rfkill->name); - - rfkill->clk = devm_clk_get(&pdev->dev, clk_name); - - gpio = devm_gpiod_get_index(&pdev->dev, rfkill->reset_name, 0); + gpio = devm_gpiod_get_index(&pdev->dev, "reset", 0); if (!IS_ERR(gpio)) { ret = gpiod_direction_output(gpio, 0); if (ret) @@ -130,7 +110,7 @@ static int rfkill_gpio_probe(struct platform_device *pdev) rfkill->reset_gpio = gpio; } - gpio = devm_gpiod_get_index(&pdev->dev, rfkill->shutdown_name, 1); + gpio = devm_gpiod_get_index(&pdev->dev, "shutdown", 1); if (!IS_ERR(gpio)) { ret = gpiod_direction_output(gpio, 0); if (ret) @@ -146,14 +126,6 @@ static int rfkill_gpio_probe(struct platform_device *pdev) return -EINVAL; } - if (pdata && pdata->gpio_runtime_setup) { - ret = pdata->gpio_runtime_setup(pdev); - if (ret) { - dev_err(&pdev->dev, "can't set up gpio\n"); - return ret; - } - } - rfkill->rfkill_dev = rfkill_alloc(rfkill->name, &pdev->dev, rfkill->type, &rfkill_gpio_ops, rfkill); @@ -174,20 +146,23 @@ static int rfkill_gpio_probe(struct platform_device *pdev) static int rfkill_gpio_remove(struct platform_device *pdev) { struct rfkill_gpio_data *rfkill = platform_get_drvdata(pdev); - struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data; - if (pdata && pdata->gpio_runtime_close) - pdata->gpio_runtime_close(pdev); rfkill_unregister(rfkill->rfkill_dev); rfkill_destroy(rfkill->rfkill_dev); return 0; } +#ifdef CONFIG_ACPI static const struct acpi_device_id rfkill_acpi_match[] = { + { "BCM2E1A", RFKILL_TYPE_BLUETOOTH }, + { "BCM2E39", RFKILL_TYPE_BLUETOOTH }, + { "BCM2E3D", RFKILL_TYPE_BLUETOOTH }, { "BCM4752", RFKILL_TYPE_GPS }, + { "LNV4752", RFKILL_TYPE_GPS }, { }, }; +#endif static struct platform_driver rfkill_gpio_driver = { .probe = rfkill_gpio_probe, diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index c2cca2ee6ae..8451c8cdc9d 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1041,7 +1041,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros rose_start_heartbeat(make); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk); return 1; } diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index d1c3429b69e..ec126f91276 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -20,9 +20,8 @@ af-rxrpc-y := \ ar-skbuff.o \ ar-transport.o -ifeq ($(CONFIG_PROC_FS),y) -af-rxrpc-y += ar-proc.o -endif +af-rxrpc-$(CONFIG_PROC_FS) += ar-proc.o +af-rxrpc-$(CONFIG_SYSCTL) += sysctl.o obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index e61aa6001c6..7b167048963 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -838,6 +838,12 @@ static int __init af_rxrpc_init(void) goto error_key_type_s; } + ret = rxrpc_sysctl_init(); + if (ret < 0) { + printk(KERN_CRIT "RxRPC: Cannot register sysctls\n"); + goto error_sysctls; + } + #ifdef CONFIG_PROC_FS proc_create("rxrpc_calls", 0, init_net.proc_net, &rxrpc_call_seq_fops); proc_create("rxrpc_conns", 0, init_net.proc_net, @@ -845,6 +851,8 @@ static int __init af_rxrpc_init(void) #endif return 0; +error_sysctls: + unregister_key_type(&key_type_rxrpc_s); error_key_type_s: unregister_key_type(&key_type_rxrpc); error_key_type: @@ -865,6 +873,7 @@ error_call_jar: static void __exit af_rxrpc_exit(void) { _enter(""); + rxrpc_sysctl_exit(); unregister_key_type(&key_type_rxrpc_s); unregister_key_type(&key_type_rxrpc); sock_unregister(PF_RXRPC); diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c index cd97a0ce48d..c6be17a959a 100644 --- a/net/rxrpc/ar-ack.c +++ b/net/rxrpc/ar-ack.c @@ -19,7 +19,49 @@ #include <net/af_rxrpc.h> #include "ar-internal.h" -static unsigned int rxrpc_ack_defer = 1; +/* + * How long to wait before scheduling ACK generation after seeing a + * packet with RXRPC_REQUEST_ACK set (in jiffies). + */ +unsigned rxrpc_requested_ack_delay = 1; + +/* + * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). + * + * We use this when we've received new data packets. If those packets aren't + * all consumed within this time we will send a DELAY ACK if an ACK was not + * requested to let the sender know it doesn't need to resend. + */ +unsigned rxrpc_soft_ack_delay = 1 * HZ; + +/* + * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). + * + * We use this when we've consumed some previously soft-ACK'd packets when + * further packets aren't immediately received to decide when to send an IDLE + * ACK let the other end know that it can free up its Tx buffer space. + */ +unsigned rxrpc_idle_ack_delay = 0.5 * HZ; + +/* + * Receive window size in packets. This indicates the maximum number of + * unconsumed received packets we're willing to retain in memory. Once this + * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further + * packets. + */ +unsigned rxrpc_rx_window_size = 32; + +/* + * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet + * made by gluing normal packets together that we're willing to handle. + */ +unsigned rxrpc_rx_mtu = 5692; + +/* + * The maximum number of fragments in a received jumbo packet that we tell the + * sender that we're willing to handle. + */ +unsigned rxrpc_rx_jumbo_max = 4; static const char *rxrpc_acks(u8 reason) { @@ -82,24 +124,23 @@ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, switch (ack_reason) { case RXRPC_ACK_DELAY: _debug("run delay timer"); - call->ack_timer.expires = jiffies + rxrpc_ack_timeout * HZ; - add_timer(&call->ack_timer); - return; + expiry = rxrpc_soft_ack_delay; + goto run_timer; case RXRPC_ACK_IDLE: if (!immediate) { _debug("run defer timer"); - expiry = 1; + expiry = rxrpc_idle_ack_delay; goto run_timer; } goto cancel_timer; case RXRPC_ACK_REQUESTED: - if (!rxrpc_ack_defer) + expiry = rxrpc_requested_ack_delay; + if (!expiry) goto cancel_timer; if (!immediate || serial == cpu_to_be32(1)) { _debug("run defer timer"); - expiry = rxrpc_ack_defer; goto run_timer; } @@ -1174,11 +1215,11 @@ send_ACK: mtu = call->conn->trans->peer->if_mtu; mtu -= call->conn->trans->peer->hdrsize; ackinfo.maxMTU = htonl(mtu); - ackinfo.rwind = htonl(32); + ackinfo.rwind = htonl(rxrpc_rx_window_size); /* permit the peer to send us jumbo packets if it wants to */ - ackinfo.rxMTU = htonl(5692); - ackinfo.jumbo_max = htonl(4); + ackinfo.rxMTU = htonl(rxrpc_rx_mtu); + ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max); hdr.serial = htonl(atomic_inc_return(&call->conn->serial)); _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c index a3bbb360a3f..a9e05db0f5d 100644 --- a/net/rxrpc/ar-call.c +++ b/net/rxrpc/ar-call.c @@ -12,10 +12,22 @@ #include <linux/slab.h> #include <linux/module.h> #include <linux/circ_buf.h> +#include <linux/hashtable.h> +#include <linux/spinlock_types.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include "ar-internal.h" +/* + * Maximum lifetime of a call (in jiffies). + */ +unsigned rxrpc_max_call_lifetime = 60 * HZ; + +/* + * Time till dead call expires after last use (in jiffies). + */ +unsigned rxrpc_dead_call_expiry = 2 * HZ; + const char *const rxrpc_call_states[] = { [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq", [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", @@ -38,8 +50,6 @@ const char *const rxrpc_call_states[] = { struct kmem_cache *rxrpc_call_jar; LIST_HEAD(rxrpc_calls); DEFINE_RWLOCK(rxrpc_call_lock); -static unsigned int rxrpc_call_max_lifetime = 60; -static unsigned int rxrpc_dead_call_timeout = 2; static void rxrpc_destroy_call(struct work_struct *work); static void rxrpc_call_life_expired(unsigned long _call); @@ -47,6 +57,145 @@ static void rxrpc_dead_call_expired(unsigned long _call); static void rxrpc_ack_time_expired(unsigned long _call); static void rxrpc_resend_time_expired(unsigned long _call); +static DEFINE_SPINLOCK(rxrpc_call_hash_lock); +static DEFINE_HASHTABLE(rxrpc_call_hash, 10); + +/* + * Hash function for rxrpc_call_hash + */ +static unsigned long rxrpc_call_hashfunc( + u8 clientflag, + __be32 cid, + __be32 call_id, + __be32 epoch, + __be16 service_id, + sa_family_t proto, + void *localptr, + unsigned int addr_size, + const u8 *peer_addr) +{ + const u16 *p; + unsigned int i; + unsigned long key; + u32 hcid = ntohl(cid); + + _enter(""); + + key = (unsigned long)localptr; + /* We just want to add up the __be32 values, so forcing the + * cast should be okay. + */ + key += (__force u32)epoch; + key += (__force u16)service_id; + key += (__force u32)call_id; + key += (hcid & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT; + key += hcid & RXRPC_CHANNELMASK; + key += clientflag; + key += proto; + /* Step through the peer address in 16-bit portions for speed */ + for (i = 0, p = (const u16 *)peer_addr; i < addr_size >> 1; i++, p++) + key += *p; + _leave(" key = 0x%lx", key); + return key; +} + +/* + * Add a call to the hashtable + */ +static void rxrpc_call_hash_add(struct rxrpc_call *call) +{ + unsigned long key; + unsigned int addr_size = 0; + + _enter(""); + switch (call->proto) { + case AF_INET: + addr_size = sizeof(call->peer_ip.ipv4_addr); + break; + case AF_INET6: + addr_size = sizeof(call->peer_ip.ipv6_addr); + break; + default: + break; + } + key = rxrpc_call_hashfunc(call->in_clientflag, call->cid, + call->call_id, call->epoch, + call->service_id, call->proto, + call->conn->trans->local, addr_size, + call->peer_ip.ipv6_addr); + /* Store the full key in the call */ + call->hash_key = key; + spin_lock(&rxrpc_call_hash_lock); + hash_add_rcu(rxrpc_call_hash, &call->hash_node, key); + spin_unlock(&rxrpc_call_hash_lock); + _leave(""); +} + +/* + * Remove a call from the hashtable + */ +static void rxrpc_call_hash_del(struct rxrpc_call *call) +{ + _enter(""); + spin_lock(&rxrpc_call_hash_lock); + hash_del_rcu(&call->hash_node); + spin_unlock(&rxrpc_call_hash_lock); + _leave(""); +} + +/* + * Find a call in the hashtable and return it, or NULL if it + * isn't there. + */ +struct rxrpc_call *rxrpc_find_call_hash( + u8 clientflag, + __be32 cid, + __be32 call_id, + __be32 epoch, + __be16 service_id, + void *localptr, + sa_family_t proto, + const u8 *peer_addr) +{ + unsigned long key; + unsigned int addr_size = 0; + struct rxrpc_call *call = NULL; + struct rxrpc_call *ret = NULL; + + _enter(""); + switch (proto) { + case AF_INET: + addr_size = sizeof(call->peer_ip.ipv4_addr); + break; + case AF_INET6: + addr_size = sizeof(call->peer_ip.ipv6_addr); + break; + default: + break; + } + + key = rxrpc_call_hashfunc(clientflag, cid, call_id, epoch, + service_id, proto, localptr, addr_size, + peer_addr); + hash_for_each_possible_rcu(rxrpc_call_hash, call, hash_node, key) { + if (call->hash_key == key && + call->call_id == call_id && + call->cid == cid && + call->in_clientflag == clientflag && + call->service_id == service_id && + call->proto == proto && + call->local == localptr && + memcmp(call->peer_ip.ipv6_addr, peer_addr, + addr_size) == 0 && + call->epoch == epoch) { + ret = call; + break; + } + } + _leave(" = %p", ret); + return ret; +} + /* * allocate a new call */ @@ -91,7 +240,7 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) call->rx_data_expect = 1; call->rx_data_eaten = 0; call->rx_first_oos = 0; - call->ackr_win_top = call->rx_data_eaten + 1 + RXRPC_MAXACKS; + call->ackr_win_top = call->rx_data_eaten + 1 + rxrpc_rx_window_size; call->creation_jif = jiffies; return call; } @@ -128,11 +277,31 @@ static struct rxrpc_call *rxrpc_alloc_client_call( return ERR_PTR(ret); } + /* Record copies of information for hashtable lookup */ + call->proto = rx->proto; + call->local = trans->local; + switch (call->proto) { + case AF_INET: + call->peer_ip.ipv4_addr = + trans->peer->srx.transport.sin.sin_addr.s_addr; + break; + case AF_INET6: + memcpy(call->peer_ip.ipv6_addr, + trans->peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8, + sizeof(call->peer_ip.ipv6_addr)); + break; + } + call->epoch = call->conn->epoch; + call->service_id = call->conn->service_id; + call->in_clientflag = call->conn->in_clientflag; + /* Add the new call to the hashtable */ + rxrpc_call_hash_add(call); + spin_lock(&call->conn->trans->peer->lock); list_add(&call->error_link, &call->conn->trans->peer->error_targets); spin_unlock(&call->conn->trans->peer->lock); - call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ; + call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime; add_timer(&call->lifetimer); _leave(" = %p", call); @@ -320,9 +489,12 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, parent = *p; call = rb_entry(parent, struct rxrpc_call, conn_node); - if (call_id < call->call_id) + /* The tree is sorted in order of the __be32 value without + * turning it into host order. + */ + if ((__force u32)call_id < (__force u32)call->call_id) p = &(*p)->rb_left; - else if (call_id > call->call_id) + else if ((__force u32)call_id > (__force u32)call->call_id) p = &(*p)->rb_right; else goto old_call; @@ -347,9 +519,31 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, list_add_tail(&call->link, &rxrpc_calls); write_unlock_bh(&rxrpc_call_lock); + /* Record copies of information for hashtable lookup */ + call->proto = rx->proto; + call->local = conn->trans->local; + switch (call->proto) { + case AF_INET: + call->peer_ip.ipv4_addr = + conn->trans->peer->srx.transport.sin.sin_addr.s_addr; + break; + case AF_INET6: + memcpy(call->peer_ip.ipv6_addr, + conn->trans->peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8, + sizeof(call->peer_ip.ipv6_addr)); + break; + default: + break; + } + call->epoch = conn->epoch; + call->service_id = conn->service_id; + call->in_clientflag = conn->in_clientflag; + /* Add the new call to the hashtable */ + rxrpc_call_hash_add(call); + _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id); - call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ; + call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime; add_timer(&call->lifetimer); _leave(" = %p {%d} [new]", call, call->debug_id); return call; @@ -533,7 +727,7 @@ void rxrpc_release_call(struct rxrpc_call *call) del_timer_sync(&call->resend_timer); del_timer_sync(&call->ack_timer); del_timer_sync(&call->lifetimer); - call->deadspan.expires = jiffies + rxrpc_dead_call_timeout * HZ; + call->deadspan.expires = jiffies + rxrpc_dead_call_expiry; add_timer(&call->deadspan); _leave(""); @@ -665,6 +859,9 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call) rxrpc_put_connection(call->conn); } + /* Remove the call from the hash */ + rxrpc_call_hash_del(call); + if (call->acks_window) { _debug("kill Tx window %d", CIRC_CNT(call->acks_head, call->acks_tail, diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c index 7bf5b5b9e8b..6631f4f1e39 100644 --- a/net/rxrpc/ar-connection.c +++ b/net/rxrpc/ar-connection.c @@ -18,11 +18,15 @@ #include <net/af_rxrpc.h> #include "ar-internal.h" +/* + * Time till a connection expires after last use (in seconds). + */ +unsigned rxrpc_connection_expiry = 10 * 60; + static void rxrpc_connection_reaper(struct work_struct *work); LIST_HEAD(rxrpc_connections); DEFINE_RWLOCK(rxrpc_connection_lock); -static unsigned long rxrpc_connection_timeout = 10 * 60; static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper); /* @@ -862,7 +866,7 @@ static void rxrpc_connection_reaper(struct work_struct *work) spin_lock(&conn->trans->client_lock); write_lock(&conn->trans->conn_lock); - reap_time = conn->put_time + rxrpc_connection_timeout; + reap_time = conn->put_time + rxrpc_connection_expiry; if (atomic_read(&conn->usage) > 0) { ; @@ -916,7 +920,7 @@ void __exit rxrpc_destroy_all_connections(void) { _enter(""); - rxrpc_connection_timeout = 0; + rxrpc_connection_expiry = 0; cancel_delayed_work(&rxrpc_connection_reap); rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c index a9206087b4d..db57458c824 100644 --- a/net/rxrpc/ar-error.c +++ b/net/rxrpc/ar-error.c @@ -83,6 +83,7 @@ void rxrpc_UDP_error_report(struct sock *sk) if (mtu == 0) { /* they didn't give us a size, estimate one */ + mtu = peer->if_mtu; if (mtu > 1500) { mtu >>= 1; if (mtu < 1500) diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c index 529572f18d1..63b21e580de 100644 --- a/net/rxrpc/ar-input.c +++ b/net/rxrpc/ar-input.c @@ -25,8 +25,6 @@ #include <net/net_namespace.h> #include "ar-internal.h" -unsigned long rxrpc_ack_timeout = 1; - const char *rxrpc_pkts[] = { "?00", "DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG", @@ -115,7 +113,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, spin_unlock_bh(&sk->sk_receive_queue.lock); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, skb_len); + sk->sk_data_ready(sk); } skb = NULL; } else { @@ -349,8 +347,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) * it */ if (sp->hdr.flags & RXRPC_REQUEST_ACK) { _proto("ACK Requested on %%%u", serial); - rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, - !(sp->hdr.flags & RXRPC_MORE_PACKETS)); + rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false); } switch (sp->hdr.type) { @@ -526,36 +523,38 @@ protocol_error: * post an incoming packet to the appropriate call/socket to deal with * - must get rid of the sk_buff, either by freeing it or by queuing it */ -static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, +static void rxrpc_post_packet_to_call(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp; - struct rxrpc_call *call; - struct rb_node *p; - __be32 call_id; - - _enter("%p,%p", conn, skb); - read_lock_bh(&conn->lock); + _enter("%p,%p", call, skb); sp = rxrpc_skb(skb); - /* look at extant calls by channel number first */ - call = conn->channels[ntohl(sp->hdr.cid) & RXRPC_CHANNELMASK]; - if (!call || call->call_id != sp->hdr.callNumber) - goto call_not_extant; - _debug("extant call [%d]", call->state); - ASSERTCMP(call->conn, ==, conn); read_lock(&call->state_lock); switch (call->state) { case RXRPC_CALL_LOCALLY_ABORTED: - if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events)) + if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events)) { rxrpc_queue_call(call); + goto free_unlock; + } case RXRPC_CALL_REMOTELY_ABORTED: case RXRPC_CALL_NETWORK_ERROR: case RXRPC_CALL_DEAD: + goto dead_call; + case RXRPC_CALL_COMPLETE: + case RXRPC_CALL_CLIENT_FINAL_ACK: + /* complete server call */ + if (call->conn->in_clientflag) + goto dead_call; + /* resend last packet of a completed call */ + _debug("final ack again"); + rxrpc_get_call(call); + set_bit(RXRPC_CALL_ACK_FINAL, &call->events); + rxrpc_queue_call(call); goto free_unlock; default: break; @@ -563,7 +562,6 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, read_unlock(&call->state_lock); rxrpc_get_call(call); - read_unlock_bh(&conn->lock); if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && sp->hdr.flags & RXRPC_JUMBO_PACKET) @@ -574,78 +572,16 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, rxrpc_put_call(call); goto done; -call_not_extant: - /* search the completed calls in case what we're dealing with is - * there */ - _debug("call not extant"); - - call_id = sp->hdr.callNumber; - p = conn->calls.rb_node; - while (p) { - call = rb_entry(p, struct rxrpc_call, conn_node); - - if (call_id < call->call_id) - p = p->rb_left; - else if (call_id > call->call_id) - p = p->rb_right; - else - goto found_completed_call; - } - dead_call: - /* it's a either a really old call that we no longer remember or its a - * new incoming call */ - read_unlock_bh(&conn->lock); - - if (sp->hdr.flags & RXRPC_CLIENT_INITIATED && - sp->hdr.seq == cpu_to_be32(1)) { - _debug("incoming call"); - skb_queue_tail(&conn->trans->local->accept_queue, skb); - rxrpc_queue_work(&conn->trans->local->acceptor); - goto done; - } - - _debug("dead call"); - skb->priority = RX_CALL_DEAD; - rxrpc_reject_packet(conn->trans->local, skb); - goto done; - - /* resend last packet of a completed call - * - client calls may have been aborted or ACK'd - * - server calls may have been aborted - */ -found_completed_call: - _debug("completed call"); - - if (atomic_read(&call->usage) == 0) - goto dead_call; - - /* synchronise any state changes */ - read_lock(&call->state_lock); - ASSERTIFCMP(call->state != RXRPC_CALL_CLIENT_FINAL_ACK, - call->state, >=, RXRPC_CALL_COMPLETE); - - if (call->state == RXRPC_CALL_LOCALLY_ABORTED || - call->state == RXRPC_CALL_REMOTELY_ABORTED || - call->state == RXRPC_CALL_DEAD) { - read_unlock(&call->state_lock); - goto dead_call; - } - - if (call->conn->in_clientflag) { - read_unlock(&call->state_lock); - goto dead_call; /* complete server call */ + if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { + skb->priority = RX_CALL_DEAD; + rxrpc_reject_packet(call->conn->trans->local, skb); + goto unlock; } - - _debug("final ack again"); - rxrpc_get_call(call); - set_bit(RXRPC_CALL_ACK_FINAL, &call->events); - rxrpc_queue_call(call); - free_unlock: - read_unlock(&call->state_lock); - read_unlock_bh(&conn->lock); rxrpc_free_skb(skb); +unlock: + read_unlock(&call->state_lock); done: _leave(""); } @@ -664,21 +600,46 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, rxrpc_queue_conn(conn); } +static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local, + struct sk_buff *skb, + struct rxrpc_skb_priv *sp) +{ + struct rxrpc_peer *peer; + struct rxrpc_transport *trans; + struct rxrpc_connection *conn; + + peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr, + udp_hdr(skb)->source); + if (IS_ERR(peer)) + goto cant_find_conn; + + trans = rxrpc_find_transport(local, peer); + rxrpc_put_peer(peer); + if (!trans) + goto cant_find_conn; + + conn = rxrpc_find_connection(trans, &sp->hdr); + rxrpc_put_transport(trans); + if (!conn) + goto cant_find_conn; + + return conn; +cant_find_conn: + return NULL; +} + /* * handle data received on the local endpoint * - may be called in interrupt context */ -void rxrpc_data_ready(struct sock *sk, int count) +void rxrpc_data_ready(struct sock *sk) { - struct rxrpc_connection *conn; - struct rxrpc_transport *trans; struct rxrpc_skb_priv *sp; struct rxrpc_local *local; - struct rxrpc_peer *peer; struct sk_buff *skb; int ret; - _enter("%p, %d", sk, count); + _enter("%p", sk); ASSERT(!irqs_disabled()); @@ -749,27 +710,34 @@ void rxrpc_data_ready(struct sock *sk, int count) (sp->hdr.callNumber == 0 || sp->hdr.seq == 0)) goto bad_message; - peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr, udp_hdr(skb)->source); - if (IS_ERR(peer)) - goto cant_route_call; + if (sp->hdr.callNumber == 0) { + /* This is a connection-level packet. These should be + * fairly rare, so the extra overhead of looking them up the + * old-fashioned way doesn't really hurt */ + struct rxrpc_connection *conn; - trans = rxrpc_find_transport(local, peer); - rxrpc_put_peer(peer); - if (!trans) - goto cant_route_call; + conn = rxrpc_conn_from_local(local, skb, sp); + if (!conn) + goto cant_route_call; - conn = rxrpc_find_connection(trans, &sp->hdr); - rxrpc_put_transport(trans); - if (!conn) - goto cant_route_call; - - _debug("CONN %p {%d}", conn, conn->debug_id); - - if (sp->hdr.callNumber == 0) + _debug("CONN %p {%d}", conn, conn->debug_id); rxrpc_post_packet_to_conn(conn, skb); - else - rxrpc_post_packet_to_call(conn, skb); - rxrpc_put_connection(conn); + rxrpc_put_connection(conn); + } else { + struct rxrpc_call *call; + u8 in_clientflag = 0; + + if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) + in_clientflag = RXRPC_CLIENT_INITIATED; + call = rxrpc_find_call_hash(in_clientflag, sp->hdr.cid, + sp->hdr.callNumber, sp->hdr.epoch, + sp->hdr.serviceId, local, AF_INET, + (u8 *)&ip_hdr(skb)->saddr); + if (call) + rxrpc_post_packet_to_call(call, skb); + else + goto cant_route_call; + } rxrpc_put_local(local); return; @@ -790,8 +758,10 @@ cant_route_call: skb->priority = RX_CALL_DEAD; } - _debug("reject"); - rxrpc_reject_packet(local, skb); + if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { + _debug("reject type %d",sp->hdr.type); + rxrpc_reject_packet(local, skb); + } rxrpc_put_local(local); _leave(" [no call]"); return; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 5f43675ee1d..ba9fd36d3f1 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -396,9 +396,20 @@ struct rxrpc_call { #define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG) unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1]; + struct hlist_node hash_node; + unsigned long hash_key; /* Full hash key */ + u8 in_clientflag; /* Copy of conn->in_clientflag for hashing */ + struct rxrpc_local *local; /* Local endpoint. Used for hashing. */ + sa_family_t proto; /* Frame protocol */ /* the following should all be in net order */ __be32 cid; /* connection ID + channel index */ __be32 call_id; /* call ID on connection */ + __be32 epoch; /* epoch of this connection */ + __be16 service_id; /* service ID */ + union { /* Peer IP address for hashing */ + __be32 ipv4_addr; + __u8 ipv6_addr[16]; /* Anticipates eventual IPv6 support */ + } peer_ip; }; /* @@ -433,6 +444,13 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * ar-ack.c */ +extern unsigned rxrpc_requested_ack_delay; +extern unsigned rxrpc_soft_ack_delay; +extern unsigned rxrpc_idle_ack_delay; +extern unsigned rxrpc_rx_window_size; +extern unsigned rxrpc_rx_mtu; +extern unsigned rxrpc_rx_jumbo_max; + void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); void rxrpc_process_call(struct work_struct *); @@ -440,10 +458,14 @@ void rxrpc_process_call(struct work_struct *); /* * ar-call.c */ +extern unsigned rxrpc_max_call_lifetime; +extern unsigned rxrpc_dead_call_expiry; extern struct kmem_cache *rxrpc_call_jar; extern struct list_head rxrpc_calls; extern rwlock_t rxrpc_call_lock; +struct rxrpc_call *rxrpc_find_call_hash(u8, __be32, __be32, __be32, + __be16, void *, sa_family_t, const u8 *); struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *, struct rxrpc_transport *, struct rxrpc_conn_bundle *, @@ -460,6 +482,7 @@ void __exit rxrpc_destroy_all_calls(void); /* * ar-connection.c */ +extern unsigned rxrpc_connection_expiry; extern struct list_head rxrpc_connections; extern rwlock_t rxrpc_connection_lock; @@ -493,10 +516,9 @@ void rxrpc_UDP_error_handler(struct work_struct *); /* * ar-input.c */ -extern unsigned long rxrpc_ack_timeout; extern const char *rxrpc_pkts[]; -void rxrpc_data_ready(struct sock *, int); +void rxrpc_data_ready(struct sock *); int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, bool); void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *); @@ -504,6 +526,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *); * ar-local.c */ extern rwlock_t rxrpc_local_lock; + struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *); void rxrpc_put_local(struct rxrpc_local *); void __exit rxrpc_destroy_all_locals(void); @@ -522,7 +545,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t, /* * ar-output.c */ -extern int rxrpc_resend_timeout; +extern unsigned rxrpc_resend_timeout; int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *); int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *, @@ -572,6 +595,8 @@ void rxrpc_packet_destructor(struct sk_buff *); /* * ar-transport.c */ +extern unsigned rxrpc_transport_expiry; + struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *, struct rxrpc_peer *, gfp_t); void rxrpc_put_transport(struct rxrpc_transport *); @@ -580,6 +605,17 @@ struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *, struct rxrpc_peer *); /* + * sysctl.c + */ +#ifdef CONFIG_SYSCTL +extern int __init rxrpc_sysctl_init(void); +extern void rxrpc_sysctl_exit(void); +#else +static inline int __init rxrpc_sysctl_init(void) { return 0; } +static inline void rxrpc_sysctl_exit(void) {} +#endif + +/* * debug tracing */ extern unsigned int rxrpc_debug; diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index d0e8f1c1898..0b4b9a79f5a 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -18,7 +18,10 @@ #include <net/af_rxrpc.h> #include "ar-internal.h" -int rxrpc_resend_timeout = 4; +/* + * Time till packet resend (in jiffies). + */ +unsigned rxrpc_resend_timeout = 4 * HZ; static int rxrpc_send_data(struct kiocb *iocb, struct rxrpc_sock *rx, @@ -487,7 +490,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, ntohl(sp->hdr.serial), ntohl(sp->hdr.seq)); sp->need_resend = false; - sp->resend_at = jiffies + rxrpc_resend_timeout * HZ; + sp->resend_at = jiffies + rxrpc_resend_timeout; if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) { _debug("run timer"); call->resend_timer.expires = sp->resend_at; @@ -666,6 +669,7 @@ static int rxrpc_send_data(struct kiocb *iocb, /* add the packet to the send queue if it's now full */ if (sp->remain <= 0 || (segment == 0 && !more)) { struct rxrpc_connection *conn = call->conn; + uint32_t seq; size_t pad; /* pad out if we're using security */ @@ -678,11 +682,12 @@ static int rxrpc_send_data(struct kiocb *iocb, memset(skb_put(skb, pad), 0, pad); } + seq = atomic_inc_return(&call->sequence); + sp->hdr.epoch = conn->epoch; sp->hdr.cid = call->cid; sp->hdr.callNumber = call->call_id; - sp->hdr.seq = - htonl(atomic_inc_return(&call->sequence)); + sp->hdr.seq = htonl(seq); sp->hdr.serial = htonl(atomic_inc_return(&conn->serial)); sp->hdr.type = RXRPC_PACKET_TYPE_DATA; @@ -697,6 +702,8 @@ static int rxrpc_send_data(struct kiocb *iocb, else if (CIRC_SPACE(call->acks_head, call->acks_tail, call->acks_winsz) > 1) sp->hdr.flags |= RXRPC_MORE_PACKETS; + if (more && seq & 1) + sp->hdr.flags |= RXRPC_REQUEST_ACK; ret = rxrpc_secure_packet( call, skb, skb->mark, diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 34b5490dde6..e9aaa65c077 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -180,16 +180,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, if (copy > len - copied) copy = len - copied; - if (skb->ip_summed == CHECKSUM_UNNECESSARY || - skb->ip_summed == CHECKSUM_PARTIAL) { - ret = skb_copy_datagram_iovec(skb, offset, - msg->msg_iov, copy); - } else { - ret = skb_copy_and_csum_datagram_iovec(skb, offset, - msg->msg_iov); - if (ret == -EINVAL) - goto csum_copy_error; - } + ret = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copy); if (ret < 0) goto copy_error; @@ -348,20 +339,6 @@ copy_error: _leave(" = %d", ret); return ret; -csum_copy_error: - _debug("csum error"); - release_sock(&rx->sk); - if (continue_call) - rxrpc_put_call(continue_call); - rxrpc_kill_skb(skb); - if (!(flags & MSG_PEEK)) { - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - } - skb_kill_datagram(&rx->sk, skb, flags); - rxrpc_put_call(call); - return -EAGAIN; - wait_interrupted: ret = sock_intr_errno(timeo); wait_error: diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/ar-skbuff.c index de755e04d29..4cfab49e329 100644 --- a/net/rxrpc/ar-skbuff.c +++ b/net/rxrpc/ar-skbuff.c @@ -83,9 +83,14 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call, rxrpc_request_final_ACK(call); } else if (atomic_dec_and_test(&call->ackr_not_idle) && test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) { + /* We previously soft-ACK'd some received packets that have now + * been consumed, so send a hard-ACK if no more packets are + * immediately forthcoming to allow the transmitter to free up + * its Tx bufferage. + */ _debug("send Rx idle ACK"); __rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial, - true); + false); } spin_unlock_bh(&call->lock); diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c index 92df566930b..1976dec84f2 100644 --- a/net/rxrpc/ar-transport.c +++ b/net/rxrpc/ar-transport.c @@ -17,11 +17,15 @@ #include <net/af_rxrpc.h> #include "ar-internal.h" +/* + * Time after last use at which transport record is cleaned up. + */ +unsigned rxrpc_transport_expiry = 3600 * 24; + static void rxrpc_transport_reaper(struct work_struct *work); static LIST_HEAD(rxrpc_transports); static DEFINE_RWLOCK(rxrpc_transport_lock); -static unsigned long rxrpc_transport_timeout = 3600 * 24; static DECLARE_DELAYED_WORK(rxrpc_transport_reap, rxrpc_transport_reaper); /* @@ -235,7 +239,7 @@ static void rxrpc_transport_reaper(struct work_struct *work) if (likely(atomic_read(&trans->usage) > 0)) continue; - reap_time = trans->put_time + rxrpc_transport_timeout; + reap_time = trans->put_time + rxrpc_transport_expiry; if (reap_time <= now) list_move_tail(&trans->link, &graveyard); else if (reap_time < earliest) @@ -271,7 +275,7 @@ void __exit rxrpc_destroy_all_transports(void) { _enter(""); - rxrpc_transport_timeout = 0; + rxrpc_transport_expiry = 0; cancel_delayed_work(&rxrpc_transport_reap); rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c new file mode 100644 index 00000000000..50a98a910eb --- /dev/null +++ b/net/rxrpc/sysctl.c @@ -0,0 +1,146 @@ +/* sysctls for configuring RxRPC operating parameters + * + * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/sysctl.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +static struct ctl_table_header *rxrpc_sysctl_reg_table; +static const unsigned zero = 0; +static const unsigned one = 1; +static const unsigned four = 4; +static const unsigned n_65535 = 65535; +static const unsigned n_max_acks = RXRPC_MAXACKS; + +/* + * RxRPC operating parameters. + * + * See Documentation/networking/rxrpc.txt and the variable definitions for more + * information on the individual parameters. + */ +static struct ctl_table rxrpc_sysctl_table[] = { + /* Values measured in milliseconds */ + { + .procname = "req_ack_delay", + .data = &rxrpc_requested_ack_delay, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + .extra1 = (void *)&zero, + }, + { + .procname = "soft_ack_delay", + .data = &rxrpc_soft_ack_delay, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + .extra1 = (void *)&one, + }, + { + .procname = "idle_ack_delay", + .data = &rxrpc_idle_ack_delay, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + .extra1 = (void *)&one, + }, + { + .procname = "resend_timeout", + .data = &rxrpc_resend_timeout, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + .extra1 = (void *)&one, + }, + + /* Values measured in seconds but used in jiffies */ + { + .procname = "max_call_lifetime", + .data = &rxrpc_max_call_lifetime, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + .extra1 = (void *)&one, + }, + { + .procname = "dead_call_expiry", + .data = &rxrpc_dead_call_expiry, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + .extra1 = (void *)&one, + }, + + /* Values measured in seconds */ + { + .procname = "connection_expiry", + .data = &rxrpc_connection_expiry, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&one, + }, + { + .procname = "transport_expiry", + .data = &rxrpc_transport_expiry, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&one, + }, + + /* Non-time values */ + { + .procname = "rx_window_size", + .data = &rxrpc_rx_window_size, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&one, + .extra2 = (void *)&n_max_acks, + }, + { + .procname = "rx_mtu", + .data = &rxrpc_rx_mtu, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&one, + .extra1 = (void *)&n_65535, + }, + { + .procname = "rx_jumbo_max", + .data = &rxrpc_rx_jumbo_max, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&one, + .extra2 = (void *)&four, + }, + + { } +}; + +int __init rxrpc_sysctl_init(void) +{ + rxrpc_sysctl_reg_table = register_net_sysctl(&init_net, "net/rxrpc", + rxrpc_sysctl_table); + if (!rxrpc_sysctl_reg_table) + return -ENOMEM; + return 0; +} + +void rxrpc_sysctl_exit(void) +{ + if (rxrpc_sysctl_reg_table) + unregister_net_sysctl_table(rxrpc_sysctl_reg_table); +} diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 72bdc716634..8a5ba5add4b 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -27,8 +27,11 @@ #include <net/act_api.h> #include <net/netlink.h> -void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) +void tcf_hash_destroy(struct tc_action *a) { + struct tcf_common *p = a->priv; + struct tcf_hashinfo *hinfo = a->ops->hinfo; + spin_lock_bh(&hinfo->lock); hlist_del(&p->tcfc_head); spin_unlock_bh(&hinfo->lock); @@ -42,18 +45,22 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) } EXPORT_SYMBOL(tcf_hash_destroy); -int tcf_hash_release(struct tcf_common *p, int bind, - struct tcf_hashinfo *hinfo) +int tcf_hash_release(struct tc_action *a, int bind) { + struct tcf_common *p = a->priv; int ret = 0; if (p) { if (bind) p->tcfc_bindcnt--; + else if (p->tcfc_bindcnt > 0) + return -EPERM; p->tcfc_refcnt--; if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) { - tcf_hash_destroy(p, hinfo); + if (a->ops->cleanup) + a->ops->cleanup(a, bind); + tcf_hash_destroy(a); ret = 1; } } @@ -118,6 +125,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a) struct tcf_common *p; struct nlattr *nest; int i = 0, n_i = 0; + int ret = -EINVAL; nest = nla_nest_start(skb, a->order); if (nest == NULL) @@ -127,10 +135,13 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a) for (i = 0; i < (hinfo->hmask + 1); i++) { head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; hlist_for_each_entry_safe(p, n, head, tcfc_head) { - if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) { + a->priv = p; + ret = tcf_hash_release(a, 0); + if (ret == ACT_P_DELETED) { module_put(a->ops->owner); n_i++; - } + } else if (ret < 0) + goto nla_put_failure; } } if (nla_put_u32(skb, TCA_FCNT, n_i)) @@ -140,7 +151,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a) return n_i; nla_put_failure: nla_nest_cancel(skb, nest); - return -EINVAL; + return ret; } static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, @@ -198,7 +209,7 @@ int tcf_hash_search(struct tc_action *a, u32 index) } EXPORT_SYMBOL(tcf_hash_search); -struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind) +int tcf_hash_check(u32 index, struct tc_action *a, int bind) { struct tcf_hashinfo *hinfo = a->ops->hinfo; struct tcf_common *p = NULL; @@ -207,19 +218,30 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind) p->tcfc_bindcnt++; p->tcfc_refcnt++; a->priv = p; + return 1; } - return p; + return 0; } EXPORT_SYMBOL(tcf_hash_check); -struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, - struct tc_action *a, int size, int bind) +void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est) +{ + struct tcf_common *pc = a->priv; + if (est) + gen_kill_estimator(&pc->tcfc_bstats, + &pc->tcfc_rate_est); + kfree_rcu(pc, tcfc_rcu); +} +EXPORT_SYMBOL(tcf_hash_cleanup); + +int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, + int size, int bind) { struct tcf_hashinfo *hinfo = a->ops->hinfo; struct tcf_common *p = kzalloc(size, GFP_KERNEL); if (unlikely(!p)) - return ERR_PTR(-ENOMEM); + return -ENOMEM; p->tcfc_refcnt = 1; if (bind) p->tcfc_bindcnt = 1; @@ -234,17 +256,19 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, &p->tcfc_lock, est); if (err) { kfree(p); - return ERR_PTR(err); + return err; } } a->priv = (void *) p; - return p; + return 0; } EXPORT_SYMBOL(tcf_hash_create); -void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo) +void tcf_hash_insert(struct tc_action *a) { + struct tcf_common *p = a->priv; + struct tcf_hashinfo *hinfo = a->ops->hinfo; unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); spin_lock_bh(&hinfo->lock); @@ -256,12 +280,13 @@ EXPORT_SYMBOL(tcf_hash_insert); static LIST_HEAD(act_base); static DEFINE_RWLOCK(act_mod_lock); -int tcf_register_action(struct tc_action_ops *act) +int tcf_register_action(struct tc_action_ops *act, unsigned int mask) { struct tc_action_ops *a; + int err; - /* Must supply act, dump, cleanup and init */ - if (!act->act || !act->dump || !act->cleanup || !act->init) + /* Must supply act, dump and init */ + if (!act->act || !act->dump || !act->init) return -EINVAL; /* Supply defaults */ @@ -270,10 +295,21 @@ int tcf_register_action(struct tc_action_ops *act) if (!act->walk) act->walk = tcf_generic_walker; + act->hinfo = kmalloc(sizeof(struct tcf_hashinfo), GFP_KERNEL); + if (!act->hinfo) + return -ENOMEM; + err = tcf_hashinfo_init(act->hinfo, mask); + if (err) { + kfree(act->hinfo); + return err; + } + write_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { write_unlock(&act_mod_lock); + tcf_hashinfo_destroy(act->hinfo); + kfree(act->hinfo); return -EEXIST; } } @@ -292,6 +328,8 @@ int tcf_unregister_action(struct tc_action_ops *act) list_for_each_entry(a, &act_base, head) { if (a == act) { list_del(&act->head); + tcf_hashinfo_destroy(act->hinfo); + kfree(act->hinfo); err = 0; break; } @@ -368,16 +406,21 @@ exec_done: } EXPORT_SYMBOL(tcf_action_exec); -void tcf_action_destroy(struct list_head *actions, int bind) +int tcf_action_destroy(struct list_head *actions, int bind) { struct tc_action *a, *tmp; + int ret = 0; list_for_each_entry_safe(a, tmp, actions, list) { - if (a->ops->cleanup(a, bind) == ACT_P_DELETED) + ret = tcf_hash_release(a, bind); + if (ret == ACT_P_DELETED) module_put(a->ops->owner); + else if (ret < 0) + return ret; list_del(&a->list); kfree(a); } + return ret; } int @@ -642,6 +685,20 @@ act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, return rtnl_unicast(skb, net, portid); } +static struct tc_action *create_a(int i) +{ + struct tc_action *act; + + act = kzalloc(sizeof(*act), GFP_KERNEL); + if (act == NULL) { + pr_debug("create_a: failed to alloc!\n"); + return NULL; + } + act->order = i; + INIT_LIST_HEAD(&act->list); + return act; +} + static struct tc_action * tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid) { @@ -661,11 +718,10 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid) index = nla_get_u32(tb[TCA_ACT_INDEX]); err = -ENOMEM; - a = kzalloc(sizeof(struct tc_action), GFP_KERNEL); + a = create_a(0); if (a == NULL) goto err_out; - INIT_LIST_HEAD(&a->list); err = -EINVAL; a->ops = tc_lookup_action(tb[TCA_ACT_KIND]); if (a->ops == NULL) /* could happen in batch of actions */ @@ -695,20 +751,6 @@ static void cleanup_a(struct list_head *actions) } } -static struct tc_action *create_a(int i) -{ - struct tc_action *act; - - act = kzalloc(sizeof(*act), GFP_KERNEL); - if (act == NULL) { - pr_debug("create_a: failed to alloc!\n"); - return NULL; - } - act->order = i; - INIT_LIST_HEAD(&act->list); - return act; -} - static int tca_action_flush(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid) { @@ -720,18 +762,12 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, struct nlattr *nest; struct nlattr *tb[TCA_ACT_MAX + 1]; struct nlattr *kind; - struct tc_action *a = create_a(0); + struct tc_action a; int err = -ENOMEM; - if (a == NULL) { - pr_debug("tca_action_flush: couldnt create tc_action\n"); - return err; - } - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) { pr_debug("tca_action_flush: failed skb alloc\n"); - kfree(a); return err; } @@ -743,8 +779,10 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, err = -EINVAL; kind = tb[TCA_ACT_KIND]; - a->ops = tc_lookup_action(kind); - if (a->ops == NULL) /*some idjot trying to flush unknown action */ + memset(&a, 0, sizeof(struct tc_action)); + INIT_LIST_HEAD(&a.list); + a.ops = tc_lookup_action(kind); + if (a.ops == NULL) /*some idjot trying to flush unknown action */ goto err_out; nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); @@ -759,7 +797,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, if (nest == NULL) goto out_module_put; - err = a->ops->walk(skb, &dcb, RTM_DELACTION, a); + err = a.ops->walk(skb, &dcb, RTM_DELACTION, &a); if (err < 0) goto out_module_put; if (err == 0) @@ -769,8 +807,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, nlh->nlmsg_len = skb_tail_pointer(skb) - b; nlh->nlmsg_flags |= NLM_F_ROOT; - module_put(a->ops->owner); - kfree(a); + module_put(a.ops->owner); err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (err > 0) @@ -779,11 +816,10 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, return err; out_module_put: - module_put(a->ops->owner); + module_put(a.ops->owner); err_out: noflush_out: kfree_skb(skb); - kfree(a); return err; } @@ -805,7 +841,11 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, } /* now do the delete */ - tcf_action_destroy(actions, 0); + ret = tcf_action_destroy(actions, 0); + if (ret < 0) { + kfree_skb(skb); + return ret; + } ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 2210187c45c..edbf40dac70 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -37,7 +37,6 @@ #include <net/tc_act/tc_csum.h> #define CSUM_TAB_MASK 15 -static struct tcf_hashinfo csum_hash_info; static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, @@ -48,7 +47,6 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est, { struct nlattr *tb[TCA_CSUM_MAX + 1]; struct tc_csum *parm; - struct tcf_common *pc; struct tcf_csum *p; int ret = 0, err; @@ -63,38 +61,31 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est, return -EINVAL; parm = nla_data(tb[TCA_CSUM_PARMS]); - pc = tcf_hash_check(parm->index, a, bind); - if (!pc) { - pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind); - if (IS_ERR(pc)) - return PTR_ERR(pc); + if (!tcf_hash_check(parm->index, a, bind)) { + ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind); + if (ret) + return ret; ret = ACT_P_CREATED; } else { if (bind)/* dont override defaults */ return 0; - tcf_hash_release(pc, bind, a->ops->hinfo); + tcf_hash_release(a, bind); if (!ovr) return -EEXIST; } - p = to_tcf_csum(pc); + p = to_tcf_csum(a); spin_lock_bh(&p->tcf_lock); p->tcf_action = parm->action; p->update_flags = parm->update_flags; spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, a->ops->hinfo); + tcf_hash_insert(a); return ret; } -static int tcf_csum_cleanup(struct tc_action *a, int bind) -{ - struct tcf_csum *p = a->priv; - return tcf_hash_release(&p->common, bind, &csum_hash_info); -} - /** * tcf_csum_skb_nextlayer - Get next layer pointer * @skb: sk_buff to use @@ -569,12 +560,10 @@ nla_put_failure: static struct tc_action_ops act_csum_ops = { .kind = "csum", - .hinfo = &csum_hash_info, .type = TCA_ACT_CSUM, .owner = THIS_MODULE, .act = tcf_csum, .dump = tcf_csum_dump, - .cleanup = tcf_csum_cleanup, .init = tcf_csum_init, }; @@ -583,11 +572,7 @@ MODULE_LICENSE("GPL"); static int __init csum_init_module(void) { - int err = tcf_hashinfo_init(&csum_hash_info, CSUM_TAB_MASK); - if (err) - return err; - - return tcf_register_action(&act_csum_ops); + return tcf_register_action(&act_csum_ops, CSUM_TAB_MASK); } static void __exit csum_cleanup_module(void) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index a0eed30d581..d6bcbd9f779 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -24,7 +24,6 @@ #include <net/tc_act/tc_gact.h> #define GACT_TAB_MASK 15 -static struct tcf_hashinfo gact_hash_info; #ifdef CONFIG_GACT_PROB static int gact_net_rand(struct tcf_gact *gact) @@ -57,7 +56,6 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, struct nlattr *tb[TCA_GACT_MAX + 1]; struct tc_gact *parm; struct tcf_gact *gact; - struct tcf_common *pc; int ret = 0; int err; #ifdef CONFIG_GACT_PROB @@ -86,21 +84,20 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, } #endif - pc = tcf_hash_check(parm->index, a, bind); - if (!pc) { - pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind); - if (IS_ERR(pc)) - return PTR_ERR(pc); + if (!tcf_hash_check(parm->index, a, bind)) { + ret = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind); + if (ret) + return ret; ret = ACT_P_CREATED; } else { if (bind)/* dont override defaults */ return 0; - tcf_hash_release(pc, bind, a->ops->hinfo); + tcf_hash_release(a, bind); if (!ovr) return -EEXIST; } - gact = to_gact(pc); + gact = to_gact(a); spin_lock_bh(&gact->tcf_lock); gact->tcf_action = parm->action; @@ -113,19 +110,10 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, #endif spin_unlock_bh(&gact->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, a->ops->hinfo); + tcf_hash_insert(a); return ret; } -static int tcf_gact_cleanup(struct tc_action *a, int bind) -{ - struct tcf_gact *gact = a->priv; - - if (gact) - return tcf_hash_release(&gact->common, bind, a->ops->hinfo); - return 0; -} - static int tcf_gact(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -191,12 +179,10 @@ nla_put_failure: static struct tc_action_ops act_gact_ops = { .kind = "gact", - .hinfo = &gact_hash_info, .type = TCA_ACT_GACT, .owner = THIS_MODULE, .act = tcf_gact, .dump = tcf_gact_dump, - .cleanup = tcf_gact_cleanup, .init = tcf_gact_init, }; @@ -206,21 +192,17 @@ MODULE_LICENSE("GPL"); static int __init gact_init_module(void) { - int err = tcf_hashinfo_init(&gact_hash_info, GACT_TAB_MASK); - if (err) - return err; #ifdef CONFIG_GACT_PROB pr_info("GACT probability on\n"); #else pr_info("GACT probability NOT on\n"); #endif - return tcf_register_action(&act_gact_ops); + return tcf_register_action(&act_gact_ops, GACT_TAB_MASK); } static void __exit gact_cleanup_module(void) { tcf_unregister_action(&act_gact_ops); - tcf_hashinfo_destroy(&gact_hash_info); } module_init(gact_init_module); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 0a6d6217402..8a64a0734ae 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -29,7 +29,6 @@ #define IPT_TAB_MASK 15 -static struct tcf_hashinfo ipt_hash_info; static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook) { @@ -69,22 +68,12 @@ static void ipt_destroy_target(struct xt_entry_target *t) module_put(par.target->me); } -static int tcf_ipt_release(struct tcf_ipt *ipt, int bind) +static void tcf_ipt_release(struct tc_action *a, int bind) { - int ret = 0; - if (ipt) { - if (bind) - ipt->tcf_bindcnt--; - ipt->tcf_refcnt--; - if (ipt->tcf_bindcnt <= 0 && ipt->tcf_refcnt <= 0) { - ipt_destroy_target(ipt->tcfi_t); - kfree(ipt->tcfi_tname); - kfree(ipt->tcfi_t); - tcf_hash_destroy(&ipt->common, &ipt_hash_info); - ret = ACT_P_DELETED; - } - } - return ret; + struct tcf_ipt *ipt = to_ipt(a); + ipt_destroy_target(ipt->tcfi_t); + kfree(ipt->tcfi_tname); + kfree(ipt->tcfi_t); } static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { @@ -99,7 +88,6 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, { struct nlattr *tb[TCA_IPT_MAX + 1]; struct tcf_ipt *ipt; - struct tcf_common *pc; struct xt_entry_target *td, *t; char *tname; int ret = 0, err; @@ -125,21 +113,20 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, if (tb[TCA_IPT_INDEX] != NULL) index = nla_get_u32(tb[TCA_IPT_INDEX]); - pc = tcf_hash_check(index, a, bind); - if (!pc) { - pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind); - if (IS_ERR(pc)) - return PTR_ERR(pc); + if (!tcf_hash_check(index, a, bind) ) { + ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind); + if (ret) + return ret; ret = ACT_P_CREATED; } else { if (bind)/* dont override defaults */ return 0; - tcf_ipt_release(to_ipt(pc), bind); + tcf_hash_release(a, bind); if (!ovr) return -EEXIST; } - ipt = to_ipt(pc); + ipt = to_ipt(a); hook = nla_get_u32(tb[TCA_IPT_HOOK]); @@ -170,7 +157,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, ipt->tcfi_hook = hook; spin_unlock_bh(&ipt->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, a->ops->hinfo); + tcf_hash_insert(a); return ret; err3: @@ -178,21 +165,11 @@ err3: err2: kfree(tname); err1: - if (ret == ACT_P_CREATED) { - if (est) - gen_kill_estimator(&pc->tcfc_bstats, - &pc->tcfc_rate_est); - kfree_rcu(pc, tcfc_rcu); - } + if (ret == ACT_P_CREATED) + tcf_hash_cleanup(a, est); return err; } -static int tcf_ipt_cleanup(struct tc_action *a, int bind) -{ - struct tcf_ipt *ipt = a->priv; - return tcf_ipt_release(ipt, bind); -} - static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -284,23 +261,21 @@ nla_put_failure: static struct tc_action_ops act_ipt_ops = { .kind = "ipt", - .hinfo = &ipt_hash_info, .type = TCA_ACT_IPT, .owner = THIS_MODULE, .act = tcf_ipt, .dump = tcf_ipt_dump, - .cleanup = tcf_ipt_cleanup, + .cleanup = tcf_ipt_release, .init = tcf_ipt_init, }; static struct tc_action_ops act_xt_ops = { .kind = "xt", - .hinfo = &ipt_hash_info, .type = TCA_ACT_XT, .owner = THIS_MODULE, .act = tcf_ipt, .dump = tcf_ipt_dump, - .cleanup = tcf_ipt_cleanup, + .cleanup = tcf_ipt_release, .init = tcf_ipt_init, }; @@ -311,20 +286,16 @@ MODULE_ALIAS("act_xt"); static int __init ipt_init_module(void) { - int ret1, ret2, err; - err = tcf_hashinfo_init(&ipt_hash_info, IPT_TAB_MASK); - if (err) - return err; + int ret1, ret2; - ret1 = tcf_register_action(&act_xt_ops); + ret1 = tcf_register_action(&act_xt_ops, IPT_TAB_MASK); if (ret1 < 0) printk("Failed to load xt action\n"); - ret2 = tcf_register_action(&act_ipt_ops); + ret2 = tcf_register_action(&act_ipt_ops, IPT_TAB_MASK); if (ret2 < 0) printk("Failed to load ipt action\n"); if (ret1 < 0 && ret2 < 0) { - tcf_hashinfo_destroy(&ipt_hash_info); return ret1; } else return 0; @@ -334,7 +305,6 @@ static void __exit ipt_cleanup_module(void) { tcf_unregister_action(&act_xt_ops); tcf_unregister_action(&act_ipt_ops); - tcf_hashinfo_destroy(&ipt_hash_info); } module_init(ipt_init_module); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 0b2c6d39d39..4f912c0e225 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -31,23 +31,13 @@ #define MIRRED_TAB_MASK 7 static LIST_HEAD(mirred_list); -static struct tcf_hashinfo mirred_hash_info; -static int tcf_mirred_release(struct tcf_mirred *m, int bind) +static void tcf_mirred_release(struct tc_action *a, int bind) { - if (m) { - if (bind) - m->tcf_bindcnt--; - m->tcf_refcnt--; - if (!m->tcf_bindcnt && m->tcf_refcnt <= 0) { - list_del(&m->tcfm_list); - if (m->tcfm_dev) - dev_put(m->tcfm_dev); - tcf_hash_destroy(&m->common, &mirred_hash_info); - return 1; - } - } - return 0; + struct tcf_mirred *m = to_mirred(a); + list_del(&m->tcfm_list); + if (m->tcfm_dev) + dev_put(m->tcfm_dev); } static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { @@ -61,7 +51,6 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *tb[TCA_MIRRED_MAX + 1]; struct tc_mirred *parm; struct tcf_mirred *m; - struct tcf_common *pc; struct net_device *dev; int ret, ok_push = 0; @@ -101,21 +90,20 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, dev = NULL; } - pc = tcf_hash_check(parm->index, a, bind); - if (!pc) { + if (!tcf_hash_check(parm->index, a, bind)) { if (dev == NULL) return -EINVAL; - pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind); - if (IS_ERR(pc)) - return PTR_ERR(pc); + ret = tcf_hash_create(parm->index, est, a, sizeof(*m), bind); + if (ret) + return ret; ret = ACT_P_CREATED; } else { if (!ovr) { - tcf_mirred_release(to_mirred(pc), bind); + tcf_hash_release(a, bind); return -EEXIST; } } - m = to_mirred(pc); + m = to_mirred(a); spin_lock_bh(&m->tcf_lock); m->tcf_action = parm->action; @@ -131,21 +119,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&m->tcf_lock); if (ret == ACT_P_CREATED) { list_add(&m->tcfm_list, &mirred_list); - tcf_hash_insert(pc, a->ops->hinfo); + tcf_hash_insert(a); } return ret; } -static int tcf_mirred_cleanup(struct tc_action *a, int bind) -{ - struct tcf_mirred *m = a->priv; - - if (m) - return tcf_mirred_release(m, bind); - return 0; -} - static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -254,12 +233,11 @@ static struct notifier_block mirred_device_notifier = { static struct tc_action_ops act_mirred_ops = { .kind = "mirred", - .hinfo = &mirred_hash_info, .type = TCA_ACT_MIRRED, .owner = THIS_MODULE, .act = tcf_mirred, .dump = tcf_mirred_dump, - .cleanup = tcf_mirred_cleanup, + .cleanup = tcf_mirred_release, .init = tcf_mirred_init, }; @@ -273,19 +251,13 @@ static int __init mirred_init_module(void) if (err) return err; - err = tcf_hashinfo_init(&mirred_hash_info, MIRRED_TAB_MASK); - if (err) { - unregister_netdevice_notifier(&mirred_device_notifier); - return err; - } pr_info("Mirror/redirect action on\n"); - return tcf_register_action(&act_mirred_ops); + return tcf_register_action(&act_mirred_ops, MIRRED_TAB_MASK); } static void __exit mirred_cleanup_module(void) { tcf_unregister_action(&act_mirred_ops); - tcf_hashinfo_destroy(&mirred_hash_info); unregister_netdevice_notifier(&mirred_device_notifier); } diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 81f0404bb33..270a030d5fd 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -31,8 +31,6 @@ #define NAT_TAB_MASK 15 -static struct tcf_hashinfo nat_hash_info; - static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) }, }; @@ -44,7 +42,6 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_nat *parm; int ret = 0, err; struct tcf_nat *p; - struct tcf_common *pc; if (nla == NULL) return -EINVAL; @@ -57,20 +54,19 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, return -EINVAL; parm = nla_data(tb[TCA_NAT_PARMS]); - pc = tcf_hash_check(parm->index, a, bind); - if (!pc) { - pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind); - if (IS_ERR(pc)) - return PTR_ERR(pc); + if (!tcf_hash_check(parm->index, a, bind)) { + ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind); + if (ret) + return ret; ret = ACT_P_CREATED; } else { if (bind) return 0; - tcf_hash_release(pc, bind, a->ops->hinfo); + tcf_hash_release(a, bind); if (!ovr) return -EEXIST; } - p = to_tcf_nat(pc); + p = to_tcf_nat(a); spin_lock_bh(&p->tcf_lock); p->old_addr = parm->old_addr; @@ -82,18 +78,11 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, a->ops->hinfo); + tcf_hash_insert(a); return ret; } -static int tcf_nat_cleanup(struct tc_action *a, int bind) -{ - struct tcf_nat *p = a->priv; - - return tcf_hash_release(&p->common, bind, &nat_hash_info); -} - static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -293,12 +282,10 @@ nla_put_failure: static struct tc_action_ops act_nat_ops = { .kind = "nat", - .hinfo = &nat_hash_info, .type = TCA_ACT_NAT, .owner = THIS_MODULE, .act = tcf_nat, .dump = tcf_nat_dump, - .cleanup = tcf_nat_cleanup, .init = tcf_nat_init, }; @@ -307,16 +294,12 @@ MODULE_LICENSE("GPL"); static int __init nat_init_module(void) { - int err = tcf_hashinfo_init(&nat_hash_info, NAT_TAB_MASK); - if (err) - return err; - return tcf_register_action(&act_nat_ops); + return tcf_register_action(&act_nat_ops, NAT_TAB_MASK); } static void __exit nat_cleanup_module(void) { tcf_unregister_action(&act_nat_ops); - tcf_hashinfo_destroy(&nat_hash_info); } module_init(nat_init_module); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index be3f0f6875b..5f9bcb2e080 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -25,8 +25,6 @@ #define PEDIT_TAB_MASK 15 -static struct tcf_hashinfo pedit_hash_info; - static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) }, }; @@ -39,7 +37,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct tc_pedit *parm; int ret = 0, err; struct tcf_pedit *p; - struct tcf_common *pc; struct tc_pedit_key *keys = NULL; int ksize; @@ -57,26 +54,22 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize) return -EINVAL; - pc = tcf_hash_check(parm->index, a, bind); - if (!pc) { + if (!tcf_hash_check(parm->index, a, bind)) { if (!parm->nkeys) return -EINVAL; - pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind); - if (IS_ERR(pc)) - return PTR_ERR(pc); - p = to_pedit(pc); + ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind); + if (ret) + return ret; + p = to_pedit(a); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { - if (est) - gen_kill_estimator(&pc->tcfc_bstats, - &pc->tcfc_rate_est); - kfree_rcu(pc, tcfc_rcu); + tcf_hash_cleanup(a, est); return -ENOMEM; } ret = ACT_P_CREATED; } else { - p = to_pedit(pc); - tcf_hash_release(pc, bind, a->ops->hinfo); + p = to_pedit(a); + tcf_hash_release(a, bind); if (bind) return 0; if (!ovr) @@ -100,22 +93,15 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, memcpy(p->tcfp_keys, parm->keys, ksize); spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, a->ops->hinfo); + tcf_hash_insert(a); return ret; } -static int tcf_pedit_cleanup(struct tc_action *a, int bind) +static void tcf_pedit_cleanup(struct tc_action *a, int bind) { struct tcf_pedit *p = a->priv; - - if (p) { - struct tc_pedit_key *keys = p->tcfp_keys; - if (tcf_hash_release(&p->common, bind, &pedit_hash_info)) { - kfree(keys); - return 1; - } - } - return 0; + struct tc_pedit_key *keys = p->tcfp_keys; + kfree(keys); } static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, @@ -230,7 +216,6 @@ nla_put_failure: static struct tc_action_ops act_pedit_ops = { .kind = "pedit", - .hinfo = &pedit_hash_info, .type = TCA_ACT_PEDIT, .owner = THIS_MODULE, .act = tcf_pedit, @@ -245,15 +230,11 @@ MODULE_LICENSE("GPL"); static int __init pedit_init_module(void) { - int err = tcf_hashinfo_init(&pedit_hash_info, PEDIT_TAB_MASK); - if (err) - return err; - return tcf_register_action(&act_pedit_ops); + return tcf_register_action(&act_pedit_ops, PEDIT_TAB_MASK); } static void __exit pedit_cleanup_module(void) { - tcf_hashinfo_destroy(&pedit_hash_info); tcf_unregister_action(&act_pedit_ops); } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 1778209a332..0566e4606a4 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -41,7 +41,6 @@ struct tcf_police { container_of(pc, struct tcf_police, common) #define POL_TAB_MASK 15 -static struct tcf_hashinfo police_hash_info; /* old policer structure from before tc actions */ struct tc_police_compat { @@ -234,7 +233,7 @@ override: police->tcfp_t_c = ktime_to_ns(ktime_get()); police->tcf_index = parm->index ? parm->index : - tcf_hash_new_index(a->ops->hinfo); + tcf_hash_new_index(hinfo); h = tcf_hash(police->tcf_index, POL_TAB_MASK); spin_lock_bh(&hinfo->lock); hlist_add_head(&police->tcf_head, &hinfo->htab[h]); @@ -253,14 +252,6 @@ failure: return err; } -static int tcf_act_police_cleanup(struct tc_action *a, int bind) -{ - struct tcf_police *p = a->priv; - if (p) - return tcf_hash_release(&p->common, bind, &police_hash_info); - return 0; -} - static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -357,12 +348,10 @@ MODULE_LICENSE("GPL"); static struct tc_action_ops act_police_ops = { .kind = "police", - .hinfo = &police_hash_info, .type = TCA_ID_POLICE, .owner = THIS_MODULE, .act = tcf_act_police, .dump = tcf_act_police_dump, - .cleanup = tcf_act_police_cleanup, .init = tcf_act_police_locate, .walk = tcf_act_police_walker }; @@ -370,19 +359,12 @@ static struct tc_action_ops act_police_ops = { static int __init police_init_module(void) { - int err = tcf_hashinfo_init(&police_hash_info, POL_TAB_MASK); - if (err) - return err; - err = tcf_register_action(&act_police_ops); - if (err) - tcf_hashinfo_destroy(&police_hash_info); - return err; + return tcf_register_action(&act_police_ops, POL_TAB_MASK); } static void __exit police_cleanup_module(void) { - tcf_hashinfo_destroy(&police_hash_info); tcf_unregister_action(&act_police_ops); } diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 8ef2f1fcbfb..992c2317ce8 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -25,7 +25,6 @@ #include <net/tc_act/tc_defact.h> #define SIMP_TAB_MASK 7 -static struct tcf_hashinfo simp_hash_info; #define SIMP_MAX_DATA 32 static int tcf_simp(struct sk_buff *skb, const struct tc_action *a, @@ -47,20 +46,10 @@ static int tcf_simp(struct sk_buff *skb, const struct tc_action *a, return d->tcf_action; } -static int tcf_simp_release(struct tcf_defact *d, int bind) +static void tcf_simp_release(struct tc_action *a, int bind) { - int ret = 0; - if (d) { - if (bind) - d->tcf_bindcnt--; - d->tcf_refcnt--; - if (d->tcf_bindcnt <= 0 && d->tcf_refcnt <= 0) { - kfree(d->tcfd_defdata); - tcf_hash_destroy(&d->common, &simp_hash_info); - ret = 1; - } - } - return ret; + struct tcf_defact *d = to_defact(a); + kfree(d->tcfd_defdata); } static int alloc_defdata(struct tcf_defact *d, char *defdata) @@ -94,7 +83,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, struct nlattr *tb[TCA_DEF_MAX + 1]; struct tc_defact *parm; struct tcf_defact *d; - struct tcf_common *pc; char *defdata; int ret = 0, err; @@ -114,29 +102,25 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_DEF_PARMS]); defdata = nla_data(tb[TCA_DEF_DATA]); - pc = tcf_hash_check(parm->index, a, bind); - if (!pc) { - pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind); - if (IS_ERR(pc)) - return PTR_ERR(pc); + if (!tcf_hash_check(parm->index, a, bind)) { + ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind); + if (ret) + return ret; - d = to_defact(pc); + d = to_defact(a); ret = alloc_defdata(d, defdata); if (ret < 0) { - if (est) - gen_kill_estimator(&pc->tcfc_bstats, - &pc->tcfc_rate_est); - kfree_rcu(pc, tcfc_rcu); + tcf_hash_cleanup(a, est); return ret; } d->tcf_action = parm->action; ret = ACT_P_CREATED; } else { - d = to_defact(pc); + d = to_defact(a); if (bind) return 0; - tcf_simp_release(d, bind); + tcf_hash_release(a, bind); if (!ovr) return -EEXIST; @@ -144,19 +128,10 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, } if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, a->ops->hinfo); + tcf_hash_insert(a); return ret; } -static int tcf_simp_cleanup(struct tc_action *a, int bind) -{ - struct tcf_defact *d = a->priv; - - if (d) - return tcf_simp_release(d, bind); - return 0; -} - static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { @@ -187,12 +162,11 @@ nla_put_failure: static struct tc_action_ops act_simp_ops = { .kind = "simple", - .hinfo = &simp_hash_info, .type = TCA_ACT_SIMP, .owner = THIS_MODULE, .act = tcf_simp, .dump = tcf_simp_dump, - .cleanup = tcf_simp_cleanup, + .cleanup = tcf_simp_release, .init = tcf_simp_init, }; @@ -202,23 +176,15 @@ MODULE_LICENSE("GPL"); static int __init simp_init_module(void) { - int err, ret; - err = tcf_hashinfo_init(&simp_hash_info, SIMP_TAB_MASK); - if (err) - return err; - - ret = tcf_register_action(&act_simp_ops); + int ret; + ret = tcf_register_action(&act_simp_ops, SIMP_TAB_MASK); if (!ret) pr_info("Simple TC action Loaded\n"); - else - tcf_hashinfo_destroy(&simp_hash_info); - return ret; } static void __exit simp_cleanup_module(void) { - tcf_hashinfo_destroy(&simp_hash_info); tcf_unregister_action(&act_simp_ops); } diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 98725080b5a..fcfeeaf838b 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -28,7 +28,6 @@ #include <net/tc_act/tc_skbedit.h> #define SKBEDIT_TAB_MASK 15 -static struct tcf_hashinfo skbedit_hash_info; static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) @@ -65,7 +64,6 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; struct tc_skbedit *parm; struct tcf_skbedit *d; - struct tcf_common *pc; u32 flags = 0, *priority = NULL, *mark = NULL; u16 *queue_mapping = NULL; int ret = 0, err; @@ -100,19 +98,18 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_SKBEDIT_PARMS]); - pc = tcf_hash_check(parm->index, a, bind); - if (!pc) { - pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind); - if (IS_ERR(pc)) - return PTR_ERR(pc); + if (!tcf_hash_check(parm->index, a, bind)) { + ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind); + if (ret) + return ret; - d = to_skbedit(pc); + d = to_skbedit(a); ret = ACT_P_CREATED; } else { - d = to_skbedit(pc); + d = to_skbedit(a); if (bind) return 0; - tcf_hash_release(pc, bind, a->ops->hinfo); + tcf_hash_release(a, bind); if (!ovr) return -EEXIST; } @@ -132,19 +129,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&d->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, a->ops->hinfo); + tcf_hash_insert(a); return ret; } -static int tcf_skbedit_cleanup(struct tc_action *a, int bind) -{ - struct tcf_skbedit *d = a->priv; - - if (d) - return tcf_hash_release(&d->common, bind, &skbedit_hash_info); - return 0; -} - static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { @@ -186,12 +174,10 @@ nla_put_failure: static struct tc_action_ops act_skbedit_ops = { .kind = "skbedit", - .hinfo = &skbedit_hash_info, .type = TCA_ACT_SKBEDIT, .owner = THIS_MODULE, .act = tcf_skbedit, .dump = tcf_skbedit_dump, - .cleanup = tcf_skbedit_cleanup, .init = tcf_skbedit_init, }; @@ -201,15 +187,11 @@ MODULE_LICENSE("GPL"); static int __init skbedit_init_module(void) { - int err = tcf_hashinfo_init(&skbedit_hash_info, SKBEDIT_TAB_MASK); - if (err) - return err; - return tcf_register_action(&act_skbedit_ops); + return tcf_register_action(&act_skbedit_ops, SKBEDIT_TAB_MASK); } static void __exit skbedit_cleanup_module(void) { - tcf_hashinfo_destroy(&skbedit_hash_info); tcf_unregister_action(&act_skbedit_ops); } diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index a366537f82c..63a3ce75c02 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -29,11 +29,11 @@ #include <net/act_api.h> #include <net/pkt_cls.h> -#define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *)) +#define HTSIZE 256 struct fw_head { - struct fw_filter *ht[HTSIZE]; - u32 mask; + u32 mask; + struct fw_filter *ht[HTSIZE]; }; struct fw_filter { @@ -46,30 +46,11 @@ struct fw_filter { struct tcf_exts exts; }; -static inline int fw_hash(u32 handle) +static u32 fw_hash(u32 handle) { - if (HTSIZE == 4096) - return ((handle >> 24) & 0xFFF) ^ - ((handle >> 12) & 0xFFF) ^ - (handle & 0xFFF); - else if (HTSIZE == 2048) - return ((handle >> 22) & 0x7FF) ^ - ((handle >> 11) & 0x7FF) ^ - (handle & 0x7FF); - else if (HTSIZE == 1024) - return ((handle >> 20) & 0x3FF) ^ - ((handle >> 10) & 0x3FF) ^ - (handle & 0x3FF); - else if (HTSIZE == 512) - return (handle >> 27) ^ - ((handle >> 18) & 0x1FF) ^ - ((handle >> 9) & 0x1FF) ^ - (handle & 0x1FF); - else if (HTSIZE == 256) { - u8 *t = (u8 *) &handle; - return t[0] ^ t[1] ^ t[2] ^ t[3]; - } else - return handle & (HTSIZE - 1); + handle ^= (handle >> 16); + handle ^= (handle >> 8); + return handle % HTSIZE; } static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1313145e3b8..a0b84e0e22d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -273,11 +273,12 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) void qdisc_list_add(struct Qdisc *q) { - struct Qdisc *root = qdisc_dev(q)->qdisc; + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + struct Qdisc *root = qdisc_dev(q)->qdisc; - WARN_ON_ONCE(root == &noop_qdisc); - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) + WARN_ON_ONCE(root == &noop_qdisc); list_add_tail(&q->list, &root->list); + } } EXPORT_SYMBOL(qdisc_list_add); @@ -1303,6 +1304,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, struct gnet_dump d; struct qdisc_size_table *stab; + cond_resched(); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; @@ -1434,9 +1436,9 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_idx = cb->args[0]; s_q_idx = q_idx = cb->args[1]; - rcu_read_lock(); idx = 0; - for_each_netdev_rcu(net, dev) { + ASSERT_RTNL(); + for_each_netdev(net, dev) { struct netdev_queue *dev_queue; if (idx < s_idx) @@ -1459,8 +1461,6 @@ cont: } done: - rcu_read_unlock(); - cb->args[0] = idx; cb->args[1] = q_idx; @@ -1617,6 +1617,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, struct gnet_dump d; const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; + cond_resched(); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 1f9c31411f1..8449b337f9e 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -623,8 +623,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, if (nla_put_u32(skb, TCA_ATM_EXCESS, 0)) goto nla_put_failure; } - nla_nest_end(skb, nest); - return skb->len; + return nla_nest_end(skb, nest); nla_put_failure: nla_nest_cancel(skb, nest); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 2f80d01d42a..ead526467cc 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1563,8 +1563,7 @@ static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_put_failure; if (cbq_dump_attr(skb, &q->link) < 0) goto nla_put_failure; - nla_nest_end(skb, nest); - return skb->len; + return nla_nest_end(skb, nest); nla_put_failure: nla_nest_cancel(skb, nest); @@ -1599,8 +1598,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg, goto nla_put_failure; if (cbq_dump_attr(skb, cl) < 0) goto nla_put_failure; - nla_nest_end(skb, nest); - return skb->len; + return nla_nest_end(skb, nest); nla_put_failure: nla_nest_cancel(skb, nest); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 08ef7a42c0e..23c682b42f9 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -601,6 +601,7 @@ static int fq_resize(struct Qdisc *sch, u32 log) { struct fq_sched_data *q = qdisc_priv(sch); struct rb_root *array; + void *old_fq_root; u32 idx; if (q->fq_root && log == q->fq_trees_log) @@ -615,13 +616,19 @@ static int fq_resize(struct Qdisc *sch, u32 log) for (idx = 0; idx < (1U << log); idx++) array[idx] = RB_ROOT; - if (q->fq_root) { - fq_rehash(q, q->fq_root, q->fq_trees_log, array, log); - fq_free(q->fq_root); - } + sch_tree_lock(sch); + + old_fq_root = q->fq_root; + if (old_fq_root) + fq_rehash(q, old_fq_root, q->fq_trees_log, array, log); + q->fq_root = array; q->fq_trees_log = log; + sch_tree_unlock(sch); + + fq_free(old_fq_root); + return 0; } @@ -697,9 +704,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) q->flow_refill_delay = usecs_to_jiffies(usecs_delay); } - if (!err) + if (!err) { + sch_tree_unlock(sch); err = fq_resize(sch, fq_log); - + sch_tree_lock(sch); + } while (sch->q.qlen > sch->limit) { struct sk_buff *skb = fq_dequeue(sch); @@ -772,8 +781,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) goto nla_put_failure; - nla_nest_end(skb, opts); - return skb->len; + return nla_nest_end(skb, opts); nla_put_failure: return -1; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index ba5bc929eac..0bf432c782c 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -450,8 +450,7 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) q->flows_cnt)) goto nla_put_failure; - nla_nest_end(skb, opts); - return skb->len; + return nla_nest_end(skb, opts); nla_put_failure: return -1; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e82e43b69c3..e1543b03e39 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -310,6 +310,7 @@ void netif_carrier_on(struct net_device *dev) if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { if (dev->reg_state == NETREG_UNINITIALIZED) return; + atomic_inc(&dev->carrier_changes); linkwatch_fire_event(dev); if (netif_running(dev)) __netdev_watchdog_up(dev); @@ -328,6 +329,7 @@ void netif_carrier_off(struct net_device *dev) if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) { if (dev->reg_state == NETREG_UNINITIALIZED) return; + atomic_inc(&dev->carrier_changes); linkwatch_fire_event(dev); } } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index c4075610502..ec8aeaac1dd 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1353,8 +1353,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, goto nla_put_failure; if (hfsc_dump_curves(skb, cl) < 0) goto nla_put_failure; - nla_nest_end(skb, nest); - return skb->len; + return nla_nest_end(skb, nest); nla_put_failure: nla_nest_cancel(skb, nest); diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 647680b1c62..edee03d922e 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -691,8 +691,7 @@ static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight)) goto nla_put_failure; - nla_nest_end(skb, opts); - return skb->len; + return nla_nest_end(skb, opts); nla_put_failure: return -1; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 722e137df24..9f949abcace 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1062,12 +1062,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) { - spinlock_t *root_lock = qdisc_root_sleeping_lock(sch); struct htb_sched *q = qdisc_priv(sch); struct nlattr *nest; struct tc_htb_glob gopt; - spin_lock_bh(root_lock); + /* Its safe to not acquire qdisc lock. As we hold RTNL, + * no change can happen on the qdisc parameters. + */ gopt.direct_pkts = q->direct_pkts; gopt.version = HTB_VER; @@ -1081,13 +1082,10 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) || nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen)) goto nla_put_failure; - nla_nest_end(skb, nest); - spin_unlock_bh(root_lock); - return skb->len; + return nla_nest_end(skb, nest); nla_put_failure: - spin_unlock_bh(root_lock); nla_nest_cancel(skb, nest); return -1; } @@ -1096,11 +1094,12 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct htb_class *cl = (struct htb_class *)arg; - spinlock_t *root_lock = qdisc_root_sleeping_lock(sch); struct nlattr *nest; struct tc_htb_opt opt; - spin_lock_bh(root_lock); + /* Its safe to not acquire qdisc lock. As we hold RTNL, + * no change can happen on the class parameters. + */ tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT; tcm->tcm_handle = cl->common.classid; if (!cl->level && cl->un.leaf.q) @@ -1128,12 +1127,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, nla_put_u64(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps)) goto nla_put_failure; - nla_nest_end(skb, nest); - spin_unlock_bh(root_lock); - return skb->len; + return nla_nest_end(skb, nest); nla_put_failure: - spin_unlock_bh(root_lock); nla_nest_cancel(skb, nest); return -1; } diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index bce1665239b..62871c14e1f 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -100,8 +100,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; - nla_nest_end(skb, nest); - return skb->len; + return nla_nest_end(skb, nest); nla_put_failure: nla_nest_cancel(skb, nest); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index de1059af6da..f1669a00f57 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -117,6 +117,11 @@ struct netem_sched_data { LOST_IN_BURST_PERIOD, } _4_state_model; + enum { + GOOD_STATE = 1, + BAD_STATE, + } GE_state_model; + /* Correlated Loss Generation models */ struct clgstate { /* state of the Markov chain */ @@ -272,15 +277,15 @@ static bool loss_gilb_ell(struct netem_sched_data *q) struct clgstate *clg = &q->clg; switch (clg->state) { - case 1: + case GOOD_STATE: if (prandom_u32() < clg->a1) - clg->state = 2; + clg->state = BAD_STATE; if (prandom_u32() < clg->a4) return true; break; - case 2: + case BAD_STATE: if (prandom_u32() < clg->a2) - clg->state = 1; + clg->state = GOOD_STATE; if (prandom_u32() > clg->a3) return true; } @@ -689,9 +694,8 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) return 0; } -static void get_correlation(struct Qdisc *sch, const struct nlattr *attr) +static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr) { - struct netem_sched_data *q = qdisc_priv(sch); const struct tc_netem_corr *c = nla_data(attr); init_crandom(&q->delay_cor, c->delay_corr); @@ -699,27 +703,24 @@ static void get_correlation(struct Qdisc *sch, const struct nlattr *attr) init_crandom(&q->dup_cor, c->dup_corr); } -static void get_reorder(struct Qdisc *sch, const struct nlattr *attr) +static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr) { - struct netem_sched_data *q = qdisc_priv(sch); const struct tc_netem_reorder *r = nla_data(attr); q->reorder = r->probability; init_crandom(&q->reorder_cor, r->correlation); } -static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr) +static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr) { - struct netem_sched_data *q = qdisc_priv(sch); const struct tc_netem_corrupt *r = nla_data(attr); q->corrupt = r->probability; init_crandom(&q->corrupt_cor, r->correlation); } -static void get_rate(struct Qdisc *sch, const struct nlattr *attr) +static void get_rate(struct netem_sched_data *q, const struct nlattr *attr) { - struct netem_sched_data *q = qdisc_priv(sch); const struct tc_netem_rate *r = nla_data(attr); q->rate = r->rate; @@ -732,9 +733,8 @@ static void get_rate(struct Qdisc *sch, const struct nlattr *attr) q->cell_size_reciprocal = (struct reciprocal_value) { 0 }; } -static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) +static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr) { - struct netem_sched_data *q = qdisc_priv(sch); const struct nlattr *la; int rem; @@ -752,7 +752,7 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) q->loss_model = CLG_4_STATES; - q->clg.state = 1; + q->clg.state = TX_IN_GAP_PERIOD; q->clg.a1 = gi->p13; q->clg.a2 = gi->p31; q->clg.a3 = gi->p32; @@ -770,7 +770,7 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) } q->loss_model = CLG_GILB_ELL; - q->clg.state = 1; + q->clg.state = GOOD_STATE; q->clg.a1 = ge->p; q->clg.a2 = ge->r; q->clg.a3 = ge->h; @@ -821,6 +821,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) struct netem_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_NETEM_MAX + 1]; struct tc_netem_qopt *qopt; + struct clgstate old_clg; + int old_loss_model = CLG_RANDOM; int ret; if (opt == NULL) @@ -831,6 +833,33 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (ret < 0) return ret; + /* backup q->clg and q->loss_model */ + old_clg = q->clg; + old_loss_model = q->loss_model; + + if (tb[TCA_NETEM_LOSS]) { + ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]); + if (ret) { + q->loss_model = old_loss_model; + return ret; + } + } else { + q->loss_model = CLG_RANDOM; + } + + if (tb[TCA_NETEM_DELAY_DIST]) { + ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]); + if (ret) { + /* recover clg and loss_model, in case of + * q->clg and q->loss_model were modified + * in get_loss_clg() + */ + q->clg = old_clg; + q->loss_model = old_loss_model; + return ret; + } + } + sch->limit = qopt->limit; q->latency = qopt->latency; @@ -848,22 +877,16 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) q->reorder = ~0; if (tb[TCA_NETEM_CORR]) - get_correlation(sch, tb[TCA_NETEM_CORR]); - - if (tb[TCA_NETEM_DELAY_DIST]) { - ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]); - if (ret) - return ret; - } + get_correlation(q, tb[TCA_NETEM_CORR]); if (tb[TCA_NETEM_REORDER]) - get_reorder(sch, tb[TCA_NETEM_REORDER]); + get_reorder(q, tb[TCA_NETEM_REORDER]); if (tb[TCA_NETEM_CORRUPT]) - get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); + get_corrupt(q, tb[TCA_NETEM_CORRUPT]); if (tb[TCA_NETEM_RATE]) - get_rate(sch, tb[TCA_NETEM_RATE]); + get_rate(q, tb[TCA_NETEM_RATE]); if (tb[TCA_NETEM_RATE64]) q->rate = max_t(u64, q->rate, @@ -872,10 +895,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_NETEM_ECN]) q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]); - q->loss_model = CLG_RANDOM; - if (tb[TCA_NETEM_LOSS]) - ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]); - return ret; } diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index a255d0200a5..fefeeb73f15 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -15,6 +15,11 @@ * * ECN support is added by Naeem Khademi <naeemk@ifi.uio.no> * University of Oslo, Norway. + * + * References: + * IETF draft submission: http://tools.ietf.org/html/draft-pan-aqm-pie-00 + * IEEE Conference on High Performance Switching and Routing 2013 : + * "PIE: A * Lightweight Control Scheme to Address the Bufferbloat Problem" */ #include <linux/module.h> @@ -36,7 +41,7 @@ struct pie_params { psched_time_t target; /* user specified target delay in pschedtime */ u32 tupdate; /* timer frequency (in jiffies) */ u32 limit; /* number of packets that can be enqueued */ - u32 alpha; /* alpha and beta are between -4 and 4 */ + u32 alpha; /* alpha and beta are between 0 and 32 */ u32 beta; /* and are used for shift relative to 1 */ bool ecn; /* true if ecn is enabled */ bool bytemode; /* to scale drop early prob based on pkt size */ @@ -326,10 +331,16 @@ static void calculate_probability(struct Qdisc *sch) if (qdelay == 0 && qlen != 0) update_prob = false; - /* Add ranges for alpha and beta, more aggressive for high dropping - * mode and gentle steps for light dropping mode - * In light dropping mode, take gentle steps; in medium dropping mode, - * take medium steps; in high dropping mode, take big steps. + /* In the algorithm, alpha and beta are between 0 and 2 with typical + * value for alpha as 0.125. In this implementation, we use values 0-32 + * passed from user space to represent this. Also, alpha and beta have + * unit of HZ and need to be scaled before they can used to update + * probability. alpha/beta are updated locally below by 1) scaling them + * appropriately 2) scaling down by 16 to come to 0-2 range. + * Please see paper for details. + * + * We scale alpha and beta differently depending on whether we are in + * light, medium or high dropping mode. */ if (q->vars.prob < MAX_PROB / 100) { alpha = diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 1cb413fead8..18ff6343370 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -101,12 +101,11 @@ struct tbf_sched_data { /* Parameters */ u32 limit; /* Maximal length of backlog: bytes */ + u32 max_size; s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ s64 mtu; - u32 max_size; struct psched_ratecfg rate; struct psched_ratecfg peak; - bool peak_present; /* Variables */ s64 tokens; /* Current number of B tokens */ @@ -222,6 +221,11 @@ static unsigned int tbf_drop(struct Qdisc *sch) return len; } +static bool tbf_peak_present(const struct tbf_sched_data *q) +{ + return q->peak.rate_bytes_ps; +} + static struct sk_buff *tbf_dequeue(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -238,7 +242,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) now = ktime_to_ns(ktime_get()); toks = min_t(s64, now - q->t_c, q->buffer); - if (q->peak_present) { + if (tbf_peak_present(q)) { ptoks = toks + q->ptokens; if (ptoks > q->mtu) ptoks = q->mtu; @@ -334,18 +338,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB])); - if (q->qdisc != &noop_qdisc) { - err = fifo_set_limit(q->qdisc, qopt->limit); - if (err) - goto done; - } else if (qopt->limit > 0) { - child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit); - if (IS_ERR(child)) { - err = PTR_ERR(child); - goto done; - } - } - buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U); mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U); @@ -378,6 +370,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) } else { max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu)); } + } else { + memset(&peak, 0, sizeof(peak)); } if (max_size < psched_mtu(qdisc_dev(sch))) @@ -390,6 +384,18 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) goto done; } + if (q->qdisc != &noop_qdisc) { + err = fifo_set_limit(q->qdisc, qopt->limit); + if (err) + goto done; + } else if (qopt->limit > 0) { + child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit); + if (IS_ERR(child)) { + err = PTR_ERR(child); + goto done; + } + } + sch_tree_lock(sch); if (child) { qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); @@ -410,12 +416,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->ptokens = q->mtu; memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg)); - if (qopt->peakrate.rate) { - memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg)); - q->peak_present = true; - } else { - q->peak_present = false; - } + memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg)); sch_tree_unlock(sch); err = 0; @@ -458,7 +459,7 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) opt.limit = q->limit; psched_ratecfg_getrate(&opt.rate, &q->rate); - if (q->peak_present) + if (tbf_peak_present(q)) psched_ratecfg_getrate(&opt.peakrate, &q->peak); else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); @@ -469,13 +470,12 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) if (q->rate.rate_bytes_ps >= (1ULL << 32) && nla_put_u64(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps)) goto nla_put_failure; - if (q->peak_present && + if (tbf_peak_present(q) && q->peak.rate_bytes_ps >= (1ULL << 32) && nla_put_u64(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps)) goto nla_put_failure; - nla_nest_end(skb, nest); - return skb->len; + return nla_nest_end(skb, nest); nla_put_failure: nla_nest_cancel(skb, nest); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 5ae60920067..39579c3e0d1 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1239,78 +1239,106 @@ void sctp_assoc_update(struct sctp_association *asoc, } /* Update the retran path for sending a retransmitted packet. - * Round-robin through the active transports, else round-robin - * through the inactive transports as this is the next best thing - * we can try. + * See also RFC4960, 6.4. Multi-Homed SCTP Endpoints: + * + * When there is outbound data to send and the primary path + * becomes inactive (e.g., due to failures), or where the + * SCTP user explicitly requests to send data to an + * inactive destination transport address, before reporting + * an error to its ULP, the SCTP endpoint should try to send + * the data to an alternate active destination transport + * address if one exists. + * + * When retransmitting data that timed out, if the endpoint + * is multihomed, it should consider each source-destination + * address pair in its retransmission selection policy. + * When retransmitting timed-out data, the endpoint should + * attempt to pick the most divergent source-destination + * pair from the original source-destination pair to which + * the packet was transmitted. + * + * Note: Rules for picking the most divergent source-destination + * pair are an implementation decision and are not specified + * within this document. + * + * Our basic strategy is to round-robin transports in priorities + * according to sctp_state_prio_map[] e.g., if no such + * transport with state SCTP_ACTIVE exists, round-robin through + * SCTP_UNKNOWN, etc. You get the picture. */ -void sctp_assoc_update_retran_path(struct sctp_association *asoc) +static const u8 sctp_trans_state_to_prio_map[] = { + [SCTP_ACTIVE] = 3, /* best case */ + [SCTP_UNKNOWN] = 2, + [SCTP_PF] = 1, + [SCTP_INACTIVE] = 0, /* worst case */ +}; + +static u8 sctp_trans_score(const struct sctp_transport *trans) { - struct sctp_transport *t, *next; - struct list_head *head = &asoc->peer.transport_addr_list; - struct list_head *pos; + return sctp_trans_state_to_prio_map[trans->state]; +} - if (asoc->peer.transport_count == 1) - return; +static struct sctp_transport *sctp_trans_elect_best(struct sctp_transport *curr, + struct sctp_transport *best) +{ + if (best == NULL) + return curr; - /* Find the next transport in a round-robin fashion. */ - t = asoc->peer.retran_path; - pos = &t->transports; - next = NULL; + return sctp_trans_score(curr) > sctp_trans_score(best) ? curr : best; +} - while (1) { - /* Skip the head. */ - if (pos->next == head) - pos = head->next; - else - pos = pos->next; +void sctp_assoc_update_retran_path(struct sctp_association *asoc) +{ + struct sctp_transport *trans = asoc->peer.retran_path; + struct sctp_transport *trans_next = NULL; - t = list_entry(pos, struct sctp_transport, transports); + /* We're done as we only have the one and only path. */ + if (asoc->peer.transport_count == 1) + return; + /* If active_path and retran_path are the same and active, + * then this is the only active path. Use it. + */ + if (asoc->peer.active_path == asoc->peer.retran_path && + asoc->peer.active_path->state == SCTP_ACTIVE) + return; - /* We have exhausted the list, but didn't find any - * other active transports. If so, use the next - * transport. - */ - if (t == asoc->peer.retran_path) { - t = next; + /* Iterate from retran_path's successor back to retran_path. */ + for (trans = list_next_entry(trans, transports); 1; + trans = list_next_entry(trans, transports)) { + /* Manually skip the head element. */ + if (&trans->transports == &asoc->peer.transport_addr_list) + continue; + if (trans->state == SCTP_UNCONFIRMED) + continue; + trans_next = sctp_trans_elect_best(trans, trans_next); + /* Active is good enough for immediate return. */ + if (trans_next->state == SCTP_ACTIVE) break; - } - - /* Try to find an active transport. */ - - if ((t->state == SCTP_ACTIVE) || - (t->state == SCTP_UNKNOWN)) { + /* We've reached the end, time to update path. */ + if (trans == asoc->peer.retran_path) break; - } else { - /* Keep track of the next transport in case - * we don't find any active transport. - */ - if (t->state != SCTP_UNCONFIRMED && !next) - next = t; - } } - if (t) - asoc->peer.retran_path = t; - else - t = asoc->peer.retran_path; + asoc->peer.retran_path = trans_next; - pr_debug("%s: association:%p addr:%pISpc\n", __func__, asoc, - &t->ipaddr.sa); + pr_debug("%s: association:%p updated new path to addr:%pISpc\n", + __func__, asoc, &asoc->peer.retran_path->ipaddr.sa); } -/* Choose the transport for sending retransmit packet. */ -struct sctp_transport *sctp_assoc_choose_alter_transport( - struct sctp_association *asoc, struct sctp_transport *last_sent_to) +struct sctp_transport * +sctp_assoc_choose_alter_transport(struct sctp_association *asoc, + struct sctp_transport *last_sent_to) { /* If this is the first time packet is sent, use the active path, * else use the retran path. If the last packet was sent over the * retran path, update the retran path and use it. */ - if (!last_sent_to) + if (last_sent_to == NULL) { return asoc->peer.active_path; - else { + } else { if (last_sent_to == asoc->peer.retran_path) sctp_assoc_update_retran_path(asoc); + return asoc->peer.retran_path; } } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 0f6259a6a93..2b1738ef939 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -662,6 +662,8 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, */ sctp_v6_to_sk_daddr(&asoc->peer.primary_addr, newsk); + newsk->sk_v6_rcv_saddr = sk->sk_v6_rcv_saddr; + sk_refcnt_debug_inc(newsk); if (newsk->sk_prot->init(newsk)) { diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 4e1d0fcb028..c09757fbf80 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -957,7 +957,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS); - return ip_queue_xmit(skb, &transport->fl); + return ip_queue_xmit(&inet->sk, skb, &transport->fl); } static struct sctp_af sctp_af_inet; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 632090b961c..3a1767ef320 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1421,8 +1421,8 @@ static void sctp_chunk_destroy(struct sctp_chunk *chunk) BUG_ON(!list_empty(&chunk->list)); list_del_init(&chunk->transmitted_list); - /* Free the chunk skb data and the SCTP_chunk stub itself. */ - dev_kfree_skb(chunk->skb); + consume_skb(chunk->skb); + consume_skb(chunk->auth_chunk); SCTP_DBG_OBJCNT_DEC(chunk); kmem_cache_free(sctp_chunk_cachep, chunk); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index bd859154000..5d6883ff00c 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -495,11 +495,12 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, } /* If the transport error count is greater than the pf_retrans - * threshold, and less than pathmaxrtx, then mark this transport - * as Partially Failed, ee SCTP Quick Failover Draft, secon 5.1, - * point 1 + * threshold, and less than pathmaxrtx, and if the current state + * is not SCTP_UNCONFIRMED, then mark this transport as Partially + * Failed, see SCTP Quick Failover Draft, section 5.1 */ if ((transport->state != SCTP_PF) && + (transport->state != SCTP_UNCONFIRMED) && (asoc->pf_retrans < transport->pathmaxrxt) && (transport->error_count > asoc->pf_retrans)) { diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 483dcd71b3c..ae9fbeba40b 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -758,6 +758,12 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, struct sctp_chunk auth; sctp_ierror_t ret; + /* Make sure that we and the peer are AUTH capable */ + if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) { + sctp_association_free(new_asoc); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + } + /* set-up our fake chunk so that we can process it */ auth.skb = chunk->auth_chunk; auth.asoc = chunk->asoc; @@ -768,10 +774,6 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, auth.transport = chunk->transport; ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth); - - /* We can now safely free the auth_chunk clone */ - kfree_skb(chunk->auth_chunk); - if (ret != SCTP_IERROR_NO_ERROR) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9e91d6e5df6..ff20e2dbbbc 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -64,6 +64,7 @@ #include <linux/crypto.h> #include <linux/slab.h> #include <linux/file.h> +#include <linux/compat.h> #include <net/ip.h> #include <net/icmp.h> @@ -1368,11 +1369,19 @@ static int sctp_setsockopt_connectx(struct sock *sk, /* * New (hopefully final) interface for the API. * We use the sctp_getaddrs_old structure so that use-space library - * can avoid any unnecessary allocations. The only defferent part + * can avoid any unnecessary allocations. The only different part * is that we store the actual length of the address buffer into the - * addrs_num structure member. That way we can re-use the existing + * addrs_num structure member. That way we can re-use the existing * code. */ +#ifdef CONFIG_COMPAT +struct compat_sctp_getaddrs_old { + sctp_assoc_t assoc_id; + s32 addr_num; + compat_uptr_t addrs; /* struct sockaddr * */ +}; +#endif + static int sctp_getsockopt_connectx3(struct sock *sk, int len, char __user *optval, int __user *optlen) @@ -1381,16 +1390,30 @@ static int sctp_getsockopt_connectx3(struct sock *sk, int len, sctp_assoc_t assoc_id = 0; int err = 0; - if (len < sizeof(param)) - return -EINVAL; +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + struct compat_sctp_getaddrs_old param32; - if (copy_from_user(¶m, optval, sizeof(param))) - return -EFAULT; + if (len < sizeof(param32)) + return -EINVAL; + if (copy_from_user(¶m32, optval, sizeof(param32))) + return -EFAULT; - err = __sctp_setsockopt_connectx(sk, - (struct sockaddr __user *)param.addrs, - param.addr_num, &assoc_id); + param.assoc_id = param32.assoc_id; + param.addr_num = param32.addr_num; + param.addrs = compat_ptr(param32.addrs); + } else +#endif + { + if (len < sizeof(param)) + return -EINVAL; + if (copy_from_user(¶m, optval, sizeof(param))) + return -EFAULT; + } + err = __sctp_setsockopt_connectx(sk, (struct sockaddr __user *) + param.addrs, param.addr_num, + &assoc_id); if (err == 0 || err == -EINPROGRESS) { if (copy_to_user(optval, &assoc_id, sizeof(assoc_id))) return -EFAULT; @@ -6576,6 +6599,46 @@ static void __sctp_write_space(struct sctp_association *asoc) } } +static void sctp_wake_up_waiters(struct sock *sk, + struct sctp_association *asoc) +{ + struct sctp_association *tmp = asoc; + + /* We do accounting for the sndbuf space per association, + * so we only need to wake our own association. + */ + if (asoc->ep->sndbuf_policy) + return __sctp_write_space(asoc); + + /* If association goes down and is just flushing its + * outq, then just normally notify others. + */ + if (asoc->base.dead) + return sctp_write_space(sk); + + /* Accounting for the sndbuf space is per socket, so we + * need to wake up others, try to be fair and in case of + * other associations, let them have a go first instead + * of just doing a sctp_write_space() call. + * + * Note that we reach sctp_wake_up_waiters() only when + * associations free up queued chunks, thus we are under + * lock and the list of associations on a socket is + * guaranteed not to change. + */ + for (tmp = list_next_entry(tmp, asocs); 1; + tmp = list_next_entry(tmp, asocs)) { + /* Manually skip the head element. */ + if (&tmp->asocs == &((sctp_sk(sk))->ep->asocs)) + continue; + /* Wake up association. */ + __sctp_write_space(tmp); + /* We've reached the end. */ + if (tmp == asoc) + break; + } +} + /* Do accounting for the sndbuf space. * Decrement the used sndbuf space of the corresponding association by the * data size which was just transmitted(freed). @@ -6603,7 +6666,7 @@ static void sctp_wfree(struct sk_buff *skb) sk_mem_uncharge(sk, skb->truesize); sock_wfree(skb); - __sctp_write_space(asoc); + sctp_wake_up_waiters(sk, asoc); sctp_association_put(asoc); } @@ -6688,7 +6751,7 @@ do_nonblock: goto out; } -void sctp_data_ready(struct sock *sk, int len) +void sctp_data_ready(struct sock *sk) { struct socket_wq *wq; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 7135e617ab0..35c8923b555 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -151,6 +151,7 @@ static struct ctl_table sctp_net_table[] = { }, { .procname = "cookie_hmac_alg", + .data = &init_net.sctp.sctp_hmac_alg, .maxlen = 8, .mode = 0644, .proc_handler = proc_sctp_do_hmac_alg, @@ -401,15 +402,18 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, int sctp_sysctl_net_register(struct net *net) { - struct ctl_table *table; - int i; + struct ctl_table *table = sctp_net_table; + + if (!net_eq(net, &init_net)) { + int i; - table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL); - if (!table) - return -ENOMEM; + table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL); + if (!table) + return -ENOMEM; - for (i = 0; table[i].data; i++) - table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; + for (i = 0; table[i].data; i++) + table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; + } net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table); return 0; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index d0810dc5f07..1d348d15b33 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -652,5 +652,4 @@ void sctp_transport_immediate_rtx(struct sctp_transport *t) if (!mod_timer(&t->T3_rtx_timer, jiffies + t->rto)) sctp_transport_hold(t); } - return; } diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 5dc94117e9d..7144eb6a1b9 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -259,7 +259,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) sctp_ulpq_clear_pd(ulpq); if (queue == &sk->sk_receive_queue) - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); return 1; out_free: @@ -1135,5 +1135,5 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp) /* If there is data waiting, send it up the socket now. */ if (sctp_ulpq_clear_pd(ulpq) || ev) - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); } diff --git a/net/socket.c b/net/socket.c index 879933aaed4..abf56b2a14f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -72,6 +72,7 @@ #include <linux/if_bridge.h> #include <linux/if_frad.h> #include <linux/if_vlan.h> +#include <linux/ptp_classify.h> #include <linux/init.h> #include <linux/poll.h> #include <linux/cache.h> @@ -450,16 +451,17 @@ EXPORT_SYMBOL(sockfd_lookup); static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) { - struct file *file; + struct fd f = fdget(fd); struct socket *sock; *err = -EBADF; - file = fget_light(fd, fput_needed); - if (file) { - sock = sock_from_file(file, err); - if (sock) + if (f.file) { + sock = sock_from_file(f.file, err); + if (likely(sock)) { + *fput_needed = f.flags; return sock; - fput_light(file, *fput_needed); + } + fdput(f); } return NULL; } @@ -593,7 +595,7 @@ void sock_release(struct socket *sock) } if (rcu_dereference_protected(sock->wq, 1)->fasync_list) - printk(KERN_ERR "sock_release: fasync list not empty!\n"); + pr_err("%s: fasync list not empty!\n", __func__); if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags)) return; @@ -1265,8 +1267,8 @@ int __sock_create(struct net *net, int family, int type, int protocol, static int warned; if (!warned) { warned = 1; - printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", - current->comm); + pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n", + current->comm); } family = PF_PACKET; } @@ -1878,8 +1880,8 @@ out: * Receive a datagram from a socket. */ -asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, - unsigned int flags) +SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size, + unsigned int, flags) { return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); } @@ -1985,6 +1987,10 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, { if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) return -EFAULT; + + if (kmsg->msg_namelen < 0) + return -EINVAL; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) kmsg->msg_namelen = sizeof(struct sockaddr_storage); return 0; @@ -2595,8 +2601,7 @@ int sock_register(const struct net_proto_family *ops) int err; if (ops->family >= NPROTO) { - printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, - NPROTO); + pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO); return -ENOBUFS; } @@ -2610,7 +2615,7 @@ int sock_register(const struct net_proto_family *ops) } spin_unlock(&net_family_lock); - printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); + pr_info("NET: Registered protocol family %d\n", ops->family); return err; } EXPORT_SYMBOL(sock_register); @@ -2638,7 +2643,7 @@ void sock_unregister(int family) synchronize_rcu(); - printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); + pr_info("NET: Unregistered protocol family %d\n", family); } EXPORT_SYMBOL(sock_unregister); @@ -2681,9 +2686,7 @@ static int __init sock_init(void) goto out; #endif -#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING - skb_timestamping_init(); -#endif + ptp_classifier_init(); out: return err; diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 241b54f3020..0754d0f466d 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -9,19 +9,6 @@ config SUNRPC_BACKCHANNEL bool depends on SUNRPC -config SUNRPC_XPRT_RDMA - tristate - depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS - default SUNRPC && INFINIBAND - help - This option allows the NFS client and server to support - an RDMA-enabled transport. - - To compile RPC client RDMA transport support as a module, - choose M here: the module will be called xprtrdma. - - If unsure, say N. - config SUNRPC_SWAP bool depends on SUNRPC @@ -57,3 +44,29 @@ config SUNRPC_DEBUG but makes troubleshooting NFS issues significantly harder. If unsure, say Y. + +config SUNRPC_XPRT_RDMA_CLIENT + tristate "RPC over RDMA Client Support" + depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS + default SUNRPC && INFINIBAND + help + This option allows the NFS client to support an RDMA-enabled + transport. + + To compile RPC client RDMA transport support as a module, + choose M here: the module will be called xprtrdma. + + If unsure, say N. + +config SUNRPC_XPRT_RDMA_SERVER + tristate "RPC over RDMA Server Support" + depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS + default SUNRPC && INFINIBAND + help + This option allows the NFS server to support an RDMA-enabled + transport. + + To compile RPC server RDMA transport support as a module, + choose M here: the module will be called svcrdma. + + If unsure, say N. diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 8209a0411bc..e5a7a1cac8f 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -5,7 +5,8 @@ obj-$(CONFIG_SUNRPC) += sunrpc.o obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ -obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ + +obj-y += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ auth.o auth_null.o auth_unix.o auth_generic.o \ diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 6c0513a7f99..36e431ee1c9 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -108,6 +108,7 @@ struct gss_auth { static DEFINE_SPINLOCK(pipe_version_lock); static struct rpc_wait_queue pipe_version_rpc_waitqueue; static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue); +static void gss_put_auth(struct gss_auth *gss_auth); static void gss_free_ctx(struct gss_cl_ctx *); static const struct rpc_pipe_ops gss_upcall_ops_v0; @@ -320,6 +321,7 @@ gss_release_msg(struct gss_upcall_msg *gss_msg) if (gss_msg->ctx != NULL) gss_put_ctx(gss_msg->ctx); rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue); + gss_put_auth(gss_msg->auth); kfree(gss_msg); } @@ -498,9 +500,12 @@ gss_alloc_msg(struct gss_auth *gss_auth, default: err = gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name); if (err) - goto err_free_msg; + goto err_put_pipe_version; }; + kref_get(&gss_auth->kref); return gss_msg; +err_put_pipe_version: + put_pipe_version(gss_auth->net); err_free_msg: kfree(gss_msg); err: @@ -991,6 +996,8 @@ gss_create_new(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); if (gss_auth->service == 0) goto err_put_mech; + if (!gssd_running(gss_auth->net)) + goto err_put_mech; auth = &gss_auth->rpc_auth; auth->au_cslack = GSS_CRED_SLACK >> 2; auth->au_rslack = GSS_VERF_SLACK >> 2; @@ -1062,6 +1069,12 @@ gss_free_callback(struct kref *kref) } static void +gss_put_auth(struct gss_auth *gss_auth) +{ + kref_put(&gss_auth->kref, gss_free_callback); +} + +static void gss_destroy(struct rpc_auth *auth) { struct gss_auth *gss_auth = container_of(auth, @@ -1082,7 +1095,7 @@ gss_destroy(struct rpc_auth *auth) gss_auth->gss_pipe[1] = NULL; rpcauth_destroy_credcache(auth); - kref_put(&gss_auth->kref, gss_free_callback); + gss_put_auth(gss_auth); } /* @@ -1253,7 +1266,7 @@ gss_destroy_nullcred(struct rpc_cred *cred) call_rcu(&cred->cr_rcu, gss_free_cred_callback); if (ctx) gss_put_ctx(ctx); - kref_put(&gss_auth->kref, gss_free_callback); + gss_put_auth(gss_auth); } static void diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 890a29912d5..3513d559bc4 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -64,7 +64,6 @@ static void xprt_free_allocation(struct rpc_rqst *req) free_page((unsigned long)xbufp->head[0].iov_base); xbufp = &req->rq_snd_buf; free_page((unsigned long)xbufp->head[0].iov_base); - list_del(&req->rq_bc_pa_list); kfree(req); } @@ -168,8 +167,10 @@ out_free: /* * Memory allocation failed, free the temporary list */ - list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) + list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) { + list_del(&req->rq_bc_pa_list); xprt_free_allocation(req); + } dprintk("RPC: setup backchannel transport failed\n"); return -ENOMEM; @@ -198,6 +199,7 @@ void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs) xprt_dec_alloc_count(xprt, max_reqs); list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) { dprintk("RPC: req=%p\n", req); + list_del(&req->rq_bc_pa_list); xprt_free_allocation(req); if (--max_reqs == 0) break; @@ -210,39 +212,23 @@ out: } EXPORT_SYMBOL_GPL(xprt_destroy_backchannel); -/* - * One or more rpc_rqst structure have been preallocated during the - * backchannel setup. Buffer space for the send and private XDR buffers - * has been preallocated as well. Use xprt_alloc_bc_request to allocate - * to this request. Use xprt_free_bc_request to return it. - * - * We know that we're called in soft interrupt context, grab the spin_lock - * since there is no need to grab the bottom half spin_lock. - * - * Return an available rpc_rqst, otherwise NULL if non are available. - */ -struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt) +static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) { - struct rpc_rqst *req; + struct rpc_rqst *req = NULL; dprintk("RPC: allocate a backchannel request\n"); - spin_lock(&xprt->bc_pa_lock); - if (!list_empty(&xprt->bc_pa_list)) { - req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, - rq_bc_pa_list); - list_del(&req->rq_bc_pa_list); - } else { - req = NULL; - } - spin_unlock(&xprt->bc_pa_lock); + if (list_empty(&xprt->bc_pa_list)) + goto not_found; - if (req != NULL) { - set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); - req->rq_reply_bytes_recvd = 0; - req->rq_bytes_sent = 0; - memcpy(&req->rq_private_buf, &req->rq_rcv_buf, + req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, + rq_bc_pa_list); + req->rq_reply_bytes_recvd = 0; + req->rq_bytes_sent = 0; + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(req->rq_private_buf)); - } + req->rq_xid = xid; + req->rq_connect_cookie = xprt->connect_cookie; +not_found: dprintk("RPC: backchannel req=%p\n", req); return req; } @@ -257,6 +243,7 @@ void xprt_free_bc_request(struct rpc_rqst *req) dprintk("RPC: free backchannel req=%p\n", req); + req->rq_connect_cookie = xprt->connect_cookie - 1; smp_mb__before_clear_bit(); WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); @@ -279,7 +266,57 @@ void xprt_free_bc_request(struct rpc_rqst *req) * may be reused by a new callback request. */ spin_lock_bh(&xprt->bc_pa_lock); - list_add(&req->rq_bc_pa_list, &xprt->bc_pa_list); + list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); spin_unlock_bh(&xprt->bc_pa_lock); } +/* + * One or more rpc_rqst structure have been preallocated during the + * backchannel setup. Buffer space for the send and private XDR buffers + * has been preallocated as well. Use xprt_alloc_bc_request to allocate + * to this request. Use xprt_free_bc_request to return it. + * + * We know that we're called in soft interrupt context, grab the spin_lock + * since there is no need to grab the bottom half spin_lock. + * + * Return an available rpc_rqst, otherwise NULL if non are available. + */ +struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid) +{ + struct rpc_rqst *req; + + spin_lock(&xprt->bc_pa_lock); + list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) { + if (req->rq_connect_cookie != xprt->connect_cookie) + continue; + if (req->rq_xid == xid) + goto found; + } + req = xprt_alloc_bc_request(xprt, xid); +found: + spin_unlock(&xprt->bc_pa_lock); + return req; +} + +/* + * Add callback request to callback list. The callback + * service sleeps on the sv_cb_waitq waiting for new + * requests. Wake it up after adding enqueing the + * request. + */ +void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) +{ + struct rpc_xprt *xprt = req->rq_xprt; + struct svc_serv *bc_serv = xprt->bc_serv; + + req->rq_private_buf.len = copied; + set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); + + dprintk("RPC: add callback request to list\n"); + spin_lock(&bc_serv->sv_cb_lock); + list_del(&req->rq_bc_pa_list); + list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); + wake_up(&bc_serv->sv_cb_waitq); + spin_unlock(&bc_serv->sv_cb_lock); +} + diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0edada97343..2e6ab10734f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -438,6 +438,38 @@ out_no_rpciod: return ERR_PTR(err); } +struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, + struct rpc_xprt *xprt) +{ + struct rpc_clnt *clnt = NULL; + + clnt = rpc_new_client(args, xprt, NULL); + if (IS_ERR(clnt)) + return clnt; + + if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { + int err = rpc_ping(clnt); + if (err != 0) { + rpc_shutdown_client(clnt); + return ERR_PTR(err); + } + } + + clnt->cl_softrtry = 1; + if (args->flags & RPC_CLNT_CREATE_HARDRTRY) + clnt->cl_softrtry = 0; + + if (args->flags & RPC_CLNT_CREATE_AUTOBIND) + clnt->cl_autobind = 1; + if (args->flags & RPC_CLNT_CREATE_DISCRTRY) + clnt->cl_discrtry = 1; + if (!(args->flags & RPC_CLNT_CREATE_QUIET)) + clnt->cl_chatty = 1; + + return clnt; +} +EXPORT_SYMBOL_GPL(rpc_create_xprt); + /** * rpc_create - create an RPC client and transport with one call * @args: rpc_clnt create argument structure @@ -451,7 +483,6 @@ out_no_rpciod: struct rpc_clnt *rpc_create(struct rpc_create_args *args) { struct rpc_xprt *xprt; - struct rpc_clnt *clnt; struct xprt_create xprtargs = { .net = args->net, .ident = args->protocol, @@ -515,30 +546,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) xprt->resvport = 0; - clnt = rpc_new_client(args, xprt, NULL); - if (IS_ERR(clnt)) - return clnt; - - if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { - int err = rpc_ping(clnt); - if (err != 0) { - rpc_shutdown_client(clnt); - return ERR_PTR(err); - } - } - - clnt->cl_softrtry = 1; - if (args->flags & RPC_CLNT_CREATE_HARDRTRY) - clnt->cl_softrtry = 0; - - if (args->flags & RPC_CLNT_CREATE_AUTOBIND) - clnt->cl_autobind = 1; - if (args->flags & RPC_CLNT_CREATE_DISCRTRY) - clnt->cl_discrtry = 1; - if (!(args->flags & RPC_CLNT_CREATE_QUIET)) - clnt->cl_chatty = 1; - - return clnt; + return rpc_create_xprt(args, xprt); } EXPORT_SYMBOL_GPL(rpc_create); @@ -1363,6 +1371,7 @@ rpc_restart_call_prepare(struct rpc_task *task) if (RPC_ASSASSINATED(task)) return 0; task->tk_action = call_start; + task->tk_status = 0; if (task->tk_ops->rpc_call_prepare != NULL) task->tk_action = rpc_prepare_task; return 1; @@ -1379,6 +1388,7 @@ rpc_restart_call(struct rpc_task *task) if (RPC_ASSASSINATED(task)) return 0; task->tk_action = call_start; + task->tk_status = 0; return 1; } EXPORT_SYMBOL_GPL(rpc_restart_call); @@ -1728,9 +1738,7 @@ call_bind_status(struct rpc_task *task) case -EPROTONOSUPPORT: dprintk("RPC: %5u remote rpcbind version unavailable, retrying\n", task->tk_pid); - task->tk_status = 0; - task->tk_action = call_bind; - return; + goto retry_timeout; case -ECONNREFUSED: /* connection problems */ case -ECONNRESET: case -ECONNABORTED: @@ -1756,6 +1764,7 @@ call_bind_status(struct rpc_task *task) return; retry_timeout: + task->tk_status = 0; task->tk_action = call_timeout; } @@ -1798,21 +1807,19 @@ call_connect_status(struct rpc_task *task) trace_rpc_connect_status(task, status); task->tk_status = 0; switch (status) { - /* if soft mounted, test if we've timed out */ - case -ETIMEDOUT: - task->tk_action = call_timeout; - return; case -ECONNREFUSED: case -ECONNRESET: case -ECONNABORTED: case -ENETUNREACH: case -EHOSTUNREACH: - /* retry with existing socket, after a delay */ - rpc_delay(task, 3*HZ); if (RPC_IS_SOFTCONN(task)) break; + /* retry with existing socket, after a delay */ + rpc_delay(task, 3*HZ); case -EAGAIN: - task->tk_action = call_bind; + /* Check for timeouts before looping back to call_bind */ + case -ETIMEDOUT: + task->tk_action = call_timeout; return; case 0: clnt->cl_stats->netreconn++; @@ -2007,6 +2014,10 @@ call_status(struct rpc_task *task) case -EHOSTDOWN: case -EHOSTUNREACH: case -ENETUNREACH: + if (RPC_IS_SOFTCONN(task)) { + rpc_exit(task, status); + break; + } /* * Delay any retries for 3 seconds, then handle as if it * were a timeout. diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index ff3cc4bf4b2..25578afe154 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -637,7 +637,8 @@ static void __rpc_queue_timer_fn(unsigned long ptr) static void __rpc_atrun(struct rpc_task *task) { - task->tk_status = 0; + if (task->tk_status == -ETIMEDOUT) + task->tk_status = 0; } /* diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 80a6640f329..06c6ff0cb91 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -571,7 +571,7 @@ static void svc_check_conn_limits(struct svc_serv *serv) } } -int svc_alloc_arg(struct svc_rqst *rqstp) +static int svc_alloc_arg(struct svc_rqst *rqstp) { struct svc_serv *serv = rqstp->rq_server; struct xdr_buf *arg; @@ -612,7 +612,7 @@ int svc_alloc_arg(struct svc_rqst *rqstp) return 0; } -struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) +static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) { struct svc_xprt *xprt; struct svc_pool *pool = rqstp->rq_pool; @@ -691,7 +691,7 @@ struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) return xprt; } -void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) +static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) { spin_lock_bh(&serv->sv_lock); set_bit(XPT_TEMP, &newxpt->xpt_flags); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b6e59f0a947..43bcb4699d6 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -60,7 +60,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, int flags); -static void svc_udp_data_ready(struct sock *, int); +static void svc_udp_data_ready(struct sock *); static int svc_udp_recvfrom(struct svc_rqst *); static int svc_udp_sendto(struct svc_rqst *); static void svc_sock_detach(struct svc_xprt *); @@ -403,14 +403,14 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, /* * INET callback when data has been received on the socket. */ -static void svc_udp_data_ready(struct sock *sk, int count) +static void svc_udp_data_ready(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; wait_queue_head_t *wq = sk_sleep(sk); if (svsk) { - dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", - svsk, sk, count, + dprintk("svc: socket %p(inet %p), busy=%d\n", + svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); @@ -731,7 +731,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) * A data_ready event on a listening socket means there's a connection * pending. Do not use state_change as a substitute for it. */ -static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) +static void svc_tcp_listen_data_ready(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; wait_queue_head_t *wq; @@ -783,7 +783,7 @@ static void svc_tcp_state_change(struct sock *sk) wake_up_interruptible_all(wq); } -static void svc_tcp_data_ready(struct sock *sk, int count) +static void svc_tcp_data_ready(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; wait_queue_head_t *wq = sk_sleep(sk); @@ -1397,6 +1397,22 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, return svsk; } +bool svc_alien_sock(struct net *net, int fd) +{ + int err; + struct socket *sock = sockfd_lookup(fd, &err); + bool ret = false; + + if (!sock) + goto out; + if (sock_net(sock->sk) != net) + ret = true; + sockfd_put(sock); +out: + return ret; +} +EXPORT_SYMBOL_GPL(svc_alien_sock); + /** * svc_addsock - add a listener socket to an RPC service * @serv: pointer to RPC service to which to add a new listener diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 1504bb11e4f..dd97ba3c445 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -833,8 +833,20 @@ xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf) } EXPORT_SYMBOL_GPL(xdr_buf_from_iov); -/* Sets subbuf to the portion of buf of length len beginning base bytes - * from the start of buf. Returns -1 if base of length are out of bounds. */ +/** + * xdr_buf_subsegment - set subbuf to a portion of buf + * @buf: an xdr buffer + * @subbuf: the result buffer + * @base: beginning of range in bytes + * @len: length of range in bytes + * + * sets @subbuf to an xdr buffer representing the portion of @buf of + * length @len starting at offset @base. + * + * @buf and @subbuf may be pointers to the same struct xdr_buf. + * + * Returns -1 if base of length are out of bounds. + */ int xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, unsigned int base, unsigned int len) @@ -847,9 +859,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, len -= subbuf->head[0].iov_len; base = 0; } else { - subbuf->head[0].iov_base = NULL; - subbuf->head[0].iov_len = 0; base -= buf->head[0].iov_len; + subbuf->head[0].iov_len = 0; } if (base < buf->page_len) { @@ -871,9 +882,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, len -= subbuf->tail[0].iov_len; base = 0; } else { - subbuf->tail[0].iov_base = NULL; - subbuf->tail[0].iov_len = 0; base -= buf->tail[0].iov_len; + subbuf->tail[0].iov_len = 0; } if (base || len) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 7d4df99f761..d173f79947c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1383,15 +1383,3 @@ void xprt_put(struct rpc_xprt *xprt) if (atomic_dec_and_test(&xprt->count)) xprt_destroy(xprt); } - -/** - * xprt_get - return a reference to an RPC transport. - * @xprt: pointer to the transport - * - */ -struct rpc_xprt *xprt_get(struct rpc_xprt *xprt) -{ - if (atomic_inc_not_zero(&xprt->count)) - return xprt; - return NULL; -} diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index 5a8f268bdd3..da5136fd569 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile @@ -1,8 +1,8 @@ -obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o +obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o xprtrdma-y := transport.o rpc_rdma.o verbs.o -obj-$(CONFIG_SUNRPC_XPRT_RDMA) += svcrdma.o +obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o svcrdma-y := svc_rdma.o svc_rdma_transport.o \ svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index e03725bfe2b..96ead526b12 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -649,9 +649,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) break; page_base = 0; } - rqst->rq_rcv_buf.page_len = olen - copy_len; - } else - rqst->rq_rcv_buf.page_len = 0; + } if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) { curlen = copy_len; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 0ce75524ed2..8d904e4eef1 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -90,6 +90,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, sge_no++; } rqstp->rq_respages = &rqstp->rq_pages[sge_no]; + rqstp->rq_next_page = rqstp->rq_respages + 1; /* We should never run out of SGE because the limit is defined to * support the max allowed RPC data length @@ -169,6 +170,7 @@ static int map_read_chunks(struct svcxprt_rdma *xprt, */ head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1]; + rqstp->rq_next_page = rqstp->rq_respages + 1; byte_count -= sge_bytes; ch_bytes -= sge_bytes; @@ -276,6 +278,7 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, /* rq_respages points one past arg pages */ rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; + rqstp->rq_next_page = rqstp->rq_respages + 1; /* Create the reply and chunk maps */ offset = 0; @@ -520,13 +523,6 @@ next_sge: for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) rqstp->rq_pages[ch_no] = NULL; - /* - * Detach res pages. If svc_release sees any it will attempt to - * put them. - */ - while (rqstp->rq_next_page != rqstp->rq_respages) - *(--rqstp->rq_next_page) = NULL; - return err; } @@ -550,7 +546,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp, /* rq_respages starts after the last arg page */ rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; - rqstp->rq_next_page = &rqstp->rq_arg.pages[page_no]; + rqstp->rq_next_page = rqstp->rq_respages + 1; /* Rebuild rq_arg head and tail. */ rqstp->rq_arg.head[0] = head->arg.head[0]; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index c1d124dc772..7e024a51617 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -265,6 +265,7 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt, xdr_off -= xdr->head[0].iov_len; if (xdr_off < xdr->page_len) { /* This offset is in the page list */ + xdr_off += xdr->page_base; page = xdr->pages[xdr_off >> PAGE_SHIFT]; xdr_off &= ~PAGE_MASK; } else { @@ -625,6 +626,7 @@ static int send_reply(struct svcxprt_rdma *rdma, if (page_no+1 >= sge_no) ctxt->sge[page_no+1].length = 0; } + rqstp->rq_next_page = rqstp->rq_respages + 1; BUG_ON(sge_no > rdma->sc_max_sge); memset(&send_wr, 0, sizeof send_wr); ctxt->wr_op = IB_WR_SEND; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 62e4f9bcc38..25688fa2207 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -477,8 +477,7 @@ struct page *svc_rdma_get_page(void) while ((page = alloc_page(GFP_KERNEL)) == NULL) { /* If we can't get memory, wait a bit and try again */ - printk(KERN_INFO "svcrdma: out of memory...retrying in 1000 " - "jiffies.\n"); + printk(KERN_INFO "svcrdma: out of memory...retrying in 1s\n"); schedule_timeout_uninterruptible(msecs_to_jiffies(1000)); } return page; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 285dc088411..1eb9c468d0c 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -733,7 +733,7 @@ static void __exit xprt_rdma_cleanup(void) { int rc; - dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n"); + dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); #ifdef RPC_DEBUG if (sunrpc_table_header) { unregister_sysctl_table(sunrpc_table_header); @@ -755,14 +755,14 @@ static int __init xprt_rdma_init(void) if (rc) return rc; - dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n"); + dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); - dprintk(KERN_INFO "Defaults:\n"); - dprintk(KERN_INFO "\tSlots %d\n" + dprintk("Defaults:\n"); + dprintk("\tSlots %d\n" "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", xprt_rdma_slot_table_entries, xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); - dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n", + dprintk("\tPadding %d\n\tMemreg %d\n", xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); #ifdef RPC_DEBUG diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 817a1e52396..25a3dcf15ca 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -254,7 +254,7 @@ struct sock_xprt { /* * Saved socket callback addresses */ - void (*old_data_ready)(struct sock *, int); + void (*old_data_ready)(struct sock *); void (*old_state_change)(struct sock *); void (*old_write_space)(struct sock *); void (*old_error_report)(struct sock *); @@ -510,6 +510,7 @@ static int xs_nospace(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct sock *sk = transport->inet; int ret = -EAGAIN; dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", @@ -527,7 +528,7 @@ static int xs_nospace(struct rpc_task *task) * window size */ set_bit(SOCK_NOSPACE, &transport->sock->flags); - transport->inet->sk_write_pending++; + sk->sk_write_pending++; /* ...and wait for more buffer space */ xprt_wait_for_buffer_space(task, xs_nospace_callback); } @@ -537,6 +538,9 @@ static int xs_nospace(struct rpc_task *task) } spin_unlock_bh(&xprt->transport_lock); + + /* Race breaker in case memory is freed before above code is called */ + sk->sk_write_space(sk); return ret; } @@ -905,6 +909,12 @@ static void xs_tcp_close(struct rpc_xprt *xprt) xs_tcp_shutdown(xprt); } +static void xs_xprt_free(struct rpc_xprt *xprt) +{ + xs_free_peer_addresses(xprt); + xprt_free(xprt); +} + /** * xs_destroy - prepare to shutdown a transport * @xprt: doomed transport @@ -915,8 +925,7 @@ static void xs_destroy(struct rpc_xprt *xprt) dprintk("RPC: xs_destroy xprt %p\n", xprt); xs_close(xprt); - xs_free_peer_addresses(xprt); - xprt_free(xprt); + xs_xprt_free(xprt); module_put(THIS_MODULE); } @@ -942,7 +951,7 @@ static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) * * Currently this assumes we can read the whole reply in a single gulp. */ -static void xs_local_data_ready(struct sock *sk, int len) +static void xs_local_data_ready(struct sock *sk) { struct rpc_task *task; struct rpc_xprt *xprt; @@ -1005,7 +1014,7 @@ static void xs_local_data_ready(struct sock *sk, int len) * @len: how much data to read * */ -static void xs_udp_data_ready(struct sock *sk, int len) +static void xs_udp_data_ready(struct sock *sk) { struct rpc_task *task; struct rpc_xprt *xprt; @@ -1302,41 +1311,29 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, * If we're unable to obtain the rpc_rqst we schedule the closing of the * connection and return -1. */ -static inline int xs_tcp_read_callback(struct rpc_xprt *xprt, +static int xs_tcp_read_callback(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct rpc_rqst *req; - req = xprt_alloc_bc_request(xprt); + /* Look up and lock the request corresponding to the given XID */ + spin_lock(&xprt->transport_lock); + req = xprt_lookup_bc_request(xprt, transport->tcp_xid); if (req == NULL) { + spin_unlock(&xprt->transport_lock); printk(KERN_WARNING "Callback slot table overflowed\n"); xprt_force_disconnect(xprt); return -1; } - req->rq_xid = transport->tcp_xid; dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid)); xs_tcp_read_common(xprt, desc, req); - if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) { - struct svc_serv *bc_serv = xprt->bc_serv; - - /* - * Add callback request to callback list. The callback - * service sleeps on the sv_cb_waitq waiting for new - * requests. Wake it up after adding enqueing the - * request. - */ - dprintk("RPC: add callback request to list\n"); - spin_lock(&bc_serv->sv_cb_lock); - list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); - spin_unlock(&bc_serv->sv_cb_lock); - wake_up(&bc_serv->sv_cb_waitq); - } - - req->rq_private_buf.len = transport->tcp_copied; + if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) + xprt_complete_bc_request(req, transport->tcp_copied); + spin_unlock(&xprt->transport_lock); return 0; } @@ -1440,7 +1437,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns * @bytes: how much data to read * */ -static void xs_tcp_data_ready(struct sock *sk, int bytes) +static void xs_tcp_data_ready(struct sock *sk) { struct rpc_xprt *xprt; read_descriptor_t rd_desc; @@ -2540,6 +2537,10 @@ static void bc_close(struct rpc_xprt *xprt) static void bc_destroy(struct rpc_xprt *xprt) { + dprintk("RPC: bc_destroy xprt %p\n", xprt); + + xs_xprt_free(xprt); + module_put(THIS_MODULE); } static struct rpc_xprt_ops xs_local_ops = { @@ -2740,7 +2741,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) return xprt; ret = ERR_PTR(-EINVAL); out_err: - xprt_free(xprt); + xs_xprt_free(xprt); return ret; } @@ -2818,7 +2819,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) return xprt; ret = ERR_PTR(-EINVAL); out_err: - xprt_free(xprt); + xs_xprt_free(xprt); return ret; } @@ -2893,12 +2894,11 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) xprt->address_strings[RPC_DISPLAY_ADDR], xprt->address_strings[RPC_DISPLAY_PROTO]); - if (try_module_get(THIS_MODULE)) return xprt; ret = ERR_PTR(-EINVAL); out_err: - xprt_free(xprt); + xs_xprt_free(xprt); return ret; } @@ -2915,15 +2915,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) struct svc_sock *bc_sock; struct rpc_xprt *ret; - if (args->bc_xprt->xpt_bc_xprt) { - /* - * This server connection already has a backchannel - * transport; we can't create a new one, as we wouldn't - * be able to match replies based on xid any more. So, - * reuse the already-existing one: - */ - return args->bc_xprt->xpt_bc_xprt; - } xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, xprt_tcp_slot_table_entries); if (IS_ERR(xprt)) @@ -2981,13 +2972,14 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) */ xprt_set_connected(xprt); - if (try_module_get(THIS_MODULE)) return xprt; + + args->bc_xprt->xpt_bc_xprt = NULL; xprt_put(xprt); ret = ERR_PTR(-EINVAL); out_err: - xprt_free(xprt); + xs_xprt_free(xprt); return ret; } diff --git a/net/tipc/addr.h b/net/tipc/addr.h index 60b00ab93d7..a74acf9ee80 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -37,6 +37,8 @@ #ifndef _TIPC_ADDR_H #define _TIPC_ADDR_H +#include "core.h" + #define TIPC_ZONE_MASK 0xff000000u #define TIPC_CLUSTER_MASK 0xfffff000u diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index bf860d9e75a..95ab5ef9292 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -41,9 +41,9 @@ #include "bcast.h" #include "name_distr.h" -#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ - -#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ +#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ +#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ +#define BCBEARER MAX_BEARERS /** * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link @@ -356,9 +356,9 @@ static void bclink_peek_nack(struct tipc_msg *msg) } /* - * tipc_bclink_send_msg - broadcast a packet to all nodes in cluster + * tipc_bclink_xmit - broadcast a packet to all nodes in cluster */ -int tipc_bclink_send_msg(struct sk_buff *buf) +int tipc_bclink_xmit(struct sk_buff *buf) { int res; @@ -370,7 +370,7 @@ int tipc_bclink_send_msg(struct sk_buff *buf) goto exit; } - res = tipc_link_send_buf(bcl, buf); + res = __tipc_link_xmit(bcl, buf); if (likely(res >= 0)) { bclink_set_last_sent(); bcl->stats.queue_sz_counts++; @@ -399,19 +399,18 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) */ if (((seqno - tipc_own_addr) % TIPC_MIN_LINK_WIN) == 0) { - tipc_link_send_proto_msg( - node->active_links[node->addr & 1], - STATE_MSG, 0, 0, 0, 0, 0); + tipc_link_proto_xmit(node->active_links[node->addr & 1], + STATE_MSG, 0, 0, 0, 0, 0); bcl->stats.sent_acks++; } } /** - * tipc_bclink_recv_pkt - receive a broadcast packet, and deliver upwards + * tipc_bclink_rcv - receive a broadcast packet, and deliver upwards * * tipc_net_lock is read_locked, no other locks set */ -void tipc_bclink_recv_pkt(struct sk_buff *buf) +void tipc_bclink_rcv(struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); struct tipc_node *node; @@ -468,7 +467,7 @@ receive: spin_unlock_bh(&bc_lock); tipc_node_unlock(node); if (likely(msg_mcast(msg))) - tipc_port_recv_mcast(buf, NULL); + tipc_port_mcast_rcv(buf, NULL); else kfree_skb(buf); } else if (msg_user(msg) == MSG_BUNDLER) { @@ -478,12 +477,12 @@ receive: bcl->stats.recv_bundled += msg_msgcnt(msg); spin_unlock_bh(&bc_lock); tipc_node_unlock(node); - tipc_link_recv_bundle(buf); + tipc_link_bundle_rcv(buf); } else if (msg_user(msg) == MSG_FRAGMENTER) { int ret; - ret = tipc_link_recv_fragment(&node->bclink.reasm_head, - &node->bclink.reasm_tail, - &buf); + ret = tipc_link_frag_rcv(&node->bclink.reasm_head, + &node->bclink.reasm_tail, + &buf); if (ret == LINK_REASM_ERROR) goto unlock; spin_lock_bh(&bc_lock); @@ -503,7 +502,7 @@ receive: bclink_accept_pkt(node, seqno); spin_unlock_bh(&bc_lock); tipc_node_unlock(node); - tipc_named_recv(buf); + tipc_named_rcv(buf); } else { spin_lock_bh(&bc_lock); bclink_accept_pkt(node, seqno); @@ -669,9 +668,8 @@ void tipc_bcbearer_sort(void) memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp)); for (b_index = 0; b_index < MAX_BEARERS; b_index++) { - struct tipc_bearer *b = &tipc_bearers[b_index]; - - if (!b->active || !b->nodes.count) + struct tipc_bearer *b = bearer_list[b_index]; + if (!b || !b->nodes.count) continue; if (!bp_temp[b->priority].primary) @@ -785,8 +783,8 @@ void tipc_bclink_init(void) bcl->owner = &bclink->node; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); - spin_lock_init(&bcbearer->bearer.lock); bcl->b_ptr = &bcbearer->bearer; + bearer_list[BCBEARER] = &bcbearer->bearer; bcl->state = WORKING_WORKING; strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); } @@ -797,6 +795,7 @@ void tipc_bclink_stop(void) tipc_link_purge_queues(bcl); spin_unlock_bh(&bc_lock); + bearer_list[BCBEARER] = NULL; memset(bclink, 0, sizeof(*bclink)); memset(bcbearer, 0, sizeof(*bcbearer)); } diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 6ee587b469f..a80ef54b818 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -90,8 +90,8 @@ void tipc_bclink_add_node(u32 addr); void tipc_bclink_remove_node(u32 addr); struct tipc_node *tipc_bclink_retransmit_to(void); void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked); -int tipc_bclink_send_msg(struct sk_buff *buf); -void tipc_bclink_recv_pkt(struct sk_buff *buf); +int tipc_bclink_xmit(struct sk_buff *buf); +void tipc_bclink_rcv(struct sk_buff *buf); u32 tipc_bclink_get_last_sent(void); u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr); void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index a38c89969c6..3fef7eb776d 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -49,9 +49,9 @@ static struct tipc_media * const media_info_array[] = { NULL }; -struct tipc_bearer tipc_bearers[MAX_BEARERS]; +struct tipc_bearer *bearer_list[MAX_BEARERS + 1]; -static void bearer_disable(struct tipc_bearer *b_ptr); +static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down); /** * tipc_media_find - locates specified media object by name @@ -177,8 +177,9 @@ struct tipc_bearer *tipc_bearer_find(const char *name) struct tipc_bearer *b_ptr; u32 i; - for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { - if (b_ptr->active && (!strcmp(b_ptr->name, name))) + for (i = 0; i < MAX_BEARERS; i++) { + b_ptr = bearer_list[i]; + if (b_ptr && (!strcmp(b_ptr->name, name))) return b_ptr; } return NULL; @@ -200,8 +201,10 @@ struct sk_buff *tipc_bearer_get_names(void) read_lock_bh(&tipc_net_lock); for (i = 0; media_info_array[i] != NULL; i++) { for (j = 0; j < MAX_BEARERS; j++) { - b = &tipc_bearers[j]; - if (b->active && (b->media == media_info_array[i])) { + b = bearer_list[j]; + if (!b) + continue; + if (b->media == media_info_array[i]) { tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME, b->name, strlen(b->name) + 1); @@ -284,16 +287,17 @@ restart: bearer_id = MAX_BEARERS; with_this_prio = 1; for (i = MAX_BEARERS; i-- != 0; ) { - if (!tipc_bearers[i].active) { + b_ptr = bearer_list[i]; + if (!b_ptr) { bearer_id = i; continue; } - if (!strcmp(name, tipc_bearers[i].name)) { + if (!strcmp(name, b_ptr->name)) { pr_warn("Bearer <%s> rejected, already enabled\n", name); goto exit; } - if ((tipc_bearers[i].priority == priority) && + if ((b_ptr->priority == priority) && (++with_this_prio > 2)) { if (priority-- == 0) { pr_warn("Bearer <%s> rejected, duplicate priority\n", @@ -311,7 +315,11 @@ restart: goto exit; } - b_ptr = &tipc_bearers[bearer_id]; + b_ptr = kzalloc(sizeof(*b_ptr), GFP_ATOMIC); + if (!b_ptr) { + res = -ENOMEM; + goto exit; + } strcpy(b_ptr->name, name); b_ptr->media = m_ptr; res = m_ptr->enable_media(b_ptr); @@ -324,19 +332,20 @@ restart: b_ptr->identity = bearer_id; b_ptr->tolerance = m_ptr->tolerance; b_ptr->window = m_ptr->window; + b_ptr->domain = disc_domain; b_ptr->net_plane = bearer_id + 'A'; - b_ptr->active = 1; b_ptr->priority = priority; - INIT_LIST_HEAD(&b_ptr->links); - spin_lock_init(&b_ptr->lock); - res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr, disc_domain); + res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr); if (res) { - bearer_disable(b_ptr); + bearer_disable(b_ptr, false); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", name); goto exit; } + + bearer_list[bearer_id] = b_ptr; + pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, tipc_addr_string_fill(addr_string, disc_domain), priority); @@ -350,20 +359,11 @@ exit: */ static int tipc_reset_bearer(struct tipc_bearer *b_ptr) { - struct tipc_link *l_ptr; - struct tipc_link *temp_l_ptr; - read_lock_bh(&tipc_net_lock); pr_info("Resetting bearer <%s>\n", b_ptr->name); - spin_lock_bh(&b_ptr->lock); - list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { - struct tipc_node *n_ptr = l_ptr->owner; - - spin_lock_bh(&n_ptr->lock); - tipc_link_reset(l_ptr); - spin_unlock_bh(&n_ptr->lock); - } - spin_unlock_bh(&b_ptr->lock); + tipc_disc_delete(b_ptr->link_req); + tipc_link_reset_list(b_ptr->identity); + tipc_disc_create(b_ptr, &b_ptr->bcast_addr); read_unlock_bh(&tipc_net_lock); return 0; } @@ -373,26 +373,24 @@ static int tipc_reset_bearer(struct tipc_bearer *b_ptr) * * Note: This routine assumes caller holds tipc_net_lock. */ -static void bearer_disable(struct tipc_bearer *b_ptr) +static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down) { - struct tipc_link *l_ptr; - struct tipc_link *temp_l_ptr; - struct tipc_link_req *temp_req; + u32 i; pr_info("Disabling bearer <%s>\n", b_ptr->name); - spin_lock_bh(&b_ptr->lock); b_ptr->media->disable_media(b_ptr); - list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { - tipc_link_delete(l_ptr); - } - temp_req = b_ptr->link_req; - b_ptr->link_req = NULL; - spin_unlock_bh(&b_ptr->lock); - if (temp_req) - tipc_disc_delete(temp_req); + tipc_link_delete_list(b_ptr->identity, shutting_down); + if (b_ptr->link_req) + tipc_disc_delete(b_ptr->link_req); - memset(b_ptr, 0, sizeof(struct tipc_bearer)); + for (i = 0; i < MAX_BEARERS; i++) { + if (b_ptr == bearer_list[i]) { + bearer_list[i] = NULL; + break; + } + } + kfree(b_ptr); } int tipc_disable_bearer(const char *name) @@ -406,7 +404,7 @@ int tipc_disable_bearer(const char *name) pr_warn("Attempt to disable unknown bearer <%s>\n", name); res = -EINVAL; } else { - bearer_disable(b_ptr); + bearer_disable(b_ptr, false); res = 0; } write_unlock_bh(&tipc_net_lock); @@ -585,7 +583,11 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, break; case NETDEV_DOWN: case NETDEV_CHANGEMTU: + tipc_reset_bearer(b_ptr); + break; case NETDEV_CHANGEADDR: + tipc_l2_media_addr_set(b_ptr, &b_ptr->addr, + (char *)dev->dev_addr); tipc_reset_bearer(b_ptr); break; case NETDEV_UNREGISTER: @@ -599,7 +601,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, } static struct packet_type tipc_packet_type __read_mostly = { - .type = __constant_htons(ETH_P_TIPC), + .type = htons(ETH_P_TIPC), .func = tipc_l2_rcv_msg, }; @@ -610,8 +612,13 @@ static struct notifier_block notifier = { int tipc_bearer_setup(void) { + int err; + + err = register_netdevice_notifier(¬ifier); + if (err) + return err; dev_add_pack(&tipc_packet_type); - return register_netdevice_notifier(¬ifier); + return 0; } void tipc_bearer_cleanup(void) @@ -622,10 +629,14 @@ void tipc_bearer_cleanup(void) void tipc_bearer_stop(void) { + struct tipc_bearer *b_ptr; u32 i; for (i = 0; i < MAX_BEARERS; i++) { - if (tipc_bearers[i].active) - bearer_disable(&tipc_bearers[i]); + b_ptr = bearer_list[i]; + if (b_ptr) { + bearer_disable(b_ptr, true); + bearer_list[i] = NULL; + } } } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 4f5db9ad5bf..ba48145e871 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -107,10 +107,8 @@ struct tipc_media { /** * struct tipc_bearer - Generic TIPC bearer structure - * @dev: ptr to associated network device - * @usr_handle: pointer to additional media-specific information about bearer + * @media_ptr: pointer to additional media-specific information about bearer * @mtu: max packet size bearer can support - * @lock: spinlock for controlling access to bearer * @addr: media-specific address associated with bearer * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer @@ -118,10 +116,9 @@ struct tipc_media { * @priority: default link priority for bearer * @window: default window size for bearer * @tolerance: default link tolerance for bearer + * @domain: network domain to which links can be established * @identity: array index of this bearer within TIPC bearer array * @link_req: ptr to (optional) structure making periodic link setup requests - * @links: list of non-congested links associated with bearer - * @active: non-zero if bearer structure is represents a bearer * @net_plane: network plane ('A' through 'H') currently associated with bearer * @nodes: indicates which nodes in cluster can be reached through bearer * @@ -134,16 +131,14 @@ struct tipc_bearer { u32 mtu; /* initalized by media */ struct tipc_media_addr addr; /* initalized by media */ char name[TIPC_MAX_BEARER_NAME]; - spinlock_t lock; struct tipc_media *media; struct tipc_media_addr bcast_addr; u32 priority; u32 window; u32 tolerance; + u32 domain; u32 identity; struct tipc_link_req *link_req; - struct list_head links; - int active; char net_plane; struct tipc_node_map nodes; }; @@ -155,7 +150,7 @@ struct tipc_bearer_names { struct tipc_link; -extern struct tipc_bearer tipc_bearers[]; +extern struct tipc_bearer *bearer_list[]; /* * TIPC routines available to supported media types diff --git a/net/tipc/config.c b/net/tipc/config.c index c301a9a592d..4b981c05382 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -43,13 +43,11 @@ #define REPLY_TRUNCATED "<truncated>\n" static DEFINE_MUTEX(config_mutex); -static struct tipc_server cfgsrv; static const void *req_tlv_area; /* request message TLV area */ static int req_tlv_space; /* request message TLV area size */ static int rep_headroom; /* reply message headroom to use */ - struct sk_buff *tipc_cfg_reply_alloc(int payload_size) { struct sk_buff *buf; @@ -181,19 +179,7 @@ static struct sk_buff *cfg_set_own_addr(void) if (tipc_own_addr) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot change node address once assigned)"); - tipc_core_start_net(addr); - return tipc_cfg_reply_none(); -} - -static struct sk_buff *cfg_set_remote_mng(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - tipc_remote_management = (value != 0); + tipc_net_start(addr); return tipc_cfg_reply_none(); } @@ -247,21 +233,10 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area /* Check command authorization */ if (likely(in_own_node(orig_node))) { /* command is permitted */ - } else if (cmd >= 0x8000) { + } else { rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot be done remotely)"); goto exit; - } else if (!tipc_remote_management) { - rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NO_REMOTE); - goto exit; - } else if (cmd >= 0x4000) { - u32 domain = 0; - - if ((tipc_nametbl_translate(TIPC_ZM_SRV, 0, &domain) == 0) || - (domain != orig_node)) { - rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_ZONE_MSTR); - goto exit; - } } /* Call appropriate processing routine */ @@ -310,18 +285,12 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_SET_NODE_ADDR: rep_tlv_buf = cfg_set_own_addr(); break; - case TIPC_CMD_SET_REMOTE_MNG: - rep_tlv_buf = cfg_set_remote_mng(); - break; case TIPC_CMD_SET_MAX_PORTS: rep_tlv_buf = cfg_set_max_ports(); break; case TIPC_CMD_SET_NETID: rep_tlv_buf = cfg_set_netid(); break; - case TIPC_CMD_GET_REMOTE_MNG: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_remote_management); - break; case TIPC_CMD_GET_MAX_PORTS: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_ports); break; @@ -345,6 +314,8 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_SET_MAX_PUBL: case TIPC_CMD_GET_MAX_PUBL: case TIPC_CMD_SET_LOG_SIZE: + case TIPC_CMD_SET_REMOTE_MNG: + case TIPC_CMD_GET_REMOTE_MNG: case TIPC_CMD_DUMP_LOG: rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (obsolete command)"); @@ -369,80 +340,3 @@ exit: mutex_unlock(&config_mutex); return rep_tlv_buf; } - -static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr, - void *usr_data, void *buf, size_t len) -{ - struct tipc_cfg_msg_hdr *req_hdr; - struct tipc_cfg_msg_hdr *rep_hdr; - struct sk_buff *rep_buf; - int ret; - - /* Validate configuration message header (ignore invalid message) */ - req_hdr = (struct tipc_cfg_msg_hdr *)buf; - if ((len < sizeof(*req_hdr)) || - (len != TCM_ALIGN(ntohl(req_hdr->tcm_len))) || - (ntohs(req_hdr->tcm_flags) != TCM_F_REQUEST)) { - pr_warn("Invalid configuration message discarded\n"); - return; - } - - /* Generate reply for request (if can't, return request) */ - rep_buf = tipc_cfg_do_cmd(addr->addr.id.node, ntohs(req_hdr->tcm_type), - buf + sizeof(*req_hdr), - len - sizeof(*req_hdr), - BUF_HEADROOM + MAX_H_SIZE + sizeof(*rep_hdr)); - if (rep_buf) { - skb_push(rep_buf, sizeof(*rep_hdr)); - rep_hdr = (struct tipc_cfg_msg_hdr *)rep_buf->data; - memcpy(rep_hdr, req_hdr, sizeof(*rep_hdr)); - rep_hdr->tcm_len = htonl(rep_buf->len); - rep_hdr->tcm_flags &= htons(~TCM_F_REQUEST); - - ret = tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data, - rep_buf->len); - if (ret < 0) - pr_err("Sending cfg reply message failed, no memory\n"); - - kfree_skb(rep_buf); - } -} - -static struct sockaddr_tipc cfgsrv_addr __read_mostly = { - .family = AF_TIPC, - .addrtype = TIPC_ADDR_NAMESEQ, - .addr.nameseq.type = TIPC_CFG_SRV, - .addr.nameseq.lower = 0, - .addr.nameseq.upper = 0, - .scope = TIPC_ZONE_SCOPE -}; - -static struct tipc_server cfgsrv __read_mostly = { - .saddr = &cfgsrv_addr, - .imp = TIPC_CRITICAL_IMPORTANCE, - .type = SOCK_RDM, - .max_rcvbuf_size = 64 * 1024, - .name = "cfg_server", - .tipc_conn_recvmsg = cfg_conn_msg_event, - .tipc_conn_new = NULL, - .tipc_conn_shutdown = NULL -}; - -int tipc_cfg_init(void) -{ - return tipc_server_start(&cfgsrv); -} - -void tipc_cfg_reinit(void) -{ - tipc_server_stop(&cfgsrv); - - cfgsrv_addr.addr.nameseq.lower = tipc_own_addr; - cfgsrv_addr.addr.nameseq.upper = tipc_own_addr; - tipc_server_start(&cfgsrv); -} - -void tipc_cfg_stop(void) -{ - tipc_server_stop(&cfgsrv); -} diff --git a/net/tipc/config.h b/net/tipc/config.h index 1f252f3fa05..47b1bf18161 100644 --- a/net/tipc/config.h +++ b/net/tipc/config.h @@ -64,9 +64,4 @@ static inline struct sk_buff *tipc_cfg_reply_ultra_string(char *string) struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *req_tlv_area, int req_tlv_space, int headroom); - -int tipc_cfg_init(void); -void tipc_cfg_reinit(void); -void tipc_cfg_stop(void); - #endif diff --git a/net/tipc/core.c b/net/tipc/core.c index f9e88d8b04c..50d57429ebc 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -1,7 +1,7 @@ /* * net/tipc/core.c: TIPC module code * - * Copyright (c) 2003-2006, Ericsson AB + * Copyright (c) 2003-2006, 2013, Ericsson AB * Copyright (c) 2005-2006, 2010-2013, Wind River Systems * All rights reserved. * @@ -50,7 +50,6 @@ int tipc_random __read_mostly; u32 tipc_own_addr __read_mostly; int tipc_max_ports __read_mostly; int tipc_net_id __read_mostly; -int tipc_remote_management __read_mostly; int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */ /** @@ -77,39 +76,14 @@ struct sk_buff *tipc_buf_acquire(u32 size) } /** - * tipc_core_stop_net - shut down TIPC networking sub-systems - */ -static void tipc_core_stop_net(void) -{ - tipc_net_stop(); - tipc_bearer_cleanup(); -} - -/** - * start_net - start TIPC networking sub-systems - */ -int tipc_core_start_net(unsigned long addr) -{ - int res; - - tipc_net_start(addr); - res = tipc_bearer_setup(); - if (res < 0) - goto err; - return res; - -err: - tipc_core_stop_net(); - return res; -} - -/** * tipc_core_stop - switch TIPC from SINGLE NODE to NOT RUNNING mode */ static void tipc_core_stop(void) { + tipc_handler_stop(); + tipc_net_stop(); + tipc_bearer_cleanup(); tipc_netlink_stop(); - tipc_cfg_stop(); tipc_subscr_stop(); tipc_nametbl_stop(); tipc_ref_table_stop(); @@ -122,30 +96,59 @@ static void tipc_core_stop(void) */ static int tipc_core_start(void) { - int res; + int err; get_random_bytes(&tipc_random, sizeof(tipc_random)); - res = tipc_handler_start(); - if (!res) - res = tipc_ref_table_init(tipc_max_ports, tipc_random); - if (!res) - res = tipc_nametbl_init(); - if (!res) - res = tipc_netlink_start(); - if (!res) - res = tipc_socket_init(); - if (!res) - res = tipc_register_sysctl(); - if (!res) - res = tipc_subscr_start(); - if (!res) - res = tipc_cfg_init(); - if (res) { - tipc_handler_stop(); - tipc_core_stop(); - } - return res; + err = tipc_handler_start(); + if (err) + goto out_handler; + + err = tipc_ref_table_init(tipc_max_ports, tipc_random); + if (err) + goto out_reftbl; + + err = tipc_nametbl_init(); + if (err) + goto out_nametbl; + + err = tipc_netlink_start(); + if (err) + goto out_netlink; + + err = tipc_socket_init(); + if (err) + goto out_socket; + + err = tipc_register_sysctl(); + if (err) + goto out_sysctl; + + err = tipc_subscr_start(); + if (err) + goto out_subscr; + + err = tipc_bearer_setup(); + if (err) + goto out_bearer; + + return 0; +out_bearer: + tipc_subscr_stop(); +out_subscr: + tipc_unregister_sysctl(); +out_sysctl: + tipc_socket_stop(); +out_socket: + tipc_netlink_stop(); +out_netlink: + tipc_nametbl_stop(); +out_nametbl: + tipc_ref_table_stop(); +out_reftbl: + tipc_handler_stop(); +out_handler: + return err; } static int __init tipc_init(void) @@ -155,7 +158,6 @@ static int __init tipc_init(void) pr_info("Activated (version " TIPC_MOD_VER ")\n"); tipc_own_addr = 0; - tipc_remote_management = 1; tipc_max_ports = CONFIG_TIPC_PORTS; tipc_net_id = 4711; @@ -174,8 +176,6 @@ static int __init tipc_init(void) static void __exit tipc_exit(void) { - tipc_handler_stop(); - tipc_core_stop_net(); tipc_core_stop(); pr_info("Deactivated\n"); } diff --git a/net/tipc/core.h b/net/tipc/core.h index 1ff477b0450..8985bbcb942 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -79,7 +79,6 @@ int tipc_snprintf(char *buf, int len, const char *fmt, ...); extern u32 tipc_own_addr __read_mostly; extern int tipc_max_ports __read_mostly; extern int tipc_net_id __read_mostly; -extern int tipc_remote_management __read_mostly; extern int sysctl_tipc_rmem[3] __read_mostly; /* @@ -90,7 +89,6 @@ extern int tipc_random __read_mostly; /* * Routines available to privileged subsystems */ -int tipc_core_start_net(unsigned long); int tipc_handler_start(void); void tipc_handler_stop(void); int tipc_netlink_start(void); @@ -192,6 +190,7 @@ static inline void k_term_timer(struct timer_list *timer) struct tipc_skb_cb { void *handle; + bool deferred; }; #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 412ff41b861..542fe3413dc 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -48,7 +48,6 @@ * struct tipc_link_req - information about an ongoing link setup request * @bearer: bearer issuing requests * @dest: destination address for request messages - * @domain: network domain to which links can be established * @num_nodes: number of nodes currently discovered (i.e. with an active link) * @lock: spinlock for controlling access to requests * @buf: request message to be (repeatedly) sent @@ -58,7 +57,6 @@ struct tipc_link_req { struct tipc_bearer *bearer; struct tipc_media_addr dest; - u32 domain; int num_nodes; spinlock_t lock; struct sk_buff *buf; @@ -69,14 +67,13 @@ struct tipc_link_req { /** * tipc_disc_init_msg - initialize a link setup message * @type: message type (request or response) - * @dest_domain: network domain of node(s) which should respond to message * @b_ptr: ptr to bearer issuing message */ -static struct sk_buff *tipc_disc_init_msg(u32 type, u32 dest_domain, - struct tipc_bearer *b_ptr) +static struct sk_buff *tipc_disc_init_msg(u32 type, struct tipc_bearer *b_ptr) { struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE); struct tipc_msg *msg; + u32 dest_domain = b_ptr->domain; if (buf) { msg = buf_msg(buf); @@ -110,11 +107,11 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, } /** - * tipc_disc_recv_msg - handle incoming link setup message (request or response) + * tipc_disc_rcv - handle incoming link setup message (request or response) * @buf: buffer containing message * @b_ptr: bearer that message arrived on */ -void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) +void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr) { struct tipc_node *n_ptr; struct tipc_link *link; @@ -149,7 +146,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) } if (!tipc_in_scope(dest, tipc_own_addr)) return; - if (!tipc_in_scope(b_ptr->link_req->domain, orig)) + if (!tipc_in_scope(b_ptr->domain, orig)) return; /* Locate structure corresponding to requesting node */ @@ -242,7 +239,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) link_fully_up = link_working_working(link); if ((type == DSC_REQ_MSG) && !link_fully_up) { - rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr); + rbuf = tipc_disc_init_msg(DSC_RESP_MSG, b_ptr); if (rbuf) { tipc_bearer_send(b_ptr, rbuf, &media_addr); kfree_skb(rbuf); @@ -306,7 +303,7 @@ static void disc_timeout(struct tipc_link_req *req) spin_lock_bh(&req->lock); /* Stop searching if only desired node has been found */ - if (tipc_node(req->domain) && req->num_nodes) { + if (tipc_node(req->bearer->domain) && req->num_nodes) { req->timer_intv = TIPC_LINK_REQ_INACTIVE; goto exit; } @@ -342,8 +339,7 @@ exit: * * Returns 0 if successful, otherwise -errno. */ -int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest, - u32 dest_domain) +int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) { struct tipc_link_req *req; @@ -351,7 +347,7 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest, if (!req) return -ENOMEM; - req->buf = tipc_disc_init_msg(DSC_REQ_MSG, dest_domain, b_ptr); + req->buf = tipc_disc_init_msg(DSC_REQ_MSG, b_ptr); if (!req->buf) { kfree(req); return -ENOMSG; @@ -359,7 +355,6 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest, memcpy(&req->dest, dest, sizeof(*dest)); req->bearer = b_ptr; - req->domain = dest_domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; spin_lock_init(&req->lock); diff --git a/net/tipc/discover.h b/net/tipc/discover.h index 75b67c403aa..07f34729459 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -39,11 +39,10 @@ struct tipc_link_req; -int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest, - u32 dest_domain); +int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest); void tipc_disc_delete(struct tipc_link_req *req); void tipc_disc_add_dest(struct tipc_link_req *req); void tipc_disc_remove_dest(struct tipc_link_req *req); -void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr); +void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr); #endif diff --git a/net/tipc/handler.c b/net/tipc/handler.c index e4bc8a29674..1fabf160501 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -58,7 +58,6 @@ unsigned int tipc_k_signal(Handler routine, unsigned long argument) spin_lock_bh(&qitem_lock); if (!handler_enabled) { - pr_err("Signal request ignored by handler\n"); spin_unlock_bh(&qitem_lock); return -ENOPROTOOPT; } diff --git a/net/tipc/link.c b/net/tipc/link.c index d4b5de41b68..c5190ab7529 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -77,19 +77,19 @@ static const char *link_unk_evt = "Unknown link event "; static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, struct sk_buff *buf); -static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf); -static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr, +static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf); +static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, struct sk_buff **buf); static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance); -static int link_send_sections_long(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destnode); +static int tipc_link_iovec_long_xmit(struct tipc_port *sender, + struct iovec const *msg_sect, + unsigned int len, u32 destnode); static void link_state_event(struct tipc_link *l_ptr, u32 event); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); -static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf); -static void tipc_link_send_sync(struct tipc_link *l); -static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf); +static int tipc_link_frag_xmit(struct tipc_link *l_ptr, struct sk_buff *buf); +static void tipc_link_sync_xmit(struct tipc_link *l); +static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); /* * Simple link routines @@ -147,11 +147,6 @@ int tipc_link_is_active(struct tipc_link *l_ptr) /** * link_timeout - handle expiration of link timer * @l_ptr: pointer to link - * - * This routine must not grab "tipc_net_lock" to avoid a potential deadlock conflict - * with tipc_link_delete(). (There is no risk that the node will be deleted by - * another thread because tipc_link_delete() always cancels the link timer before - * tipc_node_delete() is called.) */ static void link_timeout(struct tipc_link *l_ptr) { @@ -213,8 +208,8 @@ static void link_set_timer(struct tipc_link *l_ptr, u32 time) * Returns pointer to link. */ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, - struct tipc_bearer *b_ptr, - const struct tipc_media_addr *media_addr) + struct tipc_bearer *b_ptr, + const struct tipc_media_addr *media_addr) { struct tipc_link *l_ptr; struct tipc_msg *msg; @@ -279,41 +274,44 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr); - list_add_tail(&l_ptr->link_list, &b_ptr->links); link_state_event(l_ptr, STARTING_EVT); return l_ptr; } -/** - * tipc_link_delete - delete a link - * @l_ptr: pointer to link - * - * Note: 'tipc_net_lock' is write_locked, bearer is locked. - * This routine must not grab the node lock until after link timer cancellation - * to avoid a potential deadlock situation. - */ -void tipc_link_delete(struct tipc_link *l_ptr) +void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down) { - if (!l_ptr) { - pr_err("Attempt to delete non-existent link\n"); - return; - } - - k_cancel_timer(&l_ptr->timer); + struct tipc_link *l_ptr; + struct tipc_node *n_ptr; - tipc_node_lock(l_ptr->owner); - tipc_link_reset(l_ptr); - tipc_node_detach_link(l_ptr->owner, l_ptr); - tipc_link_purge_queues(l_ptr); - list_del_init(&l_ptr->link_list); - tipc_node_unlock(l_ptr->owner); - k_term_timer(&l_ptr->timer); - kfree(l_ptr); + rcu_read_lock(); + list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { + spin_lock_bh(&n_ptr->lock); + l_ptr = n_ptr->links[bearer_id]; + if (l_ptr) { + tipc_link_reset(l_ptr); + if (shutting_down || !tipc_node_is_up(n_ptr)) { + tipc_node_detach_link(l_ptr->owner, l_ptr); + tipc_link_reset_fragments(l_ptr); + spin_unlock_bh(&n_ptr->lock); + + /* Nobody else can access this link now: */ + del_timer_sync(&l_ptr->timer); + kfree(l_ptr); + } else { + /* Detach/delete when failover is finished: */ + l_ptr->flags |= LINK_STOPPED; + spin_unlock_bh(&n_ptr->lock); + del_timer_sync(&l_ptr->timer); + } + continue; + } + spin_unlock_bh(&n_ptr->lock); + } + rcu_read_unlock(); } - /** * link_schedule_port - schedule port for deferred sending * @l_ptr: pointer to link @@ -330,8 +328,6 @@ static int link_schedule_port(struct tipc_link *l_ptr, u32 origport, u32 sz) spin_lock_bh(&tipc_port_list_lock); p_ptr = tipc_port_lock(origport); if (p_ptr) { - if (!p_ptr->wakeup) - goto exit; if (!list_empty(&p_ptr->wait_list)) goto exit; p_ptr->congested = 1; @@ -366,7 +362,7 @@ void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all) list_del_init(&p_ptr->wait_list); spin_lock_bh(p_ptr->lock); p_ptr->congested = 0; - p_ptr->wakeup(p_ptr); + tipc_port_wakeup(p_ptr); win -= p_ptr->waiting_pkts; spin_unlock_bh(p_ptr->lock); } @@ -461,6 +457,21 @@ void tipc_link_reset(struct tipc_link *l_ptr) link_reset_statistics(l_ptr); } +void tipc_link_reset_list(unsigned int bearer_id) +{ + struct tipc_link *l_ptr; + struct tipc_node *n_ptr; + + rcu_read_lock(); + list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { + spin_lock_bh(&n_ptr->lock); + l_ptr = n_ptr->links[bearer_id]; + if (l_ptr) + tipc_link_reset(l_ptr); + spin_unlock_bh(&n_ptr->lock); + } + rcu_read_unlock(); +} static void link_activate(struct tipc_link *l_ptr) { @@ -479,7 +490,10 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) struct tipc_link *other; u32 cont_intv = l_ptr->continuity_interval; - if (!l_ptr->started && (event != STARTING_EVT)) + if (l_ptr->flags & LINK_STOPPED) + return; + + if (!(l_ptr->flags & LINK_STARTED) && (event != STARTING_EVT)) return; /* Not yet. */ /* Check whether changeover is going on */ @@ -499,12 +513,12 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) if (l_ptr->next_in_no != l_ptr->checkpoint) { l_ptr->checkpoint = l_ptr->next_in_no; if (tipc_bclink_acks_missing(l_ptr->owner)) { - tipc_link_send_proto_msg(l_ptr, STATE_MSG, - 0, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, + 0, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; } else if (l_ptr->max_pkt < l_ptr->max_pkt_target) { - tipc_link_send_proto_msg(l_ptr, STATE_MSG, - 1, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, + 1, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; } link_set_timer(l_ptr, cont_intv); @@ -512,7 +526,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) } l_ptr->state = WORKING_UNKNOWN; l_ptr->fsm_msg_cnt = 0; - tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv / 4); break; @@ -522,7 +536,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) tipc_link_reset(l_ptr); l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; - tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, + 0, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -544,7 +559,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) tipc_link_reset(l_ptr); l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; - tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, + 0, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -554,14 +570,14 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->fsm_msg_cnt = 0; l_ptr->checkpoint = l_ptr->next_in_no; if (tipc_bclink_acks_missing(l_ptr->owner)) { - tipc_link_send_proto_msg(l_ptr, STATE_MSG, - 0, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, + 0, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; } link_set_timer(l_ptr, cont_intv); } else if (l_ptr->fsm_msg_cnt < l_ptr->abort_limit) { - tipc_link_send_proto_msg(l_ptr, STATE_MSG, - 1, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, + 1, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv / 4); } else { /* Link has failed */ @@ -570,8 +586,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) tipc_link_reset(l_ptr); l_ptr->state = RESET_UNKNOWN; l_ptr->fsm_msg_cnt = 0; - tipc_link_send_proto_msg(l_ptr, RESET_MSG, - 0, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, RESET_MSG, + 0, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); } @@ -591,24 +607,25 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = WORKING_WORKING; l_ptr->fsm_msg_cnt = 0; link_activate(l_ptr); - tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; if (l_ptr->owner->working_links == 1) - tipc_link_send_sync(l_ptr); + tipc_link_sync_xmit(l_ptr); link_set_timer(l_ptr, cont_intv); break; case RESET_MSG: l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; - tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 1, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, + 1, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; case STARTING_EVT: - l_ptr->started = 1; + l_ptr->flags |= LINK_STARTED; /* fall through */ case TIMEOUT_EVT: - tipc_link_send_proto_msg(l_ptr, RESET_MSG, 0, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -626,16 +643,17 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = WORKING_WORKING; l_ptr->fsm_msg_cnt = 0; link_activate(l_ptr); - tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; if (l_ptr->owner->working_links == 1) - tipc_link_send_sync(l_ptr); + tipc_link_sync_xmit(l_ptr); link_set_timer(l_ptr, cont_intv); break; case RESET_MSG: break; case TIMEOUT_EVT: - tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, + 0, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -721,11 +739,11 @@ static void link_add_chain_to_outqueue(struct tipc_link *l_ptr, } /* - * tipc_link_send_buf() is the 'full path' for messages, called from - * inside TIPC when the 'fast path' in tipc_send_buf + * tipc_link_xmit() is the 'full path' for messages, called from + * inside TIPC when the 'fast path' in tipc_send_xmit * has failed, and from link_send() */ -int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf) +int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); u32 size = msg_size(msg); @@ -753,7 +771,7 @@ int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf) /* Fragmentation needed ? */ if (size > max_packet) - return link_send_long_buf(l_ptr, buf); + return tipc_link_frag_xmit(l_ptr, buf); /* Packet can be queued or sent. */ if (likely(!link_congested(l_ptr))) { @@ -797,11 +815,11 @@ int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf) } /* - * tipc_link_send(): same as tipc_link_send_buf(), but the link to use has - * not been selected yet, and the the owner node is not locked + * tipc_link_xmit(): same as __tipc_link_xmit(), but the link to use + * has not been selected yet, and the the owner node is not locked * Called by TIPC internal users, e.g. the name distributor */ -int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector) +int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector) { struct tipc_link *l_ptr; struct tipc_node *n_ptr; @@ -813,7 +831,7 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector) tipc_node_lock(n_ptr); l_ptr = n_ptr->active_links[selector & 1]; if (l_ptr) - res = tipc_link_send_buf(l_ptr, buf); + res = __tipc_link_xmit(l_ptr, buf); else kfree_skb(buf); tipc_node_unlock(n_ptr); @@ -825,14 +843,14 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector) } /* - * tipc_link_send_sync - synchronize broadcast link endpoints. + * tipc_link_sync_xmit - synchronize broadcast link endpoints. * * Give a newly added peer node the sequence number where it should * start receiving and acking broadcast packets. * * Called with node locked */ -static void tipc_link_send_sync(struct tipc_link *l) +static void tipc_link_sync_xmit(struct tipc_link *l) { struct sk_buff *buf; struct tipc_msg *msg; @@ -849,14 +867,14 @@ static void tipc_link_send_sync(struct tipc_link *l) } /* - * tipc_link_recv_sync - synchronize broadcast link endpoints. + * tipc_link_sync_rcv - synchronize broadcast link endpoints. * Receive the sequence number where we should start receiving and * acking broadcast packets from a newly added peer node, and open * up for reception of such packets. * * Called with node locked */ -static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf) +static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); @@ -866,7 +884,7 @@ static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf) } /* - * tipc_link_send_names - send name table entries to new neighbor + * tipc_link_names_xmit - send name table entries to new neighbor * * Send routine for bulk delivery of name table messages when contact * with a new neighbor occurs. No link congestion checking is performed @@ -874,7 +892,7 @@ static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf) * small enough not to require fragmentation. * Called without any locks held. */ -void tipc_link_send_names(struct list_head *message_list, u32 dest) +void tipc_link_names_xmit(struct list_head *message_list, u32 dest) { struct tipc_node *n_ptr; struct tipc_link *l_ptr; @@ -909,13 +927,13 @@ void tipc_link_send_names(struct list_head *message_list, u32 dest) } /* - * link_send_buf_fast: Entry for data messages where the + * tipc_link_xmit_fast: Entry for data messages where the * destination link is known and the header is complete, * inclusive total message length. Very time critical. * Link is locked. Returns user data length. */ -static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf, - u32 *used_max_pkt) +static int tipc_link_xmit_fast(struct tipc_link *l_ptr, struct sk_buff *buf, + u32 *used_max_pkt) { struct tipc_msg *msg = buf_msg(buf); int res = msg_data_sz(msg); @@ -931,18 +949,18 @@ static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf, else *used_max_pkt = l_ptr->max_pkt; } - return tipc_link_send_buf(l_ptr, buf); /* All other cases */ + return __tipc_link_xmit(l_ptr, buf); /* All other cases */ } /* - * tipc_link_send_sections_fast: Entry for messages where the + * tipc_link_iovec_xmit_fast: Entry for messages where the * destination processor is known and the header is complete, * except for total message length. * Returns user data length or errno. */ -int tipc_link_send_sections_fast(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destaddr) +int tipc_link_iovec_xmit_fast(struct tipc_port *sender, + struct iovec const *msg_sect, + unsigned int len, u32 destaddr) { struct tipc_msg *hdr = &sender->phdr; struct tipc_link *l_ptr; @@ -968,8 +986,8 @@ again: l_ptr = node->active_links[selector]; if (likely(l_ptr)) { if (likely(buf)) { - res = link_send_buf_fast(l_ptr, buf, - &sender->max_pkt); + res = tipc_link_xmit_fast(l_ptr, buf, + &sender->max_pkt); exit: tipc_node_unlock(node); read_unlock_bh(&tipc_net_lock); @@ -995,24 +1013,21 @@ exit: if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt) goto again; - return link_send_sections_long(sender, msg_sect, len, - destaddr); + return tipc_link_iovec_long_xmit(sender, msg_sect, + len, destaddr); } tipc_node_unlock(node); } read_unlock_bh(&tipc_net_lock); /* Couldn't find a link to the destination node */ - if (buf) - return tipc_reject_msg(buf, TIPC_ERR_NO_NODE); - if (res >= 0) - return tipc_port_reject_sections(sender, hdr, msg_sect, - len, TIPC_ERR_NO_NODE); - return res; + kfree_skb(buf); + tipc_port_iovec_reject(sender, hdr, msg_sect, len, TIPC_ERR_NO_NODE); + return -ENETUNREACH; } /* - * link_send_sections_long(): Entry for long messages where the + * tipc_link_iovec_long_xmit(): Entry for long messages where the * destination node is known and the header is complete, * inclusive total message length. * Link and bearer congestion status have been checked to be ok, @@ -1025,9 +1040,9 @@ exit: * * Returns user data length or errno. */ -static int link_send_sections_long(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destaddr) +static int tipc_link_iovec_long_xmit(struct tipc_port *sender, + struct iovec const *msg_sect, + unsigned int len, u32 destaddr) { struct tipc_link *l_ptr; struct tipc_node *node; @@ -1146,8 +1161,9 @@ error: } else { reject: kfree_skb_list(buf_chain); - return tipc_port_reject_sections(sender, hdr, msg_sect, - len, TIPC_ERR_NO_NODE); + tipc_port_iovec_reject(sender, hdr, msg_sect, len, + TIPC_ERR_NO_NODE); + return -ENETUNREACH; } /* Append chain of fragments to send queue & send them */ @@ -1391,6 +1407,12 @@ static int link_recv_buf_validate(struct sk_buff *buf) u32 hdr_size; u32 min_hdr_size; + /* If this packet comes from the defer queue, the skb has already + * been validated + */ + if (unlikely(TIPC_SKB_CB(buf)->deferred)) + return 1; + if (unlikely(buf->len < MIN_H_SIZE)) return 0; @@ -1435,15 +1457,10 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) u32 seq_no; u32 ackd; u32 released = 0; - int type; head = head->next; buf->next = NULL; - /* Ensure bearer is still enabled */ - if (unlikely(!b_ptr->active)) - goto discard; - /* Ensure message is well-formed */ if (unlikely(!link_recv_buf_validate(buf))) goto discard; @@ -1457,9 +1474,9 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) if (unlikely(msg_non_seq(msg))) { if (msg_user(msg) == LINK_CONFIG) - tipc_disc_recv_msg(buf, b_ptr); + tipc_disc_rcv(buf, b_ptr); else - tipc_bclink_recv_pkt(buf); + tipc_bclink_rcv(buf); continue; } @@ -1483,7 +1500,7 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) if ((n_ptr->block_setup & WAIT_PEER_DOWN) && msg_user(msg) == LINK_PROTOCOL && (msg_type(msg) == RESET_MSG || - msg_type(msg) == ACTIVATE_MSG) && + msg_type(msg) == ACTIVATE_MSG) && !msg_redundant_link(msg)) n_ptr->block_setup &= ~WAIT_PEER_DOWN; @@ -1502,7 +1519,6 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) while ((crs != l_ptr->next_out) && less_eq(buf_seqno(crs), ackd)) { struct sk_buff *next = crs->next; - kfree_skb(crs); crs = next; released++; @@ -1515,18 +1531,19 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) /* Try sending any messages link endpoint has pending */ if (unlikely(l_ptr->next_out)) tipc_link_push_queue(l_ptr); + if (unlikely(!list_empty(&l_ptr->waiting_ports))) tipc_link_wakeup_ports(l_ptr, 0); + if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) { l_ptr->stats.sent_acks++; - tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); } - /* Now (finally!) process the incoming message */ -protocol_check: + /* Process the incoming packet */ if (unlikely(!link_working_working(l_ptr))) { if (msg_user(msg) == LINK_PROTOCOL) { - link_recv_proto_msg(l_ptr, buf); + tipc_link_proto_rcv(l_ptr, buf); head = link_insert_deferred_queue(l_ptr, head); tipc_node_unlock(n_ptr); continue; @@ -1555,67 +1572,65 @@ protocol_check: l_ptr->next_in_no++; if (unlikely(l_ptr->oldest_deferred_in)) head = link_insert_deferred_queue(l_ptr, head); -deliver: - if (likely(msg_isdata(msg))) { - tipc_node_unlock(n_ptr); - tipc_port_recv_msg(buf); - continue; + + /* Deliver packet/message to correct user: */ + if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) { + if (!tipc_link_tunnel_rcv(n_ptr, &buf)) { + tipc_node_unlock(n_ptr); + continue; + } + msg = buf_msg(buf); + } else if (msg_user(msg) == MSG_FRAGMENTER) { + int rc; + + l_ptr->stats.recv_fragments++; + rc = tipc_link_frag_rcv(&l_ptr->reasm_head, + &l_ptr->reasm_tail, + &buf); + if (rc == LINK_REASM_COMPLETE) { + l_ptr->stats.recv_fragmented++; + msg = buf_msg(buf); + } else { + if (rc == LINK_REASM_ERROR) + tipc_link_reset(l_ptr); + tipc_node_unlock(n_ptr); + continue; + } } + switch (msg_user(msg)) { - int ret; + case TIPC_LOW_IMPORTANCE: + case TIPC_MEDIUM_IMPORTANCE: + case TIPC_HIGH_IMPORTANCE: + case TIPC_CRITICAL_IMPORTANCE: + tipc_node_unlock(n_ptr); + tipc_port_rcv(buf); + continue; case MSG_BUNDLER: l_ptr->stats.recv_bundles++; l_ptr->stats.recv_bundled += msg_msgcnt(msg); tipc_node_unlock(n_ptr); - tipc_link_recv_bundle(buf); + tipc_link_bundle_rcv(buf); continue; case NAME_DISTRIBUTOR: n_ptr->bclink.recv_permitted = true; tipc_node_unlock(n_ptr); - tipc_named_recv(buf); - continue; - case BCAST_PROTOCOL: - tipc_link_recv_sync(n_ptr, buf); - tipc_node_unlock(n_ptr); + tipc_named_rcv(buf); continue; case CONN_MANAGER: tipc_node_unlock(n_ptr); - tipc_port_recv_proto_msg(buf); - continue; - case MSG_FRAGMENTER: - l_ptr->stats.recv_fragments++; - ret = tipc_link_recv_fragment(&l_ptr->reasm_head, - &l_ptr->reasm_tail, - &buf); - if (ret == LINK_REASM_COMPLETE) { - l_ptr->stats.recv_fragmented++; - msg = buf_msg(buf); - goto deliver; - } - if (ret == LINK_REASM_ERROR) - tipc_link_reset(l_ptr); - tipc_node_unlock(n_ptr); + tipc_port_proto_rcv(buf); continue; - case CHANGEOVER_PROTOCOL: - type = msg_type(msg); - if (tipc_link_tunnel_rcv(&l_ptr, &buf)) { - msg = buf_msg(buf); - seq_no = msg_seqno(msg); - if (type == ORIGINAL_MSG) - goto deliver; - goto protocol_check; - } + case BCAST_PROTOCOL: + tipc_link_sync_rcv(n_ptr, buf); break; default: kfree_skb(buf); - buf = NULL; break; } tipc_node_unlock(n_ptr); - tipc_net_route_msg(buf); continue; unlock_discard: - tipc_node_unlock(n_ptr); discard: kfree_skb(buf); @@ -1682,7 +1697,7 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, u32 seq_no = buf_seqno(buf); if (likely(msg_user(buf_msg(buf)) == LINK_PROTOCOL)) { - link_recv_proto_msg(l_ptr, buf); + tipc_link_proto_rcv(l_ptr, buf); return; } @@ -1703,8 +1718,9 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, &l_ptr->newest_deferred_in, buf)) { l_ptr->deferred_inqueue_sz++; l_ptr->stats.deferred_recv++; + TIPC_SKB_CB(buf)->deferred = true; if ((l_ptr->deferred_inqueue_sz % 16) == 1) - tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); } else l_ptr->stats.duplicates++; } @@ -1712,9 +1728,8 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, /* * Send protocol message to the other endpoint. */ -void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, - int probe_msg, u32 gap, u32 tolerance, - u32 priority, u32 ack_mtu) +void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, + u32 gap, u32 tolerance, u32 priority, u32 ack_mtu) { struct sk_buff *buf = NULL; struct tipc_msg *msg = l_ptr->pmsg; @@ -1813,7 +1828,7 @@ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, * Note that network plane id propagates through the network, and may * change at any time. The node with lowest address rules */ -static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf) +static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) { u32 rec_gap = 0; u32 max_pkt_info; @@ -1932,8 +1947,8 @@ static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf) msg_last_bcast(msg)); if (rec_gap || (msg_probe(msg))) { - tipc_link_send_proto_msg(l_ptr, STATE_MSG, - 0, rec_gap, 0, 0, max_pkt_ack); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, rec_gap, 0, + 0, max_pkt_ack); } if (msg_seq_gap(msg)) { l_ptr->stats.recv_nacks++; @@ -1972,7 +1987,7 @@ static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr, } skb_copy_to_linear_data(buf, tunnel_hdr, INT_H_SIZE); skb_copy_to_linear_data_offset(buf, INT_H_SIZE, msg, length); - tipc_link_send_buf(tunnel, buf); + __tipc_link_xmit(tunnel, buf); } @@ -2005,7 +2020,7 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) if (buf) { skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE); msg_set_size(&tunnel_hdr, INT_H_SIZE); - tipc_link_send_buf(tunnel, buf); + __tipc_link_xmit(tunnel, buf); } else { pr_warn("%sunable to send changeover msg\n", link_co_err); @@ -2039,7 +2054,7 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) } } -/* tipc_link_dup_send_queue(): A second link has become active. Tunnel a +/* tipc_link_dup_queue_xmit(): A second link has become active. Tunnel a * duplicate of the first link's send queue via the new link. This way, we * are guaranteed that currently queued packets from a socket are delivered * before future traffic from the same socket, even if this is using the @@ -2048,7 +2063,7 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) * and sequence order is preserved per sender/receiver socket pair. * Owner node is locked. */ -void tipc_link_dup_send_queue(struct tipc_link *l_ptr, +void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *tunnel) { struct sk_buff *iter; @@ -2078,7 +2093,7 @@ void tipc_link_dup_send_queue(struct tipc_link *l_ptr, skb_copy_to_linear_data(outbuf, &tunnel_hdr, INT_H_SIZE); skb_copy_to_linear_data_offset(outbuf, INT_H_SIZE, iter->data, length); - tipc_link_send_buf(tunnel, outbuf); + __tipc_link_xmit(tunnel, outbuf); if (!tipc_link_is_up(l_ptr)) return; iter = iter->next; @@ -2105,89 +2120,114 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos) return eb; } -/* tipc_link_tunnel_rcv(): Receive a tunneled packet, sent - * via other link as result of a failover (ORIGINAL_MSG) or - * a new active link (DUPLICATE_MSG). Failover packets are - * returned to the active link for delivery upwards. + + +/* tipc_link_dup_rcv(): Receive a tunnelled DUPLICATE_MSG packet. + * Owner node is locked. + */ +static void tipc_link_dup_rcv(struct tipc_link *l_ptr, + struct sk_buff *t_buf) +{ + struct sk_buff *buf; + + if (!tipc_link_is_up(l_ptr)) + return; + + buf = buf_extract(t_buf, INT_H_SIZE); + if (buf == NULL) { + pr_warn("%sfailed to extract inner dup pkt\n", link_co_err); + return; + } + + /* Add buffer to deferred queue, if applicable: */ + link_handle_out_of_seq_msg(l_ptr, buf); +} + +/* tipc_link_failover_rcv(): Receive a tunnelled ORIGINAL_MSG packet * Owner node is locked. */ -static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr, - struct sk_buff **buf) +static struct sk_buff *tipc_link_failover_rcv(struct tipc_link *l_ptr, + struct sk_buff *t_buf) { - struct sk_buff *tunnel_buf = *buf; - struct tipc_link *dest_link; + struct tipc_msg *t_msg = buf_msg(t_buf); + struct sk_buff *buf = NULL; struct tipc_msg *msg; - struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf); - u32 msg_typ = msg_type(tunnel_msg); - u32 msg_count = msg_msgcnt(tunnel_msg); - u32 bearer_id = msg_bearer_id(tunnel_msg); - if (bearer_id >= MAX_BEARERS) - goto exit; - dest_link = (*l_ptr)->owner->links[bearer_id]; - if (!dest_link) - goto exit; - if (dest_link == *l_ptr) { - pr_err("Unexpected changeover message on link <%s>\n", - (*l_ptr)->name); - goto exit; - } - *l_ptr = dest_link; - msg = msg_get_wrapped(tunnel_msg); + if (tipc_link_is_up(l_ptr)) + tipc_link_reset(l_ptr); - if (msg_typ == DUPLICATE_MSG) { - if (less(msg_seqno(msg), mod(dest_link->next_in_no))) - goto exit; - *buf = buf_extract(tunnel_buf, INT_H_SIZE); - if (*buf == NULL) { - pr_warn("%sduplicate msg dropped\n", link_co_err); + /* First failover packet? */ + if (l_ptr->exp_msg_count == START_CHANGEOVER) + l_ptr->exp_msg_count = msg_msgcnt(t_msg); + + /* Should there be an inner packet? */ + if (l_ptr->exp_msg_count) { + l_ptr->exp_msg_count--; + buf = buf_extract(t_buf, INT_H_SIZE); + if (buf == NULL) { + pr_warn("%sno inner failover pkt\n", link_co_err); goto exit; } - kfree_skb(tunnel_buf); - return 1; - } + msg = buf_msg(buf); - /* First original message ?: */ - if (tipc_link_is_up(dest_link)) { - pr_info("%s<%s>, changeover initiated by peer\n", link_rst_msg, - dest_link->name); - tipc_link_reset(dest_link); - dest_link->exp_msg_count = msg_count; - if (!msg_count) - goto exit; - } else if (dest_link->exp_msg_count == START_CHANGEOVER) { - dest_link->exp_msg_count = msg_count; - if (!msg_count) + if (less(msg_seqno(msg), l_ptr->reset_checkpoint)) { + kfree_skb(buf); + buf = NULL; goto exit; + } + if (msg_user(msg) == MSG_FRAGMENTER) { + l_ptr->stats.recv_fragments++; + tipc_link_frag_rcv(&l_ptr->reasm_head, + &l_ptr->reasm_tail, + &buf); + } } +exit: + if ((l_ptr->exp_msg_count == 0) && (l_ptr->flags & LINK_STOPPED)) { + tipc_node_detach_link(l_ptr->owner, l_ptr); + kfree(l_ptr); + } + return buf; +} + +/* tipc_link_tunnel_rcv(): Receive a tunnelled packet, sent + * via other link as result of a failover (ORIGINAL_MSG) or + * a new active link (DUPLICATE_MSG). Failover packets are + * returned to the active link for delivery upwards. + * Owner node is locked. + */ +static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, + struct sk_buff **buf) +{ + struct sk_buff *t_buf = *buf; + struct tipc_link *l_ptr; + struct tipc_msg *t_msg = buf_msg(t_buf); + u32 bearer_id = msg_bearer_id(t_msg); + + *buf = NULL; - /* Receive original message */ - if (dest_link->exp_msg_count == 0) { - pr_warn("%sgot too many tunnelled messages\n", link_co_err); + if (bearer_id >= MAX_BEARERS) goto exit; - } - dest_link->exp_msg_count--; - if (less(msg_seqno(msg), dest_link->reset_checkpoint)) { + + l_ptr = n_ptr->links[bearer_id]; + if (!l_ptr) goto exit; - } else { - *buf = buf_extract(tunnel_buf, INT_H_SIZE); - if (*buf != NULL) { - kfree_skb(tunnel_buf); - return 1; - } else { - pr_warn("%soriginal msg dropped\n", link_co_err); - } - } + + if (msg_type(t_msg) == DUPLICATE_MSG) + tipc_link_dup_rcv(l_ptr, t_buf); + else if (msg_type(t_msg) == ORIGINAL_MSG) + *buf = tipc_link_failover_rcv(l_ptr, t_buf); + else + pr_warn("%sunknown tunnel pkt received\n", link_co_err); exit: - *buf = NULL; - kfree_skb(tunnel_buf); - return 0; + kfree_skb(t_buf); + return *buf != NULL; } /* * Bundler functionality: */ -void tipc_link_recv_bundle(struct sk_buff *buf) +void tipc_link_bundle_rcv(struct sk_buff *buf) { u32 msgcount = msg_msgcnt(buf_msg(buf)); u32 pos = INT_H_SIZE; @@ -2210,11 +2250,11 @@ void tipc_link_recv_bundle(struct sk_buff *buf) */ /* - * link_send_long_buf: Entry for buffers needing fragmentation. + * tipc_link_frag_xmit: Entry for buffers needing fragmentation. * The buffer is complete, inclusive total message length. * Returns user data length. */ -static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf) +static int tipc_link_frag_xmit(struct tipc_link *l_ptr, struct sk_buff *buf) { struct sk_buff *buf_chain = NULL; struct sk_buff *buf_chain_tail = (struct sk_buff *)&buf_chain; @@ -2277,12 +2317,11 @@ static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf) return dsz; } -/* - * tipc_link_recv_fragment(): Called with node lock on. Returns +/* tipc_link_frag_rcv(): Called with node lock on. Returns * the reassembled buffer if message is complete. */ -int tipc_link_recv_fragment(struct sk_buff **head, struct sk_buff **tail, - struct sk_buff **fbuf) +int tipc_link_frag_rcv(struct sk_buff **head, struct sk_buff **tail, + struct sk_buff **fbuf) { struct sk_buff *frag = *fbuf; struct tipc_msg *msg = buf_msg(frag); @@ -2296,6 +2335,7 @@ int tipc_link_recv_fragment(struct sk_buff **head, struct sk_buff **tail, goto out_free; *head = frag; skb_frag_list_init(*head); + *fbuf = NULL; return 0; } else if (*head && skb_try_coalesce(*head, frag, &headstolen, &delta)) { @@ -2315,10 +2355,12 @@ int tipc_link_recv_fragment(struct sk_buff **head, struct sk_buff **tail, *tail = *head = NULL; return LINK_REASM_COMPLETE; } + *fbuf = NULL; return 0; out_free: pr_warn_ratelimited("Link unable to reassemble fragmented message\n"); kfree_skb(*fbuf); + *fbuf = NULL; return LINK_REASM_ERROR; } @@ -2352,35 +2394,41 @@ void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) l_ptr->queue_limit[MSG_FRAGMENTER] = 4000; } -/** - * link_find_link - locate link by name - * @name: ptr to link name string - * @node: ptr to area to be filled with ptr to associated node - * +/* tipc_link_find_owner - locate owner node of link by link's name + * @name: pointer to link name string + * @bearer_id: pointer to index in 'node->links' array where the link was found. * Caller must hold 'tipc_net_lock' to ensure node and bearer are not deleted; * this also prevents link deletion. * - * Returns pointer to link (or 0 if invalid link name). + * Returns pointer to node owning the link, or 0 if no matching link is found. */ -static struct tipc_link *link_find_link(const char *name, - struct tipc_node **node) +static struct tipc_node *tipc_link_find_owner(const char *link_name, + unsigned int *bearer_id) { struct tipc_link *l_ptr; struct tipc_node *n_ptr; + struct tipc_node *found_node = 0; int i; - list_for_each_entry(n_ptr, &tipc_node_list, list) { + *bearer_id = 0; + rcu_read_lock(); + list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { + tipc_node_lock(n_ptr); for (i = 0; i < MAX_BEARERS; i++) { l_ptr = n_ptr->links[i]; - if (l_ptr && !strcmp(l_ptr->name, name)) - goto found; + if (l_ptr && !strcmp(l_ptr->name, link_name)) { + *bearer_id = i; + found_node = n_ptr; + break; + } } + tipc_node_unlock(n_ptr); + if (found_node) + break; } - l_ptr = NULL; - n_ptr = NULL; -found: - *node = n_ptr; - return l_ptr; + rcu_read_unlock(); + + return found_node; } /** @@ -2422,32 +2470,33 @@ static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd) struct tipc_link *l_ptr; struct tipc_bearer *b_ptr; struct tipc_media *m_ptr; + int bearer_id; int res = 0; - l_ptr = link_find_link(name, &node); - if (l_ptr) { - /* - * acquire node lock for tipc_link_send_proto_msg(). - * see "TIPC locking policy" in net.c. - */ + node = tipc_link_find_owner(name, &bearer_id); + if (node) { tipc_node_lock(node); - switch (cmd) { - case TIPC_CMD_SET_LINK_TOL: - link_set_supervision_props(l_ptr, new_value); - tipc_link_send_proto_msg(l_ptr, - STATE_MSG, 0, 0, new_value, 0, 0); - break; - case TIPC_CMD_SET_LINK_PRI: - l_ptr->priority = new_value; - tipc_link_send_proto_msg(l_ptr, - STATE_MSG, 0, 0, 0, new_value, 0); - break; - case TIPC_CMD_SET_LINK_WINDOW: - tipc_link_set_queue_limits(l_ptr, new_value); - break; - default: - res = -EINVAL; - break; + l_ptr = node->links[bearer_id]; + + if (l_ptr) { + switch (cmd) { + case TIPC_CMD_SET_LINK_TOL: + link_set_supervision_props(l_ptr, new_value); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, + new_value, 0, 0); + break; + case TIPC_CMD_SET_LINK_PRI: + l_ptr->priority = new_value; + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, + 0, new_value, 0); + break; + case TIPC_CMD_SET_LINK_WINDOW: + tipc_link_set_queue_limits(l_ptr, new_value); + break; + default: + res = -EINVAL; + break; + } } tipc_node_unlock(node); return res; @@ -2542,6 +2591,7 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_ char *link_name; struct tipc_link *l_ptr; struct tipc_node *node; + unsigned int bearer_id; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); @@ -2552,15 +2602,19 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_ return tipc_cfg_reply_error_string("link not found"); return tipc_cfg_reply_none(); } - read_lock_bh(&tipc_net_lock); - l_ptr = link_find_link(link_name, &node); - if (!l_ptr) { + node = tipc_link_find_owner(link_name, &bearer_id); + if (!node) { read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string("link not found"); } - tipc_node_lock(node); + l_ptr = node->links[bearer_id]; + if (!l_ptr) { + tipc_node_unlock(node); + read_unlock_bh(&tipc_net_lock); + return tipc_cfg_reply_error_string("link not found"); + } link_reset_statistics(l_ptr); tipc_node_unlock(node); read_unlock_bh(&tipc_net_lock); @@ -2590,18 +2644,27 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) struct tipc_node *node; char *status; u32 profile_total = 0; + unsigned int bearer_id; int ret; if (!strcmp(name, tipc_bclink_name)) return tipc_bclink_stats(buf, buf_size); read_lock_bh(&tipc_net_lock); - l = link_find_link(name, &node); - if (!l) { + node = tipc_link_find_owner(name, &bearer_id); + if (!node) { read_unlock_bh(&tipc_net_lock); return 0; } tipc_node_lock(node); + + l = node->links[bearer_id]; + if (!l) { + tipc_node_unlock(node); + read_unlock_bh(&tipc_net_lock); + return 0; + } + s = &l->stats; if (tipc_link_is_active(l)) diff --git a/net/tipc/link.h b/net/tipc/link.h index 3b6aa65b608..8c0b49b5b2e 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -1,7 +1,7 @@ /* * net/tipc/link.h: Include file for TIPC link code * - * Copyright (c) 1995-2006, Ericsson AB + * Copyright (c) 1995-2006, 2013, Ericsson AB * Copyright (c) 2004-2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -40,27 +40,28 @@ #include "msg.h" #include "node.h" -/* - * Link reassembly status codes +/* Link reassembly status codes */ #define LINK_REASM_ERROR -1 #define LINK_REASM_COMPLETE 1 -/* - * Out-of-range value for link sequence numbers +/* Out-of-range value for link sequence numbers */ #define INVALID_LINK_SEQ 0x10000 -/* - * Link states +/* Link working states */ #define WORKING_WORKING 560810u #define WORKING_UNKNOWN 560811u #define RESET_UNKNOWN 560812u #define RESET_RESET 560813u -/* - * Starting value for maximum packet size negotiation on unicast links +/* Link endpoint execution states + */ +#define LINK_STARTED 0x0001 +#define LINK_STOPPED 0x0002 + +/* Starting value for maximum packet size negotiation on unicast links * (unless bearer MTU is less) */ #define MAX_PKT_DEFAULT 1500 @@ -102,8 +103,7 @@ struct tipc_stats { * @media_addr: media address to use when sending messages over link * @timer: link timer * @owner: pointer to peer node - * @link_list: adjacent links in bearer's list of links - * @started: indicates if link has been started + * @flags: execution state flags for link endpoint instance * @checkpoint: reference point for triggering link continuity checking * @peer_session: link session # being used by peer end of link * @peer_bearer_id: bearer id used by link's peer endpoint @@ -149,10 +149,9 @@ struct tipc_link { struct tipc_media_addr media_addr; struct timer_list timer; struct tipc_node *owner; - struct list_head link_list; /* Management and link supervision data */ - int started; + unsigned int flags; u32 checkpoint; u32 peer_session; u32 peer_bearer_id; @@ -215,10 +214,9 @@ struct tipc_port; struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr); -void tipc_link_delete(struct tipc_link *l_ptr); +void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down); void tipc_link_failover_send_queue(struct tipc_link *l_ptr); -void tipc_link_dup_send_queue(struct tipc_link *l_ptr, - struct tipc_link *dest); +void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *dest); void tipc_link_reset_fragments(struct tipc_link *l_ptr); int tipc_link_is_up(struct tipc_link *l_ptr); int tipc_link_is_active(struct tipc_link *l_ptr); @@ -231,23 +229,24 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space); void tipc_link_reset(struct tipc_link *l_ptr); -int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector); -void tipc_link_send_names(struct list_head *message_list, u32 dest); +void tipc_link_reset_list(unsigned int bearer_id); +int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector); +void tipc_link_names_xmit(struct list_head *message_list, u32 dest); +int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf); int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf); u32 tipc_link_get_max_pkt(u32 dest, u32 selector); -int tipc_link_send_sections_fast(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destnode); -void tipc_link_recv_bundle(struct sk_buff *buf); -int tipc_link_recv_fragment(struct sk_buff **reasm_head, - struct sk_buff **reasm_tail, - struct sk_buff **fbuf); -void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, int prob, - u32 gap, u32 tolerance, u32 priority, - u32 acked_mtu); +int tipc_link_iovec_xmit_fast(struct tipc_port *sender, + struct iovec const *msg_sect, + unsigned int len, u32 destnode); +void tipc_link_bundle_rcv(struct sk_buff *buf); +int tipc_link_frag_rcv(struct sk_buff **reasm_head, + struct sk_buff **reasm_tail, + struct sk_buff **fbuf); +void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, + u32 gap, u32 tolerance, u32 priority, u32 acked_mtu); void tipc_link_push_queue(struct tipc_link *l_ptr); u32 tipc_link_defer_pkt(struct sk_buff **head, struct sk_buff **tail, - struct sk_buff *buf); + struct sk_buff *buf); void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all); void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window); void tipc_link_retransmit(struct tipc_link *l_ptr, diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index e0d08055754..aff8041dc15 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -131,16 +131,24 @@ static void named_cluster_distribute(struct sk_buff *buf) { struct sk_buff *buf_copy; struct tipc_node *n_ptr; + struct tipc_link *l_ptr; - list_for_each_entry(n_ptr, &tipc_node_list, list) { - if (tipc_node_active_links(n_ptr)) { + rcu_read_lock(); + list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { + spin_lock_bh(&n_ptr->lock); + l_ptr = n_ptr->active_links[n_ptr->addr & 1]; + if (l_ptr) { buf_copy = skb_copy(buf, GFP_ATOMIC); - if (!buf_copy) + if (!buf_copy) { + spin_unlock_bh(&n_ptr->lock); break; + } msg_set_destnode(buf_msg(buf_copy), n_ptr->addr); - tipc_link_send(buf_copy, n_ptr->addr, n_ptr->addr); + __tipc_link_xmit(l_ptr, buf_copy); } + spin_unlock_bh(&n_ptr->lock); } + rcu_read_unlock(); kfree_skb(buf); } @@ -262,7 +270,7 @@ void tipc_named_node_up(unsigned long nodearg) named_distribute(&message_list, node, &publ_zone, max_item_buf); read_unlock_bh(&tipc_nametbl_lock); - tipc_link_send_names(&message_list, node); + tipc_link_names_xmit(&message_list, node); } /** @@ -293,9 +301,9 @@ static void named_purge_publ(struct publication *publ) } /** - * tipc_named_recv - process name table update message sent by another node + * tipc_named_rcv - process name table update message sent by another node */ -void tipc_named_recv(struct sk_buff *buf) +void tipc_named_rcv(struct sk_buff *buf) { struct publication *publ; struct tipc_msg *msg = buf_msg(buf); diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index 1e41bdd4f25..9b312ccfd43 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -42,7 +42,7 @@ void tipc_named_publish(struct publication *publ); void tipc_named_withdraw(struct publication *publ); void tipc_named_node_up(unsigned long node); -void tipc_named_recv(struct sk_buff *buf); +void tipc_named_rcv(struct sk_buff *buf); void tipc_named_reinit(void); #endif diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 92a1533af4e..042e8e3cabc 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -941,20 +941,48 @@ int tipc_nametbl_init(void) return 0; } -void tipc_nametbl_stop(void) +/** + * tipc_purge_publications - remove all publications for a given type + * + * tipc_nametbl_lock must be held when calling this function + */ +static void tipc_purge_publications(struct name_seq *seq) { - u32 i; + struct publication *publ, *safe; + struct sub_seq *sseq; + struct name_info *info; - if (!table.types) + if (!seq->sseqs) { + nameseq_delete_empty(seq); return; + } + sseq = seq->sseqs; + info = sseq->info; + list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { + tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, + publ->ref, publ->key); + } +} + +void tipc_nametbl_stop(void) +{ + u32 i; + struct name_seq *seq; + struct hlist_head *seq_head; + struct hlist_node *safe; - /* Verify name table is empty, then release it */ + /* Verify name table is empty and purge any lingering + * publications, then release the name table + */ write_lock_bh(&tipc_nametbl_lock); for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { if (hlist_empty(&table.types[i])) continue; - pr_err("nametbl_stop(): orphaned hash chain detected\n"); - break; + seq_head = &table.types[i]; + hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) { + tipc_purge_publications(seq); + } + continue; } kfree(table.types); table.types = NULL; diff --git a/net/tipc/net.c b/net/tipc/net.c index 7d305ecc09c..4c564eb69e1 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -146,19 +146,19 @@ void tipc_net_route_msg(struct sk_buff *buf) if (tipc_in_scope(dnode, tipc_own_addr)) { if (msg_isdata(msg)) { if (msg_mcast(msg)) - tipc_port_recv_mcast(buf, NULL); + tipc_port_mcast_rcv(buf, NULL); else if (msg_destport(msg)) - tipc_port_recv_msg(buf); + tipc_port_rcv(buf); else net_route_named_msg(buf); return; } switch (msg_user(msg)) { case NAME_DISTRIBUTOR: - tipc_named_recv(buf); + tipc_named_rcv(buf); break; case CONN_MANAGER: - tipc_port_recv_proto_msg(buf); + tipc_port_proto_rcv(buf); break; default: kfree_skb(buf); @@ -168,7 +168,7 @@ void tipc_net_route_msg(struct sk_buff *buf) /* Handle message for another node */ skb_trim(buf, msg_size(msg)); - tipc_link_send(buf, dnode, msg_link_selector(msg)); + tipc_link_xmit(buf, dnode, msg_link_selector(msg)); } void tipc_net_start(u32 addr) @@ -182,8 +182,8 @@ void tipc_net_start(u32 addr) tipc_bclink_init(); write_unlock_bh(&tipc_net_lock); - tipc_cfg_reinit(); - + tipc_nametbl_publish(TIPC_CFG_SRV, tipc_own_addr, tipc_own_addr, + TIPC_ZONE_SCOPE, 0, tipc_own_addr); pr_info("Started in network mode\n"); pr_info("Own node address %s, network identity %u\n", tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); @@ -191,15 +191,15 @@ void tipc_net_start(u32 addr) void tipc_net_stop(void) { - struct tipc_node *node, *t_node; - if (!tipc_own_addr) return; + + tipc_nametbl_withdraw(TIPC_CFG_SRV, tipc_own_addr, 0, tipc_own_addr); write_lock_bh(&tipc_net_lock); tipc_bearer_stop(); tipc_bclink_stop(); - list_for_each_entry_safe(node, t_node, &tipc_node_list, list) - tipc_node_delete(node); + tipc_node_stop(); write_unlock_bh(&tipc_net_lock); + pr_info("Left network mode\n"); } diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 9f72a637636..3aaf73de9e2 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -83,8 +83,6 @@ static struct genl_ops tipc_genl_ops[] = { }, }; -static int tipc_genl_family_registered; - int tipc_netlink_start(void) { int res; @@ -94,16 +92,10 @@ int tipc_netlink_start(void) pr_err("Failed to register netlink interface\n"); return res; } - - tipc_genl_family_registered = 1; return 0; } void tipc_netlink_stop(void) { - if (!tipc_genl_family_registered) - return; - genl_unregister_family(&tipc_genl_family); - tipc_genl_family_registered = 0; } diff --git a/net/tipc/node.c b/net/tipc/node.c index efe4d41bf11..1d3a4999a70 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2,7 +2,7 @@ * net/tipc/node.c: TIPC node management routines * * Copyright (c) 2000-2006, 2012 Ericsson AB - * Copyright (c) 2005-2006, 2010-2011, Wind River Systems + * Copyright (c) 2005-2006, 2010-2014, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,13 +44,11 @@ static void node_lost_contact(struct tipc_node *n_ptr); static void node_established_contact(struct tipc_node *n_ptr); -static DEFINE_SPINLOCK(node_create_lock); - static struct hlist_head node_htable[NODE_HTABLE_SIZE]; LIST_HEAD(tipc_node_list); static u32 tipc_num_nodes; - -static atomic_t tipc_num_links = ATOMIC_INIT(0); +static u32 tipc_num_links; +static DEFINE_SPINLOCK(node_list_lock); /* * A trivial power-of-two bitmask technique is used for speed, since this @@ -73,37 +71,26 @@ struct tipc_node *tipc_node_find(u32 addr) if (unlikely(!in_own_cluster_exact(addr))) return NULL; - hlist_for_each_entry(node, &node_htable[tipc_hashfn(addr)], hash) { - if (node->addr == addr) + rcu_read_lock(); + hlist_for_each_entry_rcu(node, &node_htable[tipc_hashfn(addr)], hash) { + if (node->addr == addr) { + rcu_read_unlock(); return node; + } } + rcu_read_unlock(); return NULL; } -/** - * tipc_node_create - create neighboring node - * - * Currently, this routine is called by neighbor discovery code, which holds - * net_lock for reading only. We must take node_create_lock to ensure a node - * isn't created twice if two different bearers discover the node at the same - * time. (It would be preferable to switch to holding net_lock in write mode, - * but this is a non-trivial change.) - */ struct tipc_node *tipc_node_create(u32 addr) { struct tipc_node *n_ptr, *temp_node; - spin_lock_bh(&node_create_lock); - - n_ptr = tipc_node_find(addr); - if (n_ptr) { - spin_unlock_bh(&node_create_lock); - return n_ptr; - } + spin_lock_bh(&node_list_lock); n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC); if (!n_ptr) { - spin_unlock_bh(&node_create_lock); + spin_unlock_bh(&node_list_lock); pr_warn("Node creation failed, no memory\n"); return NULL; } @@ -114,31 +101,41 @@ struct tipc_node *tipc_node_create(u32 addr) INIT_LIST_HEAD(&n_ptr->list); INIT_LIST_HEAD(&n_ptr->nsub); - hlist_add_head(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]); + hlist_add_head_rcu(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]); - list_for_each_entry(temp_node, &tipc_node_list, list) { + list_for_each_entry_rcu(temp_node, &tipc_node_list, list) { if (n_ptr->addr < temp_node->addr) break; } - list_add_tail(&n_ptr->list, &temp_node->list); + list_add_tail_rcu(&n_ptr->list, &temp_node->list); n_ptr->block_setup = WAIT_PEER_DOWN; n_ptr->signature = INVALID_NODE_SIG; tipc_num_nodes++; - spin_unlock_bh(&node_create_lock); + spin_unlock_bh(&node_list_lock); return n_ptr; } -void tipc_node_delete(struct tipc_node *n_ptr) +static void tipc_node_delete(struct tipc_node *n_ptr) { - list_del(&n_ptr->list); - hlist_del(&n_ptr->hash); - kfree(n_ptr); + list_del_rcu(&n_ptr->list); + hlist_del_rcu(&n_ptr->hash); + kfree_rcu(n_ptr, rcu); tipc_num_nodes--; } +void tipc_node_stop(void) +{ + struct tipc_node *node, *t_node; + + spin_lock_bh(&node_list_lock); + list_for_each_entry_safe(node, t_node, &tipc_node_list, list) + tipc_node_delete(node); + spin_unlock_bh(&node_list_lock); +} + /** * tipc_node_link_up - handle addition of link * @@ -162,7 +159,7 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) pr_info("New link <%s> becomes standby\n", l_ptr->name); return; } - tipc_link_dup_send_queue(active[0], l_ptr); + tipc_link_dup_queue_xmit(active[0], l_ptr); if (l_ptr->priority == active[0]->priority) { active[0] = l_ptr; return; @@ -243,15 +240,25 @@ int tipc_node_is_up(struct tipc_node *n_ptr) void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { n_ptr->links[l_ptr->b_ptr->identity] = l_ptr; - atomic_inc(&tipc_num_links); + spin_lock_bh(&node_list_lock); + tipc_num_links++; + spin_unlock_bh(&node_list_lock); n_ptr->link_cnt++; } void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { - n_ptr->links[l_ptr->b_ptr->identity] = NULL; - atomic_dec(&tipc_num_links); - n_ptr->link_cnt--; + int i; + + for (i = 0; i < MAX_BEARERS; i++) { + if (l_ptr != n_ptr->links[i]) + continue; + n_ptr->links[i] = NULL; + spin_lock_bh(&node_list_lock); + tipc_num_links--; + spin_unlock_bh(&node_list_lock); + n_ptr->link_cnt--; + } } static void node_established_contact(struct tipc_node *n_ptr) @@ -335,27 +342,28 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (network address)"); - read_lock_bh(&tipc_net_lock); + spin_lock_bh(&node_list_lock); if (!tipc_num_nodes) { - read_unlock_bh(&tipc_net_lock); + spin_unlock_bh(&node_list_lock); return tipc_cfg_reply_none(); } /* For now, get space for all other nodes */ payload_size = TLV_SPACE(sizeof(node_info)) * tipc_num_nodes; if (payload_size > 32768u) { - read_unlock_bh(&tipc_net_lock); + spin_unlock_bh(&node_list_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (too many nodes)"); } + spin_unlock_bh(&node_list_lock); + buf = tipc_cfg_reply_alloc(payload_size); - if (!buf) { - read_unlock_bh(&tipc_net_lock); + if (!buf) return NULL; - } /* Add TLVs for all nodes in scope */ - list_for_each_entry(n_ptr, &tipc_node_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { if (!tipc_in_scope(domain, n_ptr->addr)) continue; node_info.addr = htonl(n_ptr->addr); @@ -363,8 +371,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) tipc_cfg_append_tlv(buf, TIPC_TLV_NODE_INFO, &node_info, sizeof(node_info)); } - - read_unlock_bh(&tipc_net_lock); + rcu_read_unlock(); return buf; } @@ -387,21 +394,19 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) if (!tipc_own_addr) return tipc_cfg_reply_none(); - read_lock_bh(&tipc_net_lock); - + spin_lock_bh(&node_list_lock); /* Get space for all unicast links + broadcast link */ - payload_size = TLV_SPACE(sizeof(link_info)) * - (atomic_read(&tipc_num_links) + 1); + payload_size = TLV_SPACE((sizeof(link_info)) * (tipc_num_links + 1)); if (payload_size > 32768u) { - read_unlock_bh(&tipc_net_lock); + spin_unlock_bh(&node_list_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (too many links)"); } + spin_unlock_bh(&node_list_lock); + buf = tipc_cfg_reply_alloc(payload_size); - if (!buf) { - read_unlock_bh(&tipc_net_lock); + if (!buf) return NULL; - } /* Add TLV for broadcast link */ link_info.dest = htonl(tipc_cluster_mask(tipc_own_addr)); @@ -410,7 +415,8 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); /* Add TLVs for any other links in scope */ - list_for_each_entry(n_ptr, &tipc_node_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { u32 i; if (!tipc_in_scope(domain, n_ptr->addr)) @@ -427,7 +433,6 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) } tipc_node_unlock(n_ptr); } - - read_unlock_bh(&tipc_net_lock); + rcu_read_unlock(); return buf; } diff --git a/net/tipc/node.h b/net/tipc/node.h index 63e2e8ead2f..7cbb8cec1a9 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -2,7 +2,7 @@ * net/tipc/node.h: Include file for TIPC node management routines * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, 2010-2011, Wind River Systems + * Copyright (c) 2005, 2010-2014, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -66,6 +66,7 @@ * @link_cnt: number of links to node * @signature: node instance identifier * @bclink: broadcast-related info + * @rcu: rcu struct for tipc_node * @acked: sequence # of last outbound b'cast message acknowledged by node * @last_in: sequence # of last in-sequence b'cast message received from node * @last_sent: sequence # of last b'cast message sent by node @@ -89,6 +90,7 @@ struct tipc_node { int working_links; int block_setup; u32 signature; + struct rcu_head rcu; struct { u32 acked; u32 last_in; @@ -107,7 +109,7 @@ extern struct list_head tipc_node_list; struct tipc_node *tipc_node_find(u32 addr); struct tipc_node *tipc_node_create(u32 addr); -void tipc_node_delete(struct tipc_node *n_ptr); +void tipc_node_stop(void); void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr); diff --git a/net/tipc/port.c b/net/tipc/port.c index b742b265452..5c14c7801ee 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -1,7 +1,7 @@ /* * net/tipc/port.c: TIPC port code * - * Copyright (c) 1992-2007, Ericsson AB + * Copyright (c) 1992-2007, 2014, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * @@ -38,6 +38,7 @@ #include "config.h" #include "port.h" #include "name_table.h" +#include "socket.h" /* Connection management: */ #define PROBING_INTERVAL 3600000 /* [ms] => 1 h */ @@ -54,17 +55,6 @@ static struct sk_buff *port_build_self_abort_msg(struct tipc_port *, u32 err); static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *, u32 err); static void port_timeout(unsigned long ref); - -static u32 port_peernode(struct tipc_port *p_ptr) -{ - return msg_destnode(&p_ptr->phdr); -} - -static u32 port_peerport(struct tipc_port *p_ptr) -{ - return msg_destport(&p_ptr->phdr); -} - /** * tipc_port_peer_msg - verify message was sent by connected port's peer * @@ -76,33 +66,32 @@ int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg) u32 peernode; u32 orignode; - if (msg_origport(msg) != port_peerport(p_ptr)) + if (msg_origport(msg) != tipc_port_peerport(p_ptr)) return 0; orignode = msg_orignode(msg); - peernode = port_peernode(p_ptr); + peernode = tipc_port_peernode(p_ptr); return (orignode == peernode) || (!orignode && (peernode == tipc_own_addr)) || (!peernode && (orignode == tipc_own_addr)); } /** - * tipc_multicast - send a multicast message to local and remote destinations + * tipc_port_mcast_xmit - send a multicast message to local and remote + * destinations */ -int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, - struct iovec const *msg_sect, unsigned int len) +int tipc_port_mcast_xmit(struct tipc_port *oport, + struct tipc_name_seq const *seq, + struct iovec const *msg_sect, + unsigned int len) { struct tipc_msg *hdr; struct sk_buff *buf; struct sk_buff *ibuf = NULL; struct tipc_port_list dports = {0, NULL, }; - struct tipc_port *oport = tipc_port_deref(ref); int ext_targets; int res; - if (unlikely(!oport)) - return -EINVAL; - /* Create multicast message */ hdr = &oport->phdr; msg_set_type(hdr, TIPC_MCAST_MSG); @@ -131,7 +120,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, return -ENOMEM; } } - res = tipc_bclink_send_msg(buf); + res = tipc_bclink_xmit(buf); if ((res < 0) && (dports.count != 0)) kfree_skb(ibuf); } else { @@ -140,7 +129,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, if (res >= 0) { if (ibuf) - tipc_port_recv_mcast(ibuf, &dports); + tipc_port_mcast_rcv(ibuf, &dports); } else { tipc_port_list_free(&dports); } @@ -148,11 +137,11 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, } /** - * tipc_port_recv_mcast - deliver multicast message to all destination ports + * tipc_port_mcast_rcv - deliver multicast message to all destination ports * * If there is no port list, perform a lookup to create one */ -void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp) +void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp) { struct tipc_msg *msg; struct tipc_port_list dports = {0, NULL, }; @@ -176,7 +165,7 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp) msg_set_destnode(msg, tipc_own_addr); if (dp->count == 1) { msg_set_destport(msg, dp->ports[0]); - tipc_port_recv_msg(buf); + tipc_port_rcv(buf); tipc_port_list_free(dp); return; } @@ -191,7 +180,7 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp) if ((index == 0) && (cnt != 0)) item = item->next; msg_set_destport(buf_msg(b), item->ports[index]); - tipc_port_recv_msg(b); + tipc_port_rcv(b); } } exit: @@ -199,40 +188,32 @@ exit: tipc_port_list_free(dp); } -/** - * tipc_createport - create a generic TIPC port + +void tipc_port_wakeup(struct tipc_port *port) +{ + tipc_sock_wakeup(tipc_port_to_sock(port)); +} + +/* tipc_port_init - intiate TIPC port and lock it * - * Returns pointer to (locked) TIPC port, or NULL if unable to create it + * Returns obtained reference if initialization is successful, zero otherwise */ -struct tipc_port *tipc_createport(struct sock *sk, - u32 (*dispatcher)(struct tipc_port *, - struct sk_buff *), - void (*wakeup)(struct tipc_port *), - const u32 importance) +u32 tipc_port_init(struct tipc_port *p_ptr, + const unsigned int importance) { - struct tipc_port *p_ptr; struct tipc_msg *msg; u32 ref; - p_ptr = kzalloc(sizeof(*p_ptr), GFP_ATOMIC); - if (!p_ptr) { - pr_warn("Port creation failed, no memory\n"); - return NULL; - } ref = tipc_ref_acquire(p_ptr, &p_ptr->lock); if (!ref) { - pr_warn("Port creation failed, ref. table exhausted\n"); - kfree(p_ptr); - return NULL; + pr_warn("Port registration failed, ref. table exhausted\n"); + return 0; } - p_ptr->sk = sk; p_ptr->max_pkt = MAX_PKT_DEFAULT; p_ptr->ref = ref; INIT_LIST_HEAD(&p_ptr->wait_list); INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); - p_ptr->dispatcher = dispatcher; - p_ptr->wakeup = wakeup; k_init_timer(&p_ptr->timer, (Handler)port_timeout, ref); INIT_LIST_HEAD(&p_ptr->publications); INIT_LIST_HEAD(&p_ptr->port_list); @@ -248,10 +229,10 @@ struct tipc_port *tipc_createport(struct sock *sk, msg_set_origport(msg, ref); list_add_tail(&p_ptr->port_list, &ports); spin_unlock_bh(&tipc_port_list_lock); - return p_ptr; + return ref; } -int tipc_deleteport(struct tipc_port *p_ptr) +void tipc_port_destroy(struct tipc_port *p_ptr) { struct sk_buff *buf = NULL; @@ -272,67 +253,7 @@ int tipc_deleteport(struct tipc_port *p_ptr) list_del(&p_ptr->wait_list); spin_unlock_bh(&tipc_port_list_lock); k_term_timer(&p_ptr->timer); - kfree(p_ptr); tipc_net_route_msg(buf); - return 0; -} - -static int port_unreliable(struct tipc_port *p_ptr) -{ - return msg_src_droppable(&p_ptr->phdr); -} - -int tipc_portunreliable(u32 ref, unsigned int *isunreliable) -{ - struct tipc_port *p_ptr; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - *isunreliable = port_unreliable(p_ptr); - tipc_port_unlock(p_ptr); - return 0; -} - -int tipc_set_portunreliable(u32 ref, unsigned int isunreliable) -{ - struct tipc_port *p_ptr; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - msg_set_src_droppable(&p_ptr->phdr, (isunreliable != 0)); - tipc_port_unlock(p_ptr); - return 0; -} - -static int port_unreturnable(struct tipc_port *p_ptr) -{ - return msg_dest_droppable(&p_ptr->phdr); -} - -int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable) -{ - struct tipc_port *p_ptr; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - *isunrejectable = port_unreturnable(p_ptr); - tipc_port_unlock(p_ptr); - return 0; -} - -int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable) -{ - struct tipc_port *p_ptr; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - msg_set_dest_droppable(&p_ptr->phdr, (isunrejectable != 0)); - tipc_port_unlock(p_ptr); - return 0; } /* @@ -350,8 +271,8 @@ static struct sk_buff *port_build_proto_msg(struct tipc_port *p_ptr, if (buf) { msg = buf_msg(buf); tipc_msg_init(msg, CONN_MANAGER, type, INT_H_SIZE, - port_peernode(p_ptr)); - msg_set_destport(msg, port_peerport(p_ptr)); + tipc_port_peernode(p_ptr)); + msg_set_destport(msg, tipc_port_peerport(p_ptr)); msg_set_origport(msg, p_ptr->ref); msg_set_msgcnt(msg, ack); } @@ -422,17 +343,17 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err) /* send returned message & dispose of rejected message */ src_node = msg_prevnode(msg); if (in_own_node(src_node)) - tipc_port_recv_msg(rbuf); + tipc_port_rcv(rbuf); else - tipc_link_send(rbuf, src_node, msg_link_selector(rmsg)); + tipc_link_xmit(rbuf, src_node, msg_link_selector(rmsg)); exit: kfree_skb(buf); return data_sz; } -int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr, - struct iovec const *msg_sect, unsigned int len, - int err) +int tipc_port_iovec_reject(struct tipc_port *p_ptr, struct tipc_msg *hdr, + struct iovec const *msg_sect, unsigned int len, + int err) { struct sk_buff *buf; int res; @@ -519,7 +440,7 @@ static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 er return buf; } -void tipc_port_recv_proto_msg(struct sk_buff *buf) +void tipc_port_proto_rcv(struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); struct tipc_port *p_ptr; @@ -547,13 +468,12 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf) /* Process protocol message sent by peer */ switch (msg_type(msg)) { case CONN_ACK: - wakeable = tipc_port_congested(p_ptr) && p_ptr->congested && - p_ptr->wakeup; + wakeable = tipc_port_congested(p_ptr) && p_ptr->congested; p_ptr->acked += msg_msgcnt(msg); if (!tipc_port_congested(p_ptr)) { p_ptr->congested = 0; if (wakeable) - p_ptr->wakeup(p_ptr); + tipc_port_wakeup(p_ptr); } break; case CONN_PROBE: @@ -584,8 +504,8 @@ static int port_print(struct tipc_port *p_ptr, char *buf, int len, int full_id) ret = tipc_snprintf(buf, len, "%-10u:", p_ptr->ref); if (p_ptr->connected) { - u32 dport = port_peerport(p_ptr); - u32 destnode = port_peernode(p_ptr); + u32 dport = tipc_port_peerport(p_ptr); + u32 destnode = tipc_port_peernode(p_ptr); ret += tipc_snprintf(buf + ret, len - ret, " connected to <%u.%u.%u:%u>", @@ -673,34 +593,6 @@ void tipc_acknowledge(u32 ref, u32 ack) tipc_net_route_msg(buf); } -int tipc_portimportance(u32 ref, unsigned int *importance) -{ - struct tipc_port *p_ptr; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - *importance = (unsigned int)msg_importance(&p_ptr->phdr); - tipc_port_unlock(p_ptr); - return 0; -} - -int tipc_set_portimportance(u32 ref, unsigned int imp) -{ - struct tipc_port *p_ptr; - - if (imp > TIPC_CRITICAL_IMPORTANCE) - return -EINVAL; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - msg_set_importance(&p_ptr->phdr, (u32)imp); - tipc_port_unlock(p_ptr); - return 0; -} - - int tipc_publish(struct tipc_port *p_ptr, unsigned int scope, struct tipc_name_seq const *seq) { @@ -760,7 +652,7 @@ int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope, return res; } -int tipc_connect(u32 ref, struct tipc_portid const *peer) +int tipc_port_connect(u32 ref, struct tipc_portid const *peer) { struct tipc_port *p_ptr; int res; @@ -768,17 +660,17 @@ int tipc_connect(u32 ref, struct tipc_portid const *peer) p_ptr = tipc_port_lock(ref); if (!p_ptr) return -EINVAL; - res = __tipc_connect(ref, p_ptr, peer); + res = __tipc_port_connect(ref, p_ptr, peer); tipc_port_unlock(p_ptr); return res; } /* - * __tipc_connect - connect to a remote peer + * __tipc_port_connect - connect to a remote peer * * Port must be locked. */ -int __tipc_connect(u32 ref, struct tipc_port *p_ptr, +int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr, struct tipc_portid const *peer) { struct tipc_msg *msg; @@ -815,7 +707,7 @@ exit: * * Port must be locked. */ -int __tipc_disconnect(struct tipc_port *tp_ptr) +int __tipc_port_disconnect(struct tipc_port *tp_ptr) { if (tp_ptr->connected) { tp_ptr->connected = 0; @@ -828,10 +720,10 @@ int __tipc_disconnect(struct tipc_port *tp_ptr) } /* - * tipc_disconnect(): Disconnect port form peer. + * tipc_port_disconnect(): Disconnect port form peer. * This is a node local operation. */ -int tipc_disconnect(u32 ref) +int tipc_port_disconnect(u32 ref) { struct tipc_port *p_ptr; int res; @@ -839,15 +731,15 @@ int tipc_disconnect(u32 ref) p_ptr = tipc_port_lock(ref); if (!p_ptr) return -EINVAL; - res = __tipc_disconnect(p_ptr); + res = __tipc_port_disconnect(p_ptr); tipc_port_unlock(p_ptr); return res; } /* - * tipc_shutdown(): Send a SHUTDOWN msg to peer and disconnect + * tipc_port_shutdown(): Send a SHUTDOWN msg to peer and disconnect */ -int tipc_shutdown(u32 ref) +int tipc_port_shutdown(u32 ref) { struct tipc_port *p_ptr; struct sk_buff *buf = NULL; @@ -859,13 +751,13 @@ int tipc_shutdown(u32 ref) buf = port_build_peer_abort_msg(p_ptr, TIPC_CONN_SHUTDOWN); tipc_port_unlock(p_ptr); tipc_net_route_msg(buf); - return tipc_disconnect(ref); + return tipc_port_disconnect(ref); } /** - * tipc_port_recv_msg - receive message from lower layer and deliver to port user + * tipc_port_rcv - receive message from lower layer and deliver to port user */ -int tipc_port_recv_msg(struct sk_buff *buf) +int tipc_port_rcv(struct sk_buff *buf) { struct tipc_port *p_ptr; struct tipc_msg *msg = buf_msg(buf); @@ -882,7 +774,7 @@ int tipc_port_recv_msg(struct sk_buff *buf) /* validate destination & pass to port, otherwise reject message */ p_ptr = tipc_port_lock(destport); if (likely(p_ptr)) { - err = p_ptr->dispatcher(p_ptr, buf); + err = tipc_sk_rcv(&tipc_port_to_sock(p_ptr)->sk, buf); tipc_port_unlock(p_ptr); if (likely(!err)) return dsz; @@ -894,43 +786,43 @@ int tipc_port_recv_msg(struct sk_buff *buf) } /* - * tipc_port_recv_sections(): Concatenate and deliver sectioned - * message for this node. + * tipc_port_iovec_rcv: Concatenate and deliver sectioned + * message for this node. */ -static int tipc_port_recv_sections(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len) +static int tipc_port_iovec_rcv(struct tipc_port *sender, + struct iovec const *msg_sect, + unsigned int len) { struct sk_buff *buf; int res; res = tipc_msg_build(&sender->phdr, msg_sect, len, MAX_MSG_SIZE, &buf); if (likely(buf)) - tipc_port_recv_msg(buf); + tipc_port_rcv(buf); return res; } /** * tipc_send - send message sections on connection */ -int tipc_send(u32 ref, struct iovec const *msg_sect, unsigned int len) +int tipc_send(struct tipc_port *p_ptr, + struct iovec const *msg_sect, + unsigned int len) { - struct tipc_port *p_ptr; u32 destnode; int res; - p_ptr = tipc_port_deref(ref); - if (!p_ptr || !p_ptr->connected) + if (!p_ptr->connected) return -EINVAL; p_ptr->congested = 1; if (!tipc_port_congested(p_ptr)) { - destnode = port_peernode(p_ptr); + destnode = tipc_port_peernode(p_ptr); if (likely(!in_own_node(destnode))) - res = tipc_link_send_sections_fast(p_ptr, msg_sect, - len, destnode); + res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len, + destnode); else - res = tipc_port_recv_sections(p_ptr, msg_sect, len); + res = tipc_port_iovec_rcv(p_ptr, msg_sect, len); if (likely(res != -ELINKCONG)) { p_ptr->congested = 0; @@ -939,7 +831,7 @@ int tipc_send(u32 ref, struct iovec const *msg_sect, unsigned int len) return res; } } - if (port_unreliable(p_ptr)) { + if (tipc_port_unreliable(p_ptr)) { p_ptr->congested = 0; return len; } @@ -949,17 +841,18 @@ int tipc_send(u32 ref, struct iovec const *msg_sect, unsigned int len) /** * tipc_send2name - send message sections to port name */ -int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain, - struct iovec const *msg_sect, unsigned int len) +int tipc_send2name(struct tipc_port *p_ptr, + struct tipc_name const *name, + unsigned int domain, + struct iovec const *msg_sect, + unsigned int len) { - struct tipc_port *p_ptr; struct tipc_msg *msg; u32 destnode = domain; u32 destport; int res; - p_ptr = tipc_port_deref(ref); - if (!p_ptr || p_ptr->connected) + if (p_ptr->connected) return -EINVAL; msg = &p_ptr->phdr; @@ -974,39 +867,39 @@ int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain, if (likely(destport || destnode)) { if (likely(in_own_node(destnode))) - res = tipc_port_recv_sections(p_ptr, msg_sect, len); + res = tipc_port_iovec_rcv(p_ptr, msg_sect, len); else if (tipc_own_addr) - res = tipc_link_send_sections_fast(p_ptr, msg_sect, - len, destnode); + res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len, + destnode); else - res = tipc_port_reject_sections(p_ptr, msg, msg_sect, - len, TIPC_ERR_NO_NODE); + res = tipc_port_iovec_reject(p_ptr, msg, msg_sect, + len, TIPC_ERR_NO_NODE); if (likely(res != -ELINKCONG)) { if (res > 0) p_ptr->sent++; return res; } - if (port_unreliable(p_ptr)) { + if (tipc_port_unreliable(p_ptr)) return len; - } + return -ELINKCONG; } - return tipc_port_reject_sections(p_ptr, msg, msg_sect, len, - TIPC_ERR_NO_NAME); + return tipc_port_iovec_reject(p_ptr, msg, msg_sect, len, + TIPC_ERR_NO_NAME); } /** * tipc_send2port - send message sections to port identity */ -int tipc_send2port(u32 ref, struct tipc_portid const *dest, - struct iovec const *msg_sect, unsigned int len) +int tipc_send2port(struct tipc_port *p_ptr, + struct tipc_portid const *dest, + struct iovec const *msg_sect, + unsigned int len) { - struct tipc_port *p_ptr; struct tipc_msg *msg; int res; - p_ptr = tipc_port_deref(ref); - if (!p_ptr || p_ptr->connected) + if (p_ptr->connected) return -EINVAL; msg = &p_ptr->phdr; @@ -1017,20 +910,20 @@ int tipc_send2port(u32 ref, struct tipc_portid const *dest, msg_set_hdr_sz(msg, BASIC_H_SIZE); if (in_own_node(dest->node)) - res = tipc_port_recv_sections(p_ptr, msg_sect, len); + res = tipc_port_iovec_rcv(p_ptr, msg_sect, len); else if (tipc_own_addr) - res = tipc_link_send_sections_fast(p_ptr, msg_sect, len, - dest->node); + res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len, + dest->node); else - res = tipc_port_reject_sections(p_ptr, msg, msg_sect, len, + res = tipc_port_iovec_reject(p_ptr, msg, msg_sect, len, TIPC_ERR_NO_NODE); if (likely(res != -ELINKCONG)) { if (res > 0) p_ptr->sent++; return res; } - if (port_unreliable(p_ptr)) { + if (tipc_port_unreliable(p_ptr)) return len; - } + return -ELINKCONG; } diff --git a/net/tipc/port.h b/net/tipc/port.h index 34f12bd4074..a00397393bd 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -1,7 +1,7 @@ /* * net/tipc/port.h: Include file for TIPC port code * - * Copyright (c) 1994-2007, Ericsson AB + * Copyright (c) 1994-2007, 2014, Ericsson AB * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * @@ -48,7 +48,6 @@ /** * struct tipc_port - TIPC port structure - * @sk: pointer to socket handle * @lock: pointer to spinlock for controlling access to port * @connected: non-zero if port is currently connected to a peer port * @conn_type: TIPC type used when connection was established @@ -60,8 +59,6 @@ * @ref: unique reference to port in TIPC object registry * @phdr: preformatted message header used when sending messages * @port_list: adjacent ports in TIPC's global list of ports - * @dispatcher: ptr to routine which handles received messages - * @wakeup: ptr to routine to call when port is no longer congested * @wait_list: adjacent ports in list of ports waiting on link congestion * @waiting_pkts: * @sent: # of non-empty messages sent by port @@ -74,7 +71,6 @@ * @subscription: "node down" subscription used to terminate failed connections */ struct tipc_port { - struct sock *sk; spinlock_t *lock; int connected; u32 conn_type; @@ -86,8 +82,6 @@ struct tipc_port { u32 ref; struct tipc_msg phdr; struct list_head port_list; - u32 (*dispatcher)(struct tipc_port *, struct sk_buff *); - void (*wakeup)(struct tipc_port *); struct list_head wait_list; u32 waiting_pkts; u32 sent; @@ -106,68 +100,71 @@ struct tipc_port_list; /* * TIPC port manipulation routines */ -struct tipc_port *tipc_createport(struct sock *sk, - u32 (*dispatcher)(struct tipc_port *, - struct sk_buff *), - void (*wakeup)(struct tipc_port *), - const u32 importance); +u32 tipc_port_init(struct tipc_port *p_ptr, + const unsigned int importance); int tipc_reject_msg(struct sk_buff *buf, u32 err); void tipc_acknowledge(u32 port_ref, u32 ack); -int tipc_deleteport(struct tipc_port *p_ptr); - -int tipc_portimportance(u32 portref, unsigned int *importance); -int tipc_set_portimportance(u32 portref, unsigned int importance); - -int tipc_portunreliable(u32 portref, unsigned int *isunreliable); -int tipc_set_portunreliable(u32 portref, unsigned int isunreliable); - -int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable); -int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable); +void tipc_port_destroy(struct tipc_port *p_ptr); int tipc_publish(struct tipc_port *p_ptr, unsigned int scope, struct tipc_name_seq const *name_seq); + int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope, struct tipc_name_seq const *name_seq); -int tipc_connect(u32 portref, struct tipc_portid const *port); +int tipc_port_connect(u32 portref, struct tipc_portid const *port); -int tipc_disconnect(u32 portref); +int tipc_port_disconnect(u32 portref); -int tipc_shutdown(u32 ref); +int tipc_port_shutdown(u32 ref); +void tipc_port_wakeup(struct tipc_port *port); /* * The following routines require that the port be locked on entry */ -int __tipc_disconnect(struct tipc_port *tp_ptr); -int __tipc_connect(u32 ref, struct tipc_port *p_ptr, +int __tipc_port_disconnect(struct tipc_port *tp_ptr); +int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr, struct tipc_portid const *peer); int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg); /* * TIPC messaging routines */ -int tipc_port_recv_msg(struct sk_buff *buf); -int tipc_send(u32 portref, struct iovec const *msg_sect, unsigned int len); - -int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain, - struct iovec const *msg_sect, unsigned int len); +int tipc_port_rcv(struct sk_buff *buf); + +int tipc_send(struct tipc_port *port, + struct iovec const *msg_sect, + unsigned int len); + +int tipc_send2name(struct tipc_port *port, + struct tipc_name const *name, + u32 domain, + struct iovec const *msg_sect, + unsigned int len); + +int tipc_send2port(struct tipc_port *port, + struct tipc_portid const *dest, + struct iovec const *msg_sect, + unsigned int len); + +int tipc_port_mcast_xmit(struct tipc_port *port, + struct tipc_name_seq const *seq, + struct iovec const *msg, + unsigned int len); + +int tipc_port_iovec_reject(struct tipc_port *p_ptr, + struct tipc_msg *hdr, + struct iovec const *msg_sect, + unsigned int len, + int err); -int tipc_send2port(u32 portref, struct tipc_portid const *dest, - struct iovec const *msg_sect, unsigned int len); - -int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, - struct iovec const *msg, unsigned int len); - -int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr, - struct iovec const *msg_sect, unsigned int len, - int err); struct sk_buff *tipc_port_get_ports(void); -void tipc_port_recv_proto_msg(struct sk_buff *buf); -void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp); +void tipc_port_proto_rcv(struct sk_buff *buf); +void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp); void tipc_port_reinit(void); /** @@ -188,14 +185,53 @@ static inline void tipc_port_unlock(struct tipc_port *p_ptr) spin_unlock_bh(p_ptr->lock); } -static inline struct tipc_port *tipc_port_deref(u32 ref) +static inline int tipc_port_congested(struct tipc_port *p_ptr) { - return (struct tipc_port *)tipc_ref_deref(ref); + return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2); } -static inline int tipc_port_congested(struct tipc_port *p_ptr) + +static inline u32 tipc_port_peernode(struct tipc_port *p_ptr) { - return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2); + return msg_destnode(&p_ptr->phdr); +} + +static inline u32 tipc_port_peerport(struct tipc_port *p_ptr) +{ + return msg_destport(&p_ptr->phdr); +} + +static inline bool tipc_port_unreliable(struct tipc_port *port) +{ + return msg_src_droppable(&port->phdr) != 0; +} + +static inline void tipc_port_set_unreliable(struct tipc_port *port, + bool unreliable) +{ + msg_set_src_droppable(&port->phdr, unreliable ? 1 : 0); +} + +static inline bool tipc_port_unreturnable(struct tipc_port *port) +{ + return msg_dest_droppable(&port->phdr) != 0; +} + +static inline void tipc_port_set_unreturnable(struct tipc_port *port, + bool unreturnable) +{ + msg_set_dest_droppable(&port->phdr, unreturnable ? 1 : 0); +} + + +static inline int tipc_port_importance(struct tipc_port *port) +{ + return msg_importance(&port->phdr); +} + +static inline void tipc_port_set_importance(struct tipc_port *port, int imp) +{ + msg_set_importance(&port->phdr, (u32)imp); } #endif diff --git a/net/tipc/ref.c b/net/tipc/ref.c index 2a2a938dc22..3d4ecd754ee 100644 --- a/net/tipc/ref.c +++ b/net/tipc/ref.c @@ -89,7 +89,7 @@ struct ref_table { static struct ref_table tipc_ref_table; -static DEFINE_RWLOCK(ref_table_lock); +static DEFINE_SPINLOCK(ref_table_lock); /** * tipc_ref_table_init - create reference table for objects @@ -126,9 +126,6 @@ int tipc_ref_table_init(u32 requested_size, u32 start) */ void tipc_ref_table_stop(void) { - if (!tipc_ref_table.entries) - return; - vfree(tipc_ref_table.entries); tipc_ref_table.entries = NULL; } @@ -162,7 +159,7 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock) } /* take a free entry, if available; otherwise initialize a new entry */ - write_lock_bh(&ref_table_lock); + spin_lock_bh(&ref_table_lock); if (tipc_ref_table.first_free) { index = tipc_ref_table.first_free; entry = &(tipc_ref_table.entries[index]); @@ -178,7 +175,7 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock) } else { ref = 0; } - write_unlock_bh(&ref_table_lock); + spin_unlock_bh(&ref_table_lock); /* * Grab the lock so no one else can modify this entry @@ -219,7 +216,7 @@ void tipc_ref_discard(u32 ref) index = ref & index_mask; entry = &(tipc_ref_table.entries[index]); - write_lock_bh(&ref_table_lock); + spin_lock_bh(&ref_table_lock); if (!entry->object) { pr_err("Attempt to discard ref. to non-existent obj\n"); @@ -245,7 +242,7 @@ void tipc_ref_discard(u32 ref) tipc_ref_table.last_free = index; exit: - write_unlock_bh(&ref_table_lock); + spin_unlock_bh(&ref_table_lock); } /** @@ -267,20 +264,3 @@ void *tipc_ref_lock(u32 ref) } return NULL; } - - -/** - * tipc_ref_deref - return pointer referenced object (without locking it) - */ -void *tipc_ref_deref(u32 ref) -{ - if (likely(tipc_ref_table.entries)) { - struct reference *entry; - - entry = &tipc_ref_table.entries[ref & - tipc_ref_table.index_mask]; - if (likely(entry->ref == ref)) - return entry->object; - } - return NULL; -} diff --git a/net/tipc/ref.h b/net/tipc/ref.h index 5bc8e7ab84d..d01aa1df63b 100644 --- a/net/tipc/ref.h +++ b/net/tipc/ref.h @@ -44,6 +44,5 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock); void tipc_ref_discard(u32 ref); void *tipc_ref_lock(u32 ref); -void *tipc_ref_deref(u32 ref); #endif diff --git a/net/tipc/server.c b/net/tipc/server.c index b635ca347a8..a538a02f869 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -87,7 +87,6 @@ static void tipc_clean_outqueues(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); - struct tipc_server *s = con->server; if (con->sock) { tipc_sock_release_local(con->sock); @@ -95,10 +94,6 @@ static void tipc_conn_kref_release(struct kref *kref) } tipc_clean_outqueues(con); - - if (con->conid) - s->tipc_conn_shutdown(con->conid, con->usr_data); - kfree(con); } @@ -124,7 +119,7 @@ static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid) return con; } -static void sock_data_ready(struct sock *sk, int unused) +static void sock_data_ready(struct sock *sk) { struct tipc_conn *con; @@ -181,6 +176,9 @@ static void tipc_close_conn(struct tipc_conn *con) struct tipc_server *s = con->server; if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { + if (con->conid) + s->tipc_conn_shutdown(con->conid, con->usr_data); + spin_lock_bh(&s->idr_lock); idr_remove(&s->conn_idr, con->conid); s->idr_in_use--; @@ -299,7 +297,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con) newcon->usr_data = s->tipc_conn_new(newcon->conid); /* Wake up receive process in case of 'SYN+' message */ - newsock->sk->sk_data_ready(newsock->sk, 0); + newsock->sk->sk_data_ready(newsock->sk); return ret; } @@ -429,10 +427,12 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid, list_add_tail(&e->list, &con->outqueue); spin_unlock_bh(&con->outqueue_lock); - if (test_bit(CF_CONNECTED, &con->flags)) + if (test_bit(CF_CONNECTED, &con->flags)) { if (!queue_work(s->send_wq, &con->swork)) conn_put(con); - + } else { + conn_put(con); + } return 0; } @@ -573,7 +573,6 @@ int tipc_server_start(struct tipc_server *s) kmem_cache_destroy(s->rcvbuf_cache); return ret; } - s->enabled = 1; return ret; } @@ -583,10 +582,6 @@ void tipc_server_stop(struct tipc_server *s) int total = 0; int id; - if (!s->enabled) - return; - - s->enabled = 0; spin_lock_bh(&s->idr_lock); for (id = 0; total < s->idr_in_use; id++) { con = idr_find(&s->conn_idr, id); diff --git a/net/tipc/server.h b/net/tipc/server.h index 98b23f20bc0..be817b0b547 100644 --- a/net/tipc/server.h +++ b/net/tipc/server.h @@ -56,7 +56,6 @@ * @name: server name * @imp: message importance * @type: socket type - * @enabled: identify whether server is launched or not */ struct tipc_server { struct idr conn_idr; @@ -74,7 +73,6 @@ struct tipc_server { const char name[TIPC_SERVER_NAME_LEN]; int imp; int type; - int enabled; }; int tipc_conn_sendmsg(struct tipc_server *s, int conid, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index aab4948f0af..3c0256962f7 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1,7 +1,7 @@ /* * net/tipc/socket.c: TIPC socket API * - * Copyright (c) 2001-2007, 2012 Ericsson AB + * Copyright (c) 2001-2007, 2012-2014, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * @@ -38,30 +38,17 @@ #include "port.h" #include <linux/export.h> -#include <net/sock.h> #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ -struct tipc_sock { - struct sock sk; - struct tipc_port *p; - struct tipc_portid peer_name; - unsigned int conn_timeout; -}; - -#define tipc_sk(sk) ((struct tipc_sock *)(sk)) -#define tipc_sk_port(sk) (tipc_sk(sk)->p) - static int backlog_rcv(struct sock *sk, struct sk_buff *skb); -static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf); -static void wakeupdispatch(struct tipc_port *tport); -static void tipc_data_ready(struct sock *sk, int len); +static void tipc_data_ready(struct sock *sk); static void tipc_write_space(struct sock *sk); -static int release(struct socket *sock); -static int accept(struct socket *sock, struct socket *new_sock, int flags); +static int tipc_release(struct socket *sock); +static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; @@ -70,8 +57,6 @@ static const struct proto_ops msg_ops; static struct proto tipc_proto; static struct proto tipc_proto_kern; -static int sockets_enabled; - /* * Revised TIPC socket locking policy: * @@ -117,6 +102,8 @@ static int sockets_enabled; * - port reference */ +#include "socket.h" + /** * advance_rx_queue - discard first buffer in socket receive queue * @@ -152,13 +139,15 @@ static void reject_rx_queue(struct sock *sk) * * Returns 0 on success, errno otherwise */ -static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, - int kern) +static int tipc_sk_create(struct net *net, struct socket *sock, + int protocol, int kern) { const struct proto_ops *ops; socket_state state; struct sock *sk; - struct tipc_port *tp_ptr; + struct tipc_sock *tsk; + struct tipc_port *port; + u32 ref; /* Validate arguments */ if (unlikely(protocol != 0)) @@ -191,10 +180,12 @@ static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, if (sk == NULL) return -ENOMEM; - /* Allocate TIPC port for socket to use */ - tp_ptr = tipc_createport(sk, &dispatch, &wakeupdispatch, - TIPC_LOW_IMPORTANCE); - if (unlikely(!tp_ptr)) { + tsk = tipc_sk(sk); + port = &tsk->port; + + ref = tipc_port_init(port, TIPC_LOW_IMPORTANCE); + if (!ref) { + pr_warn("Socket registration failed, ref. table exhausted\n"); sk_free(sk); return -ENOMEM; } @@ -208,17 +199,14 @@ static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; - tipc_sk(sk)->p = tp_ptr; tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT; - - spin_unlock_bh(tp_ptr->lock); + tipc_port_unlock(port); if (sock->state == SS_READY) { - tipc_set_portunreturnable(tp_ptr->ref, 1); + tipc_port_set_unreturnable(port, true); if (sock->type == SOCK_DGRAM) - tipc_set_portunreliable(tp_ptr->ref, 1); + tipc_port_set_unreliable(port, true); } - return 0; } @@ -256,7 +244,7 @@ int tipc_sock_create_local(int type, struct socket **res) */ void tipc_sock_release_local(struct socket *sock) { - release(sock); + tipc_release(sock); sock->ops = NULL; sock_release(sock); } @@ -282,7 +270,7 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, if (ret < 0) return ret; - ret = accept(sock, *newsock, flags); + ret = tipc_accept(sock, *newsock, flags); if (ret < 0) { sock_release(*newsock); return ret; @@ -292,7 +280,7 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, } /** - * release - destroy a TIPC socket + * tipc_release - destroy a TIPC socket * @sock: socket to destroy * * This routine cleans up any messages that are still queued on the socket. @@ -307,12 +295,12 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, * * Returns 0 on success, errno otherwise */ -static int release(struct socket *sock) +static int tipc_release(struct socket *sock) { struct sock *sk = sock->sk; - struct tipc_port *tport; + struct tipc_sock *tsk; + struct tipc_port *port; struct sk_buff *buf; - int res; /* * Exit if socket isn't fully initialized (occurs when a failed accept() @@ -321,7 +309,8 @@ static int release(struct socket *sock) if (sk == NULL) return 0; - tport = tipc_sk_port(sk); + tsk = tipc_sk(sk); + port = &tsk->port; lock_sock(sk); /* @@ -338,17 +327,16 @@ static int release(struct socket *sock) if ((sock->state == SS_CONNECTING) || (sock->state == SS_CONNECTED)) { sock->state = SS_DISCONNECTING; - tipc_disconnect(tport->ref); + tipc_port_disconnect(port->ref); } tipc_reject_msg(buf, TIPC_ERR_NO_PORT); } } - /* - * Delete TIPC port; this ensures no more messages are queued - * (also disconnects an active connection & sends a 'FIN-' to peer) + /* Destroy TIPC port; also disconnects an active connection and + * sends a 'FIN-' to peer. */ - res = tipc_deleteport(tport); + tipc_port_destroy(port); /* Discard any remaining (connection-based) messages in receive queue */ __skb_queue_purge(&sk->sk_receive_queue); @@ -360,11 +348,11 @@ static int release(struct socket *sock) sock_put(sk); sock->sk = NULL; - return res; + return 0; } /** - * bind - associate or disassocate TIPC name(s) with a socket + * tipc_bind - associate or disassocate TIPC name(s) with a socket * @sock: socket structure * @uaddr: socket address describing name(s) and desired operation * @uaddr_len: size of socket address data structure @@ -378,16 +366,17 @@ static int release(struct socket *sock) * NOTE: This routine doesn't need to take the socket lock since it doesn't * access any non-constant socket information. */ -static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) +static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, + int uaddr_len) { struct sock *sk = sock->sk; struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; - struct tipc_port *tport = tipc_sk_port(sock->sk); + struct tipc_sock *tsk = tipc_sk(sk); int res = -EINVAL; lock_sock(sk); if (unlikely(!uaddr_len)) { - res = tipc_withdraw(tport, 0, NULL); + res = tipc_withdraw(&tsk->port, 0, NULL); goto exit; } @@ -415,15 +404,15 @@ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) } res = (addr->scope > 0) ? - tipc_publish(tport, addr->scope, &addr->addr.nameseq) : - tipc_withdraw(tport, -addr->scope, &addr->addr.nameseq); + tipc_publish(&tsk->port, addr->scope, &addr->addr.nameseq) : + tipc_withdraw(&tsk->port, -addr->scope, &addr->addr.nameseq); exit: release_sock(sk); return res; } /** - * get_name - get port ID of socket or peer socket + * tipc_getname - get port ID of socket or peer socket * @sock: socket structure * @uaddr: area for returned socket address * @uaddr_len: area for returned length of socket address @@ -435,21 +424,21 @@ exit: * accesses socket information that is unchanging (or which changes in * a completely predictable manner). */ -static int get_name(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) +static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, + int *uaddr_len, int peer) { struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; - struct tipc_sock *tsock = tipc_sk(sock->sk); + struct tipc_sock *tsk = tipc_sk(sock->sk); memset(addr, 0, sizeof(*addr)); if (peer) { if ((sock->state != SS_CONNECTED) && ((peer != 2) || (sock->state != SS_DISCONNECTING))) return -ENOTCONN; - addr->addr.id.ref = tsock->peer_name.ref; - addr->addr.id.node = tsock->peer_name.node; + addr->addr.id.ref = tipc_port_peerport(&tsk->port); + addr->addr.id.node = tipc_port_peernode(&tsk->port); } else { - addr->addr.id.ref = tsock->p->ref; + addr->addr.id.ref = tsk->port.ref; addr->addr.id.node = tipc_own_addr; } @@ -463,7 +452,7 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr, } /** - * poll - read and possibly block on pollmask + * tipc_poll - read and possibly block on pollmask * @file: file structure associated with the socket * @sock: socket for which to calculate the poll bits * @wait: ??? @@ -502,22 +491,23 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr, * imply that the operation will succeed, merely that it should be performed * and will not block. */ -static unsigned int poll(struct file *file, struct socket *sock, - poll_table *wait) +static unsigned int tipc_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); u32 mask = 0; sock_poll_wait(file, sk_sleep(sk), wait); switch ((int)sock->state) { case SS_UNCONNECTED: - if (!tipc_sk_port(sk)->congested) + if (!tsk->port.congested) mask |= POLLOUT; break; case SS_READY: case SS_CONNECTED: - if (!tipc_sk_port(sk)->congested) + if (!tsk->port.congested) mask |= POLLOUT; /* fall thru' */ case SS_CONNECTING: @@ -567,7 +557,7 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m) static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) { struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); + struct tipc_sock *tsk = tipc_sk(sk); DEFINE_WAIT(wait); int done; @@ -583,14 +573,15 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) return sock_intr_errno(*timeo_p); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - done = sk_wait_event(sk, timeo_p, !tport->congested); + done = sk_wait_event(sk, timeo_p, !tsk->port.congested); finish_wait(sk_sleep(sk), &wait); } while (!done); return 0; } + /** - * send_msg - send message in connectionless manner + * tipc_sendmsg - send message in connectionless manner * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure * @m: message to send @@ -603,11 +594,12 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) * * Returns the number of bytes sent on success, or errno otherwise */ -static int send_msg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len) { struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_port *port = &tsk->port; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); int needs_conn; long timeo; @@ -634,13 +626,13 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, res = -EISCONN; goto exit; } - if (tport->published) { + if (tsk->port.published) { res = -EOPNOTSUPP; goto exit; } if (dest->addrtype == TIPC_ADDR_NAME) { - tport->conn_type = dest->addr.name.name.type; - tport->conn_instance = dest->addr.name.name.instance; + tsk->port.conn_type = dest->addr.name.name.type; + tsk->port.conn_instance = dest->addr.name.name.instance; } /* Abort any pending connection attempts (very unlikely) */ @@ -653,13 +645,13 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, res = dest_name_check(dest, m); if (res) break; - res = tipc_send2name(tport->ref, + res = tipc_send2name(port, &dest->addr.name.name, dest->addr.name.domain, m->msg_iov, total_len); } else if (dest->addrtype == TIPC_ADDR_ID) { - res = tipc_send2port(tport->ref, + res = tipc_send2port(port, &dest->addr.id, m->msg_iov, total_len); @@ -671,10 +663,10 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, res = dest_name_check(dest, m); if (res) break; - res = tipc_multicast(tport->ref, - &dest->addr.nameseq, - m->msg_iov, - total_len); + res = tipc_port_mcast_xmit(port, + &dest->addr.nameseq, + m->msg_iov, + total_len); } if (likely(res != -ELINKCONG)) { if (needs_conn && (res >= 0)) @@ -695,7 +687,8 @@ exit: static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) { struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_port *port = &tsk->port; DEFINE_WAIT(wait); int done; @@ -714,14 +707,14 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); done = sk_wait_event(sk, timeo_p, - (!tport->congested || !tport->connected)); + (!port->congested || !port->connected)); finish_wait(sk_sleep(sk), &wait); } while (!done); return 0; } /** - * send_packet - send a connection-oriented message + * tipc_send_packet - send a connection-oriented message * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure * @m: message to send @@ -731,18 +724,18 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) * * Returns the number of bytes sent on success, or errno otherwise */ -static int send_packet(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len) { struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); + struct tipc_sock *tsk = tipc_sk(sk); DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); int res = -EINVAL; long timeo; /* Handle implied connection establishment */ if (unlikely(dest)) - return send_msg(iocb, sock, m, total_len); + return tipc_sendmsg(iocb, sock, m, total_len); if (total_len > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; @@ -760,7 +753,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); do { - res = tipc_send(tport->ref, m->msg_iov, total_len); + res = tipc_send(&tsk->port, m->msg_iov, total_len); if (likely(res != -ELINKCONG)) break; res = tipc_wait_for_sndpkt(sock, &timeo); @@ -774,7 +767,7 @@ exit: } /** - * send_stream - send stream-oriented data + * tipc_send_stream - send stream-oriented data * @iocb: (unused) * @sock: socket structure * @m: data to send @@ -785,11 +778,11 @@ exit: * Returns the number of bytes sent on success (or partial success), * or errno if no data sent */ -static int send_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len) { struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); + struct tipc_sock *tsk = tipc_sk(sk); struct msghdr my_msg; struct iovec my_iov; struct iovec *curr_iov; @@ -806,7 +799,7 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, /* Handle special cases where there is no connection */ if (unlikely(sock->state != SS_CONNECTED)) { if (sock->state == SS_UNCONNECTED) - res = send_packet(NULL, sock, m, total_len); + res = tipc_send_packet(NULL, sock, m, total_len); else res = sock->state == SS_DISCONNECTING ? -EPIPE : -ENOTCONN; goto exit; @@ -837,21 +830,22 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, my_msg.msg_name = NULL; bytes_sent = 0; - hdr_size = msg_hdr_sz(&tport->phdr); + hdr_size = msg_hdr_sz(&tsk->port.phdr); while (curr_iovlen--) { curr_start = curr_iov->iov_base; curr_left = curr_iov->iov_len; while (curr_left) { - bytes_to_send = tport->max_pkt - hdr_size; + bytes_to_send = tsk->port.max_pkt - hdr_size; if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE) bytes_to_send = TIPC_MAX_USER_MSG_SIZE; if (curr_left < bytes_to_send) bytes_to_send = curr_left; my_iov.iov_base = curr_start; my_iov.iov_len = bytes_to_send; - res = send_packet(NULL, sock, &my_msg, bytes_to_send); + res = tipc_send_packet(NULL, sock, &my_msg, + bytes_to_send); if (res < 0) { if (bytes_sent) res = bytes_sent; @@ -872,27 +866,25 @@ exit: /** * auto_connect - complete connection setup to a remote port - * @sock: socket structure + * @tsk: tipc socket structure * @msg: peer's response message * * Returns 0 on success, errno otherwise */ -static int auto_connect(struct socket *sock, struct tipc_msg *msg) +static int auto_connect(struct tipc_sock *tsk, struct tipc_msg *msg) { - struct tipc_sock *tsock = tipc_sk(sock->sk); - struct tipc_port *p_ptr; + struct tipc_port *port = &tsk->port; + struct socket *sock = tsk->sk.sk_socket; + struct tipc_portid peer; - tsock->peer_name.ref = msg_origport(msg); - tsock->peer_name.node = msg_orignode(msg); - p_ptr = tipc_port_deref(tsock->p->ref); - if (!p_ptr) - return -EINVAL; + peer.ref = msg_origport(msg); + peer.node = msg_orignode(msg); - __tipc_connect(tsock->p->ref, p_ptr, &tsock->peer_name); + __tipc_port_connect(port->ref, port, &peer); if (msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE) return -EINVAL; - msg_set_importance(&p_ptr->phdr, (u32)msg_importance(msg)); + msg_set_importance(&port->phdr, (u32)msg_importance(msg)); sock->state = SS_CONNECTED; return 0; } @@ -999,7 +991,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo) for (;;) { prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (skb_queue_empty(&sk->sk_receive_queue)) { + if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { if (sock->state == SS_DISCONNECTING) { err = -ENOTCONN; break; @@ -1023,7 +1015,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo) } /** - * recv_msg - receive packet-oriented message + * tipc_recvmsg - receive packet-oriented message * @iocb: (unused) * @m: descriptor for message info * @buf_len: total size of user buffer area @@ -1034,11 +1026,12 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo) * * Returns size of returned message data, errno otherwise */ -static int recv_msg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t buf_len, int flags) +static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t buf_len, int flags) { struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_port *port = &tsk->port; struct sk_buff *buf; struct tipc_msg *msg; long timeo; @@ -1081,7 +1074,7 @@ restart: set_orig_addr(m, msg); /* Capture ancillary data (optional) */ - res = anc_data_recv(m, msg, tport); + res = anc_data_recv(m, msg, port); if (res) goto exit; @@ -1107,8 +1100,8 @@ restart: /* Consume received message (optional) */ if (likely(!(flags & MSG_PEEK))) { if ((sock->state != SS_READY) && - (++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN)) - tipc_acknowledge(tport->ref, tport->conn_unacked); + (++port->conn_unacked >= TIPC_FLOW_CONTROL_WIN)) + tipc_acknowledge(port->ref, port->conn_unacked); advance_rx_queue(sk); } exit: @@ -1117,7 +1110,7 @@ exit: } /** - * recv_stream - receive stream-oriented data + * tipc_recv_stream - receive stream-oriented data * @iocb: (unused) * @m: descriptor for message info * @buf_len: total size of user buffer area @@ -1128,11 +1121,12 @@ exit: * * Returns size of returned message data, errno otherwise */ -static int recv_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t buf_len, int flags) +static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t buf_len, int flags) { struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_port *port = &tsk->port; struct sk_buff *buf; struct tipc_msg *msg; long timeo; @@ -1177,7 +1171,7 @@ restart: /* Optionally capture sender's address & ancillary data of first msg */ if (sz_copied == 0) { set_orig_addr(m, msg); - res = anc_data_recv(m, msg, tport); + res = anc_data_recv(m, msg, port); if (res) goto exit; } @@ -1215,8 +1209,8 @@ restart: /* Consume received message (optional) */ if (likely(!(flags & MSG_PEEK))) { - if (unlikely(++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN)) - tipc_acknowledge(tport->ref, tport->conn_unacked); + if (unlikely(++port->conn_unacked >= TIPC_FLOW_CONTROL_WIN)) + tipc_acknowledge(port->ref, port->conn_unacked); advance_rx_queue(sk); } @@ -1254,7 +1248,7 @@ static void tipc_write_space(struct sock *sk) * @sk: socket * @len: the length of messages */ -static void tipc_data_ready(struct sock *sk, int len) +static void tipc_data_ready(struct sock *sk) { struct socket_wq *wq; @@ -1268,17 +1262,19 @@ static void tipc_data_ready(struct sock *sk, int len) /** * filter_connect - Handle all incoming messages for a connection-based socket - * @tsock: TIPC socket + * @tsk: TIPC socket * @msg: message * * Returns TIPC error status code and socket error status code * once it encounters some errors */ -static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) +static u32 filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) { - struct socket *sock = tsock->sk.sk_socket; + struct sock *sk = &tsk->sk; + struct tipc_port *port = &tsk->port; + struct socket *sock = sk->sk_socket; struct tipc_msg *msg = buf_msg(*buf); - struct sock *sk = &tsock->sk; + u32 retval = TIPC_ERR_NO_PORT; int res; @@ -1288,10 +1284,10 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) switch ((int)sock->state) { case SS_CONNECTED: /* Accept only connection-based messages sent by peer */ - if (msg_connected(msg) && tipc_port_peer_msg(tsock->p, msg)) { + if (msg_connected(msg) && tipc_port_peer_msg(port, msg)) { if (unlikely(msg_errcode(msg))) { sock->state = SS_DISCONNECTING; - __tipc_disconnect(tsock->p); + __tipc_port_disconnect(port); } retval = TIPC_OK; } @@ -1308,7 +1304,7 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) if (unlikely(!msg_connected(msg))) break; - res = auto_connect(sock, msg); + res = auto_connect(tsk, msg); if (res) { sock->state = SS_DISCONNECTING; sk->sk_err = -res; @@ -1387,6 +1383,7 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) { struct socket *sock = sk->sk_socket; + struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *msg = buf_msg(buf); unsigned int limit = rcvbuf_limit(sk, buf); u32 res = TIPC_OK; @@ -1399,7 +1396,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) if (msg_connected(msg)) return TIPC_ERR_NO_PORT; } else { - res = filter_connect(tipc_sk(sk), &buf); + res = filter_connect(tsk, &buf); if (res != TIPC_OK || buf == NULL) return res; } @@ -1413,7 +1410,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) __skb_queue_tail(&sk->sk_receive_queue, buf); skb_set_owner_r(buf, sk); - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); return TIPC_OK; } @@ -1437,17 +1434,16 @@ static int backlog_rcv(struct sock *sk, struct sk_buff *buf) } /** - * dispatch - handle incoming message - * @tport: TIPC port that received message + * tipc_sk_rcv - handle incoming message + * @sk: socket receiving message * @buf: message * * Called with port lock already taken. * * Returns TIPC error status code (TIPC_OK if message is not to be rejected) */ -static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) +u32 tipc_sk_rcv(struct sock *sk, struct sk_buff *buf) { - struct sock *sk = tport->sk; u32 res; /* @@ -1470,19 +1466,6 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) return res; } -/** - * wakeupdispatch - wake up port after congestion - * @tport: port to wakeup - * - * Called with port lock already taken. - */ -static void wakeupdispatch(struct tipc_port *tport) -{ - struct sock *sk = tport->sk; - - sk->sk_write_space(sk); -} - static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) { struct sock *sk = sock->sk; @@ -1506,7 +1489,7 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) } /** - * connect - establish a connection to another TIPC port + * tipc_connect - establish a connection to another TIPC port * @sock: socket structure * @dest: socket address for destination port * @destlen: size of socket address data structure @@ -1514,8 +1497,8 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) * * Returns 0 on success, errno otherwise */ -static int connect(struct socket *sock, struct sockaddr *dest, int destlen, - int flags) +static int tipc_connect(struct socket *sock, struct sockaddr *dest, + int destlen, int flags) { struct sock *sk = sock->sk; struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest; @@ -1556,7 +1539,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, if (!timeout) m.msg_flags = MSG_DONTWAIT; - res = send_msg(NULL, sock, &m, 0); + res = tipc_sendmsg(NULL, sock, &m, 0); if ((res < 0) && (res != -EWOULDBLOCK)) goto exit; @@ -1587,13 +1570,13 @@ exit: } /** - * listen - allow socket to listen for incoming connections + * tipc_listen - allow socket to listen for incoming connections * @sock: socket structure * @len: (unused) * * Returns 0 on success, errno otherwise */ -static int listen(struct socket *sock, int len) +static int tipc_listen(struct socket *sock, int len) { struct sock *sk = sock->sk; int res; @@ -1625,7 +1608,7 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) for (;;) { prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (skb_queue_empty(&sk->sk_receive_queue)) { + if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); @@ -1648,20 +1631,20 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) } /** - * accept - wait for connection request + * tipc_accept - wait for connection request * @sock: listening socket * @newsock: new socket that is to be connected * @flags: file-related flags associated with socket * * Returns 0 on success, errno otherwise */ -static int accept(struct socket *sock, struct socket *new_sock, int flags) +static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) { struct sock *new_sk, *sk = sock->sk; struct sk_buff *buf; - struct tipc_sock *new_tsock; - struct tipc_port *new_tport; + struct tipc_port *new_port; struct tipc_msg *msg; + struct tipc_portid peer; u32 new_ref; long timeo; int res; @@ -1672,7 +1655,6 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) res = -EINVAL; goto exit; } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); res = tipc_wait_for_accept(sock, timeo); if (res) @@ -1685,9 +1667,8 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) goto exit; new_sk = new_sock->sk; - new_tsock = tipc_sk(new_sk); - new_tport = new_tsock->p; - new_ref = new_tport->ref; + new_port = &tipc_sk(new_sk)->port; + new_ref = new_port->ref; msg = buf_msg(buf); /* we lock on new_sk; but lockdep sees the lock on sk */ @@ -1700,15 +1681,15 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) reject_rx_queue(new_sk); /* Connect new socket to it's peer */ - new_tsock->peer_name.ref = msg_origport(msg); - new_tsock->peer_name.node = msg_orignode(msg); - tipc_connect(new_ref, &new_tsock->peer_name); + peer.ref = msg_origport(msg); + peer.node = msg_orignode(msg); + tipc_port_connect(new_ref, &peer); new_sock->state = SS_CONNECTED; - tipc_set_portimportance(new_ref, msg_importance(msg)); + tipc_port_set_importance(new_port, msg_importance(msg)); if (msg_named(msg)) { - new_tport->conn_type = msg_nametype(msg); - new_tport->conn_instance = msg_nameinst(msg); + new_port->conn_type = msg_nametype(msg); + new_port->conn_instance = msg_nameinst(msg); } /* @@ -1719,21 +1700,20 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) struct msghdr m = {NULL,}; advance_rx_queue(sk); - send_packet(NULL, new_sock, &m, 0); + tipc_send_packet(NULL, new_sock, &m, 0); } else { __skb_dequeue(&sk->sk_receive_queue); __skb_queue_head(&new_sk->sk_receive_queue, buf); skb_set_owner_r(buf, new_sk); } release_sock(new_sk); - exit: release_sock(sk); return res; } /** - * shutdown - shutdown socket connection + * tipc_shutdown - shutdown socket connection * @sock: socket structure * @how: direction to close (must be SHUT_RDWR) * @@ -1741,10 +1721,11 @@ exit: * * Returns 0 on success, errno otherwise */ -static int shutdown(struct socket *sock, int how) +static int tipc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_port *port = &tsk->port; struct sk_buff *buf; int res; @@ -1765,10 +1746,10 @@ restart: kfree_skb(buf); goto restart; } - tipc_disconnect(tport->ref); + tipc_port_disconnect(port->ref); tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN); } else { - tipc_shutdown(tport->ref); + tipc_port_shutdown(port->ref); } sock->state = SS_DISCONNECTING; @@ -1794,7 +1775,7 @@ restart: } /** - * setsockopt - set socket option + * tipc_setsockopt - set socket option * @sock: socket structure * @lvl: option level * @opt: option identifier @@ -1806,11 +1787,12 @@ restart: * * Returns 0 on success, errno otherwise */ -static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov, - unsigned int ol) +static int tipc_setsockopt(struct socket *sock, int lvl, int opt, + char __user *ov, unsigned int ol) { struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_port *port = &tsk->port; u32 value; int res; @@ -1828,16 +1810,16 @@ static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov, switch (opt) { case TIPC_IMPORTANCE: - res = tipc_set_portimportance(tport->ref, value); + tipc_port_set_importance(port, value); break; case TIPC_SRC_DROPPABLE: if (sock->type != SOCK_STREAM) - res = tipc_set_portunreliable(tport->ref, value); + tipc_port_set_unreliable(port, value); else res = -ENOPROTOOPT; break; case TIPC_DEST_DROPPABLE: - res = tipc_set_portunreturnable(tport->ref, value); + tipc_port_set_unreturnable(port, value); break; case TIPC_CONN_TIMEOUT: tipc_sk(sk)->conn_timeout = value; @@ -1853,7 +1835,7 @@ static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov, } /** - * getsockopt - get socket option + * tipc_getsockopt - get socket option * @sock: socket structure * @lvl: option level * @opt: option identifier @@ -1865,11 +1847,12 @@ static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov, * * Returns 0 on success, errno otherwise */ -static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov, - int __user *ol) +static int tipc_getsockopt(struct socket *sock, int lvl, int opt, + char __user *ov, int __user *ol) { struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_port *port = &tsk->port; int len; u32 value; int res; @@ -1886,13 +1869,13 @@ static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov, switch (opt) { case TIPC_IMPORTANCE: - res = tipc_portimportance(tport->ref, &value); + value = tipc_port_importance(port); break; case TIPC_SRC_DROPPABLE: - res = tipc_portunreliable(tport->ref, &value); + value = tipc_port_unreliable(port); break; case TIPC_DEST_DROPPABLE: - res = tipc_portunreturnable(tport->ref, &value); + value = tipc_port_unreturnable(port); break; case TIPC_CONN_TIMEOUT: value = tipc_sk(sk)->conn_timeout; @@ -1927,20 +1910,20 @@ static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov, static const struct proto_ops msg_ops = { .owner = THIS_MODULE, .family = AF_TIPC, - .release = release, - .bind = bind, - .connect = connect, + .release = tipc_release, + .bind = tipc_bind, + .connect = tipc_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, - .getname = get_name, - .poll = poll, + .getname = tipc_getname, + .poll = tipc_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, - .shutdown = shutdown, - .setsockopt = setsockopt, - .getsockopt = getsockopt, - .sendmsg = send_msg, - .recvmsg = recv_msg, + .shutdown = tipc_shutdown, + .setsockopt = tipc_setsockopt, + .getsockopt = tipc_getsockopt, + .sendmsg = tipc_sendmsg, + .recvmsg = tipc_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage }; @@ -1948,20 +1931,20 @@ static const struct proto_ops msg_ops = { static const struct proto_ops packet_ops = { .owner = THIS_MODULE, .family = AF_TIPC, - .release = release, - .bind = bind, - .connect = connect, + .release = tipc_release, + .bind = tipc_bind, + .connect = tipc_connect, .socketpair = sock_no_socketpair, - .accept = accept, - .getname = get_name, - .poll = poll, + .accept = tipc_accept, + .getname = tipc_getname, + .poll = tipc_poll, .ioctl = sock_no_ioctl, - .listen = listen, - .shutdown = shutdown, - .setsockopt = setsockopt, - .getsockopt = getsockopt, - .sendmsg = send_packet, - .recvmsg = recv_msg, + .listen = tipc_listen, + .shutdown = tipc_shutdown, + .setsockopt = tipc_setsockopt, + .getsockopt = tipc_getsockopt, + .sendmsg = tipc_send_packet, + .recvmsg = tipc_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage }; @@ -1969,20 +1952,20 @@ static const struct proto_ops packet_ops = { static const struct proto_ops stream_ops = { .owner = THIS_MODULE, .family = AF_TIPC, - .release = release, - .bind = bind, - .connect = connect, + .release = tipc_release, + .bind = tipc_bind, + .connect = tipc_connect, .socketpair = sock_no_socketpair, - .accept = accept, - .getname = get_name, - .poll = poll, + .accept = tipc_accept, + .getname = tipc_getname, + .poll = tipc_poll, .ioctl = sock_no_ioctl, - .listen = listen, - .shutdown = shutdown, - .setsockopt = setsockopt, - .getsockopt = getsockopt, - .sendmsg = send_stream, - .recvmsg = recv_stream, + .listen = tipc_listen, + .shutdown = tipc_shutdown, + .setsockopt = tipc_setsockopt, + .getsockopt = tipc_getsockopt, + .sendmsg = tipc_send_stream, + .recvmsg = tipc_recv_stream, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage }; @@ -2027,8 +2010,6 @@ int tipc_socket_init(void) proto_unregister(&tipc_proto); goto out; } - - sockets_enabled = 1; out: return res; } @@ -2038,10 +2019,6 @@ int tipc_socket_init(void) */ void tipc_socket_stop(void) { - if (!sockets_enabled) - return; - - sockets_enabled = 0; sock_unregister(tipc_family_ops.family); proto_unregister(&tipc_proto); } diff --git a/net/tipc/socket.h b/net/tipc/socket.h new file mode 100644 index 00000000000..74e5c7f195a --- /dev/null +++ b/net/tipc/socket.h @@ -0,0 +1,72 @@ +/* net/tipc/socket.h: Include file for TIPC socket code + * + * Copyright (c) 2014, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_SOCK_H +#define _TIPC_SOCK_H + +#include "port.h" +#include <net/sock.h> + +/** + * struct tipc_sock - TIPC socket structure + * @sk: socket - interacts with 'port' and with user via the socket API + * @port: port - interacts with 'sk' and with the rest of the TIPC stack + * @peer_name: the peer of the connection, if any + * @conn_timeout: the time we can wait for an unresponded setup request + */ + +struct tipc_sock { + struct sock sk; + struct tipc_port port; + unsigned int conn_timeout; +}; + +static inline struct tipc_sock *tipc_sk(const struct sock *sk) +{ + return container_of(sk, struct tipc_sock, sk); +} + +static inline struct tipc_sock *tipc_port_to_sock(const struct tipc_port *port) +{ + return container_of(port, struct tipc_sock, port); +} + +static inline void tipc_sock_wakeup(struct tipc_sock *tsk) +{ + tsk->sk.sk_write_space(&tsk->sk); +} + +u32 tipc_sk_rcv(struct sock *sk, struct sk_buff *buf); + +#endif diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 7cb0bd5b117..642437231ad 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -96,20 +96,16 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, { struct tipc_subscriber *subscriber = sub->subscriber; struct kvec msg_sect; - int ret; msg_sect.iov_base = (void *)&sub->evt; msg_sect.iov_len = sizeof(struct tipc_event); - sub->evt.event = htohl(event, sub->swap); sub->evt.found_lower = htohl(found_lower, sub->swap); sub->evt.found_upper = htohl(found_upper, sub->swap); sub->evt.port.ref = htohl(port_ref, sub->swap); sub->evt.port.node = htohl(node, sub->swap); - ret = tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, - msg_sect.iov_base, msg_sect.iov_len); - if (ret < 0) - pr_err("Sending subscription event failed, no memory\n"); + tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, msg_sect.iov_base, + msg_sect.iov_len); } /** @@ -153,14 +149,6 @@ static void subscr_timeout(struct tipc_subscription *sub) /* The spin lock per subscriber is used to protect its members */ spin_lock_bh(&subscriber->lock); - /* Validate if the connection related to the subscriber is - * closed (in case subscriber is terminating) - */ - if (subscriber->conid == 0) { - spin_unlock_bh(&subscriber->lock); - return; - } - /* Validate timeout (in case subscription is being cancelled) */ if (sub->timeout == TIPC_WAIT_FOREVER) { spin_unlock_bh(&subscriber->lock); @@ -215,9 +203,6 @@ static void subscr_release(struct tipc_subscriber *subscriber) spin_lock_bh(&subscriber->lock); - /* Invalidate subscriber reference */ - subscriber->conid = 0; - /* Destroy any existing subscriptions for subscriber */ list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, subscription_list) { @@ -278,9 +263,9 @@ static void subscr_cancel(struct tipc_subscr *s, * * Called with subscriber lock held. */ -static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, - struct tipc_subscriber *subscriber) -{ +static int subscr_subscribe(struct tipc_subscr *s, + struct tipc_subscriber *subscriber, + struct tipc_subscription **sub_p) { struct tipc_subscription *sub; int swap; @@ -291,23 +276,21 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); subscr_cancel(s, subscriber); - return NULL; + return 0; } /* Refuse subscription if global limit exceeded */ if (atomic_read(&subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { pr_warn("Subscription rejected, limit reached (%u)\n", TIPC_MAX_SUBSCRIPTIONS); - subscr_terminate(subscriber); - return NULL; + return -EINVAL; } /* Allocate subscription object */ sub = kmalloc(sizeof(*sub), GFP_ATOMIC); if (!sub) { pr_warn("Subscription rejected, no memory\n"); - subscr_terminate(subscriber); - return NULL; + return -ENOMEM; } /* Initialize subscription object */ @@ -321,8 +304,7 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, (sub->seq.lower > sub->seq.upper)) { pr_warn("Subscription rejected, illegal request\n"); kfree(sub); - subscr_terminate(subscriber); - return NULL; + return -EINVAL; } INIT_LIST_HEAD(&sub->nameseq_list); list_add(&sub->subscription_list, &subscriber->subscription_list); @@ -335,8 +317,8 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, (Handler)subscr_timeout, (unsigned long)sub); k_start_timer(&sub->timer, sub->timeout); } - - return sub; + *sub_p = sub; + return 0; } /* Handle one termination request for the subscriber */ @@ -350,10 +332,14 @@ static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, void *usr_data, void *buf, size_t len) { struct tipc_subscriber *subscriber = usr_data; - struct tipc_subscription *sub; + struct tipc_subscription *sub = NULL; spin_lock_bh(&subscriber->lock); - sub = subscr_subscribe((struct tipc_subscr *)buf, subscriber); + if (subscr_subscribe((struct tipc_subscr *)buf, subscriber, &sub) < 0) { + spin_unlock_bh(&subscriber->lock); + subscr_terminate(subscriber); + return; + } if (sub) tipc_nametbl_subscribe(sub); spin_unlock_bh(&subscriber->lock); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 29fc8bee970..bb7e8ba821f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -163,9 +163,8 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) static inline unsigned int unix_hash_fold(__wsum n) { - unsigned int hash = (__force unsigned int)n; + unsigned int hash = (__force unsigned int)csum_fold(n); - hash ^= hash>>16; hash ^= hash>>8; return hash&(UNIX_HASH_SIZE-1); } @@ -1218,7 +1217,7 @@ restart: __skb_queue_tail(&other->sk_receive_queue, skb); spin_unlock(&other->sk_receive_queue.lock); unix_state_unlock(other); - other->sk_data_ready(other, 0); + other->sk_data_ready(other); sock_put(other); return 0; @@ -1601,7 +1600,7 @@ restart: if (max_level > unix_sk(other)->recursion_level) unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); - other->sk_data_ready(other, len); + other->sk_data_ready(other); sock_put(other); scm_destroy(siocb->scm); return len; @@ -1707,7 +1706,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, if (max_level > unix_sk(other)->recursion_level) unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); - other->sk_data_ready(other, size); + other->sk_data_ready(other); sent += size; } @@ -1788,8 +1787,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; err = mutex_lock_interruptible(&u->readlock); - if (err) { - err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); + if (unlikely(err)) { + /* recvmsg() in non blocking mode is supposed to return -EAGAIN + * sk_rcvtimeo is not honored by mutex_lock_interruptible() + */ + err = noblock ? -EAGAIN : -ERESTARTSYS; goto out; } @@ -1914,6 +1916,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, struct unix_sock *u = unix_sk(sk); DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); int copied = 0; + int noblock = flags & MSG_DONTWAIT; int check_creds = 0; int target; int err = 0; @@ -1929,7 +1932,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); - timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); + timeo = sock_rcvtimeo(sk, noblock); /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg @@ -1941,8 +1944,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, } err = mutex_lock_interruptible(&u->readlock); - if (err) { - err = sock_intr_errno(timeo); + if (unlikely(err)) { + /* recvmsg() in non blocking mode is supposed to return -EAGAIN + * sk_rcvtimeo is not honored by mutex_lock_interruptible() + */ + err = noblock ? -EAGAIN : -ERESTARTSYS; goto out; } diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c index 9a730744e7b..9b7f207f2be 100644 --- a/net/vmw_vsock/vmci_transport_notify.c +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -315,7 +315,7 @@ vmci_transport_handle_wrote(struct sock *sk, struct vsock_sock *vsk = vsock_sk(sk); PKT_FIELD(vsk, sent_waiting_read) = false; #endif - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); } static void vmci_transport_notify_pkt_socket_init(struct sock *sk) diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c index 622bd7aa101..dc9c7929a2f 100644 --- a/net/vmw_vsock/vmci_transport_notify_qstate.c +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -92,7 +92,7 @@ vmci_transport_handle_wrote(struct sock *sk, bool bottom_half, struct sockaddr_vm *dst, struct sockaddr_vm *src) { - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); } static void vsock_block_update_write_window(struct sock *sk) @@ -290,7 +290,7 @@ vmci_transport_notify_pkt_recv_post_dequeue( /* See the comment in * vmci_transport_notify_pkt_send_post_enqueue(). */ - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); } return err; diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 16d08b39921..405f3c4cf70 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -95,6 +95,43 @@ config CFG80211_CERTIFICATION_ONUS you are a wireless researcher and are working in a controlled and approved environment by your local regulatory agency. +config CFG80211_REG_CELLULAR_HINTS + bool "cfg80211 regulatory support for cellular base station hints" + depends on CFG80211_CERTIFICATION_ONUS + ---help--- + This option enables support for parsing regulatory hints + from cellular base stations. If enabled and at least one driver + claims support for parsing cellular base station hints the + regulatory core will allow and parse these regulatory hints. + The regulatory core will only apply these regulatory hints on + drivers that support this feature. You should only enable this + feature if you have tested and validated this feature on your + systems. + +config CFG80211_REG_RELAX_NO_IR + bool "cfg80211 support for NO_IR relaxation" + depends on CFG80211_CERTIFICATION_ONUS + ---help--- + This option enables support for relaxation of the NO_IR flag for + situations that certain regulatory bodies have provided clarifications + on how relaxation can occur. This feature has an inherent dependency on + userspace features which must have been properly tested and as such is + not enabled by default. + + A relaxation feature example is allowing the operation of a P2P group + owner (GO) on channels marked with NO_IR if there is an additional BSS + interface which associated to an AP which userspace assumes or confirms + to be an authorized master, i.e., with radar detection support and DFS + capabilities. However, note that in order to not create daisy chain + scenarios, this relaxation is not allowed in cases that the BSS client + is associated to P2P GO and in addition the P2P GO instantiated on + a channel due to this relaxation should not allow connection from + non P2P clients. + + The regulatory core will apply these relaxations only for drivers that + support this feature by declaring the appropriate channel flags and + capabilities in their registration flow. + config CFG80211_DEFAULT_PS bool "enable powersave by default" depends on CFG80211 diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 68602be07cc..bdad1f95156 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -6,8 +6,8 @@ #include "rdev-ops.h" -static int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev) +int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, + struct net_device *dev, bool notify) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; @@ -30,20 +30,21 @@ static int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, memset(&wdev->chandef, 0, sizeof(wdev->chandef)); wdev->ssid_len = 0; rdev_set_qos_map(rdev, dev, NULL); - nl80211_send_ap_stopped(wdev); + if (notify) + nl80211_send_ap_stopped(wdev); } return err; } int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev) + struct net_device *dev, bool notify) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; wdev_lock(wdev); - err = __cfg80211_stop_ap(rdev, dev); + err = __cfg80211_stop_ap(rdev, dev, notify); wdev_unlock(wdev); return err; diff --git a/net/wireless/chan.c b/net/wireless/chan.c index f8ab7df1ab0..992b34070bc 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -326,28 +326,57 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, int cfg80211_chandef_dfs_required(struct wiphy *wiphy, - const struct cfg80211_chan_def *chandef) + const struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) { int width; - int r; + int ret; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return -EINVAL; - width = cfg80211_chandef_get_width(chandef); - if (width < 0) - return -EINVAL; + switch (iftype) { + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_MESH_POINT: + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return -EINVAL; - r = cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq1, - width); - if (r) - return r; + ret = cfg80211_get_chans_dfs_required(wiphy, + chandef->center_freq1, + width); + if (ret < 0) + return ret; + else if (ret > 0) + return BIT(chandef->width); - if (!chandef->center_freq2) - return 0; + if (!chandef->center_freq2) + return 0; - return cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq2, - width); + ret = cfg80211_get_chans_dfs_required(wiphy, + chandef->center_freq2, + width); + if (ret < 0) + return ret; + else if (ret > 0) + return BIT(chandef->width); + + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_P2P_DEVICE: + break; + case NL80211_IFTYPE_UNSPECIFIED: + case NUM_NL80211_IFTYPES: + WARN_ON(1); + } + + return 0; } EXPORT_SYMBOL(cfg80211_chandef_dfs_required); @@ -490,6 +519,62 @@ static bool cfg80211_chandef_dfs_available(struct wiphy *wiphy, return r; } +static unsigned int cfg80211_get_chans_dfs_cac_time(struct wiphy *wiphy, + u32 center_freq, + u32 bandwidth) +{ + struct ieee80211_channel *c; + u32 start_freq, end_freq, freq; + unsigned int dfs_cac_ms = 0; + + start_freq = cfg80211_get_start_freq(center_freq, bandwidth); + end_freq = cfg80211_get_end_freq(center_freq, bandwidth); + + for (freq = start_freq; freq <= end_freq; freq += 20) { + c = ieee80211_get_channel(wiphy, freq); + if (!c) + return 0; + + if (c->flags & IEEE80211_CHAN_DISABLED) + return 0; + + if (!(c->flags & IEEE80211_CHAN_RADAR)) + continue; + + if (c->dfs_cac_ms > dfs_cac_ms) + dfs_cac_ms = c->dfs_cac_ms; + } + + return dfs_cac_ms; +} + +unsigned int +cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef) +{ + int width; + unsigned int t1 = 0, t2 = 0; + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return 0; + + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return 0; + + t1 = cfg80211_get_chans_dfs_cac_time(wiphy, + chandef->center_freq1, + width); + + if (!chandef->center_freq2) + return t1; + + t2 = cfg80211_get_chans_dfs_cac_time(wiphy, + chandef->center_freq2, + width); + + return max(t1, t2); +} static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, u32 center_freq, u32 bandwidth, @@ -531,12 +616,14 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, width = 5; break; case NL80211_CHAN_WIDTH_10: + prohibited_flags |= IEEE80211_CHAN_NO_10MHZ; width = 10; break; case NL80211_CHAN_WIDTH_20: if (!ht_cap->ht_supported) return false; case NL80211_CHAN_WIDTH_20_NOHT: + prohibited_flags |= IEEE80211_CHAN_NO_20MHZ; width = 20; break; case NL80211_CHAN_WIDTH_40: @@ -605,17 +692,111 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_chandef_usable); +/* + * For GO only, check if the channel can be used under permissive conditions + * mandated by the some regulatory bodies, i.e., the channel is marked with + * IEEE80211_CHAN_GO_CONCURRENT and there is an additional station interface + * associated to an AP on the same channel or on the same UNII band + * (assuming that the AP is an authorized master). + * In addition allow the GO to operate on a channel on which indoor operation is + * allowed, iff we are currently operating in an indoor environment. + */ +static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev, + struct ieee80211_channel *chan) +{ + struct wireless_dev *wdev_iter; + struct wiphy *wiphy = wiphy_idx_to_wiphy(rdev->wiphy_idx); + + ASSERT_RTNL(); + + if (!config_enabled(CONFIG_CFG80211_REG_RELAX_NO_IR) || + !(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR)) + return false; + + if (regulatory_indoor_allowed() && + (chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) + return true; + + if (!(chan->flags & IEEE80211_CHAN_GO_CONCURRENT)) + return false; + + /* + * Generally, it is possible to rely on another device/driver to allow + * the GO concurrent relaxation, however, since the device can further + * enforce the relaxation (by doing a similar verifications as this), + * and thus fail the GO instantiation, consider only the interfaces of + * the current registered device. + */ + list_for_each_entry(wdev_iter, &rdev->wdev_list, list) { + struct ieee80211_channel *other_chan = NULL; + int r1, r2; + + if (wdev_iter->iftype != NL80211_IFTYPE_STATION || + !netif_running(wdev_iter->netdev)) + continue; + + wdev_lock(wdev_iter); + if (wdev_iter->current_bss) + other_chan = wdev_iter->current_bss->pub.channel; + wdev_unlock(wdev_iter); + + if (!other_chan) + continue; + + if (chan == other_chan) + return true; + + if (chan->band != IEEE80211_BAND_5GHZ) + continue; + + r1 = cfg80211_get_unii(chan->center_freq); + r2 = cfg80211_get_unii(other_chan->center_freq); + + if (r1 != -EINVAL && r1 == r2) { + /* + * At some locations channels 149-165 are considered a + * bundle, but at other locations, e.g., Indonesia, + * channels 149-161 are considered a bundle while + * channel 165 is left out and considered to be in a + * different bundle. Thus, in case that there is a + * station interface connected to an AP on channel 165, + * it is assumed that channels 149-161 are allowed for + * GO operations. However, having a station interface + * connected to an AP on channels 149-161, does not + * allow GO operation on channel 165. + */ + if (chan->center_freq == 5825 && + other_chan->center_freq != 5825) + continue; + return true; + } + } + + return false; +} + bool cfg80211_reg_can_beacon(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef) + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) { + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); bool res; u32 prohibited_flags = IEEE80211_CHAN_DISABLED | - IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR; - trace_cfg80211_reg_can_beacon(wiphy, chandef); + trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype); + + /* + * Under certain conditions suggested by the some regulatory bodies + * a GO can operate on channels marked with IEEE80211_NO_IR + * so set this flag only if such relaxations are not enabled and + * the conditions are not met. + */ + if (iftype != NL80211_IFTYPE_P2P_GO || + !cfg80211_go_permissive_chan(rdev, chandef->chan)) + prohibited_flags |= IEEE80211_CHAN_NO_IR; - if (cfg80211_chandef_dfs_required(wiphy, chandef) > 0 && + if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 && cfg80211_chandef_dfs_available(wiphy, chandef)) { /* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */ prohibited_flags = IEEE80211_CHAN_DISABLED; @@ -645,6 +826,8 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, enum cfg80211_chan_mode *chanmode, u8 *radar_detect) { + int ret; + *chan = NULL; *chanmode = CHAN_MODE_UNDEFINED; @@ -687,8 +870,11 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, *chan = wdev->chandef.chan; *chanmode = CHAN_MODE_SHARED; - if (cfg80211_chandef_dfs_required(wdev->wiphy, - &wdev->chandef)) + ret = cfg80211_chandef_dfs_required(wdev->wiphy, + &wdev->chandef, + wdev->iftype); + WARN_ON(ret < 0); + if (ret > 0) *radar_detect |= BIT(wdev->chandef.width); } return; @@ -697,25 +883,22 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, *chan = wdev->chandef.chan; *chanmode = CHAN_MODE_SHARED; - if (cfg80211_chandef_dfs_required(wdev->wiphy, - &wdev->chandef)) + ret = cfg80211_chandef_dfs_required(wdev->wiphy, + &wdev->chandef, + wdev->iftype); + WARN_ON(ret < 0); + if (ret > 0) *radar_detect |= BIT(wdev->chandef.width); } return; case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_WDS: - /* these interface types don't really have a channel */ - return; case NL80211_IFTYPE_P2P_DEVICE: - if (wdev->wiphy->features & - NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL) - *chanmode = CHAN_MODE_EXCLUSIVE; + /* these interface types don't really have a channel */ return; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: WARN_ON(1); } - - return; } diff --git a/net/wireless/core.c b/net/wireless/core.c index b5ff39a6f6e..a1c40654dd9 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -69,7 +69,7 @@ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) int get_wiphy_idx(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); return rdev->wiphy_idx; } @@ -130,7 +130,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, newname)) pr_err("failed to rename debugfs dir to %s!\n", newname); - nl80211_notify_dev_rename(rdev); + nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); return 0; } @@ -203,19 +203,19 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, rdev->opencount--; - WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev && - !rdev->scan_req->notified); + if (rdev->scan_req && rdev->scan_req->wdev == wdev) { + if (WARN_ON(!rdev->scan_req->notified)) + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev, false); + } } -static int cfg80211_rfkill_set_block(void *data, bool blocked) +void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = data; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct wireless_dev *wdev; - if (!blocked) - return 0; - - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->netdev) { @@ -231,7 +231,18 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) break; } } +} +EXPORT_SYMBOL_GPL(cfg80211_shutdown_all_interfaces); + +static int cfg80211_rfkill_set_block(void *data, bool blocked) +{ + struct cfg80211_registered_device *rdev = data; + + if (!blocked) + return 0; + rtnl_lock(); + cfg80211_shutdown_all_interfaces(&rdev->wiphy); rtnl_unlock(); return 0; @@ -257,6 +268,45 @@ static void cfg80211_event_work(struct work_struct *work) rtnl_unlock(); } +void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev) +{ + struct cfg80211_iface_destroy *item; + + ASSERT_RTNL(); + + spin_lock_irq(&rdev->destroy_list_lock); + while ((item = list_first_entry_or_null(&rdev->destroy_list, + struct cfg80211_iface_destroy, + list))) { + struct wireless_dev *wdev, *tmp; + u32 nlportid = item->nlportid; + + list_del(&item->list); + kfree(item); + spin_unlock_irq(&rdev->destroy_list_lock); + + list_for_each_entry_safe(wdev, tmp, &rdev->wdev_list, list) { + if (nlportid == wdev->owner_nlportid) + rdev_del_virtual_intf(rdev, wdev); + } + + spin_lock_irq(&rdev->destroy_list_lock); + } + spin_unlock_irq(&rdev->destroy_list_lock); +} + +static void cfg80211_destroy_iface_wk(struct work_struct *work) +{ + struct cfg80211_registered_device *rdev; + + rdev = container_of(work, struct cfg80211_registered_device, + destroy_work); + + rtnl_lock(); + cfg80211_destroy_ifaces(rdev); + rtnl_unlock(); +} + /* exported functions */ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) @@ -315,6 +365,10 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.dev.class = &ieee80211_class; rdev->wiphy.dev.platform_data = rdev; + INIT_LIST_HEAD(&rdev->destroy_list); + spin_lock_init(&rdev->destroy_list_lock); + INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk); + #ifdef CONFIG_CFG80211_DEFAULT_PS rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; #endif @@ -348,6 +402,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.rts_threshold = (u32) -1; rdev->wiphy.coverage_class = 0; + rdev->wiphy.max_num_csa_counters = 1; + return &rdev->wiphy; } EXPORT_SYMBOL(wiphy_new); @@ -393,10 +449,7 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) for (j = 0; j < c->n_limits; j++) { u16 types = c->limits[j].types; - /* - * interface types shouldn't overlap, this is - * used in cfg80211_can_change_interface() - */ + /* interface types shouldn't overlap */ if (WARN_ON(types & all_iftypes)) return -EINVAL; all_iftypes |= types; @@ -432,7 +485,7 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) int wiphy_register(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); int res; enum ieee80211_band band; struct ieee80211_supported_band *sband; @@ -440,9 +493,6 @@ int wiphy_register(struct wiphy *wiphy) int i; u16 ifmodes = wiphy->interface_modes; - /* support for 5/10 MHz is broken due to nl80211 API mess - disable */ - wiphy->flags &= ~WIPHY_FLAG_SUPPORTS_5_10_MHZ; - /* * There are major locking problems in nl80211/mac80211 for CSA, * disable for all drivers until this has been reworked. @@ -610,13 +660,15 @@ int wiphy_register(struct wiphy *wiphy) return res; } + nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); + return 0; } EXPORT_SYMBOL(wiphy_register); void wiphy_rfkill_start_polling(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); if (!rdev->ops->rfkill_poll) return; @@ -627,7 +679,7 @@ EXPORT_SYMBOL(wiphy_rfkill_start_polling); void wiphy_rfkill_stop_polling(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); rfkill_pause_polling(rdev->rfkill); } @@ -635,7 +687,7 @@ EXPORT_SYMBOL(wiphy_rfkill_stop_polling); void wiphy_unregister(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); wait_event(rdev->dev_wait, ({ int __count; @@ -648,9 +700,10 @@ void wiphy_unregister(struct wiphy *wiphy) rfkill_unregister(rdev->rfkill); rtnl_lock(); + nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY); rdev->wiphy.registered = false; - BUG_ON(!list_empty(&rdev->wdev_list)); + WARN_ON(!list_empty(&rdev->wdev_list)); /* * First remove the hardware from everywhere, this makes @@ -675,6 +728,7 @@ void wiphy_unregister(struct wiphy *wiphy) cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); + flush_work(&rdev->destroy_work); #ifdef CONFIG_PM if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) @@ -707,7 +761,7 @@ EXPORT_SYMBOL(wiphy_free); void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); if (rfkill_set_hw_state(rdev->rfkill, blocked)) schedule_work(&rdev->rfkill_sync); @@ -716,7 +770,7 @@ EXPORT_SYMBOL(wiphy_rfkill_set_hw_state); void cfg80211_unregister_wdev(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); ASSERT_RTNL(); @@ -751,23 +805,23 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, rdev->num_running_monitor_ifaces += num; } -void cfg80211_leave(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) +void __cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; ASSERT_RTNL(); + ASSERT_WDEV_LOCK(wdev); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - cfg80211_leave_ibss(rdev, dev, true); + __cfg80211_leave_ibss(rdev, dev, true); break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: if (rdev->sched_scan_req && dev == rdev->sched_scan_req->dev) __cfg80211_stop_sched_scan(rdev, false); - wdev_lock(wdev); #ifdef CONFIG_CFG80211_WEXT kfree(wdev->wext.ie); wdev->wext.ie = NULL; @@ -776,34 +830,60 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, #endif cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, true); - wdev_unlock(wdev); break; case NL80211_IFTYPE_MESH_POINT: - cfg80211_leave_mesh(rdev, dev); + __cfg80211_leave_mesh(rdev, dev); break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - cfg80211_stop_ap(rdev, dev); + __cfg80211_stop_ap(rdev, dev, true); break; default: break; } +} - wdev->beacon_interval = 0; +void cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + wdev_lock(wdev); + __cfg80211_leave(rdev, wdev); + wdev_unlock(wdev); } +void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev, + gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct cfg80211_event *ev; + unsigned long flags; + + trace_cfg80211_stop_iface(wiphy, wdev); + + ev = kzalloc(sizeof(*ev), gfp); + if (!ev) + return; + + ev->type = EVENT_STOPPED; + + spin_lock_irqsave(&wdev->event_lock, flags); + list_add_tail(&ev->list, &wdev->event_list); + spin_unlock_irqrestore(&wdev->event_lock, flags); + queue_work(cfg80211_wq, &rdev->event_work); +} +EXPORT_SYMBOL(cfg80211_stop_iface); + static int cfg80211_netdev_notifier_call(struct notifier_block *nb, unsigned long state, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev; - int ret; if (!wdev) return NOTIFY_DONE; - rdev = wiphy_to_dev(wdev->wiphy); + rdev = wiphy_to_rdev(wdev->wiphy); WARN_ON(wdev->iftype == NL80211_IFTYPE_UNSPECIFIED); @@ -859,8 +939,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; case NETDEV_DOWN: cfg80211_update_iface_num(rdev, wdev->iftype, -1); - WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev && - !rdev->scan_req->notified); + if (rdev->scan_req && rdev->scan_req->wdev == wdev) { + if (WARN_ON(!rdev->scan_req->notified)) + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev, false); + } if (WARN_ON(rdev->sched_scan_req && rdev->sched_scan_req->dev == wdev->netdev)) { @@ -958,13 +1041,14 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, case NETDEV_PRE_UP: if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype))) return notifier_from_errno(-EOPNOTSUPP); - ret = cfg80211_can_add_interface(rdev, wdev->iftype); - if (ret) - return notifier_from_errno(ret); + if (rfkill_blocked(rdev->rfkill)) + return notifier_from_errno(-ERFKILL); break; + default: + return NOTIFY_DONE; } - return NOTIFY_DONE; + return NOTIFY_OK; } static struct notifier_block cfg80211_netdev_notifier = { diff --git a/net/wireless/core.h b/net/wireless/core.h index 9895ab16c05..e9afbf10e75 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -62,6 +62,7 @@ struct cfg80211_registered_device { struct rb_root bss_tree; u32 bss_generation; struct cfg80211_scan_request *scan_req; /* protected by RTNL */ + struct sk_buff *scan_msg; struct cfg80211_sched_scan_request *sched_scan_req; unsigned long suspend_at; struct work_struct scan_done_wk; @@ -79,13 +80,17 @@ struct cfg80211_registered_device { struct cfg80211_coalesce *coalesce; + spinlock_t destroy_list_lock; + struct list_head destroy_list; + struct work_struct destroy_work; + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __aligned(NETDEV_ALIGN); }; static inline -struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy) +struct cfg80211_registered_device *wiphy_to_rdev(struct wiphy *wiphy) { BUG_ON(!wiphy); return container_of(wiphy, struct cfg80211_registered_device, wiphy); @@ -165,7 +170,6 @@ static inline void wdev_unlock(struct wireless_dev *wdev) mutex_unlock(&wdev->mtx); } -#define ASSERT_RDEV_LOCK(rdev) ASSERT_RTNL() #define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx) static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev) @@ -181,6 +185,7 @@ enum cfg80211_event_type { EVENT_ROAMED, EVENT_DISCONNECTED, EVENT_IBSS_JOINED, + EVENT_STOPPED, }; struct cfg80211_event { @@ -232,6 +237,13 @@ struct cfg80211_beacon_registration { u32 nlportid; }; +struct cfg80211_iface_destroy { + struct list_head list; + u32 nlportid; +}; + +void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev); + /* free object */ void cfg80211_dev_free(struct cfg80211_registered_device *rdev); @@ -240,15 +252,11 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, void ieee80211_set_bitrate_flags(struct wiphy *wiphy); -void cfg80211_bss_expire(struct cfg80211_registered_device *dev); -void cfg80211_bss_age(struct cfg80211_registered_device *dev, +void cfg80211_bss_expire(struct cfg80211_registered_device *rdev); +void cfg80211_bss_age(struct cfg80211_registered_device *rdev, unsigned long age_secs); /* IBSS */ -int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_ibss_params *params, - struct cfg80211_cached_keys *connkeys); int cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_ibss_params *params, @@ -274,6 +282,8 @@ int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, struct mesh_setup *setup, const struct mesh_config *conf); +int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev); int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev); int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, @@ -281,8 +291,10 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, struct cfg80211_chan_def *chandef); /* AP */ +int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, + struct net_device *dev, bool notify); int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev); + struct net_device *dev, bool notify); /* MLME */ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, @@ -363,7 +375,8 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr); void __cfg80211_scan_done(struct work_struct *wk); -void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev); +void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, + bool send_message); void __cfg80211_sched_scan_results(struct work_struct *wk); int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, bool driver_initiated); @@ -400,35 +413,9 @@ void cfg80211_set_dfs_state(struct wiphy *wiphy, void cfg80211_dfs_channels_update_work(struct work_struct *work); - -static inline int -cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - enum nl80211_iftype iftype) -{ - return cfg80211_can_use_iftype_chan(rdev, wdev, iftype, NULL, - CHAN_MODE_UNDEFINED, 0); -} - -static inline int -cfg80211_can_add_interface(struct cfg80211_registered_device *rdev, - enum nl80211_iftype iftype) -{ - if (rfkill_blocked(rdev->rfkill)) - return -ERFKILL; - - return cfg80211_can_change_interface(rdev, NULL, iftype); -} - -static inline int -cfg80211_can_use_chan(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode) -{ - return cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - chan, chanmode, 0); -} +unsigned int +cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef); static inline unsigned int elapsed_jiffies_msecs(unsigned long start) { @@ -459,6 +446,8 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); +void __cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); void cfg80211_leave(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c index e37862f1b12..d4860bfc020 100644 --- a/net/wireless/ethtool.c +++ b/net/wireless/ethtool.c @@ -43,7 +43,7 @@ static void cfg80211_get_ringparam(struct net_device *dev, struct ethtool_ringparam *rp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); memset(rp, 0, sizeof(*rp)); @@ -56,7 +56,7 @@ static int cfg80211_set_ringparam(struct net_device *dev, struct ethtool_ringparam *rp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (rp->rx_mini_pending != 0 || rp->rx_jumbo_pending != 0) return -EINVAL; @@ -70,7 +70,7 @@ static int cfg80211_set_ringparam(struct net_device *dev, static int cfg80211_get_sset_count(struct net_device *dev, int sset) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (rdev->ops->get_et_sset_count) return rdev_get_et_sset_count(rdev, dev, sset); return -EOPNOTSUPP; @@ -80,7 +80,7 @@ static void cfg80211_get_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (rdev->ops->get_et_stats) rdev_get_et_stats(rdev, dev, stats, data); } @@ -88,7 +88,7 @@ static void cfg80211_get_stats(struct net_device *dev, static void cfg80211_get_strings(struct net_device *dev, u32 sset, u8 *data) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (rdev->ops->get_et_strings) rdev_get_et_strings(rdev, dev, sset, data); } diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk index 9a8217d2a90..40c37fc5b67 100644 --- a/net/wireless/genregdb.awk +++ b/net/wireless/genregdb.awk @@ -66,19 +66,15 @@ function parse_reg_rule() units = $8 sub(/\)/, "", units) sub(/,/, "", units) + dfs_cac = $9 if (units == "mW") { - if (power == 100) { - power = 20 - } else if (power == 200) { - power = 23 - } else if (power == 500) { - power = 27 - } else if (power == 1000) { - power = 30 - } else { - print "Unknown power value in database!" - } + power = 10 * log(power)/log(10) + } else { + dfs_cac = $8 } + sub(/,/, "", dfs_cac) + sub(/\(/, "", dfs_cac) + sub(/\)/, "", dfs_cac) flagstr = "" for (i=8; i<=NF; i++) flagstr = flagstr $i @@ -105,11 +101,13 @@ function parse_reg_rule() flags = flags "\n\t\t\tNL80211_RRF_NO_IR | " } else if (flagarray[arg] == "NO-IR") { flags = flags "\n\t\t\tNL80211_RRF_NO_IR | " + } else if (flagarray[arg] == "AUTO-BW") { + flags = flags "\n\t\t\tNL80211_RRF_AUTO_BW | " } } flags = flags "0" - printf "\t\tREG_RULE(%d, %d, %d, %d, %d, %s),\n", start, end, bw, gain, power, flags + printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %.0f, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags rules++ } diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 1470b90e438..8f345da3ea5 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -45,7 +45,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, cfg80211_upload_connect_keys(wdev); - nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid, + nl80211_send_ibss_bssid(wiphy_to_rdev(wdev->wiphy), dev, bssid, GFP_KERNEL); #ifdef CONFIG_CFG80211_WEXT memset(&wrqu, 0, sizeof(wrqu)); @@ -58,7 +58,7 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, struct ieee80211_channel *channel, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; @@ -82,14 +82,12 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, } EXPORT_SYMBOL(cfg80211_ibss_joined); -int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_ibss_params *params, - struct cfg80211_cached_keys *connkeys) +static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_ibss_params *params, + struct cfg80211_cached_keys *connkeys) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct ieee80211_channel *check_chan; - u8 radar_detect_width = 0; int err; ASSERT_WDEV_LOCK(wdev); @@ -126,29 +124,6 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, #ifdef CONFIG_CFG80211_WEXT wdev->wext.ibss.chandef = params->chandef; #endif - check_chan = params->chandef.chan; - if (params->userspace_handles_dfs) { - /* use channel NULL to check for radar even if the current - * channel is not a radar channel - it might decide to change - * to DFS channel later. - */ - radar_detect_width = BIT(params->chandef.width); - check_chan = NULL; - } - - err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - check_chan, - (params->channel_fixed && - !radar_detect_width) - ? CHAN_MODE_SHARED - : CHAN_MODE_EXCLUSIVE, - radar_detect_width); - - if (err) { - wdev->connect_keys = NULL; - return err; - } - err = rdev_join_ibss(rdev, dev, params); if (err) { wdev->connect_keys = NULL; @@ -181,7 +156,7 @@ int cfg80211_join_ibss(struct cfg80211_registered_device *rdev, static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int i; ASSERT_WDEV_LOCK(wdev); @@ -336,7 +311,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, struct iw_freq *wextfreq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ieee80211_channel *chan = NULL; int err, freq; @@ -347,7 +322,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, if (!rdev->ops->join_ibss) return -EOPNOTSUPP; - freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); + freq = cfg80211_wext_freq(wextfreq); if (freq < 0) return freq; @@ -421,7 +396,7 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); size_t len = data->length; int err; @@ -445,8 +420,8 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, if (len > 0 && ssid[len - 1] == '\0') len--; + memcpy(wdev->ssid, ssid, len); wdev->wext.ibss.ssid = wdev->ssid; - memcpy(wdev->wext.ibss.ssid, ssid, len); wdev->wext.ibss.ssid_len = len; wdev_lock(wdev); @@ -488,7 +463,7 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, struct sockaddr *ap_addr, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u8 *bssid = ap_addr->sa_data; int err; @@ -506,6 +481,9 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid)) bssid = NULL; + if (bssid && !is_valid_ether_addr(bssid)) + return -EINVAL; + /* both automatic */ if (!bssid && !wdev->wext.ibss.bssid) return 0; diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index d42a3fcb2f6..092300b30c3 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -99,7 +99,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, const struct mesh_config *conf) { struct wireless_dev *wdev = dev->ieee80211_ptr; - u8 radar_detect_width = 0; int err; BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); @@ -175,22 +174,10 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, scan_width); } - if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef)) + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef, + NL80211_IFTYPE_MESH_POINT)) return -EINVAL; - err = cfg80211_chandef_dfs_required(wdev->wiphy, &setup->chandef); - if (err < 0) - return err; - if (err) - radar_detect_width = BIT(setup->chandef.width); - - err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - setup->chandef.chan, - CHAN_MODE_SHARED, - radar_detect_width); - if (err) - return err; - err = rdev_join_mesh(rdev, dev, conf, setup); if (!err) { memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len); @@ -236,11 +223,6 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, if (!netif_running(wdev->netdev)) return -ENETDOWN; - err = cfg80211_can_use_chan(rdev, wdev, chandef->chan, - CHAN_MODE_SHARED); - if (err) - return err; - err = rdev_libertas_set_mesh_channel(rdev, wdev->netdev, chandef->chan); if (!err) @@ -256,8 +238,8 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, return 0; } -static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, - struct net_device *dev) +int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index d47c9d127b1..266766b8d80 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -23,7 +23,7 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; u8 *ie = mgmt->u.assoc_resp.variable; int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable); @@ -54,7 +54,7 @@ EXPORT_SYMBOL(cfg80211_rx_assoc_resp); static void cfg80211_process_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); nl80211_send_rx_auth(rdev, wdev->netdev, buf, len, GFP_KERNEL); cfg80211_sme_rx_auth(wdev, buf, len); @@ -63,7 +63,7 @@ static void cfg80211_process_auth(struct wireless_dev *wdev, static void cfg80211_process_deauth(struct wireless_dev *wdev, const u8 *buf, size_t len) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; const u8 *bssid = mgmt->bssid; u16 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); @@ -82,7 +82,7 @@ static void cfg80211_process_deauth(struct wireless_dev *wdev, static void cfg80211_process_disassoc(struct wireless_dev *wdev, const u8 *buf, size_t len) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; const u8 *bssid = mgmt->bssid; u16 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); @@ -123,7 +123,7 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_send_auth_timeout(dev, addr); @@ -136,7 +136,7 @@ void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_send_assoc_timeout(dev, bss->bssid); @@ -172,7 +172,7 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, const u8 *tsc, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); #ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; char *buf = kmalloc(128, gfp); @@ -233,14 +233,8 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, if (!req.bss) return -ENOENT; - err = cfg80211_can_use_chan(rdev, wdev, req.bss->channel, - CHAN_MODE_SHARED); - if (err) - goto out; - err = rdev_auth(rdev, dev, &req); -out: cfg80211_put_bss(&rdev->wiphy, req.bss); return err; } @@ -306,16 +300,10 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, if (!req->bss) return -ENOENT; - err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED); - if (err) - goto out; - err = rdev_assoc(rdev, dev, req); if (!err) cfg80211_hold_bss(bss_from_pub(req->bss)); - -out: - if (err) + else cfg80211_put_bss(&rdev->wiphy, req->bss); return err; @@ -414,7 +402,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, int match_len) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_mgmt_registration *reg, *nreg; int err = 0; u16 mgmt_type; @@ -473,7 +461,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_mgmt_registration *reg, *tmp; spin_lock_bh(&wdev->mgmt_registrations_lock); @@ -620,7 +608,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, const u8 *buf, size_t len, u32 flags, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_mgmt_registration *reg; const struct ieee80211_txrx_stypes *stypes = &wiphy->mgmt_stypes[wdev->iftype]; @@ -739,7 +727,7 @@ void cfg80211_radar_event(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, gfp_t gfp) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); unsigned long timeout; trace_cfg80211_radar_event(wiphy, chandef); @@ -764,7 +752,7 @@ void cfg80211_cac_event(struct net_device *netdev, { struct wireless_dev *wdev = netdev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); unsigned long timeout; trace_cfg80211_cac_event(netdev, event); @@ -778,7 +766,7 @@ void cfg80211_cac_event(struct net_device *netdev, switch (event) { case NL80211_RADAR_CAC_FINISHED: timeout = wdev->cac_start_time + - msecs_to_jiffies(IEEE80211_DFS_MIN_CAC_TIME_MS); + msecs_to_jiffies(wdev->cac_time_ms); WARN_ON(!time_after_eq(jiffies, timeout)); cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_AVAILABLE); break; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ebea1a197af..ba4f1723c83 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -168,8 +168,8 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) netdev = __dev_get_by_index(netns, ifindex); if (netdev) { if (netdev->ieee80211_ptr) - tmp = wiphy_to_dev( - netdev->ieee80211_ptr->wiphy); + tmp = wiphy_to_rdev( + netdev->ieee80211_ptr->wiphy); else tmp = NULL; @@ -371,8 +371,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 }, [NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG }, [NL80211_ATTR_CSA_IES] = { .type = NLA_NESTED }, - [NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_U16 }, - [NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_U16 }, + [NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_BINARY }, + [NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_BINARY }, [NL80211_ATTR_STA_SUPPORTED_CHANNELS] = { .type = NLA_BINARY }, [NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES] = { .type = NLA_BINARY }, [NL80211_ATTR_HANDLE_DFS] = { .type = NLA_FLAG }, @@ -384,6 +384,9 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { .len = IEEE80211_QOS_MAP_LEN_MAX }, [NL80211_ATTR_MAC_HINT] = { .len = ETH_ALEN }, [NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 }, + [NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 }, + [NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG }, + [NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY }, }; /* policy for the key attributes */ @@ -483,7 +486,7 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, err = PTR_ERR(*wdev); goto out_unlock; } - *rdev = wiphy_to_dev((*wdev)->wiphy); + *rdev = wiphy_to_rdev((*wdev)->wiphy); /* 0 is the first index - add 1 to parse only once */ cb->args[0] = (*rdev)->wiphy_idx + 1; cb->args[1] = (*wdev)->identifier; @@ -496,7 +499,7 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, err = -ENODEV; goto out_unlock; } - *rdev = wiphy_to_dev(wiphy); + *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; list_for_each_entry(tmp, &(*rdev)->wdev_list, list) { @@ -565,6 +568,13 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct ieee80211_channel *chan, bool large) { + /* Some channels must be completely excluded from the + * list to protect old user-space tools from breaking + */ + if (!large && chan->flags & + (IEEE80211_CHAN_NO_10MHZ | IEEE80211_CHAN_NO_20MHZ)) + return 0; + if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_FREQ, chan->center_freq)) goto nla_put_failure; @@ -592,6 +602,10 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME, time)) goto nla_put_failure; + if (nla_put_u32(msg, + NL80211_FREQUENCY_ATTR_DFS_CAC_TIME, + chan->dfs_cac_ms)) + goto nla_put_failure; } } @@ -608,6 +622,18 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ)) goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_INDOOR_ONLY) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_INDOOR_ONLY)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_GO_CONCURRENT) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_GO_CONCURRENT)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_20MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_20MHZ)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_10MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_10MHZ)) + goto nla_put_failure; } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, @@ -945,8 +971,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, c->max_interfaces)) goto nla_put_failure; if (large && - nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, - c->radar_detect_widths)) + (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, + c->radar_detect_widths) || + nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, + c->radar_detect_regions))) goto nla_put_failure; nla_nest_end(msg, nl_combi); @@ -1001,42 +1029,42 @@ static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, } static int nl80211_send_wowlan(struct sk_buff *msg, - struct cfg80211_registered_device *dev, + struct cfg80211_registered_device *rdev, bool large) { struct nlattr *nl_wowlan; - if (!dev->wiphy.wowlan) + if (!rdev->wiphy.wowlan) return 0; nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); if (!nl_wowlan) return -ENOBUFS; - if (((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_ANY) && + if (((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_ANY) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_DISCONNECT) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_DISCONNECT) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) return -ENOBUFS; - if (dev->wiphy.wowlan->n_patterns) { + if (rdev->wiphy.wowlan->n_patterns) { struct nl80211_pattern_support pat = { - .max_patterns = dev->wiphy.wowlan->n_patterns, - .min_pattern_len = dev->wiphy.wowlan->pattern_min_len, - .max_pattern_len = dev->wiphy.wowlan->pattern_max_len, - .max_pkt_offset = dev->wiphy.wowlan->max_pkt_offset, + .max_patterns = rdev->wiphy.wowlan->n_patterns, + .min_pattern_len = rdev->wiphy.wowlan->pattern_min_len, + .max_pattern_len = rdev->wiphy.wowlan->pattern_max_len, + .max_pkt_offset = rdev->wiphy.wowlan->max_pkt_offset, }; if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, @@ -1044,7 +1072,7 @@ static int nl80211_send_wowlan(struct sk_buff *msg, return -ENOBUFS; } - if (large && nl80211_send_wowlan_tcp_caps(dev, msg)) + if (large && nl80211_send_wowlan_tcp_caps(rdev, msg)) return -ENOBUFS; nla_nest_end(msg, nl_wowlan); @@ -1054,19 +1082,19 @@ static int nl80211_send_wowlan(struct sk_buff *msg, #endif static int nl80211_send_coalesce(struct sk_buff *msg, - struct cfg80211_registered_device *dev) + struct cfg80211_registered_device *rdev) { struct nl80211_coalesce_rule_support rule; - if (!dev->wiphy.coalesce) + if (!rdev->wiphy.coalesce) return 0; - rule.max_rules = dev->wiphy.coalesce->n_rules; - rule.max_delay = dev->wiphy.coalesce->max_delay; - rule.pat.max_patterns = dev->wiphy.coalesce->n_patterns; - rule.pat.min_pattern_len = dev->wiphy.coalesce->pattern_min_len; - rule.pat.max_pattern_len = dev->wiphy.coalesce->pattern_max_len; - rule.pat.max_pkt_offset = dev->wiphy.coalesce->max_pkt_offset; + rule.max_rules = rdev->wiphy.coalesce->n_rules; + rule.max_delay = rdev->wiphy.coalesce->max_delay; + rule.pat.max_patterns = rdev->wiphy.coalesce->n_patterns; + rule.pat.min_pattern_len = rdev->wiphy.coalesce->pattern_min_len; + rule.pat.max_pattern_len = rdev->wiphy.coalesce->pattern_max_len; + rule.pat.max_pkt_offset = rdev->wiphy.coalesce->max_pkt_offset; if (nla_put(msg, NL80211_ATTR_COALESCE_RULE, sizeof(rule), &rule)) return -ENOBUFS; @@ -1197,7 +1225,8 @@ struct nl80211_dump_wiphy_state { bool split; }; -static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, +static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, + enum nl80211_commands cmd, struct sk_buff *msg, u32 portid, u32 seq, int flags, struct nl80211_dump_wiphy_state *state) { @@ -1209,63 +1238,66 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, struct ieee80211_channel *chan; int i; const struct ieee80211_txrx_stypes *mgmt_stypes = - dev->wiphy.mgmt_stypes; + rdev->wiphy.mgmt_stypes; u32 features; - hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); + hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -ENOBUFS; if (WARN_ON(!state)) return -EINVAL; - if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) || + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, - wiphy_name(&dev->wiphy)) || + wiphy_name(&rdev->wiphy)) || nla_put_u32(msg, NL80211_ATTR_GENERATION, cfg80211_rdev_list_generation)) goto nla_put_failure; + if (cmd != NL80211_CMD_NEW_WIPHY) + goto finish; + switch (state->split_start) { case 0: if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT, - dev->wiphy.retry_short) || + rdev->wiphy.retry_short) || nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG, - dev->wiphy.retry_long) || + rdev->wiphy.retry_long) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD, - dev->wiphy.frag_threshold) || + rdev->wiphy.frag_threshold) || nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD, - dev->wiphy.rts_threshold) || + rdev->wiphy.rts_threshold) || nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, - dev->wiphy.coverage_class) || + rdev->wiphy.coverage_class) || nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, - dev->wiphy.max_scan_ssids) || + rdev->wiphy.max_scan_ssids) || nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS, - dev->wiphy.max_sched_scan_ssids) || + rdev->wiphy.max_sched_scan_ssids) || nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, - dev->wiphy.max_scan_ie_len) || + rdev->wiphy.max_scan_ie_len) || nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN, - dev->wiphy.max_sched_scan_ie_len) || + rdev->wiphy.max_sched_scan_ie_len) || nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS, - dev->wiphy.max_match_sets)) + rdev->wiphy.max_match_sets)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && + if ((rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) && + if ((rdev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) && nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && + if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) && + if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) && nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) && + if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) && nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && + if ((rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP)) goto nla_put_failure; state->split_start++; @@ -1273,35 +1305,35 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, break; case 1: if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES, - sizeof(u32) * dev->wiphy.n_cipher_suites, - dev->wiphy.cipher_suites)) + sizeof(u32) * rdev->wiphy.n_cipher_suites, + rdev->wiphy.cipher_suites)) goto nla_put_failure; if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, - dev->wiphy.max_num_pmkids)) + rdev->wiphy.max_num_pmkids)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && + if ((rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX, - dev->wiphy.available_antennas_tx) || + rdev->wiphy.available_antennas_tx) || nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX, - dev->wiphy.available_antennas_rx)) + rdev->wiphy.available_antennas_rx)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) && + if ((rdev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) && nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD, - dev->wiphy.probe_resp_offload)) + rdev->wiphy.probe_resp_offload)) goto nla_put_failure; - if ((dev->wiphy.available_antennas_tx || - dev->wiphy.available_antennas_rx) && - dev->ops->get_antenna) { + if ((rdev->wiphy.available_antennas_tx || + rdev->wiphy.available_antennas_rx) && + rdev->ops->get_antenna) { u32 tx_ant = 0, rx_ant = 0; int res; - res = rdev_get_antenna(dev, &tx_ant, &rx_ant); + res = rdev_get_antenna(rdev, &tx_ant, &rx_ant); if (!res) { if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, @@ -1318,7 +1350,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, break; case 2: if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES, - dev->wiphy.interface_modes)) + rdev->wiphy.interface_modes)) goto nla_put_failure; state->split_start++; if (state->split) @@ -1332,7 +1364,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, band < IEEE80211_NUM_BANDS; band++) { struct ieee80211_supported_band *sband; - sband = dev->wiphy.bands[band]; + sband = rdev->wiphy.bands[band]; if (!sband) continue; @@ -1409,7 +1441,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, i = 0; #define CMD(op, n) \ do { \ - if (dev->ops->op) { \ + if (rdev->ops->op) { \ i++; \ if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \ goto nla_put_failure; \ @@ -1433,32 +1465,32 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, CMD(set_pmksa, SET_PMKSA); CMD(del_pmksa, DEL_PMKSA); CMD(flush_pmksa, FLUSH_PMKSA); - if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) + if (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) CMD(remain_on_channel, REMAIN_ON_CHANNEL); CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); CMD(mgmt_tx, FRAME); CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); - if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { + if (rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) goto nla_put_failure; } - if (dev->ops->set_monitor_channel || dev->ops->start_ap || - dev->ops->join_mesh) { + if (rdev->ops->set_monitor_channel || rdev->ops->start_ap || + rdev->ops->join_mesh) { i++; if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) goto nla_put_failure; } CMD(set_wds_peer, SET_WDS_PEER); - if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { + if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { CMD(tdls_mgmt, TDLS_MGMT); CMD(tdls_oper, TDLS_OPER); } - if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) + if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) CMD(sched_scan_start, START_SCHED_SCAN); CMD(probe_client, PROBE_CLIENT); CMD(set_noack_map, SET_NOACK_MAP); - if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { + if (rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { i++; if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) goto nla_put_failure; @@ -1468,7 +1500,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (state->split) { CMD(crit_proto_start, CRIT_PROTOCOL_START); CMD(crit_proto_stop, CRIT_PROTOCOL_STOP); - if (dev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH) + if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH) CMD(channel_switch, CHANNEL_SWITCH); } CMD(set_qos_map, SET_QOS_MAP); @@ -1479,13 +1511,13 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, #undef CMD - if (dev->ops->connect || dev->ops->auth) { + if (rdev->ops->connect || rdev->ops->auth) { i++; if (nla_put_u32(msg, i, NL80211_CMD_CONNECT)) goto nla_put_failure; } - if (dev->ops->disconnect || dev->ops->deauth) { + if (rdev->ops->disconnect || rdev->ops->deauth) { i++; if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT)) goto nla_put_failure; @@ -1496,14 +1528,14 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (state->split) break; case 5: - if (dev->ops->remain_on_channel && - (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) && + if (rdev->ops->remain_on_channel && + (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) && nla_put_u32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, - dev->wiphy.max_remain_on_channel_duration)) + rdev->wiphy.max_remain_on_channel_duration)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) && + if ((rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) && nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK)) goto nla_put_failure; @@ -1514,7 +1546,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, break; case 6: #ifdef CONFIG_PM - if (nl80211_send_wowlan(msg, dev, state->split)) + if (nl80211_send_wowlan(msg, rdev, state->split)) goto nla_put_failure; state->split_start++; if (state->split) @@ -1524,10 +1556,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, #endif case 7: if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, - dev->wiphy.software_iftypes)) + rdev->wiphy.software_iftypes)) goto nla_put_failure; - if (nl80211_put_iface_combinations(&dev->wiphy, msg, + if (nl80211_put_iface_combinations(&rdev->wiphy, msg, state->split)) goto nla_put_failure; @@ -1535,12 +1567,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (state->split) break; case 8: - if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && + if ((rdev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME, - dev->wiphy.ap_sme_capa)) + rdev->wiphy.ap_sme_capa)) goto nla_put_failure; - features = dev->wiphy.features; + features = rdev->wiphy.features; /* * We can only add the per-channel limit information if the * dump is split, otherwise it makes it too big. Therefore @@ -1551,16 +1583,16 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features)) goto nla_put_failure; - if (dev->wiphy.ht_capa_mod_mask && + if (rdev->wiphy.ht_capa_mod_mask && nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK, - sizeof(*dev->wiphy.ht_capa_mod_mask), - dev->wiphy.ht_capa_mod_mask)) + sizeof(*rdev->wiphy.ht_capa_mod_mask), + rdev->wiphy.ht_capa_mod_mask)) goto nla_put_failure; - if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && - dev->wiphy.max_acl_mac_addrs && + if (rdev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && + rdev->wiphy.max_acl_mac_addrs && nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, - dev->wiphy.max_acl_mac_addrs)) + rdev->wiphy.max_acl_mac_addrs)) goto nla_put_failure; /* @@ -1576,41 +1608,41 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, state->split_start++; break; case 9: - if (dev->wiphy.extended_capabilities && + if (rdev->wiphy.extended_capabilities && (nla_put(msg, NL80211_ATTR_EXT_CAPA, - dev->wiphy.extended_capabilities_len, - dev->wiphy.extended_capabilities) || + rdev->wiphy.extended_capabilities_len, + rdev->wiphy.extended_capabilities) || nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK, - dev->wiphy.extended_capabilities_len, - dev->wiphy.extended_capabilities_mask))) + rdev->wiphy.extended_capabilities_len, + rdev->wiphy.extended_capabilities_mask))) goto nla_put_failure; - if (dev->wiphy.vht_capa_mod_mask && + if (rdev->wiphy.vht_capa_mod_mask && nla_put(msg, NL80211_ATTR_VHT_CAPABILITY_MASK, - sizeof(*dev->wiphy.vht_capa_mod_mask), - dev->wiphy.vht_capa_mod_mask)) + sizeof(*rdev->wiphy.vht_capa_mod_mask), + rdev->wiphy.vht_capa_mod_mask)) goto nla_put_failure; state->split_start++; break; case 10: - if (nl80211_send_coalesce(msg, dev)) + if (nl80211_send_coalesce(msg, rdev)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) && + if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) && (nla_put_flag(msg, NL80211_ATTR_SUPPORT_5_MHZ) || nla_put_flag(msg, NL80211_ATTR_SUPPORT_10_MHZ))) goto nla_put_failure; - if (dev->wiphy.max_ap_assoc_sta && + if (rdev->wiphy.max_ap_assoc_sta && nla_put_u32(msg, NL80211_ATTR_MAX_AP_ASSOC_STA, - dev->wiphy.max_ap_assoc_sta)) + rdev->wiphy.max_ap_assoc_sta)) goto nla_put_failure; state->split_start++; break; case 11: - if (dev->wiphy.n_vendor_commands) { + if (rdev->wiphy.n_vendor_commands) { const struct nl80211_vendor_cmd_info *info; struct nlattr *nested; @@ -1618,15 +1650,15 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (!nested) goto nla_put_failure; - for (i = 0; i < dev->wiphy.n_vendor_commands; i++) { - info = &dev->wiphy.vendor_commands[i].info; + for (i = 0; i < rdev->wiphy.n_vendor_commands; i++) { + info = &rdev->wiphy.vendor_commands[i].info; if (nla_put(msg, i + 1, sizeof(*info), info)) goto nla_put_failure; } nla_nest_end(msg, nested); } - if (dev->wiphy.n_vendor_events) { + if (rdev->wiphy.n_vendor_events) { const struct nl80211_vendor_cmd_info *info; struct nlattr *nested; @@ -1635,18 +1667,26 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (!nested) goto nla_put_failure; - for (i = 0; i < dev->wiphy.n_vendor_events; i++) { - info = &dev->wiphy.vendor_events[i]; + for (i = 0; i < rdev->wiphy.n_vendor_events; i++) { + info = &rdev->wiphy.vendor_events[i]; if (nla_put(msg, i + 1, sizeof(*info), info)) goto nla_put_failure; } nla_nest_end(msg, nested); } + state->split_start++; + break; + case 12: + if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH && + nla_put_u8(msg, NL80211_ATTR_MAX_CSA_COUNTERS, + rdev->wiphy.max_num_csa_counters)) + goto nla_put_failure; /* done */ state->split_start = 0; break; } + finish: return genlmsg_end(msg, hdr); nla_put_failure: @@ -1679,7 +1719,7 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, if (!netdev) return -ENODEV; if (netdev->ieee80211_ptr) { - rdev = wiphy_to_dev( + rdev = wiphy_to_rdev( netdev->ieee80211_ptr->wiphy); state->filter_wiphy = rdev->wiphy_idx; } @@ -1692,7 +1732,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) { int idx = 0, ret; struct nl80211_dump_wiphy_state *state = (void *)cb->args[0]; - struct cfg80211_registered_device *dev; + struct cfg80211_registered_device *rdev; rtnl_lock(); if (!state) { @@ -1711,17 +1751,18 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0] = (long)state; } - list_for_each_entry(dev, &cfg80211_rdev_list, list) { - if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk))) + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; if (++idx <= state->start) continue; if (state->filter_wiphy != -1 && - state->filter_wiphy != dev->wiphy_idx) + state->filter_wiphy != rdev->wiphy_idx) continue; /* attempt to fit multiple wiphy data chunks into the skb */ do { - ret = nl80211_send_wiphy(dev, skb, + ret = nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY, + skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, state); @@ -1740,9 +1781,10 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) * We can then retry with the larger buffer. */ if ((ret == -ENOBUFS || ret == -EMSGSIZE) && - !skb->len && + !skb->len && !state->split && cb->min_dump_alloc < 4096) { cb->min_dump_alloc = 4096; + state->split_start = 0; rtnl_unlock(); return 1; } @@ -1768,14 +1810,15 @@ static int nl80211_dump_wiphy_done(struct netlink_callback *cb) static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; - struct cfg80211_registered_device *dev = info->user_ptr[0]; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nl80211_dump_wiphy_state state = {}; msg = nlmsg_new(4096, GFP_KERNEL); if (!msg) return -ENOMEM; - if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0, + if (nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY, msg, + info->snd_portid, info->snd_seq, 0, &state) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -1902,18 +1945,20 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, } static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, + struct net_device *dev, struct genl_info *info) { struct cfg80211_chan_def chandef; int result; enum nl80211_iftype iftype = NL80211_IFTYPE_MONITOR; + struct wireless_dev *wdev = NULL; - if (wdev) - iftype = wdev->iftype; - + if (dev) + wdev = dev->ieee80211_ptr; if (!nl80211_can_set_dev_channel(wdev)) return -EOPNOTSUPP; + if (wdev) + iftype = wdev->iftype; result = nl80211_parse_chandef(rdev, info, &chandef); if (result) @@ -1922,14 +1967,27 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - if (wdev->beacon_interval) { - result = -EBUSY; - break; - } - if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef)) { + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, iftype)) { result = -EINVAL; break; } + if (wdev->beacon_interval) { + if (!dev || !rdev->ops->set_ap_chanwidth || + !(rdev->wiphy.features & + NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE)) { + result = -EBUSY; + break; + } + + /* Only allow dynamic channel width changes */ + if (chandef.chan != wdev->preset_chandef.chan) { + result = -EBUSY; + break; + } + result = rdev_set_ap_chanwidth(rdev, dev, &chandef); + if (result) + break; + } wdev->preset_chandef = chandef; result = 0; break; @@ -1951,7 +2009,7 @@ static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *netdev = info->user_ptr[1]; - return __nl80211_set_channel(rdev, netdev->ieee80211_ptr, info); + return __nl80211_set_channel(rdev, netdev, info); } static int nl80211_set_wds_peer(struct sk_buff *skb, struct genl_info *info) @@ -2007,7 +2065,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) netdev = __dev_get_by_index(genl_info_net(info), ifindex); if (netdev && netdev->ieee80211_ptr) - rdev = wiphy_to_dev(netdev->ieee80211_ptr->wiphy); + rdev = wiphy_to_rdev(netdev->ieee80211_ptr->wiphy); else netdev = NULL; } @@ -2073,9 +2131,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { - result = __nl80211_set_channel(rdev, - nl80211_can_set_dev_channel(wdev) ? wdev : NULL, - info); + result = __nl80211_set_channel( + rdev, + nl80211_can_set_dev_channel(wdev) ? netdev : NULL, + info); if (result) return result; } @@ -2223,7 +2282,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) static inline u64 wdev_id(struct wireless_dev *wdev) { return (u64)wdev->identifier | - ((u64)wiphy_to_dev(wdev->wiphy)->wiphy_idx << 32); + ((u64)wiphy_to_rdev(wdev->wiphy)->wiphy_idx << 32); } static int nl80211_send_chandef(struct sk_buff *msg, @@ -2349,7 +2408,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; - struct cfg80211_registered_device *dev = info->user_ptr[0]; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -2357,7 +2416,7 @@ static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, - dev, wdev) < 0) { + rdev, wdev) < 0) { nlmsg_free(msg); return -ENOBUFS; } @@ -2508,6 +2567,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; u32 flags; + /* to avoid failing a new interface creation due to pending removal */ + cfg80211_destroy_ifaces(rdev); + memset(¶ms, 0, sizeof(params)); if (!info->attrs[NL80211_ATTR_IFNAME]) @@ -2557,6 +2619,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(wdev); } + if (info->attrs[NL80211_ATTR_IFACE_SOCKET_OWNER]) + wdev->owner_nlportid = info->snd_portid; + switch (type) { case NL80211_IFTYPE_MESH_POINT: if (!info->attrs[NL80211_ATTR_MESH_ID]) @@ -3136,7 +3201,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ap_settings params; int err; - u8 radar_detect_width = 0; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) @@ -3252,24 +3316,10 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) } else if (!nl80211_get_ap_channel(rdev, ¶ms)) return -EINVAL; - if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef)) + if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef, + wdev->iftype)) return -EINVAL; - err = cfg80211_chandef_dfs_required(wdev->wiphy, ¶ms.chandef); - if (err < 0) - return err; - if (err) { - radar_detect_width = BIT(params.chandef.width); - params.radar_required = true; - } - - err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - params.chandef.chan, - CHAN_MODE_SHARED, - radar_detect_width); - if (err) - return err; - if (info->attrs[NL80211_ATTR_ACL_POLICY]) { params.acl = parse_acl_data(&rdev->wiphy, info); if (IS_ERR(params.acl)) @@ -3326,7 +3376,7 @@ static int nl80211_stop_ap(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - return cfg80211_stop_ap(rdev, dev); + return cfg80211_stop_ap(rdev, dev, false); } static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = { @@ -3607,6 +3657,10 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u32(msg, NL80211_STA_INFO_TX_FAILED, sinfo->tx_failed)) goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_EXPECTED_THROUGHPUT) && + nla_put_u32(msg, NL80211_STA_INFO_EXPECTED_THROUGHPUT, + sinfo->expected_throughput)) + goto nla_put_failure; if ((sinfo->filled & STATION_INFO_BEACON_LOSS_COUNT) && nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS, sinfo->beacon_loss_count)) @@ -3669,13 +3723,13 @@ static int nl80211_dump_station(struct sk_buff *skb, struct netlink_callback *cb) { struct station_info sinfo; - struct cfg80211_registered_device *dev; + struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; u8 mac_addr[ETH_ALEN]; int sta_idx = cb->args[2]; int err; - err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev); + err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) return err; @@ -3684,14 +3738,14 @@ static int nl80211_dump_station(struct sk_buff *skb, goto out_err; } - if (!dev->ops->dump_station) { + if (!rdev->ops->dump_station) { err = -EOPNOTSUPP; goto out_err; } while (1) { memset(&sinfo, 0, sizeof(sinfo)); - err = rdev_dump_station(dev, wdev->netdev, sta_idx, + err = rdev_dump_station(rdev, wdev->netdev, sta_idx, mac_addr, &sinfo); if (err == -ENOENT) break; @@ -3701,7 +3755,7 @@ static int nl80211_dump_station(struct sk_buff *skb, if (nl80211_send_station(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - dev, wdev->netdev, mac_addr, + rdev, wdev->netdev, mac_addr, &sinfo) < 0) goto out; @@ -3713,7 +3767,7 @@ static int nl80211_dump_station(struct sk_buff *skb, cb->args[2] = sta_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(dev); + nl80211_finish_wdev_dump(rdev); return err; } @@ -4374,18 +4428,18 @@ static int nl80211_dump_mpath(struct sk_buff *skb, struct netlink_callback *cb) { struct mpath_info pinfo; - struct cfg80211_registered_device *dev; + struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; u8 dst[ETH_ALEN]; u8 next_hop[ETH_ALEN]; int path_idx = cb->args[2]; int err; - err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev); + err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) return err; - if (!dev->ops->dump_mpath) { + if (!rdev->ops->dump_mpath) { err = -EOPNOTSUPP; goto out_err; } @@ -4396,7 +4450,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb, } while (1) { - err = rdev_dump_mpath(dev, wdev->netdev, path_idx, dst, + err = rdev_dump_mpath(rdev, wdev->netdev, path_idx, dst, next_hop, &pinfo); if (err == -ENOENT) break; @@ -4417,7 +4471,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb, cb->args[2] = path_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(dev); + nl80211_finish_wdev_dump(rdev); return err; } @@ -4612,6 +4666,7 @@ static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = [NL80211_ATTR_FREQ_RANGE_MAX_BW] = { .type = NLA_U32 }, [NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN] = { .type = NLA_U32 }, [NL80211_ATTR_POWER_RULE_MAX_EIRP] = { .type = NLA_U32 }, + [NL80211_ATTR_DFS_CAC_TIME] = { .type = NLA_U32 }, }; static int parse_reg_rule(struct nlattr *tb[], @@ -4626,6 +4681,8 @@ static int parse_reg_rule(struct nlattr *tb[], return -EINVAL; if (!tb[NL80211_ATTR_FREQ_RANGE_END]) return -EINVAL; + if (!tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]) + return -EINVAL; if (!tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]) return -EINVAL; @@ -4635,9 +4692,8 @@ static int parse_reg_rule(struct nlattr *tb[], nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_START]); freq_range->end_freq_khz = nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_END]); - if (tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]) - freq_range->max_bandwidth_khz = - nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]); + freq_range->max_bandwidth_khz = + nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]); power_rule->max_eirp = nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]); @@ -4646,12 +4702,15 @@ static int parse_reg_rule(struct nlattr *tb[], power_rule->max_antenna_gain = nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]); + if (tb[NL80211_ATTR_DFS_CAC_TIME]) + reg_rule->dfs_cac_ms = + nla_get_u32(tb[NL80211_ATTR_DFS_CAC_TIME]); + return 0; } static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) { - int r; char *data = NULL; enum nl80211_user_reg_hint_type user_reg_hint_type; @@ -4664,11 +4723,6 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) if (unlikely(!rcu_access_pointer(cfg80211_regdomain))) return -EINPROGRESS; - if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) - return -EINVAL; - - data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); - if (info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]) user_reg_hint_type = nla_get_u32(info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]); @@ -4678,14 +4732,16 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) switch (user_reg_hint_type) { case NL80211_USER_REG_HINT_USER: case NL80211_USER_REG_HINT_CELL_BASE: - break; + if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) + return -EINVAL; + + data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); + return regulatory_hint_user(data, user_reg_hint_type); + case NL80211_USER_REG_HINT_INDOOR: + return regulatory_hint_indoor_user(); default: return -EINVAL; } - - r = regulatory_hint_user(data, user_reg_hint_type); - - return r; } static int nl80211_get_mesh_config(struct sk_buff *skb, @@ -5133,7 +5189,9 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) nla_put_u32(msg, NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN, power_rule->max_antenna_gain) || nla_put_u32(msg, NL80211_ATTR_POWER_RULE_MAX_EIRP, - power_rule->max_eirp)) + power_rule->max_eirp) || + nla_put_u32(msg, NL80211_ATTR_DFS_CAC_TIME, + reg_rule->dfs_cac_ms)) goto nla_put_failure_rcu; nla_nest_end(msg, nl_reg_rule); @@ -5274,7 +5332,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->scan) return -EOPNOTSUPP; - if (rdev->scan_req) { + if (rdev->scan_req || rdev->scan_msg) { err = -EBUSY; goto unlock; } @@ -5709,8 +5767,8 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, request->min_rssi_thold = NL80211_SCAN_RSSI_THOLD_OFF; } - if (info->attrs[NL80211_ATTR_IE]) { - request->ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + if (ie_len) { + request->ie_len = ie_len; memcpy((void *)request->ie, nla_data(info->attrs[NL80211_ATTR_IE]), request->ie_len); @@ -5765,6 +5823,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_chan_def chandef; enum nl80211_dfs_regions dfs_region; + unsigned int cac_time_ms; int err; dfs_region = reg_get_dfs_region(wdev->wiphy); @@ -5781,7 +5840,8 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (wdev->cac_started) return -EBUSY; - err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef); + err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef, + wdev->iftype); if (err < 0) return err; @@ -5794,17 +5854,17 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (!rdev->ops->start_radar_detection) return -EOPNOTSUPP; - err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - chandef.chan, CHAN_MODE_SHARED, - BIT(chandef.width)); - if (err) - return err; + cac_time_ms = cfg80211_chandef_dfs_cac_time(&rdev->wiphy, &chandef); + if (WARN_ON(!cac_time_ms)) + cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; - err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef); + err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef, + cac_time_ms); if (!err) { wdev->chandef = chandef; wdev->cac_started = true; wdev->cac_start_time = jiffies; + wdev->cac_time_ms = cac_time_ms; } return err; } @@ -5822,6 +5882,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) u8 radar_detect_width = 0; int err; bool need_new_beacon = false; + int len, i; if (!rdev->ops->channel_switch || !(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)) @@ -5880,26 +5941,55 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) if (!csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]) return -EINVAL; - params.counter_offset_beacon = - nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]); - if (params.counter_offset_beacon >= params.beacon_csa.tail_len) + len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]); + if (!len || (len % sizeof(u16))) return -EINVAL; - /* sanity check - counters should be the same */ - if (params.beacon_csa.tail[params.counter_offset_beacon] != - params.count) + params.n_counter_offsets_beacon = len / sizeof(u16); + if (rdev->wiphy.max_num_csa_counters && + (params.n_counter_offsets_beacon > + rdev->wiphy.max_num_csa_counters)) return -EINVAL; + params.counter_offsets_beacon = + nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]); + + /* sanity checks - counters should fit and be the same */ + for (i = 0; i < params.n_counter_offsets_beacon; i++) { + u16 offset = params.counter_offsets_beacon[i]; + + if (offset >= params.beacon_csa.tail_len) + return -EINVAL; + + if (params.beacon_csa.tail[offset] != params.count) + return -EINVAL; + } + if (csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]) { - params.counter_offset_presp = - nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]); - if (params.counter_offset_presp >= - params.beacon_csa.probe_resp_len) + len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]); + if (!len || (len % sizeof(u16))) return -EINVAL; - if (params.beacon_csa.probe_resp[params.counter_offset_presp] != - params.count) + params.n_counter_offsets_presp = len / sizeof(u16); + if (rdev->wiphy.max_num_csa_counters && + (params.n_counter_offsets_beacon > + rdev->wiphy.max_num_csa_counters)) return -EINVAL; + + params.counter_offsets_presp = + nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]); + + /* sanity checks - counters should fit and be the same */ + for (i = 0; i < params.n_counter_offsets_presp; i++) { + u16 offset = params.counter_offsets_presp[i]; + + if (offset >= params.beacon_csa.probe_resp_len) + return -EINVAL; + + if (params.beacon_csa.probe_resp[offset] != + params.count) + return -EINVAL; + } } skip_beacons: @@ -5907,22 +5997,25 @@ skip_beacons: if (err) return err; - if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef)) + if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef, + wdev->iftype)) return -EINVAL; - if (dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP || - dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO || - dev->ieee80211_ptr->iftype == NL80211_IFTYPE_ADHOC) { - err = cfg80211_chandef_dfs_required(wdev->wiphy, - ¶ms.chandef); - if (err < 0) { - return err; - } else if (err) { - radar_detect_width = BIT(params.chandef.width); - params.radar_required = true; - } + err = cfg80211_chandef_dfs_required(wdev->wiphy, + ¶ms.chandef, + wdev->iftype); + if (err < 0) + return err; + + if (err > 0) { + radar_detect_width = BIT(params.chandef.width); + params.radar_required = true; } + /* TODO: I left this here for now. With channel switch, the + * verification is a bit more complicated, because we only do + * it later when the channel switch really happens. + */ err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, params.chandef.chan, CHAN_MODE_SHARED, @@ -6149,12 +6242,12 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) { struct survey_info survey; - struct cfg80211_registered_device *dev; + struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; int survey_idx = cb->args[2]; int res; - res = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev); + res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (res) return res; @@ -6163,7 +6256,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, goto out_err; } - if (!dev->ops->dump_survey) { + if (!rdev->ops->dump_survey) { res = -EOPNOTSUPP; goto out_err; } @@ -6171,7 +6264,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, while (1) { struct ieee80211_channel *chan; - res = rdev_dump_survey(dev, wdev->netdev, survey_idx, &survey); + res = rdev_dump_survey(rdev, wdev->netdev, survey_idx, &survey); if (res == -ENOENT) break; if (res) @@ -6183,7 +6276,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, goto out; } - chan = ieee80211_get_channel(&dev->wiphy, + chan = ieee80211_get_channel(&rdev->wiphy, survey.channel->center_freq); if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) { survey_idx++; @@ -6202,7 +6295,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, cb->args[2] = survey_idx; res = skb->len; out_err: - nl80211_finish_wdev_dump(dev); + nl80211_finish_wdev_dump(rdev); return res; } @@ -6678,7 +6771,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) if (err) return err; - if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef)) + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef, + NL80211_IFTYPE_ADHOC)) return -EINVAL; switch (ibss.chandef.width) { @@ -6853,7 +6947,7 @@ struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy, int vendor_event_idx, int approxlen, gfp_t gfp) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); const struct nl80211_vendor_cmd_info *info; switch (cmd) { @@ -7268,6 +7362,7 @@ static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u8 action_code, dialog_token; + u32 peer_capability = 0; u16 status_code; u8 *peer; @@ -7286,9 +7381,12 @@ static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info) action_code = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_ACTION]); status_code = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]); dialog_token = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_DIALOG_TOKEN]); + if (info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY]) + peer_capability = + nla_get_u32(info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY]); return rdev_tdls_mgmt(rdev, dev, peer, action_code, - dialog_token, status_code, + dialog_token, status_code, peer_capability, nla_data(info->attrs[NL80211_ATTR_IE]), nla_len(info->attrs[NL80211_ATTR_IE])); } @@ -7737,6 +7835,27 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) if (!chandef.chan && params.offchan) return -EINVAL; + params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); + params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]); + + if (info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]) { + int len = nla_len(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]); + int i; + + if (len % sizeof(u16)) + return -EINVAL; + + params.n_csa_offsets = len / sizeof(u16); + params.csa_offsets = + nla_data(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]); + + /* check that all the offsets fit the frame */ + for (i = 0; i < params.n_csa_offsets; i++) { + if (params.csa_offsets[i] >= params.len) + return -EINVAL; + } + } + if (!params.dont_wait_for_ack) { msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -7750,8 +7869,6 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) } } - params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); - params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]); params.chan = chandef.chan; err = cfg80211_mlme_mgmt_tx(rdev, wdev, ¶ms, &cookie); if (err) @@ -8448,6 +8565,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], rem) { + u8 *mask_pat; + nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat), nla_len(pat), NULL); err = -EINVAL; @@ -8471,19 +8590,18 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) goto error; new_triggers.patterns[i].pkt_offset = pkt_offset; - new_triggers.patterns[i].mask = - kmalloc(mask_len + pat_len, GFP_KERNEL); - if (!new_triggers.patterns[i].mask) { + mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL); + if (!mask_pat) { err = -ENOMEM; goto error; } - new_triggers.patterns[i].pattern = - new_triggers.patterns[i].mask + mask_len; - memcpy(new_triggers.patterns[i].mask, - nla_data(pat_tb[NL80211_PKTPAT_MASK]), + new_triggers.patterns[i].mask = mask_pat; + memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len); + mask_pat += mask_len; + new_triggers.patterns[i].pattern = mask_pat; new_triggers.patterns[i].pattern_len = pat_len; - memcpy(new_triggers.patterns[i].pattern, + memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len); i++; @@ -8675,6 +8793,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN], rem) { + u8 *mask_pat; + nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat), nla_len(pat), NULL); if (!pat_tb[NL80211_PKTPAT_MASK] || @@ -8696,17 +8816,19 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, return -EINVAL; new_rule->patterns[i].pkt_offset = pkt_offset; - new_rule->patterns[i].mask = - kmalloc(mask_len + pat_len, GFP_KERNEL); - if (!new_rule->patterns[i].mask) + mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL); + if (!mask_pat) return -ENOMEM; - new_rule->patterns[i].pattern = - new_rule->patterns[i].mask + mask_len; - memcpy(new_rule->patterns[i].mask, - nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len); + + new_rule->patterns[i].mask = mask_pat; + memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]), + mask_len); + + mask_pat += mask_len; + new_rule->patterns[i].pattern = mask_pat; new_rule->patterns[i].pattern_len = pat_len; - memcpy(new_rule->patterns[i].pattern, - nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len); + memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), + pat_len); i++; } @@ -8951,9 +9073,8 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) if (wdev->p2p_started) return 0; - err = cfg80211_can_add_interface(rdev, wdev->iftype); - if (err) - return err; + if (rfkill_blocked(rdev->rfkill)) + return -ERFKILL; err = rdev_start_p2p_device(rdev, wdev); if (err) @@ -9162,7 +9283,7 @@ struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy, enum nl80211_attrs attr, int approxlen) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); if (WARN_ON(!rdev->cur_cmd_info)) return NULL; @@ -9286,7 +9407,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, } dev = wdev->netdev; - rdev = wiphy_to_dev(wdev->wiphy); + rdev = wiphy_to_rdev(wdev->wiphy); if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) { if (!dev) { @@ -9987,16 +10108,20 @@ static const struct genl_ops nl80211_ops[] = { /* notification functions */ -void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev) +void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev, + enum nl80211_commands cmd) { struct sk_buff *msg; struct nl80211_dump_wiphy_state state = {}; + WARN_ON(cmd != NL80211_CMD_NEW_WIPHY && + cmd != NL80211_CMD_DEL_WIPHY); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; - if (nl80211_send_wiphy(rdev, msg, 0, 0, 0, &state) < 0) { + if (nl80211_send_wiphy(rdev, cmd, msg, 0, 0, 0, &state) < 0) { nlmsg_free(msg); return; } @@ -10116,40 +10241,31 @@ void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, NL80211_MCGRP_SCAN, GFP_KERNEL); } -void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) +struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, bool aborted) { struct sk_buff *msg; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) - return; + return NULL; if (nl80211_send_scan_msg(msg, rdev, wdev, 0, 0, 0, - NL80211_CMD_NEW_SCAN_RESULTS) < 0) { + aborted ? NL80211_CMD_SCAN_ABORTED : + NL80211_CMD_NEW_SCAN_RESULTS) < 0) { nlmsg_free(msg); - return; + return NULL; } - genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, - NL80211_MCGRP_SCAN, GFP_KERNEL); + return msg; } -void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) +void nl80211_send_scan_result(struct cfg80211_registered_device *rdev, + struct sk_buff *msg) { - struct sk_buff *msg; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; - if (nl80211_send_scan_msg(msg, rdev, wdev, 0, 0, 0, - NL80211_CMD_SCAN_ABORTED) < 0) { - nlmsg_free(msg); - return; - } - genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_SCAN, GFP_KERNEL); } @@ -10324,7 +10440,7 @@ void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); const struct ieee80211_mgmt *mgmt = (void *)buf; u32 cmd; @@ -10546,7 +10662,7 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr, const u8* ie, u8 ie_len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; @@ -10726,7 +10842,7 @@ void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, unsigned int duration, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration); nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL, @@ -10740,7 +10856,7 @@ void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan); nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, @@ -10752,7 +10868,7 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; trace_cfg80211_new_sta(dev, mac_addr, sinfo); @@ -10775,7 +10891,7 @@ EXPORT_SYMBOL(cfg80211_new_sta); void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; @@ -10812,7 +10928,7 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; @@ -10847,7 +10963,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, const u8 *addr, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid); @@ -10967,7 +11083,7 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, const u8 *buf, size_t len, bool ack, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct net_device *netdev = wdev->netdev; struct sk_buff *msg; void *hdr; @@ -11011,7 +11127,7 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; struct nlattr *pinfoattr; void *hdr; @@ -11103,7 +11219,7 @@ void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_gtk_rekey_notify(dev, bssid); nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp); @@ -11161,7 +11277,7 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth); nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp); @@ -11208,7 +11324,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); ASSERT_WDEV_LOCK(wdev); @@ -11232,7 +11348,7 @@ void cfg80211_cqm_txe_notify(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; struct nlattr *pinfoattr; void *hdr; @@ -11332,7 +11448,7 @@ void cfg80211_cqm_pktloss_notify(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; struct nlattr *pinfoattr; void *hdr; @@ -11379,7 +11495,7 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, u64 cookie, bool acked, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; @@ -11419,7 +11535,7 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, const u8 *frame, size_t len, int freq, int sig_dbm) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; struct cfg80211_beacon_registration *reg; @@ -11466,7 +11582,7 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, struct cfg80211_wowlan_wakeup *wakeup, gfp_t gfp) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; int size = 200; @@ -11576,7 +11692,7 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, u16 reason_code, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; @@ -11628,9 +11744,15 @@ static int nl80211_netlink_notify(struct notifier_block * nb, rcu_read_lock(); list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { - list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) + bool schedule_destroy_work = false; + + list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) { cfg80211_mlme_unregister_socket(wdev, notify->portid); + if (wdev->owner_nlportid == notify->portid) + schedule_destroy_work = true; + } + spin_lock_bh(&rdev->beacon_registrations_lock); list_for_each_entry_safe(reg, tmp, &rdev->beacon_registrations, list) { @@ -11641,11 +11763,24 @@ static int nl80211_netlink_notify(struct notifier_block * nb, } } spin_unlock_bh(&rdev->beacon_registrations_lock); + + if (schedule_destroy_work) { + struct cfg80211_iface_destroy *destroy; + + destroy = kzalloc(sizeof(*destroy), GFP_ATOMIC); + if (destroy) { + destroy->nlportid = notify->portid; + spin_lock(&rdev->destroy_list_lock); + list_add(&destroy->list, &rdev->destroy_list); + spin_unlock(&rdev->destroy_list_lock); + schedule_work(&rdev->destroy_work); + } + } } rcu_read_unlock(); - return NOTIFY_DONE; + return NOTIFY_OK; } static struct notifier_block nl80211_netlink_notifier = { @@ -11656,7 +11791,7 @@ void cfg80211_ft_event(struct net_device *netdev, struct cfg80211_ft_event_params *ft_event) { struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; @@ -11703,7 +11838,7 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp) void *hdr; u32 nlportid; - rdev = wiphy_to_dev(wdev->wiphy); + rdev = wiphy_to_rdev(wdev->wiphy); if (!rdev->crit_proto_nlportid) return; @@ -11738,7 +11873,7 @@ EXPORT_SYMBOL(cfg80211_crit_proto_stopped); void nl80211_send_ap_stopped(struct wireless_dev *wdev) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index cb0216e1a00..49c9a482dd1 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -5,13 +5,14 @@ int nl80211_init(void); void nl80211_exit(void); -void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev); +void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev, + enum nl80211_commands cmd); void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); -void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev); -void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev); +struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, bool aborted); +void nl80211_send_scan_result(struct cfg80211_registered_device *rdev, + struct sk_buff *msg); void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev, struct net_device *netdev, u32 cmd); void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index c8e225947ad..d95bbe34813 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -199,7 +199,7 @@ static inline int rdev_change_station(struct cfg80211_registered_device *rdev, } static inline int rdev_get_station(struct cfg80211_registered_device *rdev, - struct net_device *dev, u8 *mac, + struct net_device *dev, const u8 *mac, struct station_info *sinfo) { int ret; @@ -769,13 +769,16 @@ static inline int rdev_set_rekey_data(struct cfg80211_registered_device *rdev, static inline int rdev_tdls_mgmt(struct cfg80211_registered_device *rdev, struct net_device *dev, u8 *peer, u8 action_code, u8 dialog_token, - u16 status_code, const u8 *buf, size_t len) + u16 status_code, u32 peer_capability, + const u8 *buf, size_t len) { int ret; trace_rdev_tdls_mgmt(&rdev->wiphy, dev, peer, action_code, - dialog_token, status_code, buf, len); + dialog_token, status_code, peer_capability, + buf, len); ret = rdev->ops->tdls_mgmt(&rdev->wiphy, dev, peer, action_code, - dialog_token, status_code, buf, len); + dialog_token, status_code, peer_capability, + buf, len); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } @@ -947,4 +950,17 @@ static inline int rdev_set_qos_map(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_ap_chanwidth(struct cfg80211_registered_device *rdev, + struct net_device *dev, struct cfg80211_chan_def *chandef) +{ + int ret; + + trace_rdev_set_ap_chanwidth(&rdev->wiphy, dev, chandef); + ret = rdev->ops->set_ap_chanwidth(&rdev->wiphy, dev, chandef); + trace_rdev_return_int(&rdev->wiphy, ret); + + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 27c5253e7a6..558b0e3a02d 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -65,11 +65,26 @@ #define REG_DBG_PRINT(args...) #endif +/** + * enum reg_request_treatment - regulatory request treatment + * + * @REG_REQ_OK: continue processing the regulatory request + * @REG_REQ_IGNORE: ignore the regulatory request + * @REG_REQ_INTERSECT: the regulatory domain resulting from this request should + * be intersected with the current one. + * @REG_REQ_ALREADY_SET: the regulatory request will not change the current + * regulatory settings, and no further processing is required. + * @REG_REQ_USER_HINT_HANDLED: a non alpha2 user hint was handled and no + * further processing is required, i.e., not need to update last_request + * etc. This should be used for user hints that do not provide an alpha2 + * but some other type of regulatory hint, i.e., indoor operation. + */ enum reg_request_treatment { REG_REQ_OK, REG_REQ_IGNORE, REG_REQ_INTERSECT, REG_REQ_ALREADY_SET, + REG_REQ_USER_HINT_HANDLED, }; static struct regulatory_request core_request_world = { @@ -91,10 +106,6 @@ static struct regulatory_request __rcu *last_request = /* To trigger userspace events */ static struct platform_device *reg_pdev; -static const struct device_type reg_device_type = { - .uevent = reg_device_uevent, -}; - /* * Central wireless core regulatory domains, we only need two, * the current one and a world regulatory domain in case we have no @@ -110,6 +121,14 @@ const struct ieee80211_regdomain __rcu *cfg80211_regdomain; */ static int reg_num_devs_support_basehint; +/* + * State variable indicating if the platform on which the devices + * are attached is operating in an indoor environment. The state variable + * is relevant for all registered devices. + * (protected by RTNL) + */ +static bool reg_is_indoor; + static const struct ieee80211_regdomain *get_cfg80211_regdom(void) { return rtnl_dereference(cfg80211_regdomain); @@ -244,11 +263,15 @@ static char user_alpha2[2]; module_param(ieee80211_regdom, charp, 0444); MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); -static void reg_kfree_last_request(void) +static void reg_free_request(struct regulatory_request *request) { - struct regulatory_request *lr; + if (request != get_last_request()) + kfree(request); +} - lr = get_last_request(); +static void reg_free_last_request(void) +{ + struct regulatory_request *lr = get_last_request(); if (lr != &core_request_world && lr) kfree_rcu(lr, rcu_head); @@ -256,7 +279,13 @@ static void reg_kfree_last_request(void) static void reg_update_last_request(struct regulatory_request *request) { - reg_kfree_last_request(); + struct regulatory_request *lr; + + lr = get_last_request(); + if (lr == request) + return; + + reg_free_last_request(); rcu_assign_pointer(last_request, request); } @@ -487,11 +516,16 @@ static inline void reg_regdb_query(const char *alpha2) {} /* * This lets us keep regulatory code which is updated on a regulatory - * basis in userspace. Country information is filled in by - * reg_device_uevent + * basis in userspace. */ static int call_crda(const char *alpha2) { + char country[12]; + char *env[] = { country, NULL }; + + snprintf(country, sizeof(country), "COUNTRY=%c%c", + alpha2[0], alpha2[1]); + if (!is_world_regdom((char *) alpha2)) pr_info("Calling CRDA for country: %c%c\n", alpha2[0], alpha2[1]); @@ -501,7 +535,7 @@ static int call_crda(const char *alpha2) /* query internal regulatory database (if it exists) */ reg_regdb_query(alpha2); - return kobject_uevent(®_pdev->dev.kobj, KOBJ_CHANGE); + return kobject_uevent_env(®_pdev->dev.kobj, KOBJ_CHANGE, env); } static enum reg_request_treatment @@ -563,9 +597,6 @@ unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd, if (freq_range_tmp->end_freq_khz < freq_range->start_freq_khz) break; - if (freq_range_tmp->max_bandwidth_khz) - break; - freq_range = freq_range_tmp; } @@ -582,9 +613,6 @@ unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd, if (freq_range_tmp->start_freq_khz > freq_range->end_freq_khz) break; - if (freq_range_tmp->max_bandwidth_khz) - break; - freq_range = freq_range_tmp; } @@ -729,21 +757,29 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1, max_bandwidth1 = freq_range1->max_bandwidth_khz; max_bandwidth2 = freq_range2->max_bandwidth_khz; - /* - * In case max_bandwidth1 == 0 and max_bandwith2 == 0 set - * output bandwidth as 0 (auto calculation). Next we will - * calculate this correctly in handle_channel function. - * In other case calculate output bandwidth here. - */ - if (max_bandwidth1 || max_bandwidth2) { - if (!max_bandwidth1) - max_bandwidth1 = reg_get_max_bandwidth(rd1, rule1); - if (!max_bandwidth2) - max_bandwidth2 = reg_get_max_bandwidth(rd2, rule2); - } + if (rule1->flags & NL80211_RRF_AUTO_BW) + max_bandwidth1 = reg_get_max_bandwidth(rd1, rule1); + if (rule2->flags & NL80211_RRF_AUTO_BW) + max_bandwidth2 = reg_get_max_bandwidth(rd2, rule2); freq_range->max_bandwidth_khz = min(max_bandwidth1, max_bandwidth2); + intersected_rule->flags = rule1->flags | rule2->flags; + + /* + * In case NL80211_RRF_AUTO_BW requested for both rules + * set AUTO_BW in intersected rule also. Next we will + * calculate BW correctly in handle_channel function. + * In other case remove AUTO_BW flag while we calculate + * maximum bandwidth correctly and auto calculation is + * not required. + */ + if ((rule1->flags & NL80211_RRF_AUTO_BW) && + (rule2->flags & NL80211_RRF_AUTO_BW)) + intersected_rule->flags |= NL80211_RRF_AUTO_BW; + else + intersected_rule->flags &= ~NL80211_RRF_AUTO_BW; + freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; if (freq_range->max_bandwidth_khz > freq_diff) freq_range->max_bandwidth_khz = freq_diff; @@ -753,7 +789,8 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1, power_rule->max_antenna_gain = min(power_rule1->max_antenna_gain, power_rule2->max_antenna_gain); - intersected_rule->flags = rule1->flags | rule2->flags; + intersected_rule->dfs_cac_ms = max(rule1->dfs_cac_ms, + rule2->dfs_cac_ms); if (!is_valid_reg_rule(intersected_rule)) return -EINVAL; @@ -867,6 +904,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_RADAR; if (rd_flags & NL80211_RRF_NO_OFDM) channel_flags |= IEEE80211_CHAN_NO_OFDM; + if (rd_flags & NL80211_RRF_NO_OUTDOOR) + channel_flags |= IEEE80211_CHAN_INDOOR_ONLY; return channel_flags; } @@ -896,7 +935,7 @@ freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq, if (!band_rule_found) band_rule_found = freq_in_rule_band(fr, center_freq); - bw_fits = reg_does_bw_fit(fr, center_freq, MHZ_TO_KHZ(20)); + bw_fits = reg_does_bw_fit(fr, center_freq, MHZ_TO_KHZ(5)); if (band_rule_found && bw_fits) return rr; @@ -938,41 +977,52 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator) EXPORT_SYMBOL(reg_initiator_name); #ifdef CONFIG_CFG80211_REG_DEBUG -static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, +static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd, + struct ieee80211_channel *chan, const struct ieee80211_reg_rule *reg_rule) { const struct ieee80211_power_rule *power_rule; const struct ieee80211_freq_range *freq_range; - char max_antenna_gain[32]; + char max_antenna_gain[32], bw[32]; power_rule = ®_rule->power_rule; freq_range = ®_rule->freq_range; if (!power_rule->max_antenna_gain) - snprintf(max_antenna_gain, 32, "N/A"); + snprintf(max_antenna_gain, sizeof(max_antenna_gain), "N/A"); + else + snprintf(max_antenna_gain, sizeof(max_antenna_gain), "%d", + power_rule->max_antenna_gain); + + if (reg_rule->flags & NL80211_RRF_AUTO_BW) + snprintf(bw, sizeof(bw), "%d KHz, %d KHz AUTO", + freq_range->max_bandwidth_khz, + reg_get_max_bandwidth(regd, reg_rule)); else - snprintf(max_antenna_gain, 32, "%d", power_rule->max_antenna_gain); + snprintf(bw, sizeof(bw), "%d KHz", + freq_range->max_bandwidth_khz); REG_DBG_PRINT("Updating information on frequency %d MHz with regulatory rule:\n", chan->center_freq); - REG_DBG_PRINT("%d KHz - %d KHz @ %d KHz), (%s mBi, %d mBm)\n", + REG_DBG_PRINT("%d KHz - %d KHz @ %s), (%s mBi, %d mBm)\n", freq_range->start_freq_khz, freq_range->end_freq_khz, - freq_range->max_bandwidth_khz, max_antenna_gain, + bw, max_antenna_gain, power_rule->max_eirp); } #else -static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, +static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd, + struct ieee80211_channel *chan, const struct ieee80211_reg_rule *reg_rule) { return; } #endif -/* - * Note that right now we assume the desired channel bandwidth - * is always 20 MHz for each individual channel (HT40 uses 20 MHz - * per channel, the primary and the extension channel). +/* Find an ieee80211_reg_rule such that a 5MHz channel with frequency + * chan->center_freq fits there. + * If there is no such reg_rule, disable the channel, otherwise set the + * flags corresponding to the bandwidths allowed in the particular reg_rule */ static void handle_channel(struct wiphy *wiphy, enum nl80211_reg_initiator initiator, @@ -1022,20 +1072,23 @@ static void handle_channel(struct wiphy *wiphy, return; } - chan_reg_rule_print_dbg(chan, reg_rule); + regd = reg_get_regdomain(wiphy); + chan_reg_rule_print_dbg(regd, chan, reg_rule); power_rule = ®_rule->power_rule; freq_range = ®_rule->freq_range; max_bandwidth_khz = freq_range->max_bandwidth_khz; /* Check if auto calculation requested */ - if (!max_bandwidth_khz) { - regd = reg_get_regdomain(wiphy); + if (reg_rule->flags & NL80211_RRF_AUTO_BW) max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule); - } + if (max_bandwidth_khz < MHZ_TO_KHZ(10)) + bw_flags = IEEE80211_CHAN_NO_10MHZ; + if (max_bandwidth_khz < MHZ_TO_KHZ(20)) + bw_flags |= IEEE80211_CHAN_NO_20MHZ; if (max_bandwidth_khz < MHZ_TO_KHZ(40)) - bw_flags = IEEE80211_CHAN_NO_HT40; + bw_flags |= IEEE80211_CHAN_NO_HT40; if (max_bandwidth_khz < MHZ_TO_KHZ(80)) bw_flags |= IEEE80211_CHAN_NO_80MHZ; if (max_bandwidth_khz < MHZ_TO_KHZ(160)) @@ -1055,6 +1108,13 @@ static void handle_channel(struct wiphy *wiphy, (int) MBI_TO_DBI(power_rule->max_antenna_gain); chan->max_reg_power = chan->max_power = chan->orig_mpwr = (int) MBM_TO_DBM(power_rule->max_eirp); + + if (chan->flags & IEEE80211_CHAN_RADAR) { + chan->dfs_cac_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; + if (reg_rule->dfs_cac_ms) + chan->dfs_cac_ms = reg_rule->dfs_cac_ms; + } + return; } @@ -1067,6 +1127,14 @@ static void handle_channel(struct wiphy *wiphy, min_t(int, chan->orig_mag, MBI_TO_DBI(power_rule->max_antenna_gain)); chan->max_reg_power = (int) MBM_TO_DBM(power_rule->max_eirp); + + if (chan->flags & IEEE80211_CHAN_RADAR) { + if (reg_rule->dfs_cac_ms) + chan->dfs_cac_ms = reg_rule->dfs_cac_ms; + else + chan->dfs_cac_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; + } + if (chan->orig_mpwr) { /* * Devices that use REGULATORY_COUNTRY_IE_FOLLOW_POWER @@ -1102,12 +1170,19 @@ static bool reg_request_cell_base(struct regulatory_request *request) return request->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE; } +static bool reg_request_indoor(struct regulatory_request *request) +{ + if (request->initiator != NL80211_REGDOM_SET_BY_USER) + return false; + return request->user_reg_hint_type == NL80211_USER_REG_HINT_INDOOR; +} + bool reg_last_request_cell_base(void) { return reg_request_cell_base(get_last_request()); } -#ifdef CONFIG_CFG80211_CERTIFICATION_ONUS +#ifdef CONFIG_CFG80211_REG_CELLULAR_HINTS /* Core specific check */ static enum reg_request_treatment reg_ignore_cell_hint(struct regulatory_request *pending_request) @@ -1437,18 +1512,22 @@ static void handle_channel_custom(struct wiphy *wiphy, return; } - chan_reg_rule_print_dbg(chan, reg_rule); + chan_reg_rule_print_dbg(regd, chan, reg_rule); power_rule = ®_rule->power_rule; freq_range = ®_rule->freq_range; max_bandwidth_khz = freq_range->max_bandwidth_khz; /* Check if auto calculation requested */ - if (!max_bandwidth_khz) + if (reg_rule->flags & NL80211_RRF_AUTO_BW) max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule); + if (max_bandwidth_khz < MHZ_TO_KHZ(10)) + bw_flags = IEEE80211_CHAN_NO_10MHZ; + if (max_bandwidth_khz < MHZ_TO_KHZ(20)) + bw_flags |= IEEE80211_CHAN_NO_20MHZ; if (max_bandwidth_khz < MHZ_TO_KHZ(40)) - bw_flags = IEEE80211_CHAN_NO_HT40; + bw_flags |= IEEE80211_CHAN_NO_HT40; if (max_bandwidth_khz < MHZ_TO_KHZ(80)) bw_flags |= IEEE80211_CHAN_NO_80MHZ; if (max_bandwidth_khz < MHZ_TO_KHZ(160)) @@ -1544,6 +1623,11 @@ __reg_process_hint_user(struct regulatory_request *user_request) { struct regulatory_request *lr = get_last_request(); + if (reg_request_indoor(user_request)) { + reg_is_indoor = true; + return REG_REQ_USER_HINT_HANDLED; + } + if (reg_request_cell_base(user_request)) return reg_ignore_cell_hint(user_request); @@ -1591,8 +1675,9 @@ reg_process_hint_user(struct regulatory_request *user_request) treatment = __reg_process_hint_user(user_request); if (treatment == REG_REQ_IGNORE || - treatment == REG_REQ_ALREADY_SET) { - kfree(user_request); + treatment == REG_REQ_ALREADY_SET || + treatment == REG_REQ_USER_HINT_HANDLED) { + reg_free_request(user_request); return treatment; } @@ -1652,14 +1737,15 @@ reg_process_hint_driver(struct wiphy *wiphy, case REG_REQ_OK: break; case REG_REQ_IGNORE: - kfree(driver_request); + case REG_REQ_USER_HINT_HANDLED: + reg_free_request(driver_request); return treatment; case REG_REQ_INTERSECT: /* fall through */ case REG_REQ_ALREADY_SET: regd = reg_copy_regd(get_cfg80211_regdom()); if (IS_ERR(regd)) { - kfree(driver_request); + reg_free_request(driver_request); return REG_REQ_IGNORE; } rcu_assign_pointer(wiphy->regd, regd); @@ -1751,12 +1837,13 @@ reg_process_hint_country_ie(struct wiphy *wiphy, case REG_REQ_OK: break; case REG_REQ_IGNORE: + case REG_REQ_USER_HINT_HANDLED: /* fall through */ case REG_REQ_ALREADY_SET: - kfree(country_ie_request); + reg_free_request(country_ie_request); return treatment; case REG_REQ_INTERSECT: - kfree(country_ie_request); + reg_free_request(country_ie_request); /* * This doesn't happen yet, not sure we * ever want to support it for this case. @@ -1788,8 +1875,9 @@ static void reg_process_hint(struct regulatory_request *reg_request) return; case NL80211_REGDOM_SET_BY_USER: treatment = reg_process_hint_user(reg_request); - if (treatment == REG_REQ_OK || - treatment == REG_REQ_ALREADY_SET) + if (treatment == REG_REQ_IGNORE || + treatment == REG_REQ_ALREADY_SET || + treatment == REG_REQ_USER_HINT_HANDLED) return; queue_delayed_work(system_power_efficient_wq, ®_timeout, msecs_to_jiffies(3142)); @@ -1817,7 +1905,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) return; out_free: - kfree(reg_request); + reg_free_request(reg_request); } /* @@ -1833,7 +1921,7 @@ static void reg_process_pending_hints(void) /* When last_request->processed becomes true this will be rescheduled */ if (lr && !lr->processed) { - REG_DBG_PRINT("Pending regulatory request, waiting for it to be processed...\n"); + reg_process_hint(lr); return; } @@ -1943,6 +2031,22 @@ int regulatory_hint_user(const char *alpha2, return 0; } +int regulatory_hint_indoor_user(void) +{ + struct regulatory_request *request; + + request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); + if (!request) + return -ENOMEM; + + request->wiphy_idx = WIPHY_IDX_INVALID; + request->initiator = NL80211_REGDOM_SET_BY_USER; + request->user_reg_hint_type = NL80211_USER_REG_HINT_INDOOR; + queue_regulatory_request(request); + + return 0; +} + /* Driver hints */ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) { @@ -2110,6 +2214,8 @@ static void restore_regulatory_settings(bool reset_user) ASSERT_RTNL(); + reg_is_indoor = false; + reset_regdomains(true, &world_regdom); restore_alpha2(alpha2, reset_user); @@ -2245,39 +2351,49 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_freq_range *freq_range = NULL; const struct ieee80211_power_rule *power_rule = NULL; - char bw[32]; + char bw[32], cac_time[32]; - pr_info(" (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp)\n"); + pr_info(" (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)\n"); for (i = 0; i < rd->n_reg_rules; i++) { reg_rule = &rd->reg_rules[i]; freq_range = ®_rule->freq_range; power_rule = ®_rule->power_rule; - if (!freq_range->max_bandwidth_khz) - snprintf(bw, 32, "%d KHz, AUTO", + if (reg_rule->flags & NL80211_RRF_AUTO_BW) + snprintf(bw, sizeof(bw), "%d KHz, %d KHz AUTO", + freq_range->max_bandwidth_khz, reg_get_max_bandwidth(rd, reg_rule)); else - snprintf(bw, 32, "%d KHz", + snprintf(bw, sizeof(bw), "%d KHz", freq_range->max_bandwidth_khz); + if (reg_rule->flags & NL80211_RRF_DFS) + scnprintf(cac_time, sizeof(cac_time), "%u s", + reg_rule->dfs_cac_ms/1000); + else + scnprintf(cac_time, sizeof(cac_time), "N/A"); + + /* * There may not be documentation for max antenna gain * in certain regions */ if (power_rule->max_antenna_gain) - pr_info(" (%d KHz - %d KHz @ %s), (%d mBi, %d mBm)\n", + pr_info(" (%d KHz - %d KHz @ %s), (%d mBi, %d mBm), (%s)\n", freq_range->start_freq_khz, freq_range->end_freq_khz, bw, power_rule->max_antenna_gain, - power_rule->max_eirp); + power_rule->max_eirp, + cac_time); else - pr_info(" (%d KHz - %d KHz @ %s), (N/A, %d mBm)\n", + pr_info(" (%d KHz - %d KHz @ %s), (N/A, %d mBm), (%s)\n", freq_range->start_freq_khz, freq_range->end_freq_khz, bw, - power_rule->max_eirp); + power_rule->max_eirp, + cac_time); } } @@ -2350,9 +2466,6 @@ static int reg_set_rd_user(const struct ieee80211_regdomain *rd, { const struct ieee80211_regdomain *intersected_rd = NULL; - if (is_world_regdom(rd->alpha2)) - return -EINVAL; - if (!regdom_changes(rd->alpha2)) return -EALREADY; @@ -2481,6 +2594,7 @@ static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd, int set_regdom(const struct ieee80211_regdomain *rd) { struct regulatory_request *lr; + bool user_reset = false; int r; if (!reg_is_valid_request(rd->alpha2)) { @@ -2497,6 +2611,7 @@ int set_regdom(const struct ieee80211_regdomain *rd) break; case NL80211_REGDOM_SET_BY_USER: r = reg_set_rd_user(rd, lr); + user_reset = true; break; case NL80211_REGDOM_SET_BY_DRIVER: r = reg_set_rd_driver(rd, lr); @@ -2510,8 +2625,14 @@ int set_regdom(const struct ieee80211_regdomain *rd) } if (r) { - if (r == -EALREADY) + switch (r) { + case -EALREADY: reg_set_request_processed(); + break; + default: + /* Back to world regulatory in case of errors */ + restore_regulatory_settings(user_reset); + } kfree(rd); return r; @@ -2533,26 +2654,6 @@ int set_regdom(const struct ieee80211_regdomain *rd) return 0; } -int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) -{ - struct regulatory_request *lr; - u8 alpha2[2]; - bool add = false; - - rcu_read_lock(); - lr = get_last_request(); - if (lr && !lr->processed) { - memcpy(alpha2, lr->alpha2, 2); - add = true; - } - rcu_read_unlock(); - - if (add) - return add_uevent_var(env, "COUNTRY=%c%c", - alpha2[0], alpha2[1]); - return 0; -} - void wiphy_regulatory_register(struct wiphy *wiphy) { struct regulatory_request *lr; @@ -2575,7 +2676,7 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy) reg_num_devs_support_basehint--; rcu_free_regdom(get_wiphy_regdom(wiphy)); - rcu_assign_pointer(wiphy->regd, NULL); + RCU_INIT_POINTER(wiphy->regd, NULL); if (lr) request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); @@ -2595,6 +2696,40 @@ static void reg_timeout_work(struct work_struct *work) rtnl_unlock(); } +/* + * See http://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii, for + * UNII band definitions + */ +int cfg80211_get_unii(int freq) +{ + /* UNII-1 */ + if (freq >= 5150 && freq <= 5250) + return 0; + + /* UNII-2A */ + if (freq > 5250 && freq <= 5350) + return 1; + + /* UNII-2B */ + if (freq > 5350 && freq <= 5470) + return 2; + + /* UNII-2C */ + if (freq > 5470 && freq <= 5725) + return 3; + + /* UNII-3 */ + if (freq > 5725 && freq <= 5825) + return 4; + + return -EINVAL; +} + +bool regulatory_indoor_allowed(void) +{ + return reg_is_indoor; +} + int __init regulatory_init(void) { int err = 0; @@ -2603,8 +2738,6 @@ int __init regulatory_init(void) if (IS_ERR(reg_pdev)) return PTR_ERR(reg_pdev); - reg_pdev->dev.type = ®_device_type; - spin_lock_init(®_requests_lock); spin_lock_init(®_pending_beacons_lock); diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 18524617ab6..5e48031ccb9 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -25,8 +25,8 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy); int regulatory_hint_user(const char *alpha2, enum nl80211_user_reg_hint_type user_reg_hint_type); +int regulatory_hint_indoor_user(void); -int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env); void wiphy_regulatory_register(struct wiphy *wiphy); void wiphy_regulatory_deregister(struct wiphy *wiphy); @@ -105,4 +105,21 @@ void regulatory_hint_country_ie(struct wiphy *wiphy, */ void regulatory_hint_disconnect(void); +/** + * cfg80211_get_unii - get the U-NII band for the frequency + * @freq: the frequency for which we want to get the UNII band. + + * Get a value specifying the U-NII band frequency belongs to. + * U-NII bands are defined by the FCC in C.F.R 47 part 15. + * + * Returns -EINVAL if freq is invalid, 0 for UNII-1, 1 for UNII-2A, + * 2 for UNII-2B, 3 for UNII-2C and 4 for UNII-3. + */ +int cfg80211_get_unii(int freq); + +/** + * regulatory_indoor_allowed - is indoor operation allowed + */ +bool regulatory_indoor_allowed(void); + #endif /* __NET_WIRELESS_REG_H */ diff --git a/net/wireless/scan.c b/net/wireless/scan.c index b528e31da2c..0798c62e608 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -81,10 +81,10 @@ static void bss_free(struct cfg80211_internal_bss *bss) kfree(bss); } -static inline void bss_ref_get(struct cfg80211_registered_device *dev, +static inline void bss_ref_get(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *bss) { - lockdep_assert_held(&dev->bss_lock); + lockdep_assert_held(&rdev->bss_lock); bss->refcount++; if (bss->pub.hidden_beacon_bss) { @@ -95,10 +95,10 @@ static inline void bss_ref_get(struct cfg80211_registered_device *dev, } } -static inline void bss_ref_put(struct cfg80211_registered_device *dev, +static inline void bss_ref_put(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *bss) { - lockdep_assert_held(&dev->bss_lock); + lockdep_assert_held(&rdev->bss_lock); if (bss->pub.hidden_beacon_bss) { struct cfg80211_internal_bss *hbss; @@ -114,10 +114,10 @@ static inline void bss_ref_put(struct cfg80211_registered_device *dev, bss_free(bss); } -static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *dev, +static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *bss) { - lockdep_assert_held(&dev->bss_lock); + lockdep_assert_held(&rdev->bss_lock); if (!list_empty(&bss->hidden_list)) { /* @@ -134,45 +134,52 @@ static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *dev, } list_del_init(&bss->list); - rb_erase(&bss->rbn, &dev->bss_tree); - bss_ref_put(dev, bss); + rb_erase(&bss->rbn, &rdev->bss_tree); + bss_ref_put(rdev, bss); return true; } -static void __cfg80211_bss_expire(struct cfg80211_registered_device *dev, +static void __cfg80211_bss_expire(struct cfg80211_registered_device *rdev, unsigned long expire_time) { struct cfg80211_internal_bss *bss, *tmp; bool expired = false; - lockdep_assert_held(&dev->bss_lock); + lockdep_assert_held(&rdev->bss_lock); - list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) { + list_for_each_entry_safe(bss, tmp, &rdev->bss_list, list) { if (atomic_read(&bss->hold)) continue; if (!time_after(expire_time, bss->ts)) continue; - if (__cfg80211_unlink_bss(dev, bss)) + if (__cfg80211_unlink_bss(rdev, bss)) expired = true; } if (expired) - dev->bss_generation++; + rdev->bss_generation++; } -void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev) +void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, + bool send_message) { struct cfg80211_scan_request *request; struct wireless_dev *wdev; + struct sk_buff *msg; #ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif ASSERT_RTNL(); - request = rdev->scan_req; + if (rdev->scan_msg) { + nl80211_send_scan_result(rdev, rdev->scan_msg); + rdev->scan_msg = NULL; + return; + } + request = rdev->scan_req; if (!request) return; @@ -186,18 +193,16 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev) if (wdev->netdev) cfg80211_sme_scan_done(wdev->netdev); - if (request->aborted) { - nl80211_send_scan_aborted(rdev, wdev); - } else { - if (request->flags & NL80211_SCAN_FLAG_FLUSH) { - /* flush entries from previous scans */ - spin_lock_bh(&rdev->bss_lock); - __cfg80211_bss_expire(rdev, request->scan_start); - spin_unlock_bh(&rdev->bss_lock); - } - nl80211_send_scan_done(rdev, wdev); + if (!request->aborted && + request->flags & NL80211_SCAN_FLAG_FLUSH) { + /* flush entries from previous scans */ + spin_lock_bh(&rdev->bss_lock); + __cfg80211_bss_expire(rdev, request->scan_start); + spin_unlock_bh(&rdev->bss_lock); } + msg = nl80211_build_scan_msg(rdev, wdev, request->aborted); + #ifdef CONFIG_CFG80211_WEXT if (wdev->netdev && !request->aborted) { memset(&wrqu, 0, sizeof(wrqu)); @@ -211,6 +216,11 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev) rdev->scan_req = NULL; kfree(request); + + if (!send_message) + rdev->scan_msg = msg; + else + nl80211_send_scan_result(rdev, msg); } void __cfg80211_scan_done(struct work_struct *wk) @@ -221,18 +231,18 @@ void __cfg80211_scan_done(struct work_struct *wk) scan_done_wk); rtnl_lock(); - ___cfg80211_scan_done(rdev); + ___cfg80211_scan_done(rdev, true); rtnl_unlock(); } void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) { trace_cfg80211_scan_done(request, aborted); - WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req); + WARN_ON(request != wiphy_to_rdev(request->wiphy)->scan_req); request->aborted = aborted; request->notified = true; - queue_work(cfg80211_wq, &wiphy_to_dev(request->wiphy)->scan_done_wk); + queue_work(cfg80211_wq, &wiphy_to_rdev(request->wiphy)->scan_done_wk); } EXPORT_SYMBOL(cfg80211_scan_done); @@ -268,20 +278,28 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy) { trace_cfg80211_sched_scan_results(wiphy); /* ignore if we're not scanning */ - if (wiphy_to_dev(wiphy)->sched_scan_req) + if (wiphy_to_rdev(wiphy)->sched_scan_req) queue_work(cfg80211_wq, - &wiphy_to_dev(wiphy)->sched_scan_results_wk); + &wiphy_to_rdev(wiphy)->sched_scan_results_wk); } EXPORT_SYMBOL(cfg80211_sched_scan_results); -void cfg80211_sched_scan_stopped(struct wiphy *wiphy) +void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + + ASSERT_RTNL(); trace_cfg80211_sched_scan_stopped(wiphy); - rtnl_lock(); __cfg80211_stop_sched_scan(rdev, true); +} +EXPORT_SYMBOL(cfg80211_sched_scan_stopped_rtnl); + +void cfg80211_sched_scan_stopped(struct wiphy *wiphy) +{ + rtnl_lock(); + cfg80211_sched_scan_stopped_rtnl(wiphy); rtnl_unlock(); } EXPORT_SYMBOL(cfg80211_sched_scan_stopped); @@ -312,21 +330,21 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, return 0; } -void cfg80211_bss_age(struct cfg80211_registered_device *dev, +void cfg80211_bss_age(struct cfg80211_registered_device *rdev, unsigned long age_secs) { struct cfg80211_internal_bss *bss; unsigned long age_jiffies = msecs_to_jiffies(age_secs * MSEC_PER_SEC); - spin_lock_bh(&dev->bss_lock); - list_for_each_entry(bss, &dev->bss_list, list) + spin_lock_bh(&rdev->bss_lock); + list_for_each_entry(bss, &rdev->bss_list, list) bss->ts -= age_jiffies; - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); } -void cfg80211_bss_expire(struct cfg80211_registered_device *dev) +void cfg80211_bss_expire(struct cfg80211_registered_device *rdev) { - __cfg80211_bss_expire(dev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE); + __cfg80211_bss_expire(rdev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE); } const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) @@ -516,32 +534,34 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, const u8 *ssid, size_t ssid_len, u16 capa_mask, u16 capa_val) { - struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_internal_bss *bss, *res = NULL; unsigned long now = jiffies; trace_cfg80211_get_bss(wiphy, channel, bssid, ssid, ssid_len, capa_mask, capa_val); - spin_lock_bh(&dev->bss_lock); + spin_lock_bh(&rdev->bss_lock); - list_for_each_entry(bss, &dev->bss_list, list) { + list_for_each_entry(bss, &rdev->bss_list, list) { if ((bss->pub.capability & capa_mask) != capa_val) continue; if (channel && bss->pub.channel != channel) continue; + if (!is_valid_ether_addr(bss->pub.bssid)) + continue; /* Don't get expired BSS structs */ if (time_after(now, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE) && !atomic_read(&bss->hold)) continue; if (is_bss(&bss->pub, bssid, ssid, ssid_len)) { res = bss; - bss_ref_get(dev, res); + bss_ref_get(rdev, res); break; } } - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); if (!res) return NULL; trace_cfg80211_return_bss(&res->pub); @@ -549,10 +569,10 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_get_bss); -static void rb_insert_bss(struct cfg80211_registered_device *dev, +static void rb_insert_bss(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *bss) { - struct rb_node **p = &dev->bss_tree.rb_node; + struct rb_node **p = &rdev->bss_tree.rb_node; struct rb_node *parent = NULL; struct cfg80211_internal_bss *tbss; int cmp; @@ -575,15 +595,15 @@ static void rb_insert_bss(struct cfg80211_registered_device *dev, } rb_link_node(&bss->rbn, parent, p); - rb_insert_color(&bss->rbn, &dev->bss_tree); + rb_insert_color(&bss->rbn, &rdev->bss_tree); } static struct cfg80211_internal_bss * -rb_find_bss(struct cfg80211_registered_device *dev, +rb_find_bss(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *res, enum bss_compare_mode mode) { - struct rb_node *n = dev->bss_tree.rb_node; + struct rb_node *n = rdev->bss_tree.rb_node; struct cfg80211_internal_bss *bss; int r; @@ -602,7 +622,7 @@ rb_find_bss(struct cfg80211_registered_device *dev, return NULL; } -static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev, +static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *new) { const struct cfg80211_bss_ies *ies; @@ -632,7 +652,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev, /* This is the bad part ... */ - list_for_each_entry(bss, &dev->bss_list, list) { + list_for_each_entry(bss, &rdev->bss_list, list) { if (!ether_addr_equal(bss->pub.bssid, new->pub.bssid)) continue; if (bss->pub.channel != new->pub.channel) @@ -649,9 +669,6 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev, continue; if (ssidlen && ie[1] != ssidlen) continue; - /* that would be odd ... */ - if (bss->pub.beacon_ies) - continue; if (WARN_ON_ONCE(bss->pub.hidden_beacon_bss)) continue; if (WARN_ON_ONCE(!list_empty(&bss->hidden_list))) @@ -669,8 +686,9 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev, /* Returned bss is reference counted and must be cleaned up appropriately. */ static struct cfg80211_internal_bss * -cfg80211_bss_update(struct cfg80211_registered_device *dev, - struct cfg80211_internal_bss *tmp) +cfg80211_bss_update(struct cfg80211_registered_device *rdev, + struct cfg80211_internal_bss *tmp, + bool signal_valid) { struct cfg80211_internal_bss *found = NULL; @@ -679,14 +697,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, tmp->ts = jiffies; - spin_lock_bh(&dev->bss_lock); + spin_lock_bh(&rdev->bss_lock); if (WARN_ON(!rcu_access_pointer(tmp->pub.ies))) { - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); return NULL; } - found = rb_find_bss(dev, tmp, BSS_CMP_REGULAR); + found = rb_find_bss(rdev, tmp, BSS_CMP_REGULAR); if (found) { /* Update IEs */ @@ -755,7 +773,12 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, } found->pub.beacon_interval = tmp->pub.beacon_interval; - found->pub.signal = tmp->pub.signal; + /* + * don't update the signal if beacon was heard on + * adjacent channel. + */ + if (signal_valid) + found->pub.signal = tmp->pub.signal; found->pub.capability = tmp->pub.capability; found->ts = tmp->ts; } else { @@ -768,7 +791,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, * is allocated on the stack since it's not needed in the * more common case of an update */ - new = kzalloc(sizeof(*new) + dev->wiphy.bss_priv_size, + new = kzalloc(sizeof(*new) + rdev->wiphy.bss_priv_size, GFP_ATOMIC); if (!new) { ies = (void *)rcu_dereference(tmp->pub.beacon_ies); @@ -784,9 +807,9 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, INIT_LIST_HEAD(&new->hidden_list); if (rcu_access_pointer(tmp->pub.proberesp_ies)) { - hidden = rb_find_bss(dev, tmp, BSS_CMP_HIDE_ZLEN); + hidden = rb_find_bss(rdev, tmp, BSS_CMP_HIDE_ZLEN); if (!hidden) - hidden = rb_find_bss(dev, tmp, + hidden = rb_find_bss(rdev, tmp, BSS_CMP_HIDE_NUL); if (hidden) { new->pub.hidden_beacon_bss = &hidden->pub; @@ -803,24 +826,24 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, * expensive search for any probe responses that should * be grouped with this beacon for updates ... */ - if (!cfg80211_combine_bsses(dev, new)) { + if (!cfg80211_combine_bsses(rdev, new)) { kfree(new); goto drop; } } - list_add_tail(&new->list, &dev->bss_list); - rb_insert_bss(dev, new); + list_add_tail(&new->list, &rdev->bss_list); + rb_insert_bss(rdev, new); found = new; } - dev->bss_generation++; - bss_ref_get(dev, found); - spin_unlock_bh(&dev->bss_lock); + rdev->bss_generation++; + bss_ref_get(rdev, found); + spin_unlock_bh(&rdev->bss_lock); return found; drop: - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); return NULL; } @@ -859,14 +882,16 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, /* Returned bss is reference counted and must be cleaned up appropriately. */ struct cfg80211_bss* cfg80211_inform_bss_width(struct wiphy *wiphy, - struct ieee80211_channel *channel, + struct ieee80211_channel *rx_channel, enum nl80211_bss_scan_width scan_width, const u8 *bssid, u64 tsf, u16 capability, u16 beacon_interval, const u8 *ie, size_t ielen, s32 signal, gfp_t gfp) { struct cfg80211_bss_ies *ies; + struct ieee80211_channel *channel; struct cfg80211_internal_bss tmp = {}, *res; + bool signal_valid; if (WARN_ON(!wiphy)) return NULL; @@ -875,7 +900,7 @@ cfg80211_inform_bss_width(struct wiphy *wiphy, (signal < 0 || signal > 100))) return NULL; - channel = cfg80211_get_bss_channel(wiphy, ie, ielen, channel); + channel = cfg80211_get_bss_channel(wiphy, ie, ielen, rx_channel); if (!channel) return NULL; @@ -903,7 +928,9 @@ cfg80211_inform_bss_width(struct wiphy *wiphy, rcu_assign_pointer(tmp.pub.beacon_ies, ies); rcu_assign_pointer(tmp.pub.ies, ies); - res = cfg80211_bss_update(wiphy_to_dev(wiphy), &tmp); + signal_valid = abs(rx_channel->center_freq - channel->center_freq) <= + wiphy->max_adj_channel_rssi_comp; + res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid); if (!res) return NULL; @@ -919,20 +946,22 @@ EXPORT_SYMBOL(cfg80211_inform_bss_width); /* Returned bss is reference counted and must be cleaned up appropriately. */ struct cfg80211_bss * cfg80211_inform_bss_width_frame(struct wiphy *wiphy, - struct ieee80211_channel *channel, + struct ieee80211_channel *rx_channel, enum nl80211_bss_scan_width scan_width, struct ieee80211_mgmt *mgmt, size_t len, s32 signal, gfp_t gfp) { struct cfg80211_internal_bss tmp = {}, *res; struct cfg80211_bss_ies *ies; + struct ieee80211_channel *channel; + bool signal_valid; size_t ielen = len - offsetof(struct ieee80211_mgmt, u.probe_resp.variable); BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) != offsetof(struct ieee80211_mgmt, u.beacon.variable)); - trace_cfg80211_inform_bss_width_frame(wiphy, channel, scan_width, mgmt, + trace_cfg80211_inform_bss_width_frame(wiphy, rx_channel, scan_width, mgmt, len, signal); if (WARN_ON(!mgmt)) @@ -949,7 +978,7 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy, return NULL; channel = cfg80211_get_bss_channel(wiphy, mgmt->u.beacon.variable, - ielen, channel); + ielen, rx_channel); if (!channel) return NULL; @@ -973,7 +1002,9 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy, tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int); tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); - res = cfg80211_bss_update(wiphy_to_dev(wiphy), &tmp); + signal_valid = abs(rx_channel->center_freq - channel->center_freq) <= + wiphy->max_adj_channel_rssi_comp; + res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid); if (!res) return NULL; @@ -988,7 +1019,7 @@ EXPORT_SYMBOL(cfg80211_inform_bss_width_frame); void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) { - struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_internal_bss *bss; if (!pub) @@ -996,15 +1027,15 @@ void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) bss = container_of(pub, struct cfg80211_internal_bss, pub); - spin_lock_bh(&dev->bss_lock); - bss_ref_get(dev, bss); - spin_unlock_bh(&dev->bss_lock); + spin_lock_bh(&rdev->bss_lock); + bss_ref_get(rdev, bss); + spin_unlock_bh(&rdev->bss_lock); } EXPORT_SYMBOL(cfg80211_ref_bss); void cfg80211_put_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) { - struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_internal_bss *bss; if (!pub) @@ -1012,15 +1043,15 @@ void cfg80211_put_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) bss = container_of(pub, struct cfg80211_internal_bss, pub); - spin_lock_bh(&dev->bss_lock); - bss_ref_put(dev, bss); - spin_unlock_bh(&dev->bss_lock); + spin_lock_bh(&rdev->bss_lock); + bss_ref_put(rdev, bss); + spin_unlock_bh(&rdev->bss_lock); } EXPORT_SYMBOL(cfg80211_put_bss); void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) { - struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_internal_bss *bss; if (WARN_ON(!pub)) @@ -1028,12 +1059,12 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) bss = container_of(pub, struct cfg80211_internal_bss, pub); - spin_lock_bh(&dev->bss_lock); + spin_lock_bh(&rdev->bss_lock); if (!list_empty(&bss->list)) { - if (__cfg80211_unlink_bss(dev, bss)) - dev->bss_generation++; + if (__cfg80211_unlink_bss(rdev, bss)) + rdev->bss_generation++; } - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); } EXPORT_SYMBOL(cfg80211_unlink_bss); @@ -1050,7 +1081,7 @@ cfg80211_get_dev_from_ifindex(struct net *net, int ifindex) if (!dev) return ERR_PTR(-ENODEV); if (dev->ieee80211_ptr) - rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); + rdev = wiphy_to_rdev(dev->ieee80211_ptr->wiphy); else rdev = ERR_PTR(-ENODEV); dev_put(dev); @@ -1079,7 +1110,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, if (IS_ERR(rdev)) return PTR_ERR(rdev); - if (rdev->scan_req) { + if (rdev->scan_req || rdev->scan_msg) { err = -EBUSY; goto out; } @@ -1130,7 +1161,11 @@ int cfg80211_wext_siwscan(struct net_device *dev, int k; int wiphy_freq = wiphy->bands[band]->channels[j].center_freq; for (k = 0; k < wreq->num_channels; k++) { - int wext_freq = cfg80211_wext_freq(wiphy, &wreq->channel_list[k]); + struct iw_freq *freq = + &wreq->channel_list[k]; + int wext_freq = + cfg80211_wext_freq(freq); + if (wext_freq == wiphy_freq) goto wext_freq_found; } @@ -1442,7 +1477,7 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, } -static int ieee80211_scan_results(struct cfg80211_registered_device *dev, +static int ieee80211_scan_results(struct cfg80211_registered_device *rdev, struct iw_request_info *info, char *buf, size_t len) { @@ -1450,18 +1485,18 @@ static int ieee80211_scan_results(struct cfg80211_registered_device *dev, char *end_buf = buf + len; struct cfg80211_internal_bss *bss; - spin_lock_bh(&dev->bss_lock); - cfg80211_bss_expire(dev); + spin_lock_bh(&rdev->bss_lock); + cfg80211_bss_expire(rdev); - list_for_each_entry(bss, &dev->bss_list, list) { + list_for_each_entry(bss, &rdev->bss_list, list) { if (buf + len - current_ev <= IW_EV_ADDR_LEN) { - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); return -E2BIG; } - current_ev = ieee80211_bss(&dev->wiphy, info, bss, + current_ev = ieee80211_bss(&rdev->wiphy, info, bss, current_ev, end_buf); } - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); return current_ev - buf; } @@ -1481,7 +1516,7 @@ int cfg80211_wext_giwscan(struct net_device *dev, if (IS_ERR(rdev)) return PTR_ERR(rdev); - if (rdev->scan_req) + if (rdev->scan_req || rdev->scan_msg) return -EAGAIN; res = ieee80211_scan_results(rdev, info, extra, data->length); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index a6350911850..8bbeeb30221 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -59,15 +59,14 @@ static void cfg80211_sme_free(struct wireless_dev *wdev) static int cfg80211_conn_scan(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_scan_request *request; int n_channels, err; ASSERT_RTNL(); - ASSERT_RDEV_LOCK(rdev); ASSERT_WDEV_LOCK(wdev); - if (rdev->scan_req) + if (rdev->scan_req || rdev->scan_msg) return -EBUSY; if (wdev->conn->params.channel) @@ -131,7 +130,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) static int cfg80211_conn_do_work(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_connect_params *params; struct cfg80211_assoc_request req = {}; int err; @@ -150,7 +149,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) case CFG80211_CONN_SCAN_AGAIN: return cfg80211_conn_scan(wdev); case CFG80211_CONN_AUTHENTICATE_NEXT: - BUG_ON(!rdev->ops->auth); + if (WARN_ON(!rdev->ops->auth)) + return -EOPNOTSUPP; wdev->conn->state = CFG80211_CONN_AUTHENTICATING; return cfg80211_mlme_auth(rdev, wdev->netdev, params->channel, params->auth_type, @@ -162,7 +162,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) case CFG80211_CONN_AUTH_FAILED: return -ENOTCONN; case CFG80211_CONN_ASSOCIATE_NEXT: - BUG_ON(!rdev->ops->assoc); + if (WARN_ON(!rdev->ops->assoc)) + return -EOPNOTSUPP; wdev->conn->state = CFG80211_CONN_ASSOCIATING; if (wdev->conn->prev_bssid_valid) req.prev_bssid = wdev->conn->prev_bssid; @@ -235,7 +236,6 @@ void cfg80211_conn_work(struct work_struct *work) NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); - cfg80211_sme_free(wdev); } wdev_unlock(wdev); } @@ -246,7 +246,7 @@ void cfg80211_conn_work(struct work_struct *work) /* Returned bss is reference counted and must be cleaned up appropriately. */ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bss *bss; u16 capa = WLAN_CAPABILITY_ESS; @@ -276,7 +276,7 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) static void __cfg80211_sme_scan_done(struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bss *bss; ASSERT_WDEV_LOCK(wdev); @@ -307,7 +307,7 @@ void cfg80211_sme_scan_done(struct net_device *dev) void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; u16 status_code = le16_to_cpu(mgmt->u.auth.status_code); @@ -353,7 +353,7 @@ void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (!wdev->conn) return false; @@ -387,7 +387,7 @@ void cfg80211_sme_deauth(struct wireless_dev *wdev) void cfg80211_sme_auth_timeout(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (!wdev->conn) return; @@ -398,7 +398,7 @@ void cfg80211_sme_auth_timeout(struct wireless_dev *wdev) void cfg80211_sme_disassoc(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (!wdev->conn) return; @@ -409,7 +409,7 @@ void cfg80211_sme_disassoc(struct wireless_dev *wdev) void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (!wdev->conn) return; @@ -422,7 +422,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, struct cfg80211_connect_params *connect, const u8 *prev_bssid) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bss *bss; int err; @@ -469,7 +469,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, } wdev->conn->params.ssid = wdev->ssid; - wdev->conn->params.ssid_len = connect->ssid_len; + wdev->conn->params.ssid_len = wdev->ssid_len; /* see if we have the bss already */ bss = cfg80211_get_conn_bss(wdev); @@ -481,7 +481,6 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, /* we're good if we have a matching bss struct */ if (bss) { - wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; err = cfg80211_conn_do_work(wdev); cfg80211_put_bss(wdev->wiphy, bss); } else { @@ -507,7 +506,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, static int cfg80211_sme_disconnect(struct wireless_dev *wdev, u16 reason) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int err; if (!wdev->conn) @@ -595,7 +594,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, return; } - nl80211_send_connect_result(wiphy_to_dev(wdev->wiphy), dev, + nl80211_send_connect_result(wiphy_to_rdev(wdev->wiphy), dev, bssid, req_ie, req_ie_len, resp_ie, resp_ie_len, status, GFP_KERNEL); @@ -626,7 +625,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, #endif if (!bss && (status == WLAN_STATUS_SUCCESS)) { - WARN_ON_ONCE(!wiphy_to_dev(wdev->wiphy)->ops->connect); + WARN_ON_ONCE(!wiphy_to_rdev(wdev->wiphy)->ops->connect); bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, wdev->ssid, wdev->ssid_len, WLAN_CAPABILITY_ESS, @@ -649,6 +648,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, cfg80211_unhold_bss(bss_from_pub(bss)); cfg80211_put_bss(wdev->wiphy, bss); } + cfg80211_sme_free(wdev); return; } @@ -688,7 +688,7 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, u16 status, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; @@ -743,7 +743,8 @@ void __cfg80211_roamed(struct wireless_dev *wdev, cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); - nl80211_send_roamed(wiphy_to_dev(wdev->wiphy), wdev->netdev, bss->bssid, + nl80211_send_roamed(wiphy_to_rdev(wdev->wiphy), + wdev->netdev, bss->bssid, req_ie, req_ie_len, resp_ie, resp_ie_len, GFP_KERNEL); @@ -802,7 +803,7 @@ void cfg80211_roamed_bss(struct net_device *dev, size_t resp_ie_len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; @@ -835,7 +836,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, size_t ie_len, u16 reason, bool from_ap) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int i; #ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; @@ -878,10 +879,10 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, } void cfg80211_disconnected(struct net_device *dev, u16 reason, - u8 *ie, size_t ie_len, gfp_t gfp) + const u8 *ie, size_t ie_len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 5eaeed59db0..560ed77084e 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1468,9 +1468,10 @@ TRACE_EVENT(rdev_sched_scan_start, TRACE_EVENT(rdev_tdls_mgmt, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *peer, u8 action_code, u8 dialog_token, - u16 status_code, const u8 *buf, size_t len), + u16 status_code, u32 peer_capability, + const u8 *buf, size_t len), TP_ARGS(wiphy, netdev, peer, action_code, dialog_token, status_code, - buf, len), + peer_capability, buf, len), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY @@ -1478,6 +1479,7 @@ TRACE_EVENT(rdev_tdls_mgmt, __field(u8, action_code) __field(u8, dialog_token) __field(u16, status_code) + __field(u32, peer_capability) __dynamic_array(u8, buf, len) ), TP_fast_assign( @@ -1487,13 +1489,15 @@ TRACE_EVENT(rdev_tdls_mgmt, __entry->action_code = action_code; __entry->dialog_token = dialog_token; __entry->status_code = status_code; + __entry->peer_capability = peer_capability; memcpy(__get_dynamic_array(buf), buf, len); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", action_code: %u, " - "dialog_token: %u, status_code: %u, buf: %#.2x ", + "dialog_token: %u, status_code: %u, peer_capability: %u buf: %#.2x ", WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->action_code, __entry->dialog_token, - __entry->status_code, ((u8 *)__get_dynamic_array(buf))[0]) + __entry->status_code, __entry->peer_capability, + ((u8 *)__get_dynamic_array(buf))[0]) ); TRACE_EVENT(rdev_dump_survey, @@ -1872,29 +1876,33 @@ TRACE_EVENT(rdev_channel_switch, WIPHY_ENTRY NETDEV_ENTRY CHAN_DEF_ENTRY - __field(u16, counter_offset_beacon) - __field(u16, counter_offset_presp) __field(bool, radar_required) __field(bool, block_tx) __field(u8, count) + __dynamic_array(u16, bcn_ofs, params->n_counter_offsets_beacon) + __dynamic_array(u16, pres_ofs, params->n_counter_offsets_presp) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; CHAN_DEF_ASSIGN(¶ms->chandef); - __entry->counter_offset_beacon = params->counter_offset_beacon; - __entry->counter_offset_presp = params->counter_offset_presp; __entry->radar_required = params->radar_required; __entry->block_tx = params->block_tx; __entry->count = params->count; + memcpy(__get_dynamic_array(bcn_ofs), + params->counter_offsets_beacon, + params->n_counter_offsets_beacon * sizeof(u16)); + + /* probe response offsets are optional */ + if (params->n_counter_offsets_presp) + memcpy(__get_dynamic_array(pres_ofs), + params->counter_offsets_presp, + params->n_counter_offsets_presp * sizeof(u16)); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT - ", block_tx: %d, count: %u, radar_required: %d" - ", counter offsets (beacon/presp): %u/%u", + ", block_tx: %d, count: %u, radar_required: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG, - __entry->block_tx, __entry->count, __entry->radar_required, - __entry->counter_offset_beacon, - __entry->counter_offset_presp) + __entry->block_tx, __entry->count, __entry->radar_required) ); TRACE_EVENT(rdev_set_qos_map, @@ -1915,6 +1923,24 @@ TRACE_EVENT(rdev_set_qos_map, WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->num_des) ); +TRACE_EVENT(rdev_set_ap_chanwidth, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, netdev, chandef), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + CHAN_DEF_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ @@ -2189,18 +2215,21 @@ TRACE_EVENT(cfg80211_cqm_rssi_notify, ); TRACE_EVENT(cfg80211_reg_can_beacon, - TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), - TP_ARGS(wiphy, chandef), + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype), + TP_ARGS(wiphy, chandef, iftype), TP_STRUCT__entry( WIPHY_ENTRY CHAN_DEF_ENTRY + __field(enum nl80211_iftype, iftype) ), TP_fast_assign( WIPHY_ASSIGN; CHAN_DEF_ASSIGN(chandef); + __entry->iftype = iftype; ), - TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, - WIPHY_PR_ARG, CHAN_DEF_PR_ARG) + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d", + WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype) ); TRACE_EVENT(cfg80211_chandef_dfs_required, @@ -2611,6 +2640,21 @@ TRACE_EVENT(cfg80211_ft_event, WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap)) ); +TRACE_EVENT(cfg80211_stop_iface, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), + TP_ARGS(wiphy, wdev), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, + WIPHY_PR_ARG, WDEV_PR_ARG) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/wireless/util.c b/net/wireless/util.c index 780b4546c9c..728f1c0dc70 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -11,6 +11,7 @@ #include <net/ip.h> #include <net/dsfield.h> #include <linux/if_vlan.h> +#include <linux/mpls.h> #include "core.h" #include "rdev-ops.h" @@ -475,7 +476,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, EXPORT_SYMBOL(ieee80211_data_to_8023); int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype, u8 *bssid, bool qos) + enum nl80211_iftype iftype, + const u8 *bssid, bool qos) { struct ieee80211_hdr hdr; u16 hdrlen, ethertype; @@ -717,6 +719,21 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, case htons(ETH_P_IPV6): dscp = ipv6_get_dsfield(ipv6_hdr(skb)) & 0xfc; break; + case htons(ETH_P_MPLS_UC): + case htons(ETH_P_MPLS_MC): { + struct mpls_label mpls_tmp, *mpls; + + mpls = skb_header_pointer(skb, sizeof(struct ethhdr), + sizeof(*mpls), &mpls_tmp); + if (!mpls) + return 0; + + return (ntohl(mpls->entry) & MPLS_LS_TC_MASK) + >> MPLS_LS_TC_SHIFT; + } + case htons(ETH_P_80221): + /* 802.21 is always network control traffic */ + return 7; default: return 0; } @@ -754,7 +771,7 @@ EXPORT_SYMBOL(ieee80211_bss_get_ie); void cfg80211_upload_connect_keys(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct net_device *dev = wdev->netdev; int i; @@ -823,6 +840,9 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) __cfg80211_ibss_joined(wdev->netdev, ev->ij.bssid, ev->ij.channel); break; + case EVENT_STOPPED: + __cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev); + break; } wdev_unlock(wdev); @@ -838,7 +858,6 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev) struct wireless_dev *wdev; ASSERT_RTNL(); - ASSERT_RDEV_LOCK(rdev); list_for_each_entry(wdev, &rdev->wdev_list, list) cfg80211_process_wdev_events(wdev); @@ -851,7 +870,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, int err; enum nl80211_iftype otype = dev->ieee80211_ptr->iftype; - ASSERT_RDEV_LOCK(rdev); + ASSERT_RTNL(); /* don't support changing VLANs, you just re-create them */ if (otype == NL80211_IFTYPE_AP_VLAN) @@ -873,11 +892,6 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return -EBUSY; if (ntype != otype && netif_running(dev)) { - err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr, - ntype); - if (err) - return err; - dev->ieee80211_ptr->use_4addr = false; dev->ieee80211_ptr->mesh_id_up_len = 0; wdev_lock(dev->ieee80211_ptr); @@ -886,7 +900,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, switch (otype) { case NL80211_IFTYPE_AP: - cfg80211_stop_ap(rdev, dev); + cfg80211_stop_ap(rdev, dev, true); break; case NL80211_IFTYPE_ADHOC: cfg80211_leave_ibss(rdev, dev, false); @@ -1253,6 +1267,120 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, return res; } +int cfg80211_iter_combinations(struct wiphy *wiphy, + const int num_different_channels, + const u8 radar_detect, + const int iftype_num[NUM_NL80211_IFTYPES], + void (*iter)(const struct ieee80211_iface_combination *c, + void *data), + void *data) +{ + const struct ieee80211_regdomain *regdom; + enum nl80211_dfs_regions region = 0; + int i, j, iftype; + int num_interfaces = 0; + u32 used_iftypes = 0; + + if (radar_detect) { + rcu_read_lock(); + regdom = rcu_dereference(cfg80211_regdomain); + if (regdom) + region = regdom->dfs_region; + rcu_read_unlock(); + } + + for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { + num_interfaces += iftype_num[iftype]; + if (iftype_num[iftype] > 0 && + !(wiphy->software_iftypes & BIT(iftype))) + used_iftypes |= BIT(iftype); + } + + for (i = 0; i < wiphy->n_iface_combinations; i++) { + const struct ieee80211_iface_combination *c; + struct ieee80211_iface_limit *limits; + u32 all_iftypes = 0; + + c = &wiphy->iface_combinations[i]; + + if (num_interfaces > c->max_interfaces) + continue; + if (num_different_channels > c->num_different_channels) + continue; + + limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits, + GFP_KERNEL); + if (!limits) + return -ENOMEM; + + for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { + if (wiphy->software_iftypes & BIT(iftype)) + continue; + for (j = 0; j < c->n_limits; j++) { + all_iftypes |= limits[j].types; + if (!(limits[j].types & BIT(iftype))) + continue; + if (limits[j].max < iftype_num[iftype]) + goto cont; + limits[j].max -= iftype_num[iftype]; + } + } + + if (radar_detect != (c->radar_detect_widths & radar_detect)) + goto cont; + + if (radar_detect && c->radar_detect_regions && + !(c->radar_detect_regions & BIT(region))) + goto cont; + + /* Finally check that all iftypes that we're currently + * using are actually part of this combination. If they + * aren't then we can't use this combination and have + * to continue to the next. + */ + if ((all_iftypes & used_iftypes) != used_iftypes) + goto cont; + + /* This combination covered all interface types and + * supported the requested numbers, so we're good. + */ + + (*iter)(c, data); + cont: + kfree(limits); + } + + return 0; +} +EXPORT_SYMBOL(cfg80211_iter_combinations); + +static void +cfg80211_iter_sum_ifcombs(const struct ieee80211_iface_combination *c, + void *data) +{ + int *num = data; + (*num)++; +} + +int cfg80211_check_combinations(struct wiphy *wiphy, + const int num_different_channels, + const u8 radar_detect, + const int iftype_num[NUM_NL80211_IFTYPES]) +{ + int err, num = 0; + + err = cfg80211_iter_combinations(wiphy, num_different_channels, + radar_detect, iftype_num, + cfg80211_iter_sum_ifcombs, &num); + if (err) + return err; + if (num == 0) + return -EBUSY; + + return 0; +} +EXPORT_SYMBOL(cfg80211_check_combinations); + int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_iftype iftype, @@ -1261,7 +1389,6 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, u8 radar_detect) { struct wireless_dev *wdev_iter; - u32 used_iftypes = BIT(iftype); int num[NUM_NL80211_IFTYPES]; struct ieee80211_channel *used_channels[CFG80211_MAX_NUM_DIFFERENT_CHANNELS]; @@ -1269,43 +1396,14 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chmode; int num_different_channels = 0; int total = 1; - bool radar_required = false; - int i, j; + int i; ASSERT_RTNL(); if (WARN_ON(hweight32(radar_detect) > 1)) return -EINVAL; - switch (iftype) { - case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_MESH_POINT: - case NL80211_IFTYPE_P2P_GO: - case NL80211_IFTYPE_WDS: - /* if the interface could potentially choose a DFS channel, - * then mark DFS as required. - */ - if (!chan) { - if (chanmode != CHAN_MODE_UNDEFINED && radar_detect) - radar_required = true; - break; - } - radar_required = !!(chan->flags & IEEE80211_CHAN_RADAR); - break; - case NL80211_IFTYPE_P2P_CLIENT: - case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_P2P_DEVICE: - case NL80211_IFTYPE_MONITOR: - break; - case NUM_NL80211_IFTYPES: - case NL80211_IFTYPE_UNSPECIFIED: - default: - return -EINVAL; - } - - if (radar_required && !radar_detect) + if (WARN_ON(iftype >= NUM_NL80211_IFTYPES)) return -EINVAL; /* Always allow software iftypes */ @@ -1320,6 +1418,11 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, num[iftype] = 1; + /* TODO: We'll probably not need this anymore, since this + * should only be called with CHAN_MODE_UNDEFINED. There are + * still a couple of pending calls where other chanmodes are + * used, but we should get rid of them. + */ switch (chanmode) { case CHAN_MODE_UNDEFINED: break; @@ -1383,65 +1486,13 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, num[wdev_iter->iftype]++; total++; - used_iftypes |= BIT(wdev_iter->iftype); } if (total == 1 && !radar_detect) return 0; - for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) { - const struct ieee80211_iface_combination *c; - struct ieee80211_iface_limit *limits; - u32 all_iftypes = 0; - - c = &rdev->wiphy.iface_combinations[i]; - - if (total > c->max_interfaces) - continue; - if (num_different_channels > c->num_different_channels) - continue; - - limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits, - GFP_KERNEL); - if (!limits) - return -ENOMEM; - - for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { - if (rdev->wiphy.software_iftypes & BIT(iftype)) - continue; - for (j = 0; j < c->n_limits; j++) { - all_iftypes |= limits[j].types; - if (!(limits[j].types & BIT(iftype))) - continue; - if (limits[j].max < num[iftype]) - goto cont; - limits[j].max -= num[iftype]; - } - } - - if (radar_detect && !(c->radar_detect_widths & radar_detect)) - goto cont; - - /* - * Finally check that all iftypes that we're currently - * using are actually part of this combination. If they - * aren't then we can't use this combination and have - * to continue to the next. - */ - if ((all_iftypes & used_iftypes) != used_iftypes) - goto cont; - - /* - * This combination covered all interface types and - * supported the requested numbers, so we're good. - */ - kfree(limits); - return 0; - cont: - kfree(limits); - } - - return -EBUSY; + return cfg80211_check_combinations(&rdev->wiphy, num_different_channels, + radar_detect, num); } int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, @@ -1495,6 +1546,24 @@ unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy) } EXPORT_SYMBOL(ieee80211_get_num_supported_channels); +int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, + struct station_info *sinfo) +{ + struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; + + wdev = dev->ieee80211_ptr; + if (!wdev) + return -EOPNOTSUPP; + + rdev = wiphy_to_rdev(wdev->wiphy); + if (!rdev->ops->get_station) + return -EOPNOTSUPP; + + return rdev_get_station(rdev, dev, mac_addr, sinfo); +} +EXPORT_SYMBOL(cfg80211_get_station); + /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ /* Ethernet-II snap header (RFC1042 for most EtherTypes) */ const unsigned char rfc1042_header[] __aligned(2) = diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 5661a54ac7e..11120bb1416 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -73,7 +73,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, struct vif_params vifparams; enum nl80211_iftype type; - rdev = wiphy_to_dev(wdev->wiphy); + rdev = wiphy_to_rdev(wdev->wiphy); switch (*mode) { case IW_MODE_INFRA: @@ -253,12 +253,12 @@ EXPORT_SYMBOL_GPL(cfg80211_wext_giwrange); /** * cfg80211_wext_freq - get wext frequency for non-"auto" - * @wiphy: the wiphy + * @dev: the net device * @freq: the wext freq encoding * * Returns a frequency, or a negative error code, or 0 for auto. */ -int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq) +int cfg80211_wext_freq(struct iw_freq *freq) { /* * Parse frequency - return 0 for auto and @@ -286,7 +286,7 @@ int cfg80211_wext_siwrts(struct net_device *dev, struct iw_param *rts, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u32 orts = wdev->wiphy->rts_threshold; int err; @@ -324,7 +324,7 @@ int cfg80211_wext_siwfrag(struct net_device *dev, struct iw_param *frag, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u32 ofrag = wdev->wiphy->frag_threshold; int err; @@ -364,7 +364,7 @@ static int cfg80211_wext_siwretry(struct net_device *dev, struct iw_param *retry, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u32 changed = 0; u8 olong = wdev->wiphy->retry_long; u8 oshort = wdev->wiphy->retry_short; @@ -587,7 +587,7 @@ static int cfg80211_wext_siwencode(struct net_device *dev, struct iw_point *erq, char *keybuf) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int idx, err; bool remove = false; struct key_params params; @@ -647,7 +647,7 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev, struct iw_point *erq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct iw_encode_ext *ext = (struct iw_encode_ext *) extra; const u8 *addr; int idx; @@ -775,7 +775,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, struct iw_freq *wextfreq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_chan_def chandef = { .width = NL80211_CHAN_WIDTH_20_NOHT, }; @@ -787,7 +787,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra); case NL80211_IFTYPE_MONITOR: - freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); + freq = cfg80211_wext_freq(wextfreq); if (freq < 0) return freq; if (freq == 0) @@ -798,7 +798,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, return -EINVAL; return cfg80211_set_monitor_channel(rdev, &chandef); case NL80211_IFTYPE_MESH_POINT: - freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); + freq = cfg80211_wext_freq(wextfreq); if (freq < 0) return freq; if (freq == 0) @@ -818,7 +818,7 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, struct iw_freq *freq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_chan_def chandef; int ret; @@ -847,7 +847,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev, union iwreq_data *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); enum nl80211_tx_power_setting type; int dbm = 0; @@ -899,7 +899,7 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev, union iwreq_data *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int err, val; if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) @@ -1119,7 +1119,7 @@ static int cfg80211_wext_siwpower(struct net_device *dev, struct iw_param *wrq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); bool ps = wdev->ps; int timeout = wdev->ps_timeout; int err; @@ -1177,7 +1177,7 @@ static int cfg80211_wds_wext_siwap(struct net_device *dev, struct sockaddr *addr, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int err; if (WARN_ON(wdev->iftype != NL80211_IFTYPE_WDS)) @@ -1221,7 +1221,7 @@ static int cfg80211_wext_siwrate(struct net_device *dev, struct iw_param *rate, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bitrate_mask mask; u32 fixed, maxrate; struct ieee80211_supported_band *sband; @@ -1272,7 +1272,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev, struct iw_param *rate, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); /* we are under RTNL - globally locked - so can use a static struct */ static struct station_info sinfo; u8 addr[ETH_ALEN]; @@ -1310,7 +1310,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev, static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); /* we are under RTNL - globally locked - so can use static structs */ static struct iw_statistics wstats; static struct station_info sinfo; @@ -1449,7 +1449,7 @@ static int cfg80211_wext_siwpmksa(struct net_device *dev, struct iw_point *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_pmksa cfg_pmksa; struct iw_pmksa *pmksa = (struct iw_pmksa *)extra; diff --git a/net/wireless/wext-compat.h b/net/wireless/wext-compat.h index 5d766b0118e..ebcacca2f73 100644 --- a/net/wireless/wext-compat.h +++ b/net/wireless/wext-compat.h @@ -50,7 +50,7 @@ int cfg80211_wext_siwgenie(struct net_device *dev, struct iw_point *data, char *extra); -int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq); +int cfg80211_wext_freq(struct iw_freq *freq); extern const struct iw_handler_def cfg80211_wext_handler; diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 14c9a2583ba..c7e5c8eb4f2 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -21,7 +21,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, const u8 *prev_bssid = NULL; int err, i; - ASSERT_RDEV_LOCK(rdev); + ASSERT_RTNL(); ASSERT_WDEV_LOCK(wdev); if (!netif_running(wdev->netdev)) @@ -67,7 +67,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, struct iw_freq *wextfreq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ieee80211_channel *chan = NULL; int err, freq; @@ -75,7 +75,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) return -EINVAL; - freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); + freq = cfg80211_wext_freq(wextfreq); if (freq < 0) return freq; @@ -169,7 +169,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); size_t len = data->length; int err; @@ -260,7 +260,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, struct sockaddr *ap_addr, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u8 *bssid = ap_addr->sa_data; int err; @@ -333,7 +333,7 @@ int cfg80211_wext_siwgenie(struct net_device *dev, struct iw_point *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u8 *ie = extra; int ie_len = data->length, err; @@ -390,7 +390,7 @@ int cfg80211_wext_siwmlme(struct net_device *dev, if (!wdev) return -EOPNOTSUPP; - rdev = wiphy_to_dev(wdev->wiphy); + rdev = wiphy_to_rdev(wdev->wiphy); if (wdev->iftype != NL80211_IFTYPE_STATION) return -EINVAL; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 6177479c7de..5ad4418ef09 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1064,7 +1064,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, x25_start_heartbeat(make); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk); rc = 1; sock_put(sk); out: diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index d1b0dc79bb6..7ac50098a37 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -79,7 +79,7 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) skb_set_owner_r(skbn, sk); skb_queue_tail(&sk->sk_receive_queue, skbn); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, skbn->len); + sk->sk_data_ready(sk); return 0; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 6c7ac016ce3..85d1d476461 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -16,6 +16,81 @@ static struct kmem_cache *secpath_cachep __read_mostly; +static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); +static struct xfrm_input_afinfo __rcu *xfrm_input_afinfo[NPROTO]; + +int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo) +{ + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EINVAL; + if (unlikely(afinfo->family >= NPROTO)) + return -EAFNOSUPPORT; + spin_lock_bh(&xfrm_input_afinfo_lock); + if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL)) + err = -ENOBUFS; + else + rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo); + spin_unlock_bh(&xfrm_input_afinfo_lock); + return err; +} +EXPORT_SYMBOL(xfrm_input_register_afinfo); + +int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo) +{ + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EINVAL; + if (unlikely(afinfo->family >= NPROTO)) + return -EAFNOSUPPORT; + spin_lock_bh(&xfrm_input_afinfo_lock); + if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) { + if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo)) + err = -EINVAL; + else + RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL); + } + spin_unlock_bh(&xfrm_input_afinfo_lock); + synchronize_rcu(); + return err; +} +EXPORT_SYMBOL(xfrm_input_unregister_afinfo); + +static struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) +{ + struct xfrm_input_afinfo *afinfo; + + if (unlikely(family >= NPROTO)) + return NULL; + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_input_afinfo[family]); + if (unlikely(!afinfo)) + rcu_read_unlock(); + return afinfo; +} + +static void xfrm_input_put_afinfo(struct xfrm_input_afinfo *afinfo) +{ + rcu_read_unlock(); +} + +static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol, + int err) +{ + int ret; + struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family); + + if (!afinfo) + return -EAFNOSUPPORT; + + ret = afinfo->callback(skb, protocol, err); + xfrm_input_put_afinfo(afinfo); + + return ret; +} + void __secpath_destroy(struct sec_path *sp) { int i; @@ -108,7 +183,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) int err; __be32 seq; __be32 seq_hi; - struct xfrm_state *x; + struct xfrm_state *x = NULL; xfrm_address_t *daddr; struct xfrm_mode *inner_mode; unsigned int family; @@ -120,9 +195,14 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) async = 1; x = xfrm_input_state(skb); seq = XFRM_SKB_CB(skb)->seq.input.low; + family = x->outer_mode->afinfo->family; goto resume; } + daddr = (xfrm_address_t *)(skb_network_header(skb) + + XFRM_SPI_SKB_CB(skb)->daddroff); + family = XFRM_SPI_SKB_CB(skb)->family; + /* Allocate new secpath or COW existing one. */ if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { struct sec_path *sp; @@ -137,10 +217,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp = sp; } - daddr = (xfrm_address_t *)(skb_network_header(skb) + - XFRM_SPI_SKB_CB(skb)->daddroff); - family = XFRM_SPI_SKB_CB(skb)->family; - seq = 0; if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); @@ -162,6 +238,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp->xvec[skb->sp->len++] = x; + if (xfrm_tunnel_check(skb, x, family)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); + goto drop; + } + spin_lock(&x->lock); if (unlikely(x->km.state == XFRM_STATE_ACQ)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); @@ -201,7 +282,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) if (nexthdr == -EINPROGRESS) return 0; - resume: spin_lock(&x->lock); if (nexthdr <= 0) { @@ -263,6 +343,10 @@ resume: } } while (!err); + err = xfrm_rcv_cb(skb, family, x->type->proto, 0); + if (err) + goto drop; + nf_reset(skb); if (decaps) { @@ -276,6 +360,7 @@ resume: drop_unlock: spin_unlock(&x->lock); drop: + xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1); kfree_skb(skb); return 0; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4b98b25793c..c08fbd11cef 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -39,8 +39,6 @@ #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) #define XFRM_MAX_QUEUE_LEN 100 -static struct dst_entry *xfrm_policy_sk_bundles; - static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] __read_mostly; @@ -661,7 +659,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) hlist_add_head(&policy->bydst, chain); xfrm_pol_hold(policy); net->xfrm.policy_count[dir]++; - atomic_inc(&flow_cache_genid); + atomic_inc(&net->xfrm.flow_cache_genid); /* After previous checking, family can either be AF_INET or AF_INET6 */ if (policy->family == AF_INET) @@ -1158,7 +1156,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, if (hlist_unhashed(&pol->bydst)) return NULL; - hlist_del(&pol->bydst); + hlist_del_init(&pol->bydst); hlist_del(&pol->byidx); list_del(&pol->walk.all); net->xfrm.policy_count[dir]--; @@ -1844,7 +1842,7 @@ purge_queue: xfrm_pol_put(pol); } -static int xdst_queue_output(struct sk_buff *skb) +static int xdst_queue_output(struct sock *sk, struct sk_buff *skb) { unsigned long sched_next; struct dst_entry *dst = skb_dst(skb); @@ -2109,13 +2107,6 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, goto no_transform; } - dst_hold(&xdst->u.dst); - - spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); - xdst->u.dst.next = xfrm_policy_sk_bundles; - xfrm_policy_sk_bundles = &xdst->u.dst; - spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); - route = xdst->route; } } @@ -2549,33 +2540,15 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) return dst; } -static void __xfrm_garbage_collect(struct net *net) -{ - struct dst_entry *head, *next; - - spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); - head = xfrm_policy_sk_bundles; - xfrm_policy_sk_bundles = NULL; - spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); - - while (head) { - next = head->next; - dst_free(head); - head = next; - } -} - void xfrm_garbage_collect(struct net *net) { - flow_cache_flush(); - __xfrm_garbage_collect(net); + flow_cache_flush(net); } EXPORT_SYMBOL(xfrm_garbage_collect); static void xfrm_garbage_collect_deferred(struct net *net) { - flow_cache_flush_deferred(); - __xfrm_garbage_collect(net); + flow_cache_flush_deferred(net); } static void xfrm_init_pmtu(struct dst_entry *dst) @@ -2940,15 +2913,19 @@ static int __net_init xfrm_net_init(struct net *net) rv = xfrm_sysctl_init(net); if (rv < 0) goto out_sysctl; + rv = flow_cache_init(net); + if (rv < 0) + goto out; /* Initialize the per-net locks here */ spin_lock_init(&net->xfrm.xfrm_state_lock); rwlock_init(&net->xfrm.xfrm_policy_lock); - spin_lock_init(&net->xfrm.xfrm_policy_sk_bundle_lock); mutex_init(&net->xfrm.xfrm_cfg_mutex); return 0; +out: + xfrm_sysctl_fini(net); out_sysctl: xfrm_policy_fini(net); out_policy: @@ -2961,6 +2938,7 @@ out_statistics: static void __net_exit xfrm_net_exit(struct net *net) { + flow_cache_fini(net); xfrm_sysctl_fini(net); xfrm_policy_fini(net); xfrm_state_fini(net); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a26b7aa7947..8e9c781a6bb 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -161,6 +161,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); +bool km_is_alive(const struct km_event *c); void km_state_expired(struct xfrm_state *x, int hard, u32 portid); static DEFINE_SPINLOCK(xfrm_type_lock); @@ -788,6 +789,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, struct xfrm_state *best = NULL; u32 mark = pol->mark.v & pol->mark.m; unsigned short encap_family = tmpl->encap_family; + struct km_event c; to_put = NULL; @@ -832,6 +834,17 @@ found: error = -EEXIST; goto out; } + + c.net = net; + /* If the KMs have no listeners (yet...), avoid allocating an SA + * for each and every packet - garbage collection might not + * handle the flood. + */ + if (!km_is_alive(&c)) { + error = -ESRCH; + goto out; + } + x = xfrm_state_alloc(net); if (x == NULL) { error = -ENOMEM; @@ -1135,10 +1148,9 @@ out: EXPORT_SYMBOL(xfrm_state_add); #ifdef CONFIG_XFRM_MIGRATE -static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) +static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig) { struct net *net = xs_net(orig); - int err = -ENOMEM; struct xfrm_state *x = xfrm_state_alloc(net); if (!x) goto out; @@ -1159,6 +1171,11 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) } x->props.aalgo = orig->props.aalgo; + if (orig->aead) { + x->aead = xfrm_algo_aead_clone(orig->aead); + if (!x->aead) + goto error; + } if (orig->ealg) { x->ealg = xfrm_algo_clone(orig->ealg); if (!x->ealg) @@ -1187,20 +1204,21 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) } if (orig->replay_esn) { - err = xfrm_replay_clone(x, orig); - if (err) + if (xfrm_replay_clone(x, orig)) goto error; } memcpy(&x->mark, &orig->mark, sizeof(x->mark)); - err = xfrm_init_state(x); - if (err) + if (xfrm_init_state(x) < 0) goto error; x->props.flags = orig->props.flags; x->props.extra_flags = orig->props.extra_flags; + x->tfcpad = orig->tfcpad; + x->replay_maxdiff = orig->replay_maxdiff; + x->replay_maxage = orig->replay_maxage; x->curlft.add_time = orig->curlft.add_time; x->km.state = orig->km.state; x->km.seq = orig->km.seq; @@ -1210,16 +1228,15 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) error: xfrm_state_put(x); out: - if (errp) - *errp = err; return NULL; } -/* net->xfrm.xfrm_state_lock is held */ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net) { unsigned int h; - struct xfrm_state *x; + struct xfrm_state *x = NULL; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); if (m->reqid) { h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr, @@ -1236,7 +1253,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n m->old_family)) continue; xfrm_state_hold(x); - return x; + break; } } else { h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr, @@ -1251,11 +1268,13 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n m->old_family)) continue; xfrm_state_hold(x); - return x; + break; } } - return NULL; + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + + return x; } EXPORT_SYMBOL(xfrm_migrate_state_find); @@ -1263,9 +1282,8 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, struct xfrm_migrate *m) { struct xfrm_state *xc; - int err; - xc = xfrm_state_clone(x, &err); + xc = xfrm_state_clone(x); if (!xc) return NULL; @@ -1278,7 +1296,7 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, state is to be updated as it is a part of triplet */ xfrm_state_insert(xc); } else { - if ((err = xfrm_state_add(xc)) < 0) + if (xfrm_state_add(xc) < 0) goto error; } @@ -1451,7 +1469,7 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, { int err = 0; struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - struct net *net = xs_net(*dst); + struct net *net = xs_net(*src); if (!afinfo) return -EAFNOSUPPORT; @@ -1590,6 +1608,23 @@ unlock: } EXPORT_SYMBOL(xfrm_alloc_spi); +static bool __xfrm_state_filter_match(struct xfrm_state *x, + struct xfrm_address_filter *filter) +{ + if (filter) { + if ((filter->family == AF_INET || + filter->family == AF_INET6) && + x->props.family != filter->family) + return false; + + return addr_match(&x->props.saddr, &filter->saddr, + filter->splen) && + addr_match(&x->id.daddr, &filter->daddr, + filter->dplen); + } + return true; +} + int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *data) @@ -1612,6 +1647,8 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, state = container_of(x, struct xfrm_state, km); if (!xfrm_id_proto_match(state->id.proto, walk->proto)) continue; + if (!__xfrm_state_filter_match(state, walk->filter)) + continue; err = func(state, walk->seq, data); if (err) { list_move_tail(&walk->all, &x->all); @@ -1630,17 +1667,21 @@ out: } EXPORT_SYMBOL(xfrm_state_walk); -void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) +void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto, + struct xfrm_address_filter *filter) { INIT_LIST_HEAD(&walk->all); walk->proto = proto; walk->state = XFRM_STATE_DEAD; walk->seq = 0; + walk->filter = filter; } EXPORT_SYMBOL(xfrm_state_walk_init); void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net) { + kfree(walk->filter); + if (list_empty(&walk->all)) return; @@ -1793,6 +1834,24 @@ int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address } EXPORT_SYMBOL(km_report); +bool km_is_alive(const struct km_event *c) +{ + struct xfrm_mgr *km; + bool is_alive = false; + + rcu_read_lock(); + list_for_each_entry_rcu(km, &xfrm_km_list, list) { + if (km->is_alive && km->is_alive(c)) { + is_alive = true; + break; + } + } + rcu_read_unlock(); + + return is_alive; +} +EXPORT_SYMBOL(km_is_alive); + int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) { int err; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 1ae3ec7c18b..8f131c10a6f 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -32,11 +32,6 @@ #include <linux/in6.h> #endif -static inline int aead_len(struct xfrm_algo_aead *alg) -{ - return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); -} - static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) { struct nlattr *rt = attrs[type]; @@ -142,7 +137,8 @@ static inline int verify_replay(struct xfrm_usersa_info *p, if (!rt) return 0; - if (p->id.proto != IPPROTO_ESP) + /* As only ESP and AH support ESN feature. */ + if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH)) return -EINVAL; if (p->replay_window != 0) @@ -886,6 +882,7 @@ static int xfrm_dump_sa_done(struct netlink_callback *cb) return 0; } +static const struct nla_policy xfrma_policy[XFRMA_MAX+1]; static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); @@ -901,8 +898,31 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) info.nlmsg_flags = NLM_F_MULTI; if (!cb->args[0]) { + struct nlattr *attrs[XFRMA_MAX+1]; + struct xfrm_address_filter *filter = NULL; + u8 proto = 0; + int err; + cb->args[0] = 1; - xfrm_state_walk_init(walk, 0); + + err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, + xfrma_policy); + if (err < 0) + return err; + + if (attrs[XFRMA_ADDRESS_FILTER]) { + filter = kmalloc(sizeof(*filter), GFP_KERNEL); + if (filter == NULL) + return -ENOMEM; + + memcpy(filter, nla_data(attrs[XFRMA_ADDRESS_FILTER]), + sizeof(*filter)); + } + + if (attrs[XFRMA_PROTO]) + proto = nla_get_u8(attrs[XFRMA_PROTO]); + + xfrm_state_walk_init(walk, proto, filter); } (void) xfrm_state_walk(net, walk, dump_one_state, &info); @@ -1226,7 +1246,7 @@ static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct nlattr **attrs return 0; uctx = nla_data(rt); - return security_xfrm_policy_alloc(&pol->security, uctx); + return security_xfrm_policy_alloc(&pol->security, uctx, GFP_KERNEL); } static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, @@ -1631,7 +1651,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (rt) { struct xfrm_user_sec_ctx *uctx = nla_data(rt); - err = security_xfrm_policy_alloc(&ctx, uctx); + err = security_xfrm_policy_alloc(&ctx, uctx, GFP_KERNEL); if (err) return err; } @@ -1933,7 +1953,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (rt) { struct xfrm_user_sec_ctx *uctx = nla_data(rt); - err = security_xfrm_policy_alloc(&ctx, uctx); + err = security_xfrm_policy_alloc(&ctx, uctx, GFP_KERNEL); if (err) return err; } @@ -2308,6 +2328,8 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_TFCPAD] = { .type = NLA_U32 }, [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) }, [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 }, + [XFRMA_PROTO] = { .type = NLA_U8 }, + [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, }; static const struct xfrm_link { @@ -2981,6 +3003,11 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC); } +static bool xfrm_is_alive(const struct km_event *c) +{ + return (bool)xfrm_acquire_is_on(c->net); +} + static struct xfrm_mgr netlink_mgr = { .id = "netlink", .notify = xfrm_send_state_notify, @@ -2990,6 +3017,7 @@ static struct xfrm_mgr netlink_mgr = { .report = xfrm_send_report, .migrate = xfrm_send_migrate, .new_mapping = xfrm_send_mapping, + .is_alive = xfrm_is_alive, }; static int __net_init xfrm_user_net_init(struct net *net) |