diff options
Diffstat (limited to 'net')
95 files changed, 994 insertions, 814 deletions
diff --git a/net/802/tr.c b/net/802/tr.c index e7eb13084d7..e874447ad14 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -561,6 +561,9 @@ static int rif_seq_show(struct seq_file *seq, void *v) } seq_putc(seq, '\n'); } + + if (dev) + dev_put(dev); } return 0; } diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 2b7390e377b..d1e10546eb8 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -492,6 +492,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, continue; dev_change_flags(vlandev, flgs & ~IFF_UP); + vlan_transfer_operstate(dev, vlandev); } break; @@ -507,6 +508,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, continue; dev_change_flags(vlandev, flgs | IFF_UP); + vlan_transfer_operstate(dev, vlandev); } break; diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 654e45f5719..c67fe6f7565 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -121,8 +121,10 @@ int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, if (!skb) return NET_RX_DROP; - if (netpoll_rx_on(skb)) + if (netpoll_rx_on(skb)) { + skb->protocol = eth_type_trans(skb, skb->dev); return vlan_hwaccel_receive_skb(skb, grp, vlan_tci); + } return napi_frags_finish(napi, skb, vlan_gro_common(napi, grp, vlan_tci, skb)); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 1b34135cf99..b4b9068e55a 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -462,6 +462,7 @@ static int vlan_dev_open(struct net_device *dev) if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_join(dev); + netif_carrier_on(dev); return 0; clear_allmulti: @@ -471,6 +472,7 @@ del_unicast: if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN); out: + netif_carrier_off(dev); return err; } @@ -492,6 +494,7 @@ static int vlan_dev_stop(struct net_device *dev) if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) dev_unicast_delete(real_dev, dev->dev_addr, dev->addr_len); + netif_carrier_off(dev); return 0; } @@ -612,6 +615,8 @@ static int vlan_dev_init(struct net_device *dev) struct net_device *real_dev = vlan_dev_info(dev)->real_dev; int subclass = 0; + netif_carrier_off(dev); + /* IFF_BROADCAST|IFF_MULTICAST; ??? */ dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI); dev->iflink = real_dev->ifindex; @@ -668,7 +673,8 @@ static int vlan_ethtool_get_settings(struct net_device *dev, const struct vlan_dev_info *vlan = vlan_dev_info(dev); struct net_device *real_dev = vlan->real_dev; - if (!real_dev->ethtool_ops->get_settings) + if (!real_dev->ethtool_ops || + !real_dev->ethtool_ops->get_settings) return -EOPNOTSUPP; return real_dev->ethtool_ops->get_settings(real_dev, cmd); diff --git a/net/9p/client.c b/net/9p/client.c index 1eb580c38fb..dd43a8289b0 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -203,7 +203,6 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) p9pdu_reset(req->tc); p9pdu_reset(req->rc); - req->flush_tag = 0; req->tc->tag = tag-1; req->status = REQ_STATUS_ALLOC; @@ -324,35 +323,9 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r) */ void p9_client_cb(struct p9_client *c, struct p9_req_t *req) { - struct p9_req_t *other_req; - unsigned long flags; - P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); - - if (req->status == REQ_STATUS_ERROR) - wake_up(req->wq); - - if (req->flush_tag) { /* flush receive path */ - P9_DPRINTK(P9_DEBUG_9P, "<<< RFLUSH %d\n", req->tc->tag); - spin_lock_irqsave(&c->lock, flags); - other_req = p9_tag_lookup(c, req->flush_tag); - if (other_req->status != REQ_STATUS_FLSH) /* stale flush */ - spin_unlock_irqrestore(&c->lock, flags); - else { - other_req->status = REQ_STATUS_FLSHD; - spin_unlock_irqrestore(&c->lock, flags); - wake_up(other_req->wq); - } - p9_free_req(c, req); - } else { /* normal receive path */ - P9_DPRINTK(P9_DEBUG_MUX, "normal: tag %d\n", req->tc->tag); - spin_lock_irqsave(&c->lock, flags); - if (req->status != REQ_STATUS_FLSHD) - req->status = REQ_STATUS_RCVD; - spin_unlock_irqrestore(&c->lock, flags); - wake_up(req->wq); - P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); - } + wake_up(req->wq); + P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); } EXPORT_SYMBOL(p9_client_cb); @@ -486,9 +459,15 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) if (IS_ERR(req)) return PTR_ERR(req); - req->flush_tag = oldtag; - /* we don't free anything here because RPC isn't complete */ + /* if we haven't received a response for oldreq, + remove it from the list. */ + spin_lock(&c->lock); + if (oldreq->status == REQ_STATUS_FLSH) + list_del(&oldreq->req_list); + spin_unlock(&c->lock); + + p9_free_req(c, req); return 0; } @@ -509,7 +488,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) struct p9_req_t *req; unsigned long flags; int sigpending; - int flushed = 0; P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); @@ -546,42 +524,28 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) goto reterr; } - /* if it was a flush we just transmitted, return our tag */ - if (type == P9_TFLUSH) - return req; -again: P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag); err = wait_event_interruptible(*req->wq, req->status >= REQ_STATUS_RCVD); - P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d (flushed=%d)\n", - req->wq, tag, err, flushed); + P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n", + req->wq, tag, err); if (req->status == REQ_STATUS_ERROR) { P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; - } else if (err == -ERESTARTSYS && flushed) { - P9_DPRINTK(P9_DEBUG_MUX, "flushed - going again\n"); - goto again; - } else if (req->status == REQ_STATUS_FLSHD) { - P9_DPRINTK(P9_DEBUG_MUX, "flushed - erestartsys\n"); - err = -ERESTARTSYS; } - if ((err == -ERESTARTSYS) && (c->status == Connected) && (!flushed)) { + if ((err == -ERESTARTSYS) && (c->status == Connected)) { P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); - spin_lock_irqsave(&c->lock, flags); - if (req->status == REQ_STATUS_SENT) - req->status = REQ_STATUS_FLSH; - spin_unlock_irqrestore(&c->lock, flags); sigpending = 1; - flushed = 1; clear_thread_flag(TIF_SIGPENDING); - if (c->trans_mod->cancel(c, req)) { - err = p9_client_flush(c, req); - if (err == 0) - goto again; - } + if (c->trans_mod->cancel(c, req)) + p9_client_flush(c, req); + + /* if we received the response anyway, don't signal error */ + if (req->status == REQ_STATUS_RCVD) + err = 0; } if (sigpending) { @@ -1244,19 +1208,53 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) ret->name, ret->uid, ret->gid, ret->muid, ret->extension, ret->n_uid, ret->n_gid, ret->n_muid); + p9_free_req(clnt, req); + return ret; + free_and_error: p9_free_req(clnt, req); error: - return ret; + kfree(ret); + return ERR_PTR(err); } EXPORT_SYMBOL(p9_client_stat); +static int p9_client_statsize(struct p9_wstat *wst, int optional) +{ + int ret; + + /* size[2] type[2] dev[4] qid[13] */ + /* mode[4] atime[4] mtime[4] length[8]*/ + /* name[s] uid[s] gid[s] muid[s] */ + ret = 2+2+4+13+4+4+4+8+2+2+2+2; + + if (wst->name) + ret += strlen(wst->name); + if (wst->uid) + ret += strlen(wst->uid); + if (wst->gid) + ret += strlen(wst->gid); + if (wst->muid) + ret += strlen(wst->muid); + + if (optional) { + ret += 2+4+4+4; /* extension[s] n_uid[4] n_gid[4] n_muid[4] */ + if (wst->extension) + ret += strlen(wst->extension); + } + + return ret; +} + int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) { int err; struct p9_req_t *req; struct p9_client *clnt; + err = 0; + clnt = fid->clnt; + wst->size = p9_client_statsize(wst, clnt->dotu); P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); P9_DPRINTK(P9_DEBUG_9P, " sz=%x type=%x dev=%x qid=%x.%llx.%x\n" @@ -1268,10 +1266,8 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) wst->atime, wst->mtime, (unsigned long long)wst->length, wst->name, wst->uid, wst->gid, wst->muid, wst->extension, wst->n_uid, wst->n_gid, wst->n_muid); - err = 0; - clnt = fid->clnt; - req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, 0, wst); + req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size, wst); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index c613ed08a5e..a2a1814c7a8 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -213,8 +213,8 @@ static void p9_conn_cancel(struct p9_conn *m, int err) spin_unlock_irqrestore(&m->client->lock, flags); list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { - list_del(&req->req_list); P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req); + list_del(&req->req_list); p9_client_cb(m->client, req); } } @@ -336,7 +336,8 @@ static void p9_read_work(struct work_struct *work) "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); m->req = p9_tag_lookup(m->client, tag); - if (!m->req) { + if (!m->req || (m->req->status != REQ_STATUS_SENT && + m->req->status != REQ_STATUS_FLSH)) { P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", tag); err = -EIO; @@ -361,10 +362,11 @@ static void p9_read_work(struct work_struct *work) if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */ P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n"); spin_lock(&m->client->lock); + if (m->req->status != REQ_STATUS_ERROR) + m->req->status = REQ_STATUS_RCVD; list_del(&m->req->req_list); spin_unlock(&m->client->lock); p9_client_cb(m->client, m->req); - m->rbuf = NULL; m->rpos = 0; m->rsize = 0; @@ -454,6 +456,7 @@ static void p9_write_work(struct work_struct *work) req = list_entry(m->unsent_req_list.next, struct p9_req_t, req_list); req->status = REQ_STATUS_SENT; + P9_DPRINTK(P9_DEBUG_TRANS, "move req %p\n", req); list_move_tail(&req->req_list, &m->req_list); m->wbuf = req->tc->sdata; @@ -683,12 +686,13 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req); spin_lock(&client->lock); - list_del(&req->req_list); if (req->status == REQ_STATUS_UNSENT) { + list_del(&req->req_list); req->status = REQ_STATUS_FLSHD; ret = 0; - } + } else if (req->status == REQ_STATUS_SENT) + req->status = REQ_STATUS_FLSH; spin_unlock(&client->lock); diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 7fa0eb20b2f..ac4990041eb 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -295,6 +295,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, goto err_out; req->rc = c->rc; + req->status = REQ_STATUS_RCVD; p9_client_cb(client, req); return; diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 2d7781ec663..bb8579a141a 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -134,6 +134,7 @@ static void req_done(struct virtqueue *vq) P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); req = p9_tag_lookup(chan->client, rc->tag); + req->status = REQ_STATUS_RCVD; p9_client_cb(chan->client, req); } } diff --git a/net/Kconfig b/net/Kconfig index ce77db4fcec..c19f549c8e7 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -119,12 +119,6 @@ menuconfig NETFILTER <file:Documentation/Changes> under "iptables" for the location of these packages. - Make sure to say N to "Fast switching" below if you intend to say Y - here, as Fast switching currently bypasses netfilter. - - Chances are that you should say Y here if you compile a kernel which - will run as a router and N for regular hosts. If unsure, say N. - if NETFILTER config NETFILTER_DEBUG diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 334fcd4a4ea..3100a8940af 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -549,6 +549,7 @@ static void br2684_setup(struct net_device *netdev) struct br2684_dev *brdev = BRPRIV(netdev); ether_setup(netdev); + brdev->net_dev = netdev; netdev->netdev_ops = &br2684_netdev_ops; diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index 57aeba729ba..832bcf092a0 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -148,9 +148,13 @@ static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos) { struct ax25_uid_assoc *pt; struct hlist_node *node; - int i = 0; + int i = 1; read_lock(&ax25_uid_lock); + + if (*pos == 0) + return SEQ_START_TOKEN; + ax25_uid_for_each(pt, node, &ax25_uid_list) { if (i == *pos) return pt; @@ -162,8 +166,10 @@ static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos) static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; - - return hlist_entry(((ax25_uid_assoc *)v)->uid_node.next, + if (v == SEQ_START_TOKEN) + return ax25_uid_list.first; + else + return hlist_entry(((ax25_uid_assoc *)v)->uid_node.next, ax25_uid_assoc, uid_node); } diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 1181db08d9d..fa47d5d84f5 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -171,10 +171,8 @@ static void hci_conn_timeout(unsigned long arg) switch (conn->state) { case BT_CONNECT: case BT_CONNECT2: - if (conn->type == ACL_LINK) + if (conn->type == ACL_LINK && conn->out) hci_acl_connect_cancel(conn); - else - hci_acl_disconn(conn, 0x13); break; case BT_CONFIG: case BT_CONNECTED: @@ -215,6 +213,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) conn->state = BT_OPEN; conn->power_save = 1; + conn->disc_timeout = HCI_DISCONN_TIMEOUT; switch (type) { case ACL_LINK: @@ -247,6 +246,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_ADD); + hci_conn_init_sysfs(conn); + tasklet_enable(&hdev->tx_task); return conn; @@ -289,6 +290,8 @@ int hci_conn_del(struct hci_conn *conn) hci_conn_del_sysfs(conn); + hci_dev_put(hdev); + return 0; } @@ -424,12 +427,9 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) if (sec_level == BT_SECURITY_SDP) return 1; - if (sec_level == BT_SECURITY_LOW) { - if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) - return hci_conn_auth(conn, sec_level, auth_type); - else - return 1; - } + if (sec_level == BT_SECURITY_LOW && + (!conn->ssp_mode || !conn->hdev->ssp_mode)) + return 1; if (conn->link_mode & HCI_LM_ENCRYPT) return hci_conn_auth(conn, sec_level, auth_type); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 55534244c3a..184ba0a88ec 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -866,8 +866,16 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); - if (!conn) - goto unlock; + if (!conn) { + if (ev->link_type != SCO_LINK) + goto unlock; + + conn = hci_conn_hash_lookup_ba(hdev, ESCO_LINK, &ev->bdaddr); + if (!conn) + goto unlock; + + conn->type = SCO_LINK; + } if (!ev->status) { conn->handle = __le16_to_cpu(ev->handle); @@ -875,6 +883,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (conn->type == ACL_LINK) { conn->state = BT_CONFIG; hci_conn_hold(conn); + conn->disc_timeout = HCI_DISCONN_TIMEOUT; } else conn->state = BT_CONNECTED; @@ -1055,9 +1064,14 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s hci_proto_connect_cfm(conn, ev->status); hci_conn_put(conn); } - } else + } else { hci_auth_cfm(conn, ev->status); + hci_conn_hold(conn); + conn->disc_timeout = HCI_DISCONN_TIMEOUT; + hci_conn_put(conn); + } + if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) { if (!ev->status) { struct hci_cp_set_conn_encrypt cp; @@ -1471,7 +1485,21 @@ static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { + struct hci_ev_pin_code_req *ev = (void *) skb->data; + struct hci_conn *conn; + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (conn && conn->state == BT_CONNECTED) { + hci_conn_hold(conn); + conn->disc_timeout = HCI_PAIRING_TIMEOUT; + hci_conn_put(conn); + } + + hci_dev_unlock(hdev); } static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -1481,7 +1509,21 @@ static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) { + struct hci_ev_link_key_notify *ev = (void *) skb->data; + struct hci_conn *conn; + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (conn) { + hci_conn_hold(conn); + conn->disc_timeout = HCI_DISCONN_TIMEOUT; + hci_conn_put(conn); + } + + hci_dev_unlock(hdev); } static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -1646,20 +1688,28 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu conn->type = SCO_LINK; } - if (conn->out && ev->status == 0x1c && conn->attempt < 2) { - conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | - (hdev->esco_type & EDR_ESCO_MASK); - hci_setup_sync(conn, conn->link->handle); - goto unlock; - } - - if (!ev->status) { + switch (ev->status) { + case 0x00: conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; hci_conn_add_sysfs(conn); - } else + break; + + case 0x1c: /* SCO interval rejected */ + case 0x1f: /* Unspecified error */ + if (conn->out && conn->attempt < 2) { + conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | + (hdev->esco_type & EDR_ESCO_MASK); + hci_setup_sync(conn, conn->link->handle); + goto unlock; + } + /* fall through */ + + default: conn->state = BT_CLOSED; + break; + } hci_proto_connect_cfm(conn, ev->status); if (ev->status) diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index ed82796d4a0..95f7a7a544b 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -9,8 +9,7 @@ struct class *bt_class = NULL; EXPORT_SYMBOL_GPL(bt_class); -static struct workqueue_struct *btaddconn; -static struct workqueue_struct *btdelconn; +static struct workqueue_struct *bt_workq; static inline char *link_typetostr(int type) { @@ -88,35 +87,17 @@ static struct device_type bt_link = { static void add_conn(struct work_struct *work) { - struct hci_conn *conn = container_of(work, struct hci_conn, work); + struct hci_conn *conn = container_of(work, struct hci_conn, work_add); + struct hci_dev *hdev = conn->hdev; - flush_workqueue(btdelconn); + dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); if (device_add(&conn->dev) < 0) { BT_ERR("Failed to register connection device"); return; } -} - -void hci_conn_add_sysfs(struct hci_conn *conn) -{ - struct hci_dev *hdev = conn->hdev; - - BT_DBG("conn %p", conn); - - conn->dev.type = &bt_link; - conn->dev.class = bt_class; - conn->dev.parent = &hdev->dev; - dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); - - dev_set_drvdata(&conn->dev, conn); - - device_initialize(&conn->dev); - - INIT_WORK(&conn->work, add_conn); - - queue_work(btaddconn, &conn->work); + hci_dev_hold(hdev); } /* @@ -131,9 +112,12 @@ static int __match_tty(struct device *dev, void *data) static void del_conn(struct work_struct *work) { - struct hci_conn *conn = container_of(work, struct hci_conn, work); + struct hci_conn *conn = container_of(work, struct hci_conn, work_del); struct hci_dev *hdev = conn->hdev; + if (!device_is_registered(&conn->dev)) + return; + while (1) { struct device *dev; @@ -146,19 +130,40 @@ static void del_conn(struct work_struct *work) device_del(&conn->dev); put_device(&conn->dev); + hci_dev_put(hdev); } -void hci_conn_del_sysfs(struct hci_conn *conn) +void hci_conn_init_sysfs(struct hci_conn *conn) { + struct hci_dev *hdev = conn->hdev; + BT_DBG("conn %p", conn); - if (!device_is_registered(&conn->dev)) - return; + conn->dev.type = &bt_link; + conn->dev.class = bt_class; + conn->dev.parent = &hdev->dev; - INIT_WORK(&conn->work, del_conn); + dev_set_drvdata(&conn->dev, conn); + + device_initialize(&conn->dev); - queue_work(btdelconn, &conn->work); + INIT_WORK(&conn->work_add, add_conn); + INIT_WORK(&conn->work_del, del_conn); +} + +void hci_conn_add_sysfs(struct hci_conn *conn) +{ + BT_DBG("conn %p", conn); + + queue_work(bt_workq, &conn->work_add); +} + +void hci_conn_del_sysfs(struct hci_conn *conn) +{ + BT_DBG("conn %p", conn); + + queue_work(bt_workq, &conn->work_del); } static inline char *host_typetostr(int type) @@ -435,20 +440,13 @@ void hci_unregister_sysfs(struct hci_dev *hdev) int __init bt_sysfs_init(void) { - btaddconn = create_singlethread_workqueue("btaddconn"); - if (!btaddconn) + bt_workq = create_singlethread_workqueue("bluetooth"); + if (!bt_workq) return -ENOMEM; - btdelconn = create_singlethread_workqueue("btdelconn"); - if (!btdelconn) { - destroy_workqueue(btaddconn); - return -ENOMEM; - } - bt_class = class_create(THIS_MODULE, "bluetooth"); if (IS_ERR(bt_class)) { - destroy_workqueue(btdelconn); - destroy_workqueue(btaddconn); + destroy_workqueue(bt_workq); return PTR_ERR(bt_class); } @@ -457,8 +455,7 @@ int __init bt_sysfs_init(void) void bt_sysfs_cleanup(void) { - destroy_workqueue(btaddconn); - destroy_workqueue(btdelconn); + destroy_workqueue(bt_workq); class_destroy(bt_class); } diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 1d0fb0f23c6..374536e050a 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1194,6 +1194,8 @@ void rfcomm_dlc_accept(struct rfcomm_dlc *d) rfcomm_send_ua(d->session, d->dlci); + rfcomm_dlc_clear_timer(d); + rfcomm_dlc_lock(d); d->state = BT_CONNECTED; d->state_change(d, 0); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 30b88777c3d..5ee1a3682bf 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -134,6 +134,10 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_PAUSE)) goto drop; + /* If STP is turned off, then forward */ + if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0) + goto forward; + if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, NULL, br_handle_local_finish)) return NULL; /* frame consumed by filter */ @@ -141,6 +145,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) return skb; /* continue processing */ } +forward: switch (p->state) { case BR_STATE_FORWARDING: rhook = rcu_dereference(br_should_route_hook); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 3953ac4214c..e4a418fcb35 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -788,15 +788,23 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb, return NF_STOLEN; } +#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE) static int br_nf_dev_queue_xmit(struct sk_buff *skb) { - if (skb->protocol == htons(ETH_P_IP) && + if (skb->nfct != NULL && + (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) && skb->len > skb->dev->mtu && !skb_is_gso(skb)) return ip_fragment(skb, br_dev_queue_push_xmit); else return br_dev_queue_push_xmit(skb); } +#else +static int br_nf_dev_queue_xmit(struct sk_buff *skb) +{ + return br_dev_queue_push_xmit(skb); +} +#endif /* PF_BRIDGE/POST_ROUTING ********************************************/ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 6e63ec3f1fc..0660515f399 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -297,6 +297,9 @@ void br_topology_change_detection(struct net_bridge *br) { int isroot = br_is_root_bridge(br); + if (br->stp_enabled != BR_KERNEL_STP) + return; + pr_info("%s: topology change detected, %s\n", br->dev->name, isroot ? "propagating" : "sending tcn bpdu"); diff --git a/net/can/af_can.c b/net/can/af_can.c index 547bafc79e2..10f0528c3bf 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -674,8 +674,8 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, rcu_read_unlock(); - /* free the skbuff allocated by the netdevice driver */ - kfree_skb(skb); + /* consume the skbuff allocated by the netdevice driver */ + consume_skb(skb); if (matches > 0) { can_stats.matches++; diff --git a/net/core/datagram.c b/net/core/datagram.c index d0de644b378..b01a76abe1d 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -64,13 +64,25 @@ static inline int connection_based(struct sock *sk) return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; } +static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync, + void *key) +{ + unsigned long bits = (unsigned long)key; + + /* + * Avoid a wakeup if event not interesting for us + */ + if (bits && !(bits & (POLLIN | POLLERR))) + return 0; + return autoremove_wake_function(wait, mode, sync, key); +} /* * Wait for a packet.. */ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) { int error; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, receiver_wake_function); prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); diff --git a/net/core/dev.c b/net/core/dev.c index 91d792d17e0..e2e9e4af3ac 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1336,7 +1336,12 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { struct packet_type *ptype; +#ifdef CONFIG_NET_CLS_ACT + if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) + net_timestamp(skb); +#else net_timestamp(skb); +#endif rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_all, list) { @@ -1430,7 +1435,7 @@ void netif_device_detach(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && netif_running(dev)) { - netif_stop_queue(dev); + netif_tx_stop_all_queues(dev); } } EXPORT_SYMBOL(netif_device_detach); @@ -1445,7 +1450,7 @@ void netif_device_attach(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && netif_running(dev)) { - netif_wake_queue(dev); + netif_tx_wake_all_queues(dev); __netdev_watchdog_up(dev); } } @@ -1730,11 +1735,12 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) { u32 hash; - if (skb_rx_queue_recorded(skb)) { - hash = skb_get_rx_queue(skb); - } else if (skb->sk && skb->sk->sk_hash) { + if (skb_rx_queue_recorded(skb)) + return skb_get_rx_queue(skb) % dev->real_num_tx_queues; + + if (skb->sk && skb->sk->sk_hash) hash = skb->sk->sk_hash; - } else + else hash = skb->protocol; hash = jhash_1word(hash, skb_tx_hashrnd); @@ -2328,8 +2334,10 @@ static int napi_gro_complete(struct sk_buff *skb) struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; int err = -ENOENT; - if (NAPI_GRO_CB(skb)->count == 1) + if (NAPI_GRO_CB(skb)->count == 1) { + skb_shinfo(skb)->gso_size = 0; goto out; + } rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@ -2348,7 +2356,6 @@ static int napi_gro_complete(struct sk_buff *skb) } out: - skb_shinfo(skb)->gso_size = 0; return netif_receive_skb(skb); } @@ -2539,9 +2546,9 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, } BUG_ON(info->nr_frags > MAX_SKB_FRAGS); - frag = &info->frags[info->nr_frags - 1]; + frag = info->frags; - for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) { + for (i = 0; i < info->nr_frags; i++) { skb_fill_page_desc(skb, i, frag->page, frag->page_offset, frag->size); frag++; @@ -4399,7 +4406,7 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; #ifdef CONFIG_COMPAT_NET_DEV_OPS - /* Netdevice_ops API compatiability support. + /* Netdevice_ops API compatibility support. * This is temporary until all network devices are converted. */ if (dev->netdev_ops) { @@ -4410,7 +4417,7 @@ int register_netdevice(struct net_device *dev) dev->name, netdev_drivername(dev, drivername, 64)); /* This works only because net_device_ops and the - compatiablity structure are the same. */ + compatibility structure are the same. */ dev->netdev_ops = (void *) &(dev->init); } #endif diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 9cc9f95b109..6d62d4618cf 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -66,9 +66,9 @@ NOTES. - * The stored value for avbps is scaled by 2^5, so that maximal - rate is ~1Gbit, avpps is scaled by 2^10. - + * avbps is scaled by 2^5, avpps is scaled by 2^10. + * both values are reported as 32 bit unsigned values. bps can + overflow for fast links : max speed being 34360Mbit/sec * Minimal interval is HZ/4=250msec (it is the greatest common divisor for HZ=100 and HZ=1024 8)), maximal interval is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals @@ -86,9 +86,9 @@ struct gen_estimator spinlock_t *stats_lock; int ewma_log; u64 last_bytes; + u64 avbps; u32 last_packets; u32 avpps; - u32 avbps; struct rcu_head e_rcu; struct rb_node node; }; @@ -115,6 +115,7 @@ static void est_timer(unsigned long arg) rcu_read_lock(); list_for_each_entry_rcu(e, &elist[idx].list, list) { u64 nbytes; + u64 brate; u32 npackets; u32 rate; @@ -125,9 +126,9 @@ static void est_timer(unsigned long arg) nbytes = e->bstats->bytes; npackets = e->bstats->packets; - rate = (nbytes - e->last_bytes)<<(7 - idx); + brate = (nbytes - e->last_bytes)<<(7 - idx); e->last_bytes = nbytes; - e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log; + e->avbps += ((s64)(brate - e->avbps)) >> e->ewma_log; e->rate_est->bps = (e->avbps+0xF)>>5; rate = (npackets - e->last_packets)<<(12 - idx); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b5873bdff61..64f51eec657 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -175,9 +175,13 @@ static void service_arp_queue(struct netpoll_info *npi) void netpoll_poll(struct netpoll *np) { struct net_device *dev = np->dev; - const struct net_device_ops *ops = dev->netdev_ops; + const struct net_device_ops *ops; + + if (!dev || !netif_running(dev)) + return; - if (!dev || !netif_running(dev) || !ops->ndo_poll_controller) + ops = dev->netdev_ops; + if (!ops->ndo_poll_controller) return; /* Process pending work on NIC */ diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 3779c1438c1..0666a827bc6 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2447,7 +2447,7 @@ static inline void free_SAs(struct pktgen_dev *pkt_dev) if (pkt_dev->cflows) { /* let go of the SAs if we have them */ int i = 0; - for (; i < pkt_dev->nflows; i++){ + for (; i < pkt_dev->cflows; i++) { struct xfrm_state *x = pkt_dev->flows[i].x; if (x) { xfrm_state_put(x); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ce6356cd9f7..e505b5392e1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -502,7 +502,9 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size) shinfo->gso_segs = 0; shinfo->gso_type = 0; shinfo->ip6_frag_id = 0; + shinfo->tx_flags.flags = 0; shinfo->frag_list = NULL; + memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); memset(skb, 0, offsetof(struct sk_buff, tail)); skb->data = skb->head + NET_SKB_PAD; @@ -1365,9 +1367,8 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) static inline struct page *linear_to_page(struct page *page, unsigned int *len, unsigned int *offset, - struct sk_buff *skb) + struct sk_buff *skb, struct sock *sk) { - struct sock *sk = skb->sk; struct page *p = sk->sk_sndmsg_page; unsigned int off; @@ -1405,13 +1406,14 @@ new_page: */ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, unsigned int *len, unsigned int offset, - struct sk_buff *skb, int linear) + struct sk_buff *skb, int linear, + struct sock *sk) { if (unlikely(spd->nr_pages == PIPE_BUFFERS)) return 1; if (linear) { - page = linear_to_page(page, len, &offset, skb); + page = linear_to_page(page, len, &offset, skb, sk); if (!page) return 1; } else @@ -1442,7 +1444,8 @@ static inline void __segment_seek(struct page **page, unsigned int *poff, static inline int __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, unsigned int *len, struct sk_buff *skb, - struct splice_pipe_desc *spd, int linear) + struct splice_pipe_desc *spd, int linear, + struct sock *sk) { if (!*len) return 1; @@ -1465,7 +1468,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff, /* the linear region may spread across several pages */ flen = min_t(unsigned int, flen, PAGE_SIZE - poff); - if (spd_fill_page(spd, page, &flen, poff, skb, linear)) + if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk)) return 1; __segment_seek(&page, &poff, &plen, flen); @@ -1481,8 +1484,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff, * pipe is full or if we already spliced the requested length. */ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, - unsigned int *len, - struct splice_pipe_desc *spd) + unsigned int *len, struct splice_pipe_desc *spd, + struct sock *sk) { int seg; @@ -1492,7 +1495,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, if (__splice_segment(virt_to_page(skb->data), (unsigned long) skb->data & (PAGE_SIZE - 1), skb_headlen(skb), - offset, len, skb, spd, 1)) + offset, len, skb, spd, 1, sk)) return 1; /* @@ -1502,7 +1505,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; if (__splice_segment(f->page, f->page_offset, f->size, - offset, len, skb, spd, 0)) + offset, len, skb, spd, 0, sk)) return 1; } @@ -1528,12 +1531,13 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, .ops = &sock_pipe_buf_ops, .spd_release = sock_spd_release, }; + struct sock *sk = skb->sk; /* * __skb_splice_bits() only fails if the output has no room left, * so no point in going over the frag_list for the error case. */ - if (__skb_splice_bits(skb, &offset, &tlen, &spd)) + if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk)) goto done; else if (!tlen) goto done; @@ -1545,14 +1549,13 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct sk_buff *list = skb_shinfo(skb)->frag_list; for (; list && tlen; list = list->next) { - if (__skb_splice_bits(list, &offset, &tlen, &spd)) + if (__skb_splice_bits(list, &offset, &tlen, &spd, sk)) break; } } done: if (spd.nr_pages) { - struct sock *sk = skb->sk; int ret; /* @@ -2285,7 +2288,7 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, next_skb: block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; - if (abs_offset < block_limit) { + if (abs_offset < block_limit && !st->frag_data) { *data = st->cur_skb->data + (abs_offset - st->stepped_offset); return block_limit - abs_offset; } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index b2cf91e4cca..5b919f7b45d 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -407,8 +407,8 @@ config INET_XFRM_MODE_BEET If unsure, say Y. config INET_LRO - tristate "Large Receive Offload (ipv4/tcp)" - + bool "Large Receive Offload (ipv4/tcp)" + default y ---help--- Support for Large Receive Offload (ipv4/tcp). diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ec0ae490f0b..33c7c85dfe4 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -986,9 +986,12 @@ fib_find_node(struct trie *t, u32 key) static struct node *trie_rebalance(struct trie *t, struct tnode *tn) { int wasfull; - t_key cindex, key = tn->key; + t_key cindex, key; struct tnode *tp; + preempt_disable(); + key = tn->key; + while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); @@ -1007,6 +1010,7 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) if (IS_TNODE(tn)) tn = (struct tnode *)resize(t, (struct tnode *)tn); + preempt_enable(); return (struct node *)tn; } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 90d22ae0a41..88bf051d0cb 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -139,6 +139,8 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */ __be32 root_server_addr = NONE; /* Address of NFS server */ u8 root_server_path[256] = { 0, }; /* Path to mount as root */ +u32 ic_dev_xid; /* Device under configuration */ + /* vendor class identifier */ static char vendor_class_identifier[253] __initdata; @@ -932,6 +934,13 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str goto drop_unlock; } + /* Is it a reply for the device we are configuring? */ + if (b->xid != ic_dev_xid) { + if (net_ratelimit()) + printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n"); + goto drop_unlock; + } + /* Parse extensions */ if (ext_len >= 4 && !memcmp(b->exten, ic_bootp_cookie, 4)) { /* Check magic cookie */ @@ -1115,6 +1124,9 @@ static int __init ic_dynamic(void) get_random_bytes(&timeout, sizeof(timeout)); timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); for (;;) { + /* Track the device we are configuring */ + ic_dev_xid = d->xid; + #ifdef IPCONFIG_BOOTP if (do_bootp && (d->able & IC_BOOTP)) ic_bootp_send_if(d, jiffies - start_jiffies); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 5ba533d234d..831fe1879dc 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb, indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; - rcu_read_lock_bh(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -273,6 +273,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + (2 * skb->dev->addr_len); + ADD_COUNTER(e->counters, hdr_len, 1); t = arpt_get_target(e); @@ -328,8 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - - rcu_read_unlock_bh(); + xt_info_rdunlock_bh(); if (hotdrop) return NF_DROP; @@ -711,9 +711,12 @@ static void get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters * with data used by 'current' CPU - * We dont care about preemption here. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; ARPT_ENTRY_ITERATE(t->entries[curcpu], @@ -726,73 +729,22 @@ static void get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); ARPT_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } -} - - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct arpt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - ARPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } -static inline int -zero_entry_counter(struct arpt_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -802,30 +754,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; - - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ + return ERR_PTR(-ENOMEM); - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); - - xt_free_table_info(info); + get_counters(private, counters); return counters; - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); } static int copy_entries_to_user(unsigned int total_size, @@ -1094,8 +1027,9 @@ static int __do_replace(struct net *net, const char *name, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1165,10 +1099,23 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct arpt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1224,26 +1171,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - mutex_lock(&t->lock); + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[smp_processor_id()]; + curcpu = smp_processor_id(); + loc_cpu_entry = private->entries[curcpu]; + xt_info_wrlock(curcpu); ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); unlock_up_free: - mutex_unlock(&t->lock); - + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 810c0b62c7d..2ec8d7290c4 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -338,10 +338,9 @@ ipt_do_table(struct sk_buff *skb, tgpar.hooknum = hook; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - - rcu_read_lock_bh(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -436,8 +435,7 @@ ipt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - - rcu_read_unlock_bh(); + xt_info_rdunlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -896,10 +894,13 @@ get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters - * with data used by 'current' CPU - * We dont care about preemption here. + * with data used by 'current' CPU. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; IPT_ENTRY_ITERATE(t->entries[curcpu], @@ -912,74 +913,22 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); IPT_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } - -} - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ipt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - IPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } - -static inline int -zero_entry_counter(struct ipt_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters * alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -988,30 +937,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; + return ERR_PTR(-ENOMEM); - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ - - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); - - xt_free_table_info(info); + get_counters(private, counters); return counters; - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); } static int @@ -1306,8 +1236,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1377,11 +1308,23 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ipt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1437,25 +1380,26 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat goto free; } - mutex_lock(&t->lock); + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + curcpu = smp_processor_id(); + loc_cpu_entry = private->entries[curcpu]; + xt_info_wrlock(curcpu); IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); unlock_up_free: - mutex_unlock(&t->lock); + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index fe65187810f..3229e0a81ba 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -211,7 +211,8 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple, minip = ntohl(range->min_ip); maxip = ntohl(range->max_ip); j = jhash_2words((__force u32)tuple->src.u3.ip, - (__force u32)tuple->dst.u3.ip, 0); + range->flags & IP_NAT_RANGE_PERSISTENT ? + (__force u32)tuple->dst.u3.ip : 0, 0); j = ((u64)j * (maxip - minip + 1)) >> 32; *var_ipp = htonl(minip + j); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c40debe51b3..28205e5bfa9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -784,8 +784,8 @@ static void rt_check_expire(void) { static unsigned int rover; unsigned int i = rover, goal; - struct rtable *rth, **rthp; - unsigned long length = 0, samples = 0; + struct rtable *rth, *aux, **rthp; + unsigned long samples = 0; unsigned long sum = 0, sum2 = 0; u64 mult; @@ -795,9 +795,9 @@ static void rt_check_expire(void) goal = (unsigned int)mult; if (goal > rt_hash_mask) goal = rt_hash_mask + 1; - length = 0; for (; goal > 0; goal--) { unsigned long tmo = ip_rt_gc_timeout; + unsigned long length; i = (i + 1) & rt_hash_mask; rthp = &rt_hash_table[i].chain; @@ -809,8 +809,10 @@ static void rt_check_expire(void) if (*rthp == NULL) continue; + length = 0; spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { + prefetch(rth->u.dst.rt_next); if (rt_is_expired(rth)) { *rthp = rth->u.dst.rt_next; rt_free(rth); @@ -819,33 +821,30 @@ static void rt_check_expire(void) if (rth->u.dst.expires) { /* Entry is expired even if it is in use */ if (time_before_eq(jiffies, rth->u.dst.expires)) { +nofree: tmo >>= 1; rthp = &rth->u.dst.rt_next; /* - * Only bump our length if the hash - * inputs on entries n and n+1 are not - * the same, we only count entries on + * We only count entries on * a chain with equal hash inputs once * so that entries for different QOS * levels, and other non-hash input * attributes don't unfairly skew * the length computation */ - if ((*rthp == NULL) || - !compare_hash_inputs(&(*rthp)->fl, - &rth->fl)) - length += ONE; + for (aux = rt_hash_table[i].chain;;) { + if (aux == rth) { + length += ONE; + break; + } + if (compare_hash_inputs(&aux->fl, &rth->fl)) + break; + aux = aux->u.dst.rt_next; + } continue; } - } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { - tmo >>= 1; - rthp = &rth->u.dst.rt_next; - if ((*rthp == NULL) || - !compare_hash_inputs(&(*rthp)->fl, - &rth->fl)) - length += ONE; - continue; - } + } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) + goto nofree; /* Cleanup aged off entries. */ *rthp = rth->u.dst.rt_next; @@ -1068,7 +1067,6 @@ out: return 0; static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) { struct rtable *rth, **rthp; - struct rtable *rthi; unsigned long now; struct rtable *cand, **candp; u32 min_score; @@ -1088,7 +1086,6 @@ restart: } rthp = &rt_hash_table[hash].chain; - rthi = NULL; spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { @@ -1134,17 +1131,6 @@ restart: chain_length++; rthp = &rth->u.dst.rt_next; - - /* - * check to see if the next entry in the chain - * contains the same hash input values as rt. If it does - * This is where we will insert into the list, instead of - * at the head. This groups entries that differ by aspects not - * relvant to the hash function together, which we use to adjust - * our chain length - */ - if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl)) - rthi = rth; } if (cand) { @@ -1205,10 +1191,7 @@ restart: } } - if (rthi) - rt->u.dst.rt_next = rthi->u.dst.rt_next; - else - rt->u.dst.rt_next = rt_hash_table[hash].chain; + rt->u.dst.rt_next = rt_hash_table[hash].chain; #if RT_CACHE_DEBUG >= 2 if (rt->u.dst.rt_next) { @@ -1224,10 +1207,7 @@ restart: * previous writes to rt are comitted to memory * before making rt visible to other CPUS. */ - if (rthi) - rcu_assign_pointer(rthi->u.dst.rt_next, rt); - else - rcu_assign_pointer(rt_hash_table[hash].chain, rt); + rcu_assign_pointer(rt_hash_table[hash].chain, rt); spin_unlock_bh(rt_hash_lock_addr(hash)); *rp = rt; @@ -3397,7 +3377,7 @@ int __init ip_rt_init(void) 0, &rt_hash_log, &rt_hash_mask, - 0); + rhash_entries ? 0 : 512 * 1024); memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket)); rt_hash_lock_init(); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fafbec8b073..7a0f0b27bf1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1321,6 +1321,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct task_struct *user_recv = NULL; int copied_early = 0; struct sk_buff *skb; + u32 urg_hole = 0; lock_sock(sk); @@ -1532,7 +1533,8 @@ do_prequeue: } } } - if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) { + if ((flags & MSG_PEEK) && + (peek_seq - copied - urg_hole != tp->copied_seq)) { if (net_ratelimit()) printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", current->comm, task_pid_nr(current)); @@ -1553,6 +1555,7 @@ do_prequeue: if (!urg_offset) { if (!sock_flag(sk, SOCK_URGINLINE)) { ++*seq; + urg_hole++; offset++; used--; if (!used) @@ -2511,6 +2514,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) struct sk_buff *p; struct tcphdr *th; struct tcphdr *th2; + unsigned int len; unsigned int thlen; unsigned int flags; unsigned int mss = 1; @@ -2531,6 +2535,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) skb_gro_pull(skb, thlen); + len = skb_gro_len(skb); flags = tcp_flag_word(th); for (; (p = *head); head = &p->next) { @@ -2561,7 +2566,7 @@ found: mss = skb_shinfo(p)->gso_size; - flush |= (skb_gro_len(skb) > mss) | !skb_gro_len(skb); + flush |= (len > mss) | !len; flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); if (flush || skb_gro_receive(head, skb)) { @@ -2574,7 +2579,7 @@ found: tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); out_check_final: - flush = skb_gro_len(skb) < mss; + flush = len < mss; flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | TCP_FLAG_SYN | TCP_FLAG_FIN); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2bc8e27a163..eec3e6f9956 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -597,16 +597,6 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) tcp_grow_window(sk, skb); } -static u32 tcp_rto_min(struct sock *sk) -{ - struct dst_entry *dst = __sk_dst_get(sk); - u32 rto_min = TCP_RTO_MIN; - - if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) - rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); - return rto_min; -} - /* Called to compute a smoothed rtt estimate. The data fed to this * routine either comes from timestamps, or from segments that were * known _not_ to have been retransmitted [see Karn/Partridge @@ -928,6 +918,8 @@ static void tcp_init_metrics(struct sock *sk) tcp_set_rto(sk); if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) goto reset; + +cwnd: tp->snd_cwnd = tcp_init_cwnd(tp, dst); tp->snd_cwnd_stamp = tcp_time_stamp; return; @@ -942,6 +934,7 @@ reset: tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; } + goto cwnd; } static void tcp_update_reordering(struct sock *sk, const int metric, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 53300fa2359..59aec609cec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -778,7 +778,7 @@ static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr) if (tp->lost_skb_hint && before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) && - (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked)) + (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))) tp->lost_cnt_hint -= decr; tcp_verify_left_out(tp); diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index a453aac91bd..c6743eec9b7 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -158,6 +158,11 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) } EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); +static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) +{ + return min(tp->snd_ssthresh, tp->snd_cwnd-1); +} + static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); @@ -221,11 +226,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) */ diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; - if (diff > gamma && tp->snd_ssthresh > 2 ) { + if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) { /* Going too fast. Time to slow down * and switch to congestion avoidance. */ - tp->snd_ssthresh = 2; /* Set cwnd to match the actual rate * exactly: @@ -235,6 +239,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * utilization. */ tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); + tp->snd_ssthresh = tcp_vegas_ssthresh(tp); } else if (tp->snd_cwnd <= tp->snd_ssthresh) { /* Slow start. */ @@ -250,6 +255,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * we slow down. */ tp->snd_cwnd--; + tp->snd_ssthresh + = tcp_vegas_ssthresh(tp); } else if (diff < alpha) { /* We don't have enough extra packets * in the network, so speed up. diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bda08a09357..7a1d1ce22e6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -222,7 +222,7 @@ fail: return error; } -int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); @@ -1823,7 +1823,6 @@ EXPORT_SYMBOL(udp_lib_getsockopt); EXPORT_SYMBOL(udp_lib_setsockopt); EXPORT_SYMBOL(udp_poll); EXPORT_SYMBOL(udp_lib_get_port); -EXPORT_SYMBOL(ipv4_rcv_saddr_equal); #ifdef CONFIG_PROC_FS EXPORT_SYMBOL(udp_proc_register); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d31df0f4bc9..a7fdf9a27f1 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -380,10 +380,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, default: goto sticky_done; } - - if ((rthdr->hdrlen & 1) || - (rthdr->hdrlen >> 1) != rthdr->segments_left) - goto sticky_done; } retv = 0; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index dfed176aed3..219e165aea1 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -365,9 +365,9 @@ ip6t_do_table(struct sk_buff *skb, IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - rcu_read_lock_bh(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb, #ifdef CONFIG_NETFILTER_DEBUG ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; #endif - rcu_read_unlock_bh(); + xt_info_rdunlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -926,9 +926,12 @@ get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters * with data used by 'current' CPU - * We dont care about preemption here. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; IP6T_ENTRY_ITERATE(t->entries[curcpu], @@ -941,72 +944,22 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); IP6T_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } -} - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ip6t_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - IP6T_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } -static inline int -zero_entry_counter(struct ip6t_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -1015,28 +968,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; - - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ + return ERR_PTR(-ENOMEM); - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); + get_counters(private, counters); - xt_free_table_info(info); - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); + return counters; } static int @@ -1332,8 +1268,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1403,11 +1340,24 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ip6t_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1463,25 +1413,28 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - mutex_lock(&t->lock); + + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + curcpu = smp_processor_id(); + xt_info_wrlock(curcpu); + loc_cpu_entry = private->entries[curcpu]; IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); + unlock_up_free: - mutex_unlock(&t->lock); + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 14e6724d567..91490ad9302 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -50,14 +50,14 @@ ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par) struct ipv6_opt_hdr _hdr; int hdrlen; - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return false; /* No more exthdr -> evaluate */ if (nexthdr == NEXTHDR_NONE) { temp |= MASK_NONE; break; } + /* Is there enough space for the next ext header? */ + if (len < (int)sizeof(struct ipv6_opt_hdr)) + return false; /* ESP -> evaluate */ if (nexthdr == NEXTHDR_ESP) { temp |= MASK_ESP; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1394ddb6e35..032a5ec391c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -137,6 +137,7 @@ static struct rt6_info ip6_null_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; @@ -159,6 +160,7 @@ static struct rt6_info ip6_prohibit_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; @@ -176,6 +178,7 @@ static struct rt6_info ip6_blk_hole_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6842dd2edd5..8905712cfbb 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -53,6 +53,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); + __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; + __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); @@ -60,7 +62,9 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) /* if both are mapped, treat as IPv4 */ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) - return ipv4_rcv_saddr_equal(sk, sk2); + return (!sk2_ipv6only && + (!sk_rcv_saddr || !sk2_rcv_saddr || + sk_rcv_saddr == sk2_rcv_saddr)); if (addr_type2 == IPV6_ADDR_ANY && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 49e786535dc..b51c9187c34 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -172,6 +172,7 @@ static void iucv_sock_close(struct sock *sk) err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo); } + case IUCV_CLOSING: /* fall through */ sk->sk_state = IUCV_CLOSED; sk->sk_state_change(sk); @@ -224,6 +225,8 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) spin_lock_init(&iucv_sk(sk)->message_q.lock); skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); iucv_sk(sk)->send_tag = 0; + iucv_sk(sk)->path = NULL; + memset(&iucv_sk(sk)->src_user_id , 0, 32); sk->sk_destruct = iucv_sock_destruct; sk->sk_sndtimeo = IUCV_CONN_TIMEOUT; @@ -811,6 +814,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + /* receive/dequeue next skb: + * the function understands MSG_PEEK and, thus, does not dequeue skb */ skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) @@ -858,9 +863,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, iucv_process_message_q(sk); spin_unlock_bh(&iucv->message_q.lock); } - - } else - skb_queue_head(&sk->sk_receive_queue, skb); + } done: return err ? : copied; @@ -934,6 +937,9 @@ static int iucv_sock_shutdown(struct socket *sock, int how) lock_sock(sk); switch (sk->sk_state) { + case IUCV_DISCONN: + case IUCV_CLOSING: + case IUCV_SEVERED: case IUCV_CLOSED: err = -ENOTCONN; goto fail; @@ -1113,8 +1119,12 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) struct sock_msg_q *save_msg; int len; - if (sk->sk_shutdown & RCV_SHUTDOWN) + if (sk->sk_shutdown & RCV_SHUTDOWN) { + iucv_message_reject(path, msg); return; + } + + spin_lock(&iucv->message_q.lock); if (!list_empty(&iucv->message_q.list) || !skb_queue_empty(&iucv->backlog_skb_q)) @@ -1129,9 +1139,8 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) if (!skb) goto save_message; - spin_lock(&iucv->message_q.lock); iucv_process_message(sk, skb, path, msg); - spin_unlock(&iucv->message_q.lock); + goto out_unlock; return; @@ -1142,8 +1151,9 @@ save_message: save_msg->path = path; save_msg->msg = *msg; - spin_lock(&iucv->message_q.lock); list_add_tail(&save_msg->list, &iucv->message_q.list); + +out_unlock: spin_unlock(&iucv->message_q.lock); } diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index f3d9ae350fb..ecc3faf9f11 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -202,10 +202,3 @@ config MAC80211_DEBUG_COUNTERS and show them in debugfs. If unsure, say N. - -config MAC80211_VERBOSE_SPECT_MGMT_DEBUG - bool "Verbose Spectrum Management (IEEE 802.11h)debugging" - depends on MAC80211_DEBUG_MENU - ---help--- - Say Y here to print out verbose Spectrum Management (IEEE 802.11h) - debug messages. diff --git a/net/mac80211/main.c b/net/mac80211/main.c index a6f1d8a869b..14134193cd1 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -258,7 +258,7 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) (chan->max_power - local->power_constr_level) : chan->max_power; - if (local->user_power_level) + if (local->user_power_level >= 0) power = min(power, local->user_power_level); if (local->hw.conf.power_level != power) { @@ -757,6 +757,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->hw.conf.long_frame_max_tx_count = 4; local->hw.conf.short_frame_max_tx_count = 7; local->hw.conf.radio_enabled = true; + local->user_power_level = -1; INIT_LIST_HEAD(&local->interfaces); mutex_init(&local->iflist_mtx); @@ -909,6 +910,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (result < 0) goto fail_sta_info; + result = ieee80211_wep_init(local); + if (result < 0) { + printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n", + wiphy_name(local->hw.wiphy), result); + goto fail_wep; + } + rtnl_lock(); result = dev_alloc_name(local->mdev, local->mdev->name); if (result < 0) @@ -930,14 +938,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_rate; } - result = ieee80211_wep_init(local); - - if (result < 0) { - printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n", - wiphy_name(local->hw.wiphy), result); - goto fail_wep; - } - /* add one default STA interface if supported */ if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION)) { result = ieee80211_if_add(local, "wlan%d", NULL, @@ -967,13 +967,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) return 0; -fail_wep: - rate_control_deinitialize(local); fail_rate: unregister_netdevice(local->mdev); local->mdev = NULL; fail_dev: rtnl_unlock(); + ieee80211_wep_free(local); +fail_wep: sta_info_stop(local); fail_sta_info: debugfs_hw_del(local); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7ecda9d59d8..132938b073d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -441,6 +441,9 @@ static bool ieee80211_check_tim(struct ieee802_11_elems *elems, u16 aid) u8 index, indexn1, indexn2; struct ieee80211_tim_ie *tim = (struct ieee80211_tim_ie *) elems->tim; + if (unlikely(!tim || elems->tim_len < 4)) + return false; + aid &= 0x3fff; index = aid / 8; mask = 1 << (aid & 7); @@ -945,9 +948,13 @@ void ieee80211_beacon_loss_work(struct work_struct *work) u.mgd.beacon_loss_work); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - printk(KERN_DEBUG "%s: driver reports beacon loss from AP %pM " - "- sending probe request\n", sdata->dev->name, - sdata->u.mgd.bssid); +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: driver reports beacon loss from AP %pM " + "- sending probe request\n", sdata->dev->name, + sdata->u.mgd.bssid); + } +#endif ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL; ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid, @@ -1007,9 +1014,13 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata) (local->hw.conf.flags & IEEE80211_CONF_PS)) && time_after(jiffies, ifmgd->last_beacon + IEEE80211_MONITORING_INTERVAL)) { - printk(KERN_DEBUG "%s: beacon loss from AP %pM " - "- sending probe request\n", - sdata->dev->name, ifmgd->bssid); +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: beacon loss from AP %pM " + "- sending probe request\n", + sdata->dev->name, ifmgd->bssid); + } +#endif ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL; ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid, ifmgd->ssid_len, NULL, 0); @@ -1355,7 +1366,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, for (i = 0; i < elems.ext_supp_rates_len; i++) { int rate = (elems.ext_supp_rates[i] & 0x7f) * 5; - bool is_basic = !!(elems.supp_rates[i] & 0x80); + bool is_basic = !!(elems.ext_supp_rates[i] & 0x80); if (rate > 110) have_higher_than_11mbit = true; @@ -1902,9 +1913,17 @@ static void ieee80211_sta_work(struct work_struct *work) static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) { - if (sdata->vif.type == NL80211_IFTYPE_STATION) + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + /* + * Need to update last_beacon to avoid beacon loss + * test to trigger. + */ + sdata->u.mgd.last_beacon = jiffies; + + queue_work(sdata->local->hw.workqueue, &sdata->u.mgd.work); + } } /* interface setup */ @@ -2105,12 +2124,13 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) struct ieee80211_local *local = container_of(work, struct ieee80211_local, dynamic_ps_enable_work); + /* XXX: using scan_sdata is completely broken! */ struct ieee80211_sub_if_data *sdata = local->scan_sdata; if (local->hw.conf.flags & IEEE80211_CONF_PS) return; - if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) + if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK && sdata) ieee80211_send_nullfunc(local, sdata, 1); local->hw.conf.flags |= IEEE80211_CONF_PS; diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 02730232649..81985d27cbd 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -156,8 +156,19 @@ int __ieee80211_resume(struct ieee80211_hw *hw) case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_AP: case NL80211_IFTYPE_MESH_POINT: - WARN_ON(ieee80211_if_config(sdata, changed)); - ieee80211_bss_info_change_notify(sdata, ~0); + /* + * Driver's config_interface can fail if rfkill is + * enabled. Accommodate this return code. + * FIXME: When mac80211 has knowledge of rfkill + * state the code below can change back to: + * WARN(ieee80211_if_config(sdata, changed)); + * ieee80211_bss_info_change_notify(sdata, ~0); + */ + if (ieee80211_if_config(sdata, changed)) + printk(KERN_DEBUG "%s: failed to configure interface during resume\n", + sdata->dev->name); + else + ieee80211_bss_info_change_notify(sdata, ~0); break; case NL80211_IFTYPE_WDS: break; diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 3824990d340..d9233ec5061 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -476,8 +476,8 @@ minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) return NULL; for (i = 0; i < IEEE80211_NUM_BANDS; i++) { - sband = hw->wiphy->bands[hw->conf.channel->band]; - if (sband->n_bitrates > max_rates) + sband = hw->wiphy->bands[i]; + if (sband && sband->n_bitrates > max_rates) max_rates = sband->n_bitrates; } diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c index b16801cde06..8bef9a1262f 100644 --- a/net/mac80211/rc80211_pid_algo.c +++ b/net/mac80211/rc80211_pid_algo.c @@ -317,13 +317,44 @@ rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, void *priv_sta) { struct rc_pid_sta_info *spinfo = priv_sta; + struct rc_pid_info *pinfo = priv; + struct rc_pid_rateinfo *rinfo = pinfo->rinfo; struct sta_info *si; + int i, j, tmp; + bool s; /* TODO: This routine should consider using RSSI from previous packets * as we need to have IEEE 802.1X auth succeed immediately after assoc.. * Until that method is implemented, we will use the lowest supported * rate as a workaround. */ + /* Sort the rates. This is optimized for the most common case (i.e. + * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed + * mapping too. */ + for (i = 0; i < sband->n_bitrates; i++) { + rinfo[i].index = i; + rinfo[i].rev_index = i; + if (RC_PID_FAST_START) + rinfo[i].diff = 0; + else + rinfo[i].diff = i * pinfo->norm_offset; + } + for (i = 1; i < sband->n_bitrates; i++) { + s = 0; + for (j = 0; j < sband->n_bitrates - i; j++) + if (unlikely(sband->bitrates[rinfo[j].index].bitrate > + sband->bitrates[rinfo[j + 1].index].bitrate)) { + tmp = rinfo[j].index; + rinfo[j].index = rinfo[j + 1].index; + rinfo[j + 1].index = tmp; + rinfo[rinfo[j].index].rev_index = j; + rinfo[rinfo[j + 1].index].rev_index = j + 1; + s = 1; + } + if (!s) + break; + } + spinfo->txrate_idx = rate_lowest_index(sband, sta); /* HACK */ si = container_of(sta, struct sta_info, sta); @@ -336,21 +367,22 @@ static void *rate_control_pid_alloc(struct ieee80211_hw *hw, struct rc_pid_info *pinfo; struct rc_pid_rateinfo *rinfo; struct ieee80211_supported_band *sband; - int i, j, tmp; - bool s; + int i, max_rates = 0; #ifdef CONFIG_MAC80211_DEBUGFS struct rc_pid_debugfs_entries *de; #endif - sband = hw->wiphy->bands[hw->conf.channel->band]; - pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC); if (!pinfo) return NULL; - /* We can safely assume that sband won't change unless we get - * reinitialized. */ - rinfo = kmalloc(sizeof(*rinfo) * sband->n_bitrates, GFP_ATOMIC); + for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + sband = hw->wiphy->bands[i]; + if (sband && sband->n_bitrates > max_rates) + max_rates = sband->n_bitrates; + } + + rinfo = kmalloc(sizeof(*rinfo) * max_rates, GFP_ATOMIC); if (!rinfo) { kfree(pinfo); return NULL; @@ -368,33 +400,6 @@ static void *rate_control_pid_alloc(struct ieee80211_hw *hw, pinfo->rinfo = rinfo; pinfo->oldrate = 0; - /* Sort the rates. This is optimized for the most common case (i.e. - * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed - * mapping too. */ - for (i = 0; i < sband->n_bitrates; i++) { - rinfo[i].index = i; - rinfo[i].rev_index = i; - if (RC_PID_FAST_START) - rinfo[i].diff = 0; - else - rinfo[i].diff = i * pinfo->norm_offset; - } - for (i = 1; i < sband->n_bitrates; i++) { - s = 0; - for (j = 0; j < sband->n_bitrates - i; j++) - if (unlikely(sband->bitrates[rinfo[j].index].bitrate > - sband->bitrates[rinfo[j + 1].index].bitrate)) { - tmp = rinfo[j].index; - rinfo[j].index = rinfo[j + 1].index; - rinfo[j + 1].index = tmp; - rinfo[rinfo[j].index].rev_index = j; - rinfo[rinfo[j + 1].index].rev_index = j + 1; - s = 1; - } - if (!s) - break; - } - #ifdef CONFIG_MAC80211_DEBUGFS de = &pinfo->dentries; de->target = debugfs_create_u32("target_pf", S_IRUSR | S_IWUSR, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 64ebe664eff..9776f73c51a 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -29,6 +29,7 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, struct tid_ampdu_rx *tid_agg_rx, struct sk_buff *skb, + struct ieee80211_rx_status *status, u16 mpdu_seq_num, int bar_req); /* @@ -1396,7 +1397,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) * mac80211. That also explains the __skb_push() * below. */ - align = (unsigned long)skb->data & 4; + align = (unsigned long)skb->data & 3; if (align) { if (WARN_ON(skb_headroom(skb) < 3)) { dev_kfree_skb(skb); @@ -1688,7 +1689,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) /* manage reordering buffer according to requested */ /* sequence number */ rcu_read_lock(); - ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, NULL, + ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, NULL, NULL, start_seq_num, 1); rcu_read_unlock(); return RX_DROP_UNUSABLE; @@ -2293,6 +2294,7 @@ static inline u16 seq_sub(u16 sq1, u16 sq2) static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, struct tid_ampdu_rx *tid_agg_rx, struct sk_buff *skb, + struct ieee80211_rx_status *rxstatus, u16 mpdu_seq_num, int bar_req) { @@ -2374,6 +2376,8 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, /* put the frame in the reordering buffer */ tid_agg_rx->reorder_buf[index] = skb; + memcpy(tid_agg_rx->reorder_buf[index]->cb, rxstatus, + sizeof(*rxstatus)); tid_agg_rx->stored_mpdu_num++; /* release the buffer until next missing frame */ index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) @@ -2399,7 +2403,8 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, } static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local, - struct sk_buff *skb) + struct sk_buff *skb, + struct ieee80211_rx_status *status) { struct ieee80211_hw *hw = &local->hw; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; @@ -2448,7 +2453,7 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local, /* according to mpdu sequence number deal with reordering buffer */ mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4; - ret = ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, + ret = ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, status, mpdu_seq_num, 0); end_reorder: return ret; @@ -2512,7 +2517,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, return; } - if (!ieee80211_rx_reorder_ampdu(local, skb)) + if (!ieee80211_rx_reorder_ampdu(local, skb, status)) __ieee80211_rx_handle_packet(hw, skb, status, rate); rcu_read_unlock(); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 3fb04a86444..63656266d56 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -772,7 +772,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) hdrlen = ieee80211_hdrlen(hdr->frame_control); /* internal error, why is TX_FRAGMENTED set? */ - if (WARN_ON(skb->len <= frag_threshold)) + if (WARN_ON(skb->len + FCS_LEN <= frag_threshold)) return TX_DROP; /* diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index deb4ecec122..959aa8379cc 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -417,6 +417,7 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev, { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_channel* chan = local->hw.conf.channel; + bool reconf = false; u32 reconf_flags = 0; int new_power_level; @@ -427,14 +428,38 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev, if (!chan) return -EINVAL; - if (data->txpower.fixed) - new_power_level = min(data->txpower.value, chan->max_power); - else /* Automatic power level setting */ - new_power_level = chan->max_power; + /* only change when not disabling */ + if (!data->txpower.disabled) { + if (data->txpower.fixed) { + if (data->txpower.value < 0) + return -EINVAL; + new_power_level = data->txpower.value; + /* + * Debatable, but we cannot do a fixed power + * level above the regulatory constraint. + * Use "iwconfig wlan0 txpower 15dBm" instead. + */ + if (new_power_level > chan->max_power) + return -EINVAL; + } else { + /* + * Automatic power level setting, max being the value + * passed in from userland. + */ + if (data->txpower.value < 0) + new_power_level = -1; + else + new_power_level = data->txpower.value; + } + + reconf = true; - local->user_power_level = new_power_level; - if (local->hw.conf.power_level != new_power_level) - reconf_flags |= IEEE80211_CONF_CHANGE_POWER; + /* + * ieee80211_hw_config() will limit to the channel's + * max power and possibly power constraint from AP. + */ + local->user_power_level = new_power_level; + } if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) { local->hw.conf.radio_enabled = !(data->txpower.disabled); @@ -442,7 +467,7 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev, ieee80211_led_radio(local, local->hw.conf.radio_enabled); } - if (reconf_flags) + if (reconf || reconf_flags) ieee80211_hw_config(local, reconf_flags); return 0; @@ -530,7 +555,7 @@ static int ieee80211_ioctl_giwfrag(struct net_device *dev, struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); frag->value = local->fragmentation_threshold; - frag->disabled = (frag->value >= IEEE80211_MAX_RTS_THRESHOLD); + frag->disabled = (frag->value >= IEEE80211_MAX_FRAG_THRESHOLD); frag->fixed = 1; return 0; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index bb279bf59a1..cb3ad741ebf 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -275,6 +275,8 @@ config NF_CT_NETLINK help This option enables support for a netlink-based userspace interface +endif # NF_CONNTRACK + # transparent proxy support config NETFILTER_TPROXY tristate "Transparent proxying support (EXPERIMENTAL)" @@ -290,8 +292,6 @@ config NETFILTER_TPROXY To compile it as a module, choose M here. If unsure, say N. -endif # NF_CONNTRACK - config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n @@ -374,7 +374,7 @@ config NETFILTER_XT_TARGET_HL config NETFILTER_XT_TARGET_LED tristate '"LED" target support' - depends on LEDS_CLASS && LED_TRIGGERS + depends on LEDS_CLASS && LEDS_TRIGGERS depends on NETFILTER_ADVANCED help This option adds a `LED' target, which allows you to blink LEDs in @@ -837,6 +837,7 @@ config NETFILTER_XT_MATCH_SOCKET depends on NETFILTER_TPROXY depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED + depends on !NF_CONNTRACK || NF_CONNTRACK select NF_DEFRAG_IPV4 help This option adds a `socket' match, which can be used to match diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 60aba45023f..77bfdfeb966 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -260,7 +260,10 @@ struct ip_vs_conn *ip_vs_ct_in_get list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == af && ip_vs_addr_equal(af, s_addr, &cp->caddr) && - ip_vs_addr_equal(af, d_addr, &cp->vaddr) && + /* protocol should only be IPPROTO_IP if + * d_addr is a fwmark */ + ip_vs_addr_equal(protocol == IPPROTO_IP ? AF_UNSPEC : af, + d_addr, &cp->vaddr) && s_port == cp->cport && d_port == cp->vport && cp->flags & IP_VS_CONN_F_TEMPLATE && protocol == cp->protocol) { @@ -698,7 +701,9 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, cp->cport = cport; ip_vs_addr_copy(af, &cp->vaddr, vaddr); cp->vport = vport; - ip_vs_addr_copy(af, &cp->daddr, daddr); + /* proto should only be IPPROTO_IP if d_addr is a fwmark */ + ip_vs_addr_copy(proto == IPPROTO_IP ? AF_UNSPEC : af, + &cp->daddr, daddr); cp->dport = dport; cp->flags = flags; spin_lock_init(&cp->lock); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index cb3e031335e..8dddb17a947 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -278,7 +278,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, */ if (svc->fwmark) { union nf_inet_addr fwmark = { - .all = { 0, 0, 0, htonl(svc->fwmark) } + .ip = htonl(svc->fwmark) }; ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0, @@ -306,7 +306,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, */ if (svc->fwmark) { union nf_inet_addr fwmark = { - .all = { 0, 0, 0, htonl(svc->fwmark) } + .ip = htonl(svc->fwmark) }; ct = ip_vs_conn_new(svc->af, IPPROTO_IP, diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 3940f996a2e..afde8f99164 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -372,7 +372,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) struct net *net = nf_ct_exp_net(expect); struct hlist_node *n; unsigned int h; - int ret = 0; + int ret = 1; if (!master_help->helper) { ret = -ESHUTDOWN; @@ -412,41 +412,23 @@ out: return ret; } -int nf_ct_expect_related(struct nf_conntrack_expect *expect) +int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, + u32 pid, int report) { int ret; spin_lock_bh(&nf_conntrack_lock); ret = __nf_ct_expect_check(expect); - if (ret < 0) + if (ret <= 0) goto out; + ret = 0; nf_ct_expect_insert(expect); - atomic_inc(&expect->use); - spin_unlock_bh(&nf_conntrack_lock); - nf_ct_expect_event(IPEXP_NEW, expect); - nf_ct_expect_put(expect); - return ret; -out: spin_unlock_bh(&nf_conntrack_lock); + nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); return ret; -} -EXPORT_SYMBOL_GPL(nf_ct_expect_related); - -int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, - u32 pid, int report) -{ - int ret; - - spin_lock_bh(&nf_conntrack_lock); - ret = __nf_ct_expect_check(expect); - if (ret < 0) - goto out; - nf_ct_expect_insert(expect); out: spin_unlock_bh(&nf_conntrack_lock); - if (ret == 0) - nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); return ret; } EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 30b8e9009f9..0fa5a422959 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -176,7 +176,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, } /* Get rid of expecteds, set helpers to NULL. */ - hlist_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode) + hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode) unhelp(h, me); for (i = 0; i < nf_conntrack_htable_size; i++) { hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c6439c77953..c523f0b8cee 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -512,7 +512,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, skb = ctnetlink_alloc_skb(tuple(ct, IP_CT_DIR_ORIGINAL), GFP_ATOMIC); if (!skb) - return NOTIFY_DONE; + goto errout; b = skb->tail; @@ -591,8 +591,9 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, nla_put_failure: rcu_read_unlock(); nlmsg_failure: - nfnetlink_set_err(0, group, -ENOBUFS); kfree_skb(skb); +errout: + nfnetlink_set_err(0, group, -ENOBUFS); return NOTIFY_DONE; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ @@ -987,7 +988,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) { struct nf_conntrack_helper *helper; struct nf_conn_help *help = nfct_help(ct); - char *helpname; + char *helpname = NULL; int err; /* don't change helper of sibling connections */ @@ -1185,28 +1186,6 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[]) return 0; } -static inline void -ctnetlink_event_report(struct nf_conn *ct, u32 pid, int report) -{ - unsigned int events = 0; - - if (test_bit(IPS_EXPECTED_BIT, &ct->status)) - events |= IPCT_RELATED; - else - events |= IPCT_NEW; - - nf_conntrack_event_report(IPCT_STATUS | - IPCT_HELPER | - IPCT_REFRESH | - IPCT_PROTOINFO | - IPCT_NATSEQADJ | - IPCT_MARK | - events, - ct, - pid, - report); -} - static struct nf_conn * ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conntrack_tuple *otuple, @@ -1230,7 +1209,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], rcu_read_lock(); if (cda[CTA_HELP]) { - char *helpname; + char *helpname = NULL; err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); if (err < 0) @@ -1372,6 +1351,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) { struct nf_conn *ct; + enum ip_conntrack_events events; ct = ctnetlink_create_conntrack(cda, &otuple, &rtuple, u3); @@ -1382,9 +1362,18 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, err = 0; nf_conntrack_get(&ct->ct_general); spin_unlock_bh(&nf_conntrack_lock); - ctnetlink_event_report(ct, - NETLINK_CB(skb).pid, - nlmsg_report(nlh)); + if (test_bit(IPS_EXPECTED_BIT, &ct->status)) + events = IPCT_RELATED; + else + events = IPCT_NEW; + + nf_conntrack_event_report(IPCT_STATUS | + IPCT_HELPER | + IPCT_PROTOINFO | + IPCT_NATSEQADJ | + IPCT_MARK | events, + ct, NETLINK_CB(skb).pid, + nlmsg_report(nlh)); nf_ct_put(ct); } else spin_unlock_bh(&nf_conntrack_lock); @@ -1403,9 +1392,13 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err == 0) { nf_conntrack_get(&ct->ct_general); spin_unlock_bh(&nf_conntrack_lock); - ctnetlink_event_report(ct, - NETLINK_CB(skb).pid, - nlmsg_report(nlh)); + nf_conntrack_event_report(IPCT_STATUS | + IPCT_HELPER | + IPCT_PROTOINFO | + IPCT_NATSEQADJ | + IPCT_MARK, + ct, NETLINK_CB(skb).pid, + nlmsg_report(nlh)); nf_ct_put(ct); } else spin_unlock_bh(&nf_conntrack_lock); @@ -1564,7 +1557,7 @@ static int ctnetlink_expect_event(struct notifier_block *this, skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb) - return NOTIFY_DONE; + goto errout; b = skb->tail; @@ -1589,8 +1582,9 @@ static int ctnetlink_expect_event(struct notifier_block *this, nla_put_failure: rcu_read_unlock(); nlmsg_failure: - nfnetlink_set_err(0, 0, -ENOBUFS); kfree_skb(skb); +errout: + nfnetlink_set_err(0, 0, -ENOBUFS); return NOTIFY_DONE; } #endif diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 50dac8dbe7d..aee0d6bea30 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -22,6 +22,7 @@ #include <linux/netfilter/nfnetlink_conntrack.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_l4proto.h> +#include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_log.h> static DEFINE_RWLOCK(dccp_lock); @@ -553,6 +554,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.state = new_state; write_unlock_bh(&dccp_lock); + if (new_state != old_state) + nf_conntrack_event_cache(IPCT_PROTOINFO, ct); + dn = dccp_pernet(net); nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]); @@ -633,6 +637,8 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, if (!nest_parms) goto nla_put_failure; NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state); + NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE, + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]); nla_nest_end(skb, nest_parms); read_unlock_bh(&dccp_lock); return 0; @@ -644,6 +650,7 @@ nla_put_failure: static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, + [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, }; static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) @@ -661,11 +668,21 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) return err; if (!tb[CTA_PROTOINFO_DCCP_STATE] || - nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) + !tb[CTA_PROTOINFO_DCCP_ROLE] || + nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX || + nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) { return -EINVAL; + } write_lock_bh(&dccp_lock); ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); + if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) { + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; + ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; + } else { + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER; + ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT; + } write_unlock_bh(&dccp_lock); return 0; } @@ -777,6 +794,7 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { .print_conntrack = dccp_print_conntrack, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .to_nlattr = dccp_to_nlattr, + .nlattr_size = dccp_nlattr_size, .from_nlattr = nlattr_to_dccp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b5ccf2b4b2e..97a6e93d742 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -634,6 +634,14 @@ static bool tcp_in_window(const struct nf_conn *ct, sender->td_end = end; sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; } + if (tcph->ack) { + if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { + sender->td_maxack = ack; + sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; + } else if (after(ack, sender->td_maxack)) + sender->td_maxack = ack; + } + /* * Update receiver data. */ @@ -919,6 +927,16 @@ static int tcp_packet(struct nf_conn *ct, return -NF_ACCEPT; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET + && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) + && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { + /* Invalid RST */ + write_unlock_bh(&tcp_lock); + if (LOG_INVALID(net, IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_tcp: invalid RST "); + return -NF_ACCEPT; + } + if (index == TCP_RST_SET && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_SYN_SET) || (!test_bit(IPS_ASSURED_BIT, &ct->status) diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 4614696c1b8..0badedc542d 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -204,6 +204,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .error = udplite_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 8bb998fe098..beb37311e1a 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -36,10 +36,14 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger) int nf_log_register(u_int8_t pf, struct nf_logger *logger) { const struct nf_logger *llog; + int i; if (pf >= ARRAY_SIZE(nf_loggers)) return -EINVAL; + for (i = 0; i < ARRAY_SIZE(logger->list); i++) + INIT_LIST_HEAD(&logger->list[i]); + mutex_lock(&nf_log_mutex); if (pf == NFPROTO_UNSPEC) { diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 2785d66a7e3..b8ab37ad7ed 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -203,7 +203,7 @@ static int __init nfnetlink_init(void) nfnetlink_rcv, NULL, THIS_MODULE); if (!nfnl) { printk(KERN_ERR "cannot initialize nfnetlink!\n"); - return -1; + return -ENOMEM; } return 0; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index fd326ac27ec..66a6dd5c519 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -581,6 +581,12 @@ nfulnl_log_packet(u_int8_t pf, + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)); + if (in && skb_mac_header_was_set(skb)) { + size += nla_total_size(skb->dev->hard_header_len) + + nla_total_size(sizeof(u_int16_t)) /* hwtype */ + + nla_total_size(sizeof(u_int16_t)); /* hwlen */ + } + spin_lock_bh(&inst->lock); if (inst->flags & NFULNL_CFG_F_SEQ) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 509a95621f9..150e5cf62f8 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -625,20 +625,6 @@ void xt_free_table_info(struct xt_table_info *info) } EXPORT_SYMBOL(xt_free_table_info); -void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo, - struct xt_table_info *newinfo) -{ - unsigned int cpu; - - for_each_possible_cpu(cpu) { - void *p = oldinfo->entries[cpu]; - rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]); - newinfo->entries[cpu] = p; - } - -} -EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu); - /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) @@ -676,32 +662,43 @@ void xt_compat_unlock(u_int8_t af) EXPORT_SYMBOL_GPL(xt_compat_unlock); #endif +DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks); +EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks); + + struct xt_table_info * xt_replace_table(struct xt_table *table, unsigned int num_counters, struct xt_table_info *newinfo, int *error) { - struct xt_table_info *oldinfo, *private; + struct xt_table_info *private; /* Do the substitution. */ - mutex_lock(&table->lock); + local_bh_disable(); private = table->private; + /* Check inside lock: is the old number correct? */ if (num_counters != private->number) { duprintf("num_counters != table->private->number (%u/%u)\n", num_counters, private->number); - mutex_unlock(&table->lock); + local_bh_enable(); *error = -EAGAIN; return NULL; } - oldinfo = private; - rcu_assign_pointer(table->private, newinfo); - newinfo->initial_entries = oldinfo->initial_entries; - mutex_unlock(&table->lock); - synchronize_net(); - return oldinfo; + table->private = newinfo; + newinfo->initial_entries = private->initial_entries; + + /* + * Even though table entries have now been swapped, other CPU's + * may still be using the old entries. This is okay, because + * resynchronization happens because of the locking done + * during the get_counters() routine. + */ + local_bh_enable(); + + return private; } EXPORT_SYMBOL_GPL(xt_replace_table); @@ -734,7 +731,6 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table, /* Simplifies replace_table code. */ table->private = bootstrap; - mutex_init(&table->lock); if (!xt_replace_table(table, 0, newinfo, &ret)) goto unlock; @@ -1147,7 +1143,14 @@ static struct pernet_operations xt_net_ops = { static int __init xt_init(void) { - int i, rv; + unsigned int i; + int rv; + + for_each_possible_cpu(i) { + struct xt_info_lock *lock = &per_cpu(xt_info_locks, i); + spin_lock_init(&lock->lock); + lock->readers = 0; + } xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL); if (!xt) diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 6c4847662b8..69a639f3540 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -135,7 +135,13 @@ static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_cluster_match_info *info = par->matchinfo; - if (info->node_mask >= (1 << info->total_nodes)) { + if (info->total_nodes > XT_CLUSTER_NODES_MAX) { + printk(KERN_ERR "xt_cluster: you have exceeded the maximum " + "number of cluster nodes (%u > %u)\n", + info->total_nodes, XT_CLUSTER_NODES_MAX); + return false; + } + if (info->node_mask >= (1ULL << info->total_nodes)) { printk(KERN_ERR "xt_cluster: this node mask cannot be " "higher than the total number of nodes\n"); return false; diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index a5b5369c30f..219dcdbe388 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -926,7 +926,7 @@ static int dl_seq_show(struct seq_file *s, void *v) if (!hlist_empty(&htable->hash[*bucket])) { hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node) if (dl_seq_real_show(ent, htable->family, s)) - return 1; + return -1; } return 0; } diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 791e030ea90..eb0ceb84652 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -474,7 +474,7 @@ static ssize_t recent_old_proc_write(struct file *file, struct recent_table *t = pde->data; struct recent_entry *e; char buf[sizeof("+255.255.255.255")], *c = buf; - __be32 addr; + union nf_inet_addr addr = {}; int add; if (size > sizeof(buf)) @@ -506,14 +506,13 @@ static ssize_t recent_old_proc_write(struct file *file, add = 1; break; } - addr = in_aton(c); + addr.ip = in_aton(c); spin_lock_bh(&recent_lock); - e = recent_entry_lookup(t, (const void *)&addr, NFPROTO_IPV4, 0); + e = recent_entry_lookup(t, &addr, NFPROTO_IPV4, 0); if (e == NULL) { if (add) - recent_entry_init(t, (const void *)&addr, - NFPROTO_IPV4, 0); + recent_entry_init(t, &addr, NFPROTO_IPV4, 0); } else { if (add) recent_entry_update(t, e); diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c index 834c6eb7f48..c0519139679 100644 --- a/net/netlabel/netlabel_addrlist.c +++ b/net/netlabel/netlabel_addrlist.c @@ -256,13 +256,11 @@ struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask, { struct netlbl_af4list *entry; - entry = netlbl_af4list_search(addr, head); - if (entry != NULL && entry->addr == addr && entry->mask == mask) { - netlbl_af4list_remove_entry(entry); - return entry; - } - - return NULL; + entry = netlbl_af4list_search_exact(addr, mask, head); + if (entry == NULL) + return NULL; + netlbl_af4list_remove_entry(entry); + return entry; } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -299,15 +297,11 @@ struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr, { struct netlbl_af6list *entry; - entry = netlbl_af6list_search(addr, head); - if (entry != NULL && - ipv6_addr_equal(&entry->addr, addr) && - ipv6_addr_equal(&entry->mask, mask)) { - netlbl_af6list_remove_entry(entry); - return entry; - } - - return NULL; + entry = netlbl_af6list_search_exact(addr, mask, head); + if (entry == NULL) + return NULL; + netlbl_af6list_remove_entry(entry); + return entry; } #endif /* IPv6 */ diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 4e705f87969..3be0e016ab7 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1084,8 +1084,10 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock, /* Build a packet - the conventional user limit is 236 bytes. We can do ludicrously large NetROM frames but must not overflow */ - if (len > 65536) - return -EMSGSIZE; + if (len > 65536) { + err = -EMSGSIZE; + goto out; + } SOCK_DEBUG(sk, "NET/ROM: sendto: building packet.\n"); size = len + NR_NETWORK_LEN + NR_TRANSPORT_LEN; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 74776de523e..f546e81acc4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1758,8 +1758,9 @@ static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len) static inline char *alloc_one_pg_vec_page(unsigned long order) { - return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO, - order); + gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN; + + return (char *) __get_free_pages(gfp_flags, order); } static char **alloc_pg_vec(struct tpacket_req *req, int order) diff --git a/net/rds/rds.h b/net/rds/rds.h index 619f0a30a4e..71794449ca4 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -638,7 +638,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *, void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); /* stats.c */ -DECLARE_PER_CPU(struct rds_statistics, rds_stats); +DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); #define rds_stats_inc_which(which, member) do { \ per_cpu(which, get_cpu()).member++; \ put_cpu(); \ diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 0f36e8d59b2..877a7f65f70 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1072,10 +1072,6 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock, unsigned char *asmptr; int n, size, qbit = 0; - /* ROSE empty frame has no meaning : don't send */ - if (len == 0) - return 0; - if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) return -EINVAL; @@ -1273,12 +1269,6 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_reset_transport_header(skb); copied = skb->len; - /* ROSE empty frame has no meaning : ignore it */ - if (copied == 0) { - skb_free_datagram(sk, skb); - return copied; - } - if (copied > size) { copied = size; msg->msg_flags |= MSG_TRUNC; diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c index 0f1218b8d28..67e38a05624 100644 --- a/net/rxrpc/ar-connection.c +++ b/net/rxrpc/ar-connection.c @@ -343,9 +343,9 @@ static int rxrpc_connect_exclusive(struct rxrpc_sock *rx, /* not yet present - create a candidate for a new connection * and then redo the check */ conn = rxrpc_alloc_connection(gfp); - if (IS_ERR(conn)) { - _leave(" = %ld", PTR_ERR(conn)); - return PTR_ERR(conn); + if (!conn) { + _leave(" = -ENOMEM"); + return -ENOMEM; } conn->trans = trans; @@ -508,9 +508,9 @@ int rxrpc_connect_call(struct rxrpc_sock *rx, /* not yet present - create a candidate for a new connection and then * redo the check */ candidate = rxrpc_alloc_connection(gfp); - if (IS_ERR(candidate)) { - _leave(" = %ld", PTR_ERR(candidate)); - return PTR_ERR(candidate); + if (!candidate) { + _leave(" = -ENOMEM"); + return -ENOMEM; } candidate->trans = trans; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 173fcc4b050..09cdcdfe7e9 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -135,6 +135,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) unsigned long cl; unsigned long fh; int err; + int tp_created = 0; if (net != &init_net) return -EINVAL; @@ -254,7 +255,7 @@ replay: } tp->ops = tp_ops; tp->protocol = protocol; - tp->prio = nprio ? : tcf_auto_prio(*back); + tp->prio = nprio ? : TC_H_MAJ(tcf_auto_prio(*back)); tp->q = q; tp->classify = tp_ops->classify; tp->classid = parent; @@ -266,10 +267,7 @@ replay: goto errout; } - spin_lock_bh(root_lock); - tp->next = *back; - *back = tp; - spin_unlock_bh(root_lock); + tp_created = 1; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) goto errout; @@ -296,8 +294,11 @@ replay: switch (n->nlmsg_type) { case RTM_NEWTFILTER: err = -EEXIST; - if (n->nlmsg_flags & NLM_F_EXCL) + if (n->nlmsg_flags & NLM_F_EXCL) { + if (tp_created) + tcf_destroy(tp); goto errout; + } break; case RTM_DELTFILTER: err = tp->ops->delete(tp, fh); @@ -314,8 +315,18 @@ replay: } err = tp->ops->change(tp, cl, t->tcm_handle, tca, &fh); - if (err == 0) + if (err == 0) { + if (tp_created) { + spin_lock_bh(root_lock); + tp->next = *back; + *back = tp; + spin_unlock_bh(root_lock); + } tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER); + } else { + if (tp_created) + tcf_destroy(tp); + } errout: if (cl) diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 91a3db4a76f..cc29b44b150 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -104,8 +104,7 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) { struct cls_cgroup_head *head = tp->root; - struct cgroup_cls_state *cs; - int ret = 0; + u32 classid; /* * Due to the nature of the classifier it is required to ignore all @@ -121,17 +120,18 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, return -1; rcu_read_lock(); - cs = task_cls_state(current); - if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) { - res->classid = cs->classid; - res->class = 0; - ret = tcf_exts_exec(skb, &head->exts, res); - } else - ret = -1; - + classid = task_cls_state(current)->classid; rcu_read_unlock(); - return ret; + if (!classid) + return -1; + + if (!tcf_em_tree_match(skb, &head->ematches, NULL)) + return -1; + + res->classid = classid; + res->class = 0; + return tcf_exts_exec(skb, &head->exts, res); } static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle) diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 72cf86e3c09..fad596bf32d 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -176,8 +176,10 @@ META_COLLECTOR(var_dev) META_COLLECTOR(int_vlan_tag) { - unsigned short uninitialized_var(tag); - if (vlan_get_tag(skb, &tag) < 0) + unsigned short tag; + + tag = vlan_tx_tag_get(skb); + if (!tag && __vlan_get_tag(skb, &tag)) *err = -1; else dst->value = tag; diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 92cfc9d7e3b..69188e8358b 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -51,7 +51,7 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt) u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; if (sch->ops == &bfifo_qdisc_ops) - limit *= qdisc_dev(sch)->mtu; + limit *= psched_mtu(qdisc_dev(sch)); q->limit = limit; } else { diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index d876b873484..2b88295cb7b 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -280,6 +280,14 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) if (unlikely(!skb)) return NULL; +#ifdef CONFIG_NET_CLS_ACT + /* + * If it's at ingress let's pretend the delay is + * from the network (tstamp will be updated). + */ + if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) + skb->tstamp.tv64 = 0; +#endif pr_debug("netem_dequeue: return skb=%p\n", skb); sch->q.qlen--; return skb; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index ec697cebb63..3b641829723 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -303,6 +303,8 @@ restart: switch (teql_resolve(skb, skb_res, slave)) { case 0: if (__netif_tx_trylock(slave_txq)) { + unsigned int length = qdisc_pkt_len(skb); + if (!netif_tx_queue_stopped(slave_txq) && !netif_tx_queue_frozen(slave_txq) && slave_ops->ndo_start_xmit(skb, slave) == 0) { @@ -310,8 +312,7 @@ restart: master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); master->stats.tx_packets++; - master->stats.tx_bytes += - qdisc_pkt_len(skb); + master->stats.tx_bytes += length; return 0; } __netif_tx_unlock(slave_txq); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index af3198814c1..9d504234af4 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -345,6 +345,7 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, lock_sock(sock->sk); sock->sk->sk_sndbuf = snd * 2; sock->sk->sk_rcvbuf = rcv * 2; + sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; release_sock(sock->sk); #endif } @@ -796,6 +797,23 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); + if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) + /* sndbuf needs to have room for one request + * per thread, otherwise we can stall even when the + * network isn't a bottleneck. + * + * We count all threads rather than threads in a + * particular pool, which provides an upper bound + * on the number of threads which will access the socket. + * + * rcvbuf just needs to be able to hold a few requests. + * Normally they will be removed from the queue + * as soon a a complete request arrives. + */ + svc_sock_setbufsize(svsk->sk_sock, + (serv->sv_nrthreads+3) * serv->sv_max_mesg, + 3 * serv->sv_max_mesg); + clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* Receive data. If we haven't got the record length yet, get @@ -1043,6 +1061,15 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; + /* initialise setting must have enough space to + * receive and respond to one request. + * svc_tcp_recvfrom will re-adjust if necessary + */ + svc_sock_setbufsize(svsk->sk_sock, + 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, + 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); + + set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (sk->sk_state != TCP_ESTABLISHED) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1112,14 +1139,8 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Initialize the socket */ if (sock->type == SOCK_DGRAM) svc_udp_init(svsk, serv); - else { - /* initialise setting must have enough space to - * receive and respond to one request. - */ - svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg, - 4 * serv->sv_max_mesg); + else svc_tcp_init(svsk, serv); - } dprintk("svc: svc_setup_socket created %p (inet %p)\n", svsk, svsk->sk_sk); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a0bfe53f162..06ca058572f 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -672,10 +672,8 @@ xprt_init_autodisconnect(unsigned long data) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) goto out_abort; spin_unlock(&xprt->transport_lock); - if (xprt_connecting(xprt)) - xprt_release_write(xprt, NULL); - else - queue_work(rpciod_workqueue, &xprt->task_cleanup); + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + queue_work(rpciod_workqueue, &xprt->task_cleanup); return; out_abort: spin_unlock(&xprt->transport_lock); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 629a28764da..42a6f9f2028 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -265,7 +265,7 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, frmr->page_list->page_list[page_no] = ib_dma_map_single(xprt->sc_cm_id->device, page_address(rqstp->rq_arg.pages[page_no]), - PAGE_SIZE, DMA_TO_DEVICE); + PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 6c26a675435..f11be72a1a8 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -128,7 +128,8 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt, page_bytes -= sge_bytes; frmr->page_list->page_list[page_no] = - ib_dma_map_page(xprt->sc_cm_id->device, page, 0, + ib_dma_map_single(xprt->sc_cm_id->device, + page_address(page), PAGE_SIZE, DMA_TO_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) @@ -183,6 +184,7 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt, fatal_err: printk("svcrdma: Error fast registering memory for xprt %p\n", xprt); + vec->frmr = NULL; svc_rdma_put_frmr(xprt, frmr); return -EIO; } @@ -516,6 +518,7 @@ static int send_reply(struct svcxprt_rdma *rdma, "svcrdma: could not post a receive buffer, err=%d." "Closing transport %p.\n", ret, rdma); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); + svc_rdma_put_frmr(rdma, vec->frmr); svc_rdma_put_context(ctxt, 0); return -ENOTCONN; } @@ -530,18 +533,17 @@ static int send_reply(struct svcxprt_rdma *rdma, clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* Prepare the SGE for the RPCRDMA Header */ + ctxt->sge[0].lkey = rdma->sc_dma_lkey; + ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); ctxt->sge[0].addr = - ib_dma_map_page(rdma->sc_cm_id->device, - page, 0, PAGE_SIZE, DMA_TO_DEVICE); + ib_dma_map_single(rdma->sc_cm_id->device, page_address(page), + ctxt->sge[0].length, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) goto err; atomic_inc(&rdma->sc_dma_used); ctxt->direction = DMA_TO_DEVICE; - ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); - ctxt->sge[0].lkey = rdma->sc_dma_lkey; - /* Determine how many of our SGE are to be transmitted */ for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); @@ -606,6 +608,7 @@ static int send_reply(struct svcxprt_rdma *rdma, return 0; err: + svc_rdma_unmap_dma(ctxt); svc_rdma_put_frmr(rdma, vec->frmr); svc_rdma_put_context(ctxt, 1); return -EIO; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 3d810e7df3f..5151f9f6c57 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -500,8 +500,8 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) BUG_ON(sge_no >= xprt->sc_max_sge); page = svc_rdma_get_page(); ctxt->pages[sge_no] = page; - pa = ib_dma_map_page(xprt->sc_cm_id->device, - page, 0, PAGE_SIZE, + pa = ib_dma_map_single(xprt->sc_cm_id->device, + page_address(page), PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) goto err_put_ctxt; @@ -520,8 +520,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) svc_xprt_get(&xprt->sc_xprt); ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); if (ret) { - svc_xprt_put(&xprt->sc_xprt); + svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); + svc_xprt_put(&xprt->sc_xprt); } return ret; @@ -1314,8 +1315,8 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); /* Prepare SGE for local address */ - sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, - p, 0, PAGE_SIZE, DMA_FROM_DEVICE); + sge.addr = ib_dma_map_single(xprt->sc_cm_id->device, + page_address(p), PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { put_page(p); return; @@ -1342,7 +1343,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, if (ret) { dprintk("svcrdma: Error %d posting send for protocol error\n", ret); - ib_dma_unmap_page(xprt->sc_cm_id->device, + ib_dma_unmap_single(xprt->sc_cm_id->device, sge.addr, PAGE_SIZE, DMA_FROM_DEVICE); svc_rdma_put_context(ctxt, 1); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3b21e0cc5e6..465aafc2007 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1495,7 +1495,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; frmr_wr.wr.fast_reg.access_flags = (writing ? - IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ); + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : + IB_ACCESS_REMOTE_READ); frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index d40ff50887a..e1859614601 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -807,6 +807,9 @@ static void xs_reset_transport(struct sock_xprt *transport) * * This is used when all requests are complete; ie, no DRC state remains * on the server we want to save. + * + * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with + * xs_reset_transport() zeroing the socket from underneath a writer. */ static void xs_close(struct rpc_xprt *xprt) { @@ -824,6 +827,14 @@ static void xs_close(struct rpc_xprt *xprt) xprt_disconnect_done(xprt); } +static void xs_tcp_close(struct rpc_xprt *xprt) +{ + if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state)) + xs_close(xprt); + else + xs_tcp_shutdown(xprt); +} + /** * xs_destroy - prepare to shutdown a transport * @xprt: doomed transport @@ -1772,6 +1783,15 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, xprt, -status, xprt_connected(xprt), sock->sk->sk_state); switch (status) { + default: + printk("%s: connect returned unhandled error %d\n", + __func__, status); + case -EADDRNOTAVAIL: + /* We're probably in TIME_WAIT. Get rid of existing socket, + * and retry + */ + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + xprt_force_disconnect(xprt); case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: @@ -1782,10 +1802,6 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, xprt_clear_connecting(xprt); return; } - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - printk("%s: connect returned unhandled error %d\n", - __func__, status); out_eagain: status = -EAGAIN; out: @@ -1994,7 +2010,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, - .close = xs_tcp_shutdown, + .close = xs_tcp_close, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, }; diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c index 5d149c1b5f0..9ad4d893a56 100644 --- a/net/wimax/op-msg.c +++ b/net/wimax/op-msg.c @@ -149,7 +149,8 @@ struct sk_buff *wimax_msg_alloc(struct wimax_dev *wimax_dev, } result = nla_put(skb, WIMAX_GNL_MSG_DATA, size, msg); if (result < 0) { - dev_err(dev, "no memory to add payload in attribute\n"); + dev_err(dev, "no memory to add payload (msg %p size %zu) in " + "attribute: %d\n", msg, size, result); goto error_nla_put; } genlmsg_end(skb, genl_msg); @@ -299,10 +300,10 @@ int wimax_msg(struct wimax_dev *wimax_dev, const char *pipe_name, struct sk_buff *skb; skb = wimax_msg_alloc(wimax_dev, pipe_name, buf, size, gfp_flags); - if (skb == NULL) - goto error_msg_new; - result = wimax_msg_send(wimax_dev, skb); -error_msg_new: + if (IS_ERR(skb)) + result = PTR_ERR(skb); + else + result = wimax_msg_send(wimax_dev, skb); return result; } EXPORT_SYMBOL_GPL(wimax_msg); diff --git a/net/wimax/stack.c b/net/wimax/stack.c index a0ee76b5251..933e1422b09 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -338,8 +338,21 @@ out: */ void wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state) { + /* + * A driver cannot take the wimax_dev out of the + * __WIMAX_ST_NULL state unless by calling wimax_dev_add(). If + * the wimax_dev's state is still NULL, we ignore any request + * to change its state because it means it hasn't been yet + * registered. + * + * There is no need to complain about it, as routines that + * call this might be shared from different code paths that + * are called before or after wimax_dev_add() has done its + * job. + */ mutex_lock(&wimax_dev->mutex); - __wimax_state_change(wimax_dev, new_state); + if (wimax_dev->state > __WIMAX_ST_NULL) + __wimax_state_change(wimax_dev, new_state); mutex_unlock(&wimax_dev->mutex); return; } @@ -376,7 +389,7 @@ EXPORT_SYMBOL_GPL(wimax_state_get); void wimax_dev_init(struct wimax_dev *wimax_dev) { INIT_LIST_HEAD(&wimax_dev->id_table_node); - __wimax_state_set(wimax_dev, WIMAX_ST_UNINITIALIZED); + __wimax_state_set(wimax_dev, __WIMAX_ST_NULL); mutex_init(&wimax_dev->mutex); mutex_init(&wimax_dev->mutex_reset); } diff --git a/net/wireless/core.h b/net/wireless/core.h index d43daa236ef..0a592e4295f 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -90,7 +90,7 @@ struct cfg80211_internal_bss { struct rb_node rbn; unsigned long ts; struct kref ref; - bool hold; + bool hold, ies_allocated; /* must be last because of priv member */ struct cfg80211_bss pub; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 353e1a4ece8..2456e4ee445 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3334,7 +3334,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); if (!msg) return; @@ -3353,7 +3353,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, return; } - genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_KERNEL); + genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_ATOMIC); return; nla_put_failure: diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 6327e1617ac..487cb627ddb 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -907,6 +907,7 @@ EXPORT_SYMBOL(freq_reg_info); int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth, const struct ieee80211_reg_rule **reg_rule) { + assert_cfg80211_lock(); return freq_reg_info_regd(wiphy, center_freq, bandwidth, reg_rule, NULL); } @@ -1133,7 +1134,8 @@ static bool reg_is_world_roaming(struct wiphy *wiphy) if (is_world_regdom(cfg80211_regdomain->alpha2) || (wiphy->regd && is_world_regdom(wiphy->regd->alpha2))) return true; - if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && + if (last_request && + last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && wiphy->custom_regulatory) return true; return false; @@ -1142,6 +1144,12 @@ static bool reg_is_world_roaming(struct wiphy *wiphy) /* Reap the advantages of previously found beacons */ static void reg_process_beacons(struct wiphy *wiphy) { + /* + * Means we are just firing up cfg80211, so no beacons would + * have been processed yet. + */ + if (!last_request) + return; if (!reg_is_world_roaming(wiphy)) return; wiphy_update_beacon_reg(wiphy); @@ -1176,6 +1184,8 @@ static void handle_channel_custom(struct wiphy *wiphy, struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; + assert_cfg80211_lock(); + sband = wiphy->bands[band]; BUG_ON(chan_idx >= sband->n_channels); chan = &sband->channels[chan_idx]; @@ -1214,10 +1224,13 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy, const struct ieee80211_regdomain *regd) { enum ieee80211_band band; + + mutex_lock(&cfg80211_mutex); for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (wiphy->bands[band]) handle_band_custom(wiphy, band, regd); } + mutex_unlock(&cfg80211_mutex); } EXPORT_SYMBOL(wiphy_apply_custom_regulatory); @@ -1423,7 +1436,7 @@ new_request: return call_crda(last_request->alpha2); } -/* This currently only processes user and driver regulatory hints */ +/* This processes *all* regulatory hints */ static void reg_process_hint(struct regulatory_request *reg_request) { int r = 0; @@ -1538,6 +1551,13 @@ static int regulatory_hint_core(const char *alpha2) queue_regulatory_request(request); + /* + * This ensures last_request is populated once modules + * come swinging in and calling regulatory hints and + * wiphy_apply_custom_regulatory(). + */ + flush_scheduled_work(); + return 0; } @@ -2095,11 +2115,12 @@ int set_regdom(const struct ieee80211_regdomain *rd) /* Caller must hold cfg80211_mutex */ void reg_device_remove(struct wiphy *wiphy) { - struct wiphy *request_wiphy; + struct wiphy *request_wiphy = NULL; assert_cfg80211_lock(); - request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + if (last_request) + request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); kfree(wiphy->regd); if (!last_request || !request_wiphy) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 2a00e362f5f..1f260c40b6c 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -58,6 +58,10 @@ static void bss_release(struct kref *ref) bss = container_of(ref, struct cfg80211_internal_bss, ref); if (bss->pub.free_priv) bss->pub.free_priv(&bss->pub); + + if (bss->ies_allocated) + kfree(bss->pub.information_elements); + kfree(bss); } @@ -360,19 +364,42 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, found = rb_find_bss(dev, res); - if (found && overwrite) { - list_replace(&found->list, &res->list); - rb_replace_node(&found->rbn, &res->rbn, - &dev->bss_tree); - kref_put(&found->ref, bss_release); - found = res; - } else if (found) { + if (found) { kref_get(&found->ref); found->pub.beacon_interval = res->pub.beacon_interval; found->pub.tsf = res->pub.tsf; found->pub.signal = res->pub.signal; found->pub.capability = res->pub.capability; found->ts = res->ts; + + /* overwrite IEs */ + if (overwrite) { + size_t used = dev->wiphy.bss_priv_size + sizeof(*res); + size_t ielen = res->pub.len_information_elements; + + if (ksize(found) >= used + ielen) { + memcpy(found->pub.information_elements, + res->pub.information_elements, ielen); + found->pub.len_information_elements = ielen; + } else { + u8 *ies = found->pub.information_elements; + + if (found->ies_allocated) { + if (ksize(ies) < ielen) + ies = krealloc(ies, ielen, + GFP_ATOMIC); + } else + ies = kmalloc(ielen, GFP_ATOMIC); + + if (ies) { + memcpy(ies, res->pub.information_elements, ielen); + found->ies_allocated = true; + found->pub.information_elements = ies; + found->pub.len_information_elements = ielen; + } + } + } + kref_put(&res->ref, bss_release); } else { /* this "consumes" the reference */ diff --git a/net/wireless/wext.c b/net/wireless/wext.c index cb6a5bb85d8..0e59f9ae9b8 100644 --- a/net/wireless/wext.c +++ b/net/wireless/wext.c @@ -786,6 +786,13 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, err = -EFAULT; goto out; } + + if (cmd == SIOCSIWENCODEEXT) { + struct iw_encode_ext *ee = (void *) extra; + + if (iwp->length < sizeof(*ee) + ee->key_len) + return -EFAULT; + } } err = handler(dev, info, (union iwreq_data *) iwp, extra); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 82271720d97..5f1f86565f1 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -794,7 +794,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, { static xfrm_address_t saddr_wildcard = { }; struct net *net = xp_net(pol); - unsigned int h; + unsigned int h, h_wildcard; struct hlist_node *entry; struct xfrm_state *x, *x0, *to_put; int acquire_in_progress = 0; @@ -819,8 +819,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (best) goto found; - h = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family); - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { + h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family); + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && |