diff options
author | Jens Axboe <jaxboe@fusionio.com> | 2010-10-19 09:13:04 +0200 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-10-19 09:13:04 +0200 |
commit | fa251f89903d73989e2f63e13d0eaed1e07ce0da (patch) | |
tree | 3f7fe779941e3b6d67754dd7c44a32f48ea47c74 /net | |
parent | dd3932eddf428571762596e17b65f5dc92ca361b (diff) | |
parent | cd07202cc8262e1669edff0d97715f3dd9260917 (diff) |
Merge branch 'v2.6.36-rc8' into for-2.6.37/barrier
Conflicts:
block/blk-core.c
drivers/block/loop.c
mm/swapfile.c
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'net')
100 files changed, 757 insertions, 574 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 01ddb0472f8..0eb96f7e44b 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -24,8 +24,11 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, if (vlan_dev) skb->dev = vlan_dev; - else if (vlan_id) - goto drop; + else if (vlan_id) { + if (!(skb->dev->flags & IFF_PROMISC)) + goto drop; + skb->pkt_type = PACKET_OTHERHOST; + } return (polling ? netif_receive_skb(skb) : netif_rx(skb)); @@ -102,8 +105,11 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, if (vlan_dev) skb->dev = vlan_dev; - else if (vlan_id) - goto drop; + else if (vlan_id) { + if (!(skb->dev->flags & IFF_PROMISC)) + goto drop; + skb->pkt_type = PACKET_OTHERHOST; + } for (p = napi->gro_list; p; p = p->next) { NAPI_GRO_CB(p)->same_flow = diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 3d59c9bf8fe..3bccdd12a26 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -510,7 +510,8 @@ static int vlan_dev_open(struct net_device *dev) if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_join(dev); - netif_carrier_on(dev); + if (netif_carrier_ok(real_dev)) + netif_carrier_on(dev); return 0; clear_allmulti: diff --git a/net/9p/client.c b/net/9p/client.c index dc6f2f26d02..9eb72505308 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -331,8 +331,10 @@ static void p9_tag_cleanup(struct p9_client *c) } } - if (c->tagpool) + if (c->tagpool) { + p9_idpool_put(0, c->tagpool); /* free reserved tag 0 */ p9_idpool_destroy(c->tagpool); + } /* free requests associated with tags */ for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) { @@ -944,6 +946,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, int16_t nwqids, count; err = 0; + wqids = NULL; clnt = oldfid->clnt; if (clone) { fid = p9_fid_create(clnt); @@ -994,9 +997,11 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, else fid->qid = oldfid->qid; + kfree(wqids); return fid; clunk_fid: + kfree(wqids); p9_client_clunk(fid); fid = NULL; diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 0ea20c30466..17c5ba7551a 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -426,8 +426,10 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) /* Allocate an fcall for the reply */ rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL); - if (!rpl_context) + if (!rpl_context) { + err = -ENOMEM; goto err_close; + } /* * If the request has a buffer, steal it, otherwise @@ -445,8 +447,8 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) } rpl_context->rc = req->rc; if (!rpl_context->rc) { - kfree(rpl_context); - goto err_close; + err = -ENOMEM; + goto err_free2; } /* @@ -458,11 +460,8 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) */ if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { err = post_recv(client, rpl_context); - if (err) { - kfree(rpl_context->rc); - kfree(rpl_context); - goto err_close; - } + if (err) + goto err_free1; } else atomic_dec(&rdma->rq_count); @@ -471,8 +470,10 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) /* Post the request */ c = kmalloc(sizeof *c, GFP_KERNEL); - if (!c) - goto err_close; + if (!c) { + err = -ENOMEM; + goto err_free1; + } c->req = req; c->busa = ib_dma_map_single(rdma->cm_id->device, @@ -499,9 +500,15 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) return ib_post_send(rdma->qp, &wr, &bad_wr); error: + kfree(c); + kfree(rpl_context->rc); + kfree(rpl_context); P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); return -EIO; - + err_free1: + kfree(rpl_context->rc); + err_free2: + kfree(rpl_context); err_close: spin_lock_irqsave(&rdma->req_lock, flags); if (rdma->state < P9_RDMA_CLOSING) { diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index dcfbe99ff81..b88515936e4 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -329,7 +329,8 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) mutex_lock(&virtio_9p_lock); list_for_each_entry(chan, &virtio_chan_list, chan_list) { - if (!strncmp(devname, chan->tag, chan->tag_len)) { + if (!strncmp(devname, chan->tag, chan->tag_len) && + strlen(devname) == chan->tag_len) { if (!chan->inuse) { chan->inuse = true; found = 1; diff --git a/net/Kconfig b/net/Kconfig index e330594d370..e926884c167 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -217,7 +217,7 @@ source "net/dns_resolver/Kconfig" config RPS boolean - depends on SMP && SYSFS + depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS default y menu "Network testing" diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 651babdfab3..ad2b232a205 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -399,12 +399,6 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) unregister_netdev(net_dev); free_netdev(net_dev); } - read_lock_irq(&devs_lock); - if (list_empty(&br2684_devs)) { - /* last br2684 device */ - unregister_atmdevice_notifier(&atm_dev_notifier); - } - read_unlock_irq(&devs_lock); return; } @@ -675,7 +669,6 @@ static int br2684_create(void __user *arg) if (list_empty(&br2684_devs)) { /* 1st br2684 device */ - register_atmdevice_notifier(&atm_dev_notifier); brdev->number = 1; } else brdev->number = BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1; @@ -815,6 +808,7 @@ static int __init br2684_init(void) return -ENOMEM; #endif register_atm_ioctl(&br2684_ioctl_ops); + register_atmdevice_notifier(&atm_dev_notifier); return 0; } @@ -830,9 +824,7 @@ static void __exit br2684_exit(void) #endif - /* if not already empty */ - if (!list_empty(&br2684_devs)) - unregister_atmdevice_notifier(&atm_dev_notifier); + unregister_atmdevice_notifier(&atm_dev_notifier); while (!list_empty(&br2684_devs)) { net_dev = list_entry_brdev(br2684_devs.next); diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 622b471e14e..74bcc662c3d 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -778,7 +778,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb) eg->packets_rcvd++; mpc->eg_ops->put(eg); - memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); + memset(ATM_SKB(new_skb), 0, sizeof(struct atm_skb_data)); netif_rx(new_skb); } diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 2ce79df0068..c7d81436213 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -112,8 +112,8 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25) if (sk) { sock_hold(sk); ax25_destroy_socket(ax25); - sock_put(sk); bh_unlock_sock(sk); + sock_put(sk); } else ax25_destroy_socket(ax25); return; diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index fadf26b4ed7..0b54b7dd840 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1441,33 +1441,23 @@ static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb) static void l2cap_streaming_send(struct sock *sk) { - struct sk_buff *skb, *tx_skb; + struct sk_buff *skb; struct l2cap_pinfo *pi = l2cap_pi(sk); u16 control, fcs; - while ((skb = sk->sk_send_head)) { - tx_skb = skb_clone(skb, GFP_ATOMIC); - - control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); + while ((skb = skb_dequeue(TX_QUEUE(sk)))) { + control = get_unaligned_le16(skb->data + L2CAP_HDR_SIZE); control |= pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT; - put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE); + put_unaligned_le16(control, skb->data + L2CAP_HDR_SIZE); if (pi->fcs == L2CAP_FCS_CRC16) { - fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2); - put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2); + fcs = crc16(0, (u8 *)skb->data, skb->len - 2); + put_unaligned_le16(fcs, skb->data + skb->len - 2); } - l2cap_do_send(sk, tx_skb); + l2cap_do_send(sk, skb); pi->next_tx_seq = (pi->next_tx_seq + 1) % 64; - - if (skb_queue_is_last(TX_QUEUE(sk), skb)) - sk->sk_send_head = NULL; - else - sk->sk_send_head = skb_queue_next(TX_QUEUE(sk), skb); - - skb = skb_dequeue(TX_QUEUE(sk)); - kfree_skb(skb); } } @@ -1960,6 +1950,11 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us switch (optname) { case L2CAP_OPTIONS: + if (sk->sk_state == BT_CONNECTED) { + err = -EINVAL; + break; + } + opts.imtu = l2cap_pi(sk)->imtu; opts.omtu = l2cap_pi(sk)->omtu; opts.flush_to = l2cap_pi(sk)->flush_to; @@ -2771,10 +2766,10 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data, case L2CAP_CONF_MTU: if (val < L2CAP_DEFAULT_MIN_MTU) { *result = L2CAP_CONF_UNACCEPT; - pi->omtu = L2CAP_DEFAULT_MIN_MTU; + pi->imtu = L2CAP_DEFAULT_MIN_MTU; } else - pi->omtu = val; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->omtu); + pi->imtu = val; + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu); break; case L2CAP_CONF_FLUSH_TO: @@ -3071,6 +3066,17 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd return 0; } +static inline void set_default_fcs(struct l2cap_pinfo *pi) +{ + /* FCS is enabled only in ERTM or streaming mode, if one or both + * sides request it. + */ + if (pi->mode != L2CAP_MODE_ERTM && pi->mode != L2CAP_MODE_STREAMING) + pi->fcs = L2CAP_FCS_NONE; + else if (!(pi->conf_state & L2CAP_CONF_NO_FCS_RECV)) + pi->fcs = L2CAP_FCS_CRC16; +} + static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_conf_req *req = (struct l2cap_conf_req *) data; @@ -3088,14 +3094,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr if (!sk) return -ENOENT; - if (sk->sk_state != BT_CONFIG) { - struct l2cap_cmd_rej rej; - - rej.reason = cpu_to_le16(0x0002); - l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ, - sizeof(rej), &rej); + if (sk->sk_state == BT_DISCONN) goto unlock; - } /* Reject if config buffer is too small. */ len = cmd_len - sizeof(*req); @@ -3135,9 +3135,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr goto unlock; if (l2cap_pi(sk)->conf_state & L2CAP_CONF_INPUT_DONE) { - if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_NO_FCS_RECV) || - l2cap_pi(sk)->fcs != L2CAP_FCS_NONE) - l2cap_pi(sk)->fcs = L2CAP_FCS_CRC16; + set_default_fcs(l2cap_pi(sk)); sk->sk_state = BT_CONNECTED; @@ -3225,9 +3223,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr l2cap_pi(sk)->conf_state |= L2CAP_CONF_INPUT_DONE; if (l2cap_pi(sk)->conf_state & L2CAP_CONF_OUTPUT_DONE) { - if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_NO_FCS_RECV) || - l2cap_pi(sk)->fcs != L2CAP_FCS_NONE) - l2cap_pi(sk)->fcs = L2CAP_FCS_CRC16; + set_default_fcs(l2cap_pi(sk)); sk->sk_state = BT_CONNECTED; l2cap_pi(sk)->next_tx_seq = 0; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 44a62327595..194b3a04cfd 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -82,11 +82,14 @@ static void rfcomm_sk_data_ready(struct rfcomm_dlc *d, struct sk_buff *skb) static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err) { struct sock *sk = d->owner, *parent; + unsigned long flags; + if (!sk) return; BT_DBG("dlc %p state %ld err %d", d, d->state, err); + local_irq_save(flags); bh_lock_sock(sk); if (err) @@ -108,6 +111,7 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err) } bh_unlock_sock(sk); + local_irq_restore(flags); if (parent && sock_flag(sk, SOCK_ZAPPED)) { /* We have to drop DLC lock here, otherwise diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 2c911c0759c..137f23259a9 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -162,8 +162,8 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) if (tmp) { memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info)); atomic_set(&tmp->use, 1); - nf_bridge_put(nf_bridge); } + nf_bridge_put(nf_bridge); nf_bridge = tmp; } return nf_bridge; @@ -761,9 +761,11 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) { if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) && skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && - !skb_is_gso(skb)) + !skb_is_gso(skb)) { + /* BUG: Should really parse the IP options here. */ + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); return ip_fragment(skb, br_dev_queue_push_xmit); - else + } else return br_dev_queue_push_xmit(skb); } #else diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 8ce90478611..4bf28f25f36 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -827,6 +827,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr, long timeo; int err; int ifindex, headroom, tailroom; + unsigned int mtu; struct net_device *dev; lock_sock(sk); @@ -896,15 +897,23 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr, cf_sk->sk.sk_state = CAIF_DISCONNECTED; goto out; } - dev = dev_get_by_index(sock_net(sk), ifindex); + + err = -ENODEV; + rcu_read_lock(); + dev = dev_get_by_index_rcu(sock_net(sk), ifindex); + if (!dev) { + rcu_read_unlock(); + goto out; + } cf_sk->headroom = LL_RESERVED_SPACE_EXTRA(dev, headroom); + mtu = dev->mtu; + rcu_read_unlock(); + cf_sk->tailroom = tailroom; - cf_sk->maxframe = dev->mtu - (headroom + tailroom); - dev_put(dev); + cf_sk->maxframe = mtu - (headroom + tailroom); if (cf_sk->maxframe < 1) { - pr_warning("CAIF: %s(): CAIF Interface MTU too small (%d)\n", - __func__, dev->mtu); - err = -ENODEV; + pr_warning("CAIF: %s(): CAIF Interface MTU too small (%u)\n", + __func__, mtu); goto out; } diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c index eb1602022ac..9a699242d10 100644 --- a/net/caif/cfrfml.c +++ b/net/caif/cfrfml.c @@ -7,7 +7,7 @@ #include <linux/stddef.h> #include <linux/spinlock.h> #include <linux/slab.h> -#include <linux/unaligned/le_byteshift.h> +#include <asm/unaligned.h> #include <net/caif/caif_layer.h> #include <net/caif/cfsrvl.h> #include <net/caif/cfpkt.h> diff --git a/net/core/dev.c b/net/core/dev.c index 3721fbb9a83..660dd41aaaa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2058,16 +2058,16 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { int queue_index; - struct sock *sk = skb->sk; + const struct net_device_ops *ops = dev->netdev_ops; - queue_index = sk_tx_queue_get(sk); - if (queue_index < 0) { - const struct net_device_ops *ops = dev->netdev_ops; + if (ops->ndo_select_queue) { + queue_index = ops->ndo_select_queue(dev, skb); + queue_index = dev_cap_txqueue(dev, queue_index); + } else { + struct sock *sk = skb->sk; + queue_index = sk_tx_queue_get(sk); + if (queue_index < 0) { - if (ops->ndo_select_queue) { - queue_index = ops->ndo_select_queue(dev, skb); - queue_index = dev_cap_txqueue(dev, queue_index); - } else { queue_index = 0; if (dev->real_num_tx_queues > 1) queue_index = skb_tx_hash(dev, skb); @@ -4845,7 +4845,7 @@ static void rollback_registered_many(struct list_head *head) dev = list_first_entry(head, struct net_device, unreg_list); call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); - synchronize_net(); + rcu_barrier(); list_for_each_entry(dev, head, unreg_list) dev_put(dev); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 7a85367b3c2..8451ab48109 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -348,7 +348,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, if (info.cmd == ETHTOOL_GRXCLSRLALL) { if (info.rule_cnt > 0) { if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32)) - rule_buf = kmalloc(info.rule_cnt * sizeof(u32), + rule_buf = kzalloc(info.rule_cnt * sizeof(u32), GFP_USER); if (!rule_buf) return -ENOMEM; @@ -397,7 +397,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index)) return -ENOMEM; full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size; - indir = kmalloc(full_size, GFP_USER); + indir = kzalloc(full_size, GFP_USER); if (!indir) return -ENOMEM; @@ -538,7 +538,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr) gstrings.len = ret; - data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); + data = kzalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); if (!data) return -ENOMEM; @@ -775,7 +775,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) if (regs.len > reglen) regs.len = reglen; - regbuf = kmalloc(reglen, GFP_USER); + regbuf = kzalloc(reglen, GFP_USER); if (!regbuf) return -ENOMEM; diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 9fbe7f7429b..6743146e4d6 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -232,7 +232,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, est->last_packets = bstats->packets; est->avpps = rate_est->pps<<10; - spin_lock(&est_tree_lock); + spin_lock_bh(&est_tree_lock); if (!elist[idx].timer.function) { INIT_LIST_HEAD(&elist[idx].list); setup_timer(&elist[idx].timer, est_timer, idx); @@ -243,7 +243,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, list_add_rcu(&est->list, &elist[idx].list); gen_add_node(est); - spin_unlock(&est_tree_lock); + spin_unlock_bh(&est_tree_lock); return 0; } @@ -270,7 +270,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, { struct gen_estimator *e; - spin_lock(&est_tree_lock); + spin_lock_bh(&est_tree_lock); while ((e = gen_find_node(bstats, rate_est))) { rb_erase(&e->node, &est_root); @@ -281,7 +281,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, list_del_rcu(&e->list); call_rcu(&e->e_rcu, __gen_kill_estimator); } - spin_unlock(&est_tree_lock); + spin_unlock_bh(&est_tree_lock); } EXPORT_SYMBOL(gen_kill_estimator); @@ -320,9 +320,9 @@ bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, ASSERT_RTNL(); - spin_lock(&est_tree_lock); + spin_lock_bh(&est_tree_lock); res = gen_find_node(bstats, rate_est) != NULL; - spin_unlock(&est_tree_lock); + spin_unlock_bh(&est_tree_lock); return res; } diff --git a/net/core/iovec.c b/net/core/iovec.c index 1cd98df412d..e6b133b77cc 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -35,9 +35,10 @@ * in any case. */ -int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) +long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) { - int size, err, ct; + int size, ct; + long err; if (m->msg_namelen) { if (mode == VERIFY_READ) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3a2513f0d0c..c83b421341c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2573,6 +2573,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) __copy_skb_header(nskb, skb); nskb->mac_len = skb->mac_len; + /* nskb and skb might have different headroom */ + if (nskb->ip_summed == CHECKSUM_PARTIAL) + nskb->csum_start += skb_headroom(nskb) - headroom; + skb_reset_mac_header(nskb); skb_set_network_header(nskb, skb->mac_len); nskb->transport_header = (nskb->network_header + @@ -2703,7 +2707,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) return -E2BIG; headroom = skb_headroom(p); - nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p)); + nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC); if (unlikely(!nskb)) return -ENOMEM; diff --git a/net/core/sock.c b/net/core/sock.c index b05b9b6ddb8..ef30e9d286e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1351,9 +1351,9 @@ int sock_i_uid(struct sock *sk) { int uid; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0; - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); return uid; } EXPORT_SYMBOL(sock_i_uid); @@ -1362,9 +1362,9 @@ unsigned long sock_i_ino(struct sock *sk) { unsigned long ino; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); return ino; } EXPORT_SYMBOL(sock_i_ino); diff --git a/net/core/stream.c b/net/core/stream.c index d959e0f4152..f5df85dcd20 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -141,10 +141,10 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; - sk_wait_event(sk, ¤t_timeo, !sk->sk_err && - !(sk->sk_shutdown & SEND_SHUTDOWN) && - sk_stream_memory_free(sk) && - vm_wait); + sk_wait_event(sk, ¤t_timeo, sk->sk_err || + (sk->sk_shutdown & SEND_SHUTDOWN) || + (sk_stream_memory_free(sk) && + !vm_wait)); sk->sk_write_pending--; if (vm_wait) { diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 7c3a7d19124..7cd7760144f 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -46,7 +46,7 @@ config IP_ADVANCED_ROUTER rp_filter on use: echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter - and + or echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter Note that some distributions enable it in startup scripts. @@ -217,6 +217,7 @@ config NET_IPIP config NET_IPGRE tristate "IP: GRE tunnels over IP" + depends on IPV6 || IPV6=n help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the @@ -412,7 +413,7 @@ config INET_XFRM_MODE_BEET If unsure, say Y. config INET_LRO - bool "Large Receive Offload (ipv4/tcp)" + tristate "Large Receive Offload (ipv4/tcp)" default y ---help--- Support for Large Receive Offload (ipv4/tcp). diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index f0550941df7..721a8a37b45 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -62,8 +62,11 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } if (!inet->inet_saddr) inet->inet_saddr = rt->rt_src; /* Update source address */ - if (!inet->inet_rcv_saddr) + if (!inet->inet_rcv_saddr) { inet->inet_rcv_saddr = rt->rt_src; + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); + } inet->inet_daddr = rt->rt_dst; inet->inet_dport = usin->sin_port; sk->sk_state = TCP_ESTABLISHED; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index a4396891835..7d02a9f999f 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -246,6 +246,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, struct fib_result res; int no_addr, rpf, accept_local; + bool dev_match; int ret; struct net *net; @@ -273,12 +274,22 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, } *spec_dst = FIB_RES_PREFSRC(res); fib_combine_itag(itag, &res); + dev_match = false; + #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) + for (ret = 0; ret < res.fi->fib_nhs; ret++) { + struct fib_nh *nh = &res.fi->fib_nh[ret]; + + if (nh->nh_dev == dev) { + dev_match = true; + break; + } + } #else if (FIB_RES_DEV(res) == dev) + dev_match = true; #endif - { + if (dev_match) { ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; fib_res_put(&res); return ret; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 79d057a939b..4a8e370862b 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -186,7 +186,9 @@ static inline struct tnode *node_parent_rcu(struct node *node) { struct tnode *ret = node_parent(node); - return rcu_dereference(ret); + return rcu_dereference_check(ret, + rcu_read_lock_held() || + lockdep_rtnl_is_held()); } /* Same as rcu_assign_pointer @@ -1753,7 +1755,9 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) static struct leaf *trie_firstleaf(struct trie *t) { - struct tnode *n = (struct tnode *) rcu_dereference(t->trie); + struct tnode *n = (struct tnode *) rcu_dereference_check(t->trie, + rcu_read_lock_held() || + lockdep_rtnl_is_held()); if (!n) return NULL; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a1ad0e7180d..2a4bb76f213 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -856,6 +856,18 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, igmpv3_clear_delrec(in_dev); } else if (len < 12) { return; /* ignore bogus packet; freed by caller */ + } else if (IGMP_V1_SEEN(in_dev)) { + /* This is a v3 query with v1 queriers present */ + max_delay = IGMP_Query_Response_Interval; + group = 0; + } else if (IGMP_V2_SEEN(in_dev)) { + /* this is a v3 query with v2 queriers present; + * Interpretation of the max_delay code is problematic here. + * A real v2 host would use ih_code directly, while v3 has a + * different encoding. We use the v3 encoding as more likely + * to be intended in a v3 query. + */ + max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE); } else { /* v3 */ if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) return; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 945b20a5ad5..35c93e8b6a4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -45,7 +45,7 @@ #include <net/netns/generic.h> #include <net/rtnetlink.h> -#ifdef CONFIG_IPV6 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> @@ -699,7 +699,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if ((dst = rt->rt_gateway) == 0) goto tx_error_icmp; } -#ifdef CONFIG_IPV6 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) { struct in6_addr *addr6; int addr_type; @@ -774,7 +774,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev goto tx_error; } } -#ifdef CONFIG_IPV6 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) { struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); @@ -850,7 +850,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if ((iph->ttl = tiph->ttl) == 0) { if (skb->protocol == htons(ETH_P_IP)) iph->ttl = old_iph->ttl; -#ifdef CONFIG_IPV6 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; #endif diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 04b69896df5..7649d775007 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -488,9 +488,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) * we can switch to copy when see the first bad fragment. */ if (skb_has_frags(skb)) { - struct sk_buff *frag; + struct sk_buff *frag, *frag2; int first_len = skb_pagelen(skb); - int truesizes = 0; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || @@ -503,18 +502,18 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (frag->len > mtu || ((frag->len & 7) && frag->next) || skb_headroom(frag) < hlen) - goto slow_path; + goto slow_path_clean; /* Partially cloned skb? */ if (skb_shared(frag)) - goto slow_path; + goto slow_path_clean; BUG_ON(frag->sk); if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; } - truesizes += frag->truesize; + skb->truesize -= frag->truesize; } /* Everything is OK. Generate! */ @@ -524,7 +523,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) frag = skb_shinfo(skb)->frag_list; skb_frag_list_init(skb); skb->data_len = first_len - skb_headlen(skb); - skb->truesize -= truesizes; skb->len = first_len; iph->tot_len = htons(first_len); iph->frag_off = htons(IP_MF); @@ -576,6 +574,15 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); return err; + +slow_path_clean: + skb_walk_frags(skb, frag2) { + if (frag2 == frag) + break; + frag2->sk = NULL; + frag2->destructor = NULL; + skb->truesize += frag2->truesize; + } } slow_path: diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 6c40a8c46e7..64b70ad162e 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1129,6 +1129,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_HDRINCL: val = inet->hdrincl; break; + case IP_NODEFRAG: + val = inet->nodefrag; + break; case IP_MTU_DISCOVER: val = inet->pmtudisc; break; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 51d6c316797..e8f4f9a57f1 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1420,6 +1420,9 @@ static int translate_compat_table(const char *name, if (ret != 0) break; ++i; + if (strcmp(arpt_get_target(iter1)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; } if (ret) { /* diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 97b64b22c41..d163f2e3b2e 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1751,6 +1751,9 @@ translate_compat_table(struct net *net, if (ret != 0) break; ++i; + if (strcmp(ipt_get_target(iter1)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; } if (ret) { /* diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index b254dafaf42..43eec80c0e7 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -112,6 +112,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) /* ip_route_me_harder expects skb->dst to be set */ skb_dst_set_noref(nskb, skb_dst(oldskb)); + nskb->protocol = htons(ETH_P_IP); if (ip_route_me_harder(nskb, addr_type)) goto free_nskb; diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index eab8de32f20..f3a9b42b16c 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -66,9 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(skb->sk); - if (inet && inet->nodefrag) + if (sk && (sk->sk_family == PF_INET) && + inet->nodefrag) return NF_ACCEPT; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 1679e2c0963..ee5f419d0a5 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -893,13 +893,15 @@ static void fast_csum(__sum16 *csum, unsigned char s[4]; if (offset & 1) { - s[0] = s[2] = 0; + s[0] = ~0; s[1] = ~*optr; + s[2] = 0; s[3] = *nptr; } else { - s[1] = s[3] = 0; s[0] = ~*optr; + s[1] = ~0; s[2] = *nptr; + s[3] = 0; } *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum))); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3f56b6e6c6a..ac6559cb54f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1231,7 +1231,7 @@ restart: } if (net_ratelimit()) - printk(KERN_WARNING "Neighbour table overflow.\n"); + printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); rt_drop(rt); return -ENOBUFS; } @@ -2738,6 +2738,11 @@ slow_output: } EXPORT_SYMBOL_GPL(__ip_route_output_key); +static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) +{ + return NULL; +} + static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) { } @@ -2746,7 +2751,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), .destroy = ipv4_dst_destroy, - .check = ipv4_dst_check, + .check = ipv4_blackhole_dst_check, .update_pmtu = ipv4_rt_blackhole_update_pmtu, .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 176e11aaea7..f115ea68a4e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -386,8 +386,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) */ mask = 0; - if (sk->sk_err) - mask = POLLERR; /* * POLLHUP is certainly not done right. But poll() doesn't @@ -451,11 +449,17 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) mask |= POLLOUT | POLLWRNORM; } - } + } else + mask |= POLLOUT | POLLWRNORM; if (tp->urg_data & TCP_URG_VALID) mask |= POLLPRI; } + /* This barrier is coupled with smp_wmb() in tcp_reset() */ + smp_rmb(); + if (sk->sk_err) + mask |= POLLERR; + return mask; } EXPORT_SYMBOL(tcp_poll); @@ -939,7 +943,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sg = sk->sk_route_caps & NETIF_F_SG; while (--iovlen >= 0) { - int seglen = iov->iov_len; + size_t seglen = iov->iov_len; unsigned char __user *from = iov->iov_base; iov++; @@ -2011,11 +2015,8 @@ adjudge_to_death: } } if (sk->sk_state != TCP_CLOSE) { - int orphan_count = percpu_counter_read_positive( - sk->sk_prot->orphan_count); - sk_mem_reclaim(sk); - if (tcp_too_many_orphans(sk, orphan_count)) { + if (tcp_too_many_orphans(sk, 0)) { if (net_ratelimit()) printk(KERN_INFO "TCP: too many of orphaned " "sockets\n"); @@ -3212,7 +3213,7 @@ void __init tcp_init(void) { struct sk_buff *skb = NULL; unsigned long nr_pages, limit; - int order, i, max_share; + int i, max_share, cnt; unsigned long jiffy = jiffies; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -3261,22 +3262,12 @@ void __init tcp_init(void) INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); } - /* Try to be a bit smarter and adjust defaults depending - * on available memory. - */ - for (order = 0; ((1 << order) << PAGE_SHIFT) < - (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket)); - order++) - ; - if (order >= 4) { - tcp_death_row.sysctl_max_tw_buckets = 180000; - sysctl_tcp_max_orphans = 4096 << (order - 4); - sysctl_max_syn_backlog = 1024; - } else if (order < 3) { - tcp_death_row.sysctl_max_tw_buckets >>= (3 - order); - sysctl_tcp_max_orphans >>= (3 - order); - sysctl_max_syn_backlog = 128; - } + + cnt = tcp_hashinfo.ehash_mask + 1; + + tcp_death_row.sysctl_max_tw_buckets = cnt / 2; + sysctl_tcp_max_orphans = cnt / 2; + sysctl_max_syn_backlog = max(128, cnt / 256); /* Set the pressure threshold to be a fraction of global memory that * is up to 1/2 at 256 MB, decreasing toward zero with the amount of diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 0ec9bd0ae94..850c737e08e 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -196,10 +196,10 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen) int tcp_set_allowed_congestion_control(char *val) { struct tcp_congestion_ops *ca; - char *clone, *name; + char *saved_clone, *clone, *name; int ret = 0; - clone = kstrdup(val, GFP_USER); + saved_clone = clone = kstrdup(val, GFP_USER); if (!clone) return -ENOMEM; @@ -226,6 +226,7 @@ int tcp_set_allowed_congestion_control(char *val) } out: spin_unlock(&tcp_cong_list_lock); + kfree(saved_clone); return ret; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e663b78a2ef..b55f60f6fcb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2545,7 +2545,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) cnt += tcp_skb_pcount(skb); if (cnt > packets) { - if (tcp_is_sack(tp) || (oldcnt >= packets)) + if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || + (oldcnt >= packets)) break; mss = skb_shinfo(skb)->gso_size; @@ -4048,6 +4049,8 @@ static void tcp_reset(struct sock *sk) default: sk->sk_err = ECONNRESET; } + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + smp_wmb(); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_error_report(sk); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 808bb920c9f..74c54b30600 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -66,18 +66,18 @@ static void tcp_write_err(struct sock *sk) static int tcp_out_of_resources(struct sock *sk, int do_reset) { struct tcp_sock *tp = tcp_sk(sk); - int orphans = percpu_counter_read_positive(&tcp_orphan_count); + int shift = 0; /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) - orphans <<= 1; + shift++; /* If some dubious ICMP arrived, penalize even more. */ if (sk->sk_err_soft) - orphans <<= 1; + shift++; - if (tcp_too_many_orphans(sk, orphans)) { + if (tcp_too_many_orphans(sk, shift)) { if (net_ratelimit()) printk(KERN_INFO "Out of socket memory\n"); @@ -135,13 +135,16 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) /* This function calculates a "timeout" which is equivalent to the timeout of a * TCP connection after "boundary" unsuccessful, exponentially backed-off - * retransmissions with an initial RTO of TCP_RTO_MIN. + * retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if + * syn_set flag is set. */ static bool retransmits_timed_out(struct sock *sk, - unsigned int boundary) + unsigned int boundary, + bool syn_set) { unsigned int timeout, linear_backoff_thresh; unsigned int start_ts; + unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN; if (!inet_csk(sk)->icsk_retransmits) return false; @@ -151,12 +154,12 @@ static bool retransmits_timed_out(struct sock *sk, else start_ts = tcp_sk(sk)->retrans_stamp; - linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); + linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); if (boundary <= linear_backoff_thresh) - timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; + timeout = ((2 << boundary) - 1) * rto_base; else - timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + + timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + (boundary - linear_backoff_thresh) * TCP_RTO_MAX; return (tcp_time_stamp - start_ts) >= timeout; @@ -167,14 +170,15 @@ static int tcp_write_timeout(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); int retry_until; - bool do_reset; + bool do_reset, syn_set = 0; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) dst_negative_advice(sk); retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; + syn_set = 1; } else { - if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { + if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0)) { /* Black hole detection */ tcp_mtu_probing(icsk, sk); @@ -187,14 +191,14 @@ static int tcp_write_timeout(struct sock *sk) retry_until = tcp_orphan_retries(sk, alive); do_reset = alive || - !retransmits_timed_out(sk, retry_until); + !retransmits_timed_out(sk, retry_until, 0); if (tcp_out_of_resources(sk, do_reset)) return 1; } } - if (retransmits_timed_out(sk, retry_until)) { + if (retransmits_timed_out(sk, retry_until, syn_set)) { /* Has it gone just too far? */ tcp_write_err(sk); return 1; @@ -436,7 +440,7 @@ out_reset_timer: icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); - if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) + if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0)) __sk_dst_reset(sk); out:; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 32e0bef60d0..fb23c2e63b5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1260,6 +1260,49 @@ void udp_lib_unhash(struct sock *sk) } EXPORT_SYMBOL(udp_lib_unhash); +/* + * inet_rcv_saddr was changed, we must rehash secondary hash + */ +void udp_lib_rehash(struct sock *sk, u16 newhash) +{ + if (sk_hashed(sk)) { + struct udp_table *udptable = sk->sk_prot->h.udp_table; + struct udp_hslot *hslot, *hslot2, *nhslot2; + + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); + nhslot2 = udp_hashslot2(udptable, newhash); + udp_sk(sk)->udp_portaddr_hash = newhash; + if (hslot2 != nhslot2) { + hslot = udp_hashslot(udptable, sock_net(sk), + udp_sk(sk)->udp_port_hash); + /* we must lock primary chain too */ + spin_lock_bh(&hslot->lock); + + spin_lock(&hslot2->lock); + hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); + hslot2->count--; + spin_unlock(&hslot2->lock); + + spin_lock(&nhslot2->lock); + hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + &nhslot2->head); + nhslot2->count++; + spin_unlock(&nhslot2->lock); + + spin_unlock_bh(&hslot->lock); + } + } +} +EXPORT_SYMBOL(udp_lib_rehash); + +static void udp_v4_rehash(struct sock *sk) +{ + u16 new_hash = udp4_portaddr_hash(sock_net(sk), + inet_sk(sk)->inet_rcv_saddr, + inet_sk(sk)->inet_num); + udp_lib_rehash(sk, new_hash); +} + static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; @@ -1843,6 +1886,7 @@ struct proto udp_prot = { .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 869078d4eeb..a580349f0b8 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -61,7 +61,7 @@ static int xfrm4_get_saddr(struct net *net, static int xfrm4_get_tos(struct flowi *fl) { - return fl->fl4_tos; + return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */ } static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 1ef1366a0a0..47947624ecc 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -21,21 +21,25 @@ static int xfrm4_init_flags(struct xfrm_state *x) } static void -__xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr) +__xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) +{ + sel->daddr.a4 = fl->fl4_dst; + sel->saddr.a4 = fl->fl4_src; + sel->dport = xfrm_flowi_dport(fl); + sel->dport_mask = htons(0xffff); + sel->sport = xfrm_flowi_sport(fl); + sel->sport_mask = htons(0xffff); + sel->family = AF_INET; + sel->prefixlen_d = 32; + sel->prefixlen_s = 32; + sel->proto = fl->proto; + sel->ifindex = fl->oif; +} + +static void +xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr) { - x->sel.daddr.a4 = fl->fl4_dst; - x->sel.saddr.a4 = fl->fl4_src; - x->sel.dport = xfrm_flowi_dport(fl); - x->sel.dport_mask = htons(0xffff); - x->sel.sport = xfrm_flowi_sport(fl); - x->sel.sport_mask = htons(0xffff); - x->sel.family = AF_INET; - x->sel.prefixlen_d = 32; - x->sel.prefixlen_s = 32; - x->sel.proto = fl->proto; - x->sel.ifindex = fl->oif; x->id = tmpl->id; if (x->id.daddr.a4 == 0) x->id.daddr.a4 = daddr->a4; @@ -70,6 +74,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .owner = THIS_MODULE, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, + .init_temprop = xfrm4_init_temprop, .output = xfrm4_output, .extract_input = xfrm4_extract_input, .extract_output = xfrm4_extract_output, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ab70a3fbcaf..324fac3b6c1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4637,10 +4637,12 @@ int __init addrconf_init(void) if (err < 0) { printk(KERN_CRIT "IPv6 Addrconf:" " cannot initialize default policy table: %d.\n", err); - return err; + goto out; } - register_pernet_subsys(&addrconf_ops); + err = register_pernet_subsys(&addrconf_ops); + if (err < 0) + goto out_addrlabel; /* The addrconf netdev notifier requires that loopback_dev * has it's ipv6 private information allocated and setup @@ -4692,7 +4694,9 @@ errout: unregister_netdevice_notifier(&ipv6_dev_notf); errlo: unregister_pernet_subsys(&addrconf_ops); - +out_addrlabel: + ipv6_addr_label_cleanup(); +out: return err; } @@ -4703,6 +4707,7 @@ void addrconf_cleanup(void) unregister_netdevice_notifier(&ipv6_dev_notf); unregister_pernet_subsys(&addrconf_ops); + ipv6_addr_label_cleanup(); rtnl_lock(); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index f0e774cea38..8175f802651 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -393,6 +393,11 @@ int __init ipv6_addr_label_init(void) return register_pernet_subsys(&ipv6_addr_label_ops); } +void ipv6_addr_label_cleanup(void) +{ + unregister_pernet_subsys(&ipv6_addr_label_ops); +} + static const struct nla_policy ifal_policy[IFAL_MAX+1] = { [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, [IFAL_LABEL] = { .len = sizeof(u32), }, diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 7d929a22cbc..ef371aa01ac 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -105,9 +105,12 @@ ipv4_connected: if (ipv6_addr_any(&np->saddr)) ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - if (ipv6_addr_any(&np->rcv_saddr)) + if (ipv6_addr_any(&np->rcv_saddr)) { ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr); + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); + } goto out; } @@ -181,6 +184,8 @@ ipv4_connected: if (ipv6_addr_any(&np->rcv_saddr)) { ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); inet->inet_rcv_saddr = LOOPBACK4_IPV6; + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); } ip6_dst_store(sk, dst, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d40b330c0ee..980912ed7a3 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -639,7 +639,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (skb_has_frags(skb)) { int first_len = skb_pagelen(skb); - int truesizes = 0; + struct sk_buff *frag2; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || @@ -651,18 +651,18 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (frag->len > mtu || ((frag->len & 7) && frag->next) || skb_headroom(frag) < hlen) - goto slow_path; + goto slow_path_clean; /* Partially cloned skb? */ if (skb_shared(frag)) - goto slow_path; + goto slow_path_clean; BUG_ON(frag->sk); if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; - truesizes += frag->truesize; } + skb->truesize -= frag->truesize; } err = 0; @@ -693,7 +693,6 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) first_len = skb_pagelen(skb); skb->data_len = first_len - skb_headlen(skb); - skb->truesize -= truesizes; skb->len = first_len; ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); @@ -756,6 +755,15 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) IPSTATS_MIB_FRAGFAILS); dst_release(&rt->dst); return err; + +slow_path_clean: + skb_walk_frags(skb, frag2) { + if (frag2 == frag) + break; + frag2->sk = NULL; + frag2->destructor = NULL; + skb->truesize += frag2->truesize; + } } slow_path: diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 29a7bca29e3..8e754be92c2 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1766,6 +1766,9 @@ translate_compat_table(struct net *net, if (ret != 0) break; ++i; + if (strcmp(ip6t_get_target(iter1)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; } if (ret) { /* diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 13ef5bc05cf..578f3c1a16d 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -113,14 +113,6 @@ static void nf_skb_free(struct sk_buff *skb) kfree_skb(NFCT_FRAG6_CB(skb)->orig); } -/* Memory Tracking Functions. */ -static void frag_kfree_skb(struct sk_buff *skb) -{ - atomic_sub(skb->truesize, &nf_init_frags.mem); - nf_skb_free(skb); - kfree_skb(skb); -} - /* Destruction primitives. */ static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) @@ -282,66 +274,22 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, } found: - /* We found where to put this one. Check for overlap with - * preceding fragment, and, if needed, align things so that - * any overlaps are eliminated. - */ - if (prev) { - int i = (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset; - - if (i > 0) { - offset += i; - if (end <= offset) { - pr_debug("overlap\n"); - goto err; - } - if (!pskb_pull(skb, i)) { - pr_debug("Can't pull\n"); - goto err; - } - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->ip_summed = CHECKSUM_NONE; - } - } - - /* Look for overlap with succeeding segments. - * If we can merge fragments, do it. + /* RFC5722, Section 4: + * When reassembling an IPv6 datagram, if + * one or more its constituent fragments is determined to be an + * overlapping fragment, the entire datagram (and any constituent + * fragments, including those not yet received) MUST be silently + * discarded. */ - while (next && NFCT_FRAG6_CB(next)->offset < end) { - /* overlap is 'i' bytes */ - int i = end - NFCT_FRAG6_CB(next)->offset; - - if (i < next->len) { - /* Eat head of the next overlapped fragment - * and leave the loop. The next ones cannot overlap. - */ - pr_debug("Eat head of the overlapped parts.: %d", i); - if (!pskb_pull(next, i)) - goto err; - /* next fragment */ - NFCT_FRAG6_CB(next)->offset += i; - fq->q.meat -= i; - if (next->ip_summed != CHECKSUM_UNNECESSARY) - next->ip_summed = CHECKSUM_NONE; - break; - } else { - struct sk_buff *free_it = next; - - /* Old fragmnet is completely overridden with - * new one drop it. - */ - next = next->next; + /* Check for overlap with preceding fragment. */ + if (prev && + (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset > 0) + goto discard_fq; - if (prev) - prev->next = next; - else - fq->q.fragments = next; - - fq->q.meat -= free_it->len; - frag_kfree_skb(free_it); - } - } + /* Look for overlap with succeeding segment. */ + if (next && NFCT_FRAG6_CB(next)->offset < end) + goto discard_fq; NFCT_FRAG6_CB(skb)->offset = offset; @@ -371,6 +319,8 @@ found: write_unlock(&nf_frags.lock); return 0; +discard_fq: + fq_kill(fq); err: return -1; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 545c4141b75..64cfef1b0a4 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -149,13 +149,6 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a) } EXPORT_SYMBOL(ip6_frag_match); -/* Memory Tracking Functions. */ -static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) -{ - atomic_sub(skb->truesize, &nf->mem); - kfree_skb(skb); -} - void ip6_frag_init(struct inet_frag_queue *q, void *a) { struct frag_queue *fq = container_of(q, struct frag_queue, q); @@ -346,58 +339,22 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, } found: - /* We found where to put this one. Check for overlap with - * preceding fragment, and, if needed, align things so that - * any overlaps are eliminated. + /* RFC5722, Section 4: + * When reassembling an IPv6 datagram, if + * one or more its constituent fragments is determined to be an + * overlapping fragment, the entire datagram (and any constituent + * fragments, including those not yet received) MUST be silently + * discarded. */ - if (prev) { - int i = (FRAG6_CB(prev)->offset + prev->len) - offset; - if (i > 0) { - offset += i; - if (end <= offset) - goto err; - if (!pskb_pull(skb, i)) - goto err; - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->ip_summed = CHECKSUM_NONE; - } - } + /* Check for overlap with preceding fragment. */ + if (prev && + (FRAG6_CB(prev)->offset + prev->len) - offset > 0) + goto discard_fq; - /* Look for overlap with succeeding segments. - * If we can merge fragments, do it. - */ - while (next && FRAG6_CB(next)->offset < end) { - int i = end - FRAG6_CB(next)->offset; /* overlap is 'i' bytes */ - - if (i < next->len) { - /* Eat head of the next overlapped fragment - * and leave the loop. The next ones cannot overlap. - */ - if (!pskb_pull(next, i)) - goto err; - FRAG6_CB(next)->offset += i; /* next fragment */ - fq->q.meat -= i; - if (next->ip_summed != CHECKSUM_UNNECESSARY) - next->ip_summed = CHECKSUM_NONE; - break; - } else { - struct sk_buff *free_it = next; - - /* Old fragment is completely overridden with - * new one drop it. - */ - next = next->next; - - if (prev) - prev->next = next; - else - fq->q.fragments = next; - - fq->q.meat -= free_it->len; - frag_kfree_skb(fq->q.net, free_it); - } - } + /* Look for overlap with succeeding segment. */ + if (next && FRAG6_CB(next)->offset < end) + goto discard_fq; FRAG6_CB(skb)->offset = offset; @@ -436,6 +393,8 @@ found: write_unlock(&ip6_frags.lock); return -1; +discard_fq: + fq_kill(fq); err: IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d126365ac04..a275c6e1e25 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -670,7 +670,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad if (net_ratelimit()) printk(KERN_WARNING - "Neighbour table overflow.\n"); + "ipv6: Neighbour table overflow.\n"); dst_free(&rt->dst); return NULL; } @@ -1556,14 +1556,13 @@ out: * i.e. Path MTU discovery */ -void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, - struct net_device *dev, u32 pmtu) +static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, + struct net *net, u32 pmtu, int ifindex) { struct rt6_info *rt, *nrt; - struct net *net = dev_net(dev); int allfrag = 0; - rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0); + rt = rt6_lookup(net, daddr, saddr, ifindex, 0); if (rt == NULL) return; @@ -1631,6 +1630,27 @@ out: dst_release(&rt->dst); } +void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, + struct net_device *dev, u32 pmtu) +{ + struct net *net = dev_net(dev); + + /* + * RFC 1981 states that a node "MUST reduce the size of the packets it + * is sending along the path" that caused the Packet Too Big message. + * Since it's not possible in the general case to determine which + * interface was used to send the original packet, we update the MTU + * on the interface that will be used to send future packets. We also + * update the MTU on the interface that received the Packet Too Big in + * case the original packet was forced out that interface with + * SO_BINDTODEVICE or similar. This is the next best thing to the + * correct behaviour, which would be to update the MTU on all + * interfaces. + */ + rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); + rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); +} + /* * Misc support functions */ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1dd1affdead..5acb3560ff1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -111,6 +111,15 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr); } +static void udp_v6_rehash(struct sock *sk) +{ + u16 new_hash = udp6_portaddr_hash(sock_net(sk), + &inet6_sk(sk)->rcv_saddr, + inet_sk(sk)->inet_num); + + udp_lib_rehash(sk, new_hash); +} + static inline int compute_score(struct sock *sk, struct net *net, unsigned short hnum, struct in6_addr *saddr, __be16 sport, @@ -1447,6 +1456,7 @@ struct proto udpv6_prot = { .backlog_rcv = udpv6_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index f417b77fa0e..a67575d472a 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -20,23 +20,27 @@ #include <net/addrconf.h> static void -__xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr) +__xfrm6_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) { /* Initialize temporary selector matching only * to current session. */ - ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst); - ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src); - x->sel.dport = xfrm_flowi_dport(fl); - x->sel.dport_mask = htons(0xffff); - x->sel.sport = xfrm_flowi_sport(fl); - x->sel.sport_mask = htons(0xffff); - x->sel.family = AF_INET6; - x->sel.prefixlen_d = 128; - x->sel.prefixlen_s = 128; - x->sel.proto = fl->proto; - x->sel.ifindex = fl->oif; + ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst); + ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src); + sel->dport = xfrm_flowi_dport(fl); + sel->dport_mask = htons(0xffff); + sel->sport = xfrm_flowi_sport(fl); + sel->sport_mask = htons(0xffff); + sel->family = AF_INET6; + sel->prefixlen_d = 128; + sel->prefixlen_s = 128; + sel->proto = fl->proto; + sel->ifindex = fl->oif; +} + +static void +xfrm6_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr) +{ x->id = tmpl->id; if (ipv6_addr_any((struct in6_addr*)&x->id.daddr)) memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr)); @@ -168,6 +172,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .eth_proto = htons(ETH_P_IPV6), .owner = THIS_MODULE, .init_tempsel = __xfrm6_init_tempsel, + .init_temprop = xfrm6_init_temprop, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, .output = xfrm6_output, diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 79986a674f6..fd55b5135de 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -824,8 +824,8 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = irda_open_tsap(self, addr->sir_lsap_sel, addr->sir_name); if (err < 0) { - kfree(self->ias_obj->name); - kfree(self->ias_obj); + irias_delete_object(self->ias_obj); + self->ias_obj = NULL; goto out; } diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index a788f9e9427..6130f9d9dbe 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -1102,7 +1102,7 @@ int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len) memcpy(&val_len, buf+n, 2); /* To avoid alignment problems */ le16_to_cpus(&val_len); n+=2; - if (val_len > 1016) { + if (val_len >= 1016) { IRDA_DEBUG(2, "%s(), parameter length to long\n", __func__ ); return -RSP_INVALID_COMMAND_FORMAT; } diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 9616c32d107..5bb8353105c 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -169,6 +169,7 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, { struct irlan_cb *self = netdev_priv(dev); int ret; + unsigned int len; /* skb headroom large enough to contain all IrDA-headers? */ if ((skb_headroom(skb) < self->max_header_size) || (skb_shared(skb))) { @@ -188,6 +189,7 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, dev->trans_start = jiffies; + len = skb->len; /* Now queue the packet in the transport layer */ if (self->use_udata) ret = irttp_udata_request(self->tsap_data, skb); @@ -209,7 +211,7 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, self->stats.tx_dropped++; } else { self->stats.tx_packets++; - self->stats.tx_bytes += skb->len; + self->stats.tx_bytes += len; } return NETDEV_TX_OK; diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 58c6c4cda73..1ae697681bc 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -132,7 +132,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, printk("\n"); } - if (data_len < ETH_HLEN) + if (!pskb_may_pull(skb, sizeof(ETH_HLEN))) goto error; secpath_reset(skb); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 023ba820236..58261299821 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -1024,7 +1024,8 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - int rc = -EINVAL, opt; + unsigned int opt; + int rc = -EINVAL; lock_sock(sk); if (unlikely(level != SOL_LLC || optlen != sizeof(int))) diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index e4dae0244d7..cf4aea3ba30 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -689,7 +689,7 @@ static void llc_station_rcv(struct sk_buff *skb) int __init llc_station_init(void) { - u16 rc = -ENOBUFS; + int rc = -ENOBUFS; struct sk_buff *skb; struct llc_station_state_ev *ev; diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index c893f236ace..8f23401832b 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -175,6 +175,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state); + del_timer_sync(&tid_tx->addba_resp_timer); + /* * After this packets are no longer handed right through * to the driver but are put onto tid_tx->pending instead, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 798a91b100c..ded5c3843e0 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -732,6 +732,12 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) rtnl_unlock(); + /* + * Now all work items will be gone, but the + * timer might still be armed, so delete it + */ + del_timer_sync(&local->work_timer); + cancel_work_sync(&local->reconfig_filter); ieee80211_clear_tx_pending(local); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index fa0f37e4afe..28624282c5f 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2199,9 +2199,6 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, struct net_device *prev_dev = NULL; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - if (status->flag & RX_FLAG_INTERNAL_CMTR) - goto out_free_skb; - if (skb_headroom(skb) < sizeof(*rthdr) && pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC)) goto out_free_skb; @@ -2260,7 +2257,6 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, } else goto out_free_skb; - status->flag |= RX_FLAG_INTERNAL_CMTR; return; out_free_skb: diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 10caec5ea8f..34da67995d9 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -377,7 +377,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) { skb2->dev = prev_dev; - netif_receive_skb(skb2); + netif_rx(skb2); } } @@ -386,7 +386,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) } if (prev_dev) { skb->dev = prev_dev; - netif_receive_skb(skb); + netif_rx(skb); skb = NULL; } rcu_read_unlock(); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4f8ddba4801..4c2f89df5cc 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -924,6 +924,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); + ip_vs_update_conntrack(skb, cp, 0); ip_vs_conn_put(cp); skb->ipvs_property = 1; diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index f228a17ec64..7e9af5b76d9 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -45,6 +45,7 @@ #include <linux/netfilter.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_helper.h> #include <linux/gfp.h> #include <net/protocol.h> @@ -359,7 +360,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, buf_len = strlen(buf); ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct)) { + if (ct && !nf_ct_is_untracked(ct) && nfct_nat(ct)) { /* If mangling fails this function will return 0 * which will cause the packet to be dropped. * Mangling can only fail under memory pressure, @@ -409,7 +410,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; - struct nf_conn *ct; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -496,11 +496,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, ip_vs_control_add(n_cp, cp); } - ct = (struct nf_conn *)skb->nfct; - if (ct && ct != &nf_conntrack_untracked) - ip_vs_expect_related(skb, ct, n_cp, - IPPROTO_TCP, &n_cp->dport, 1); - /* * Move tunnel to listen state */ diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 21e1a5e9b9d..49df6bea6a2 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -349,8 +349,8 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } #endif -static void -ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) +void +ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) { struct nf_conn *ct = (struct nf_conn *)skb->nfct; struct nf_conntrack_tuple new_tuple; @@ -365,11 +365,17 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) * real-server we will see RIP->DIP. */ new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - new_tuple.src.u3 = cp->daddr; + if (outin) + new_tuple.src.u3 = cp->daddr; + else + new_tuple.dst.u3 = cp->vaddr; /* * This will also take care of UDP and other protocols. */ - new_tuple.src.u.tcp.port = cp->dport; + if (outin) + new_tuple.src.u.tcp.port = cp->dport; + else + new_tuple.dst.u.tcp.port = cp->vport; nf_conntrack_alter_reply(ct, &new_tuple); } @@ -428,7 +434,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); - ip_vs_update_conntrack(skb, cp); + ip_vs_update_conntrack(skb, cp, 1); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still @@ -506,7 +512,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); - ip_vs_update_conntrack(skb, cp); + ip_vs_update_conntrack(skb, cp, 1); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 7dcf7a40419..8d9e4c949b9 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -48,15 +48,17 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) { unsigned int off, len; struct nf_ct_ext_type *t; + size_t alloc_size; rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); BUG_ON(t == NULL); off = ALIGN(sizeof(struct nf_ct_ext), t->align); len = off + t->len; + alloc_size = t->alloc_size; rcu_read_unlock(); - *ext = kzalloc(t->alloc_size, gfp); + *ext = kzalloc(alloc_size, gfp); if (!*ext) return NULL; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 53d892210a0..f64de954486 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1376,7 +1376,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, unsigned int msglen, origlen; const char *dptr, *end; s16 diff, tdiff = 0; - int ret; + int ret = NF_ACCEPT; typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust; if (ctinfo != IP_CT_ESTABLISHED && diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c index 5490fc37c92..daab8c4a903 100644 --- a/net/netfilter/nf_tproxy_core.c +++ b/net/netfilter/nf_tproxy_core.c @@ -70,7 +70,11 @@ nf_tproxy_destructor(struct sk_buff *skb) int nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) { - if (inet_sk(sk)->transparent) { + bool transparent = (sk->sk_state == TCP_TIME_WAIT) ? + inet_twsk(sk)->tw_transparent : + inet_sk(sk)->transparent; + + if (transparent) { skb_orphan(skb); skb->sk = sk; skb->destructor = nf_tproxy_destructor; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8648a9922aa..cd96ed3ccee 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1406,7 +1406,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct netlink_sock *nlk = nlk_sk(sk); int noblock = flags&MSG_DONTWAIT; size_t copied; - struct sk_buff *skb, *frag __maybe_unused = NULL; + struct sk_buff *skb, *data_skb; int err; if (flags&MSG_OOB) @@ -1418,45 +1418,35 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, if (skb == NULL) goto out; + data_skb = skb; + #ifdef CONFIG_COMPAT_NETLINK_MESSAGES if (unlikely(skb_shinfo(skb)->frag_list)) { - bool need_compat = !!(flags & MSG_CMSG_COMPAT); - /* - * If this skb has a frag_list, then here that means that - * we will have to use the frag_list skb for compat tasks - * and the regular skb for non-compat tasks. + * If this skb has a frag_list, then here that means that we + * will have to use the frag_list skb's data for compat tasks + * and the regular skb's data for normal (non-compat) tasks. * - * The skb might (and likely will) be cloned, so we can't - * just reset frag_list and go on with things -- we need to - * keep that. For the compat case that's easy -- simply get - * a reference to the compat skb and free the regular one - * including the frag. For the non-compat case, we need to - * avoid sending the frag to the user -- so assign NULL but - * restore it below before freeing the skb. + * If we need to send the compat skb, assign it to the + * 'data_skb' variable so that it will be used below for data + * copying. We keep 'skb' for everything else, including + * freeing both later. */ - if (need_compat) { - struct sk_buff *compskb = skb_shinfo(skb)->frag_list; - skb_get(compskb); - kfree_skb(skb); - skb = compskb; - } else { - frag = skb_shinfo(skb)->frag_list; - skb_shinfo(skb)->frag_list = NULL; - } + if (flags & MSG_CMSG_COMPAT) + data_skb = skb_shinfo(skb)->frag_list; } #endif msg->msg_namelen = 0; - copied = skb->len; + copied = data_skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; copied = len; } - skb_reset_transport_header(skb); - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + skb_reset_transport_header(data_skb); + err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied); if (msg->msg_name) { struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; @@ -1476,11 +1466,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, } siocb->scm->creds = *NETLINK_CREDS(skb); if (flags & MSG_TRUNC) - copied = skb->len; - -#ifdef CONFIG_COMPAT_NETLINK_MESSAGES - skb_shinfo(skb)->frag_list = frag; -#endif + copied = data_skb->len; skb_free_datagram(sk, skb); @@ -2116,6 +2102,26 @@ static void __net_exit netlink_net_exit(struct net *net) #endif } +static void __init netlink_add_usersock_entry(void) +{ + unsigned long *listeners; + int groups = 32; + + listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head), + GFP_KERNEL); + if (!listeners) + panic("netlink_add_usersock_entry: Cannot allocate listneres\n"); + + netlink_table_grab(); + + nl_table[NETLINK_USERSOCK].groups = groups; + nl_table[NETLINK_USERSOCK].listeners = listeners; + nl_table[NETLINK_USERSOCK].module = THIS_MODULE; + nl_table[NETLINK_USERSOCK].registered = 1; + + netlink_table_ungrab(); +} + static struct pernet_operations __net_initdata netlink_net_ops = { .init = netlink_net_init, .exit = netlink_net_exit, @@ -2164,6 +2170,8 @@ static int __init netlink_proto_init(void) hash->rehash_time = jiffies; } + netlink_add_usersock_entry(); + sock_register(&netlink_family_ops); register_pernet_subsys(&netlink_net_ops); /* The netlink device handler may be needed early. */ diff --git a/net/phonet/pep.c b/net/phonet/pep.c index b2a3ae6cad7..15003021f4f 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -225,12 +225,13 @@ static void pipe_grant_credits(struct sock *sk) static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) { struct pep_sock *pn = pep_sk(sk); - struct pnpipehdr *hdr = pnp_hdr(skb); + struct pnpipehdr *hdr; int wake = 0; if (!pskb_may_pull(skb, sizeof(*hdr) + 4)) return -EINVAL; + hdr = pnp_hdr(skb); if (hdr->data[0] != PN_PEP_TYPE_COMMON) { LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP type: %u\n", (unsigned)hdr->data[0]); diff --git a/net/rds/recv.c b/net/rds/recv.c index 795a00b7f2c..c93588c2d55 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -297,7 +297,7 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc, int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr) { struct rds_notifier *notifier; - struct rds_rdma_notify cmsg; + struct rds_rdma_notify cmsg = { 0 }; /* fill holes with zero */ unsigned int count = 0, max_messages = ~0U; unsigned long flags; LIST_HEAD(copy); diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index c397524c039..c519939e8da 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -43,7 +43,7 @@ void rds_tcp_state_change(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (conn == NULL) { state_change = sk->sk_state_change; @@ -68,7 +68,7 @@ void rds_tcp_state_change(struct sock *sk) break; } out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); state_change(sk); } diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 975183fe695..27844f231d1 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -114,7 +114,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes) rdsdebug("listen data ready sk %p\n", sk); - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); ready = sk->sk_user_data; if (ready == NULL) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -131,7 +131,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes) queue_work(rds_wq, &rds_tcp_listen_work); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); ready(sk, bytes); } diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index 1aba6878fa5..e4379740410 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -324,7 +324,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes) rdsdebug("data ready sk %p bytes %d\n", sk, bytes); - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (conn == NULL) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -338,7 +338,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes) if (rds_tcp_read_sock(conn, GFP_ATOMIC, KM_SOFTIRQ0) == -ENOMEM) queue_delayed_work(rds_wq, &conn->c_recv_w, 0); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); ready(sk, bytes); } diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index a28b895ff0d..2f012a07d94 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -224,7 +224,7 @@ void rds_tcp_write_space(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (conn == NULL) { write_space = sk->sk_write_space; @@ -244,7 +244,7 @@ void rds_tcp_write_space(struct sock *sk) queue_delayed_work(rds_wq, &conn->c_send_w, 0); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); /* * write_space is only called when data leaves tcp's send queue if diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 8e45e76a95f..d952e7eac18 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -679,7 +679,7 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1) return -EINVAL; - if (addr->srose_ndigis > ROSE_MAX_DIGIS) + if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) return -EINVAL; if ((dev = rose_dev_get(&addr->srose_addr)) == NULL) { @@ -739,7 +739,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1) return -EINVAL; - if (addr->srose_ndigis > ROSE_MAX_DIGIS) + if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) return -EINVAL; /* Source + Destination digis should not exceed ROSE_MAX_DIGIS */ diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 537a48732e9..7ebf7439b47 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -350,22 +350,19 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_police *police = a->priv; - struct tc_police opt; - - opt.index = police->tcf_index; - opt.action = police->tcf_action; - opt.mtu = police->tcfp_mtu; - opt.burst = police->tcfp_burst; - opt.refcnt = police->tcf_refcnt - ref; - opt.bindcnt = police->tcf_bindcnt - bind; + struct tc_police opt = { + .index = police->tcf_index, + .action = police->tcf_action, + .mtu = police->tcfp_mtu, + .burst = police->tcfp_burst, + .refcnt = police->tcf_refcnt - ref, + .bindcnt = police->tcf_bindcnt - bind, + }; + if (police->tcfp_R_tab) opt.rate = police->tcfp_R_tab->rate; - else - memset(&opt.rate, 0, sizeof(opt.rate)); if (police->tcfp_P_tab) opt.peakrate = police->tcfp_P_tab->rate; - else - memset(&opt.peakrate, 0, sizeof(opt.peakrate)); NLA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); if (police->tcfp_result) NLA_PUT_U32(skb, TCA_POLICE_RESULT, police->tcfp_result); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 7416a5c73b2..b0c2a82178a 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -137,7 +137,7 @@ next_knode: int toff = off + key->off + (off2 & key->offmask); __be32 *data, _data; - if (skb_headroom(skb) + toff < 0) + if (skb_headroom(skb) + toff > INT_MAX) goto out; data = skb_header_pointer(skb, toff, 4, &_data); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 34066278952..6318e1136b8 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -255,10 +255,6 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, error = -EINVAL; goto err_out; } - if (!list_empty(&flow->list)) { - error = -EEXIST; - goto err_out; - } } else { int i; unsigned long cl; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index abd904be428..47496098d35 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -761,8 +761,8 @@ init_vf(struct hfsc_class *cl, unsigned int len) if (f != cl->cl_f) { cl->cl_f = f; cftree_update(cl); - update_cfmin(cl->cl_parent); } + update_cfmin(cl->cl_parent); } } diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 86366390038..ddbbf7c81fa 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -543,16 +543,20 @@ struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc) id = ntohs(hmacs->hmac_ids[i]); /* Check the id is in the supported range */ - if (id > SCTP_AUTH_HMAC_ID_MAX) + if (id > SCTP_AUTH_HMAC_ID_MAX) { + id = 0; continue; + } /* See is we support the id. Supported IDs have name and * length fields set, so that we can allocated and use * them. We can safely just check for name, for without the * name, we can't allocate the TFM. */ - if (!sctp_hmac_list[id].hmac_name) + if (!sctp_hmac_list[id].hmac_name) { + id = 0; continue; + } break; } diff --git a/net/sctp/output.c b/net/sctp/output.c index a646681f5ac..bcc4590ccaf 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -92,7 +92,6 @@ struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, SCTP_DEBUG_PRINTK("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); - sctp_packet_reset(packet); packet->vtag = vtag; if (ecn_capable && sctp_packet_empty(packet)) { diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 24b2cd55563..d344dc481cc 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1232,6 +1232,18 @@ out: return 0; } +static bool list_has_sctp_addr(const struct list_head *list, + union sctp_addr *ipaddr) +{ + struct sctp_transport *addr; + + list_for_each_entry(addr, list, transports) { + if (sctp_cmp_addr_exact(ipaddr, &addr->ipaddr)) + return true; + } + + return false; +} /* A restart is occurring, check to make sure no new addresses * are being added as we may be under a takeover attack. */ @@ -1240,10 +1252,10 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, struct sctp_chunk *init, sctp_cmd_seq_t *commands) { - struct sctp_transport *new_addr, *addr; - int found; + struct sctp_transport *new_addr; + int ret = 1; - /* Implementor's Guide - Sectin 5.2.2 + /* Implementor's Guide - Section 5.2.2 * ... * Before responding the endpoint MUST check to see if the * unexpected INIT adds new addresses to the association. If new @@ -1254,31 +1266,19 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, /* Search through all current addresses and make sure * we aren't adding any new ones. */ - new_addr = NULL; - found = 0; - list_for_each_entry(new_addr, &new_asoc->peer.transport_addr_list, - transports) { - found = 0; - list_for_each_entry(addr, &asoc->peer.transport_addr_list, - transports) { - if (sctp_cmp_addr_exact(&new_addr->ipaddr, - &addr->ipaddr)) { - found = 1; - break; - } - } - if (!found) + transports) { + if (!list_has_sctp_addr(&asoc->peer.transport_addr_list, + &new_addr->ipaddr)) { + sctp_sf_send_restart_abort(&new_addr->ipaddr, init, + commands); + ret = 0; break; - } - - /* If a new address was added, ABORT the sender. */ - if (!found && new_addr) { - sctp_sf_send_restart_abort(&new_addr->ipaddr, init, commands); + } } /* Return success if all addresses were found. */ - return found; + return ret; } /* Populate the verification/tie tags based on overlapping INIT diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ca44917872d..fbb70770ad0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -916,6 +916,11 @@ SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk, /* Walk through the addrs buffer and count the number of addresses. */ addr_buf = kaddrs; while (walk_size < addrs_size) { + if (walk_size + sizeof(sa_family_t) > addrs_size) { + kfree(kaddrs); + return -EINVAL; + } + sa_addr = (struct sockaddr *)addr_buf; af = sctp_get_af_specific(sa_addr->sa_family); @@ -1002,9 +1007,13 @@ static int __sctp_connect(struct sock* sk, /* Walk through the addrs buffer and count the number of addresses. */ addr_buf = kaddrs; while (walk_size < addrs_size) { + if (walk_size + sizeof(sa_family_t) > addrs_size) { + err = -EINVAL; + goto out_free; + } + sa_addr = (union sctp_addr *)addr_buf; af = sctp_get_af_specific(sa_addr->sa.sa_family); - port = ntohs(sa_addr->v4.sin_port); /* If the address family is not supported or if this address * causes the address buffer to overflow return EINVAL. @@ -1014,6 +1023,8 @@ static int __sctp_connect(struct sock* sk, goto out_free; } + port = ntohs(sa_addr->v4.sin_port); + /* Save current address so we can work with it */ memcpy(&to, sa_addr, af->sockaddr_len); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 36cb66022a2..e9eaaf7d43c 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -38,7 +38,7 @@ static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = { static LIST_HEAD(cred_unused); static unsigned long number_cred_unused; -#define MAX_HASHTABLE_BITS (10) +#define MAX_HASHTABLE_BITS (14) static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp) { unsigned long num; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index dcfc66bab2b..12c48598281 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -745,17 +745,18 @@ gss_pipe_release(struct inode *inode) struct rpc_inode *rpci = RPC_I(inode); struct gss_upcall_msg *gss_msg; +restart: spin_lock(&inode->i_lock); - while (!list_empty(&rpci->in_downcall)) { + list_for_each_entry(gss_msg, &rpci->in_downcall, list) { - gss_msg = list_entry(rpci->in_downcall.next, - struct gss_upcall_msg, list); + if (!list_empty(&gss_msg->msg.list)) + continue; gss_msg->msg.errno = -EPIPE; atomic_inc(&gss_msg->count); __gss_unhash_msg(gss_msg); spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); - spin_lock(&inode->i_lock); + goto restart; } spin_unlock(&inode->i_lock); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 03264461052..778e5dfc514 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -237,6 +237,7 @@ get_key(const void *p, const void *end, if (!supported_gss_krb5_enctype(alg)) { printk(KERN_WARNING "gss_kerberos_mech: unsupported " "encryption key algorithm %d\n", alg); + p = ERR_PTR(-EINVAL); goto out_err; } p = simple_get_netobj(p, end, &key); @@ -282,15 +283,19 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx) ctx->enctype = ENCTYPE_DES_CBC_RAW; ctx->gk5e = get_gss_krb5_enctype(ctx->enctype); - if (ctx->gk5e == NULL) + if (ctx->gk5e == NULL) { + p = ERR_PTR(-EINVAL); goto out_err; + } /* The downcall format was designed before we completely understood * the uses of the context fields; so it includes some stuff we * just give some minimal sanity-checking, and some we ignore * completely (like the next twenty bytes): */ - if (unlikely(p + 20 > end || p + 20 < p)) + if (unlikely(p + 20 > end || p + 20 < p)) { + p = ERR_PTR(-EFAULT); goto out_err; + } p += 20; p = simple_get_bytes(p, end, &tmp, sizeof(tmp)); if (IS_ERR(p)) @@ -619,6 +624,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx, if (ctx->seq_send64 != ctx->seq_send) { dprintk("%s: seq_send64 %lx, seq_send %x overflow?\n", __func__, (long unsigned)ctx->seq_send64, ctx->seq_send); + p = ERR_PTR(-EINVAL); goto out_err; } p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype)); diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index dc3f1f5ed86..adade3d313f 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -100,6 +100,7 @@ gss_import_sec_context_spkm3(const void *p, size_t len, if (version != 1) { dprintk("RPC: unknown spkm3 token format: " "obsolete nfs-utils?\n"); + p = ERR_PTR(-EINVAL); goto out_err_free_ctx; } @@ -135,8 +136,10 @@ gss_import_sec_context_spkm3(const void *p, size_t len, if (IS_ERR(p)) goto out_err_free_intg_alg; - if (p != end) + if (p != end) { + p = ERR_PTR(-EFAULT); goto out_err_free_intg_key; + } ctx_id->internal_ctx_id = ctx; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 2388d83b68f..fa5549079d7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -226,7 +226,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru goto out_no_principal; } - kref_init(&clnt->cl_kref); + atomic_set(&clnt->cl_count, 1); err = rpc_setup_pipedir(clnt, program->pipe_dir_name); if (err < 0) @@ -390,14 +390,14 @@ rpc_clone_client(struct rpc_clnt *clnt) if (new->cl_principal == NULL) goto out_no_principal; } - kref_init(&new->cl_kref); + atomic_set(&new->cl_count, 1); err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); if (err != 0) goto out_no_path; if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); xprt_get(clnt->cl_xprt); - kref_get(&clnt->cl_kref); + atomic_inc(&clnt->cl_count); rpc_register_client(new); rpciod_up(); return new; @@ -465,10 +465,8 @@ EXPORT_SYMBOL_GPL(rpc_shutdown_client); * Free an RPC client */ static void -rpc_free_client(struct kref *kref) +rpc_free_client(struct rpc_clnt *clnt) { - struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); - dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); if (!IS_ERR(clnt->cl_path.dentry)) { @@ -495,12 +493,10 @@ out_free: * Free an RPC client */ static void -rpc_free_auth(struct kref *kref) +rpc_free_auth(struct rpc_clnt *clnt) { - struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); - if (clnt->cl_auth == NULL) { - rpc_free_client(kref); + rpc_free_client(clnt); return; } @@ -509,10 +505,11 @@ rpc_free_auth(struct kref *kref) * release remaining GSS contexts. This mechanism ensures * that it can do so safely. */ - kref_init(kref); + atomic_inc(&clnt->cl_count); rpcauth_release(clnt->cl_auth); clnt->cl_auth = NULL; - kref_put(kref, rpc_free_client); + if (atomic_dec_and_test(&clnt->cl_count)) + rpc_free_client(clnt); } /* @@ -525,7 +522,8 @@ rpc_release_client(struct rpc_clnt *clnt) if (list_empty(&clnt->cl_tasks)) wake_up(&destroy_wait); - kref_put(&clnt->cl_kref, rpc_free_auth); + if (atomic_dec_and_test(&clnt->cl_count)) + rpc_free_auth(clnt); } /** @@ -588,7 +586,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) if (clnt != NULL) { rpc_task_release_client(task); task->tk_client = clnt; - kref_get(&clnt->cl_kref); + atomic_inc(&clnt->cl_count); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; /* Add to the client's list of all tasks */ @@ -931,7 +929,7 @@ call_reserveresult(struct rpc_task *task) task->tk_status = 0; if (status >= 0) { if (task->tk_rqstp) { - task->tk_action = call_allocate; + task->tk_action = call_refresh; return; } @@ -966,13 +964,54 @@ call_reserveresult(struct rpc_task *task) } /* - * 2. Allocate the buffer. For details, see sched.c:rpc_malloc. + * 2. Bind and/or refresh the credentials + */ +static void +call_refresh(struct rpc_task *task) +{ + dprint_status(task); + + task->tk_action = call_refreshresult; + task->tk_status = 0; + task->tk_client->cl_stats->rpcauthrefresh++; + rpcauth_refreshcred(task); +} + +/* + * 2a. Process the results of a credential refresh + */ +static void +call_refreshresult(struct rpc_task *task) +{ + int status = task->tk_status; + + dprint_status(task); + + task->tk_status = 0; + task->tk_action = call_allocate; + if (status >= 0 && rpcauth_uptodatecred(task)) + return; + switch (status) { + case -EACCES: + rpc_exit(task, -EACCES); + return; + case -ENOMEM: + rpc_exit(task, -ENOMEM); + return; + case -ETIMEDOUT: + rpc_delay(task, 3*HZ); + } + task->tk_action = call_refresh; +} + +/* + * 2b. Allocate the buffer. For details, see sched.c:rpc_malloc. * (Note: buffer memory is freed in xprt_release). */ static void call_allocate(struct rpc_task *task) { - unsigned int slack = task->tk_client->cl_auth->au_cslack; + unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = task->tk_xprt; struct rpc_procinfo *proc = task->tk_msg.rpc_proc; @@ -980,7 +1019,7 @@ call_allocate(struct rpc_task *task) dprint_status(task); task->tk_status = 0; - task->tk_action = call_refresh; + task->tk_action = call_bind; if (req->rq_buffer) return; @@ -1017,47 +1056,6 @@ call_allocate(struct rpc_task *task) rpc_exit(task, -ERESTARTSYS); } -/* - * 2a. Bind and/or refresh the credentials - */ -static void -call_refresh(struct rpc_task *task) -{ - dprint_status(task); - - task->tk_action = call_refreshresult; - task->tk_status = 0; - task->tk_client->cl_stats->rpcauthrefresh++; - rpcauth_refreshcred(task); -} - -/* - * 2b. Process the results of a credential refresh - */ -static void -call_refreshresult(struct rpc_task *task) -{ - int status = task->tk_status; - - dprint_status(task); - - task->tk_status = 0; - task->tk_action = call_bind; - if (status >= 0 && rpcauth_uptodatecred(task)) - return; - switch (status) { - case -EACCES: - rpc_exit(task, -EACCES); - return; - case -ENOMEM: - rpc_exit(task, -ENOMEM); - return; - case -ETIMEDOUT: - rpc_delay(task, 3*HZ); - } - task->tk_action = call_refresh; -} - static inline int rpc_task_need_encode(struct rpc_task *task) { diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 95ccbcf45d3..8c8eef2b8f2 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -48,7 +48,7 @@ static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head, return; do { msg = list_entry(head->next, struct rpc_pipe_msg, list); - list_del(&msg->list); + list_del_init(&msg->list); msg->errno = err; destroy_msg(msg); } while (!list_empty(head)); @@ -208,7 +208,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp) if (msg != NULL) { spin_lock(&inode->i_lock); msg->errno = -EAGAIN; - list_del(&msg->list); + list_del_init(&msg->list); spin_unlock(&inode->i_lock); rpci->ops->destroy_msg(msg); } @@ -268,7 +268,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) if (res < 0 || msg->len == msg->copied) { filp->private_data = NULL; spin_lock(&inode->i_lock); - list_del(&msg->list); + list_del_init(&msg->list); spin_unlock(&inode->i_lock); rpci->ops->destroy_msg(msg); } @@ -371,21 +371,23 @@ rpc_show_info(struct seq_file *m, void *v) static int rpc_info_open(struct inode *inode, struct file *file) { - struct rpc_clnt *clnt; + struct rpc_clnt *clnt = NULL; int ret = single_open(file, rpc_show_info, NULL); if (!ret) { struct seq_file *m = file->private_data; - mutex_lock(&inode->i_mutex); - clnt = RPC_I(inode)->private; - if (clnt) { - kref_get(&clnt->cl_kref); + + spin_lock(&file->f_path.dentry->d_lock); + if (!d_unhashed(file->f_path.dentry)) + clnt = RPC_I(inode)->private; + if (clnt != NULL && atomic_inc_not_zero(&clnt->cl_count)) { + spin_unlock(&file->f_path.dentry->d_lock); m->private = clnt; } else { + spin_unlock(&file->f_path.dentry->d_lock); single_release(inode, file); ret = -EINVAL; } - mutex_unlock(&inode->i_mutex); } return ret; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b6309db5622..fe9306bf10c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -800,7 +800,7 @@ static void xs_udp_data_ready(struct sock *sk, int len) u32 _xid; __be32 *xp; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); dprintk("RPC: xs_udp_data_ready...\n"); if (!(xprt = xprt_from_sock(sk))) goto out; @@ -852,7 +852,7 @@ static void xs_udp_data_ready(struct sock *sk, int len) dropit: skb_free_datagram(sk, skb); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) @@ -1229,7 +1229,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) dprintk("RPC: xs_tcp_data_ready...\n"); - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) goto out; if (xprt->shutdown) @@ -1248,7 +1248,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); } while (read > 0); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } /* @@ -1301,7 +1301,7 @@ static void xs_tcp_state_change(struct sock *sk) { struct rpc_xprt *xprt; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) goto out; dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); @@ -1313,7 +1313,7 @@ static void xs_tcp_state_change(struct sock *sk) switch (sk->sk_state) { case TCP_ESTABLISHED: - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->transport_lock); if (!xprt_test_and_set_connected(xprt)) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1327,7 +1327,7 @@ static void xs_tcp_state_change(struct sock *sk) xprt_wake_pending_tasks(xprt, -EAGAIN); } - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->transport_lock); break; case TCP_FIN_WAIT1: /* The client initiated a shutdown of the socket */ @@ -1365,7 +1365,7 @@ static void xs_tcp_state_change(struct sock *sk) xs_sock_mark_closed(xprt); } out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } /** @@ -1376,7 +1376,7 @@ static void xs_error_report(struct sock *sk) { struct rpc_xprt *xprt; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) goto out; dprintk("RPC: %s client %p...\n" @@ -1384,7 +1384,7 @@ static void xs_error_report(struct sock *sk) __func__, xprt, sk->sk_err); xprt_wake_pending_tasks(xprt, -EAGAIN); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static void xs_write_space(struct sock *sk) @@ -1416,13 +1416,13 @@ static void xs_write_space(struct sock *sk) */ static void xs_udp_write_space(struct sock *sk) { - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); /* from net/core/sock.c:sock_def_write_space */ if (sock_writeable(sk)) xs_write_space(sk); - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } /** @@ -1437,13 +1437,13 @@ static void xs_udp_write_space(struct sock *sk) */ static void xs_tcp_write_space(struct sock *sk) { - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); /* from net/core/stream.c:sk_stream_write_space */ if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) xs_write_space(sk); - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 4414a18c63b..0b39b2451ea 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -692,6 +692,7 @@ static int unix_autobind(struct socket *sock) static u32 ordernum = 1; struct unix_address *addr; int err; + unsigned int retries = 0; mutex_lock(&u->readlock); @@ -717,9 +718,17 @@ retry: if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, addr->hash)) { spin_unlock(&unix_table_lock); - /* Sanity yield. It is unusual case, but yet... */ - if (!(ordernum&0xFF)) - yield(); + /* + * __unix_find_socket_byname() may take long time if many names + * are already in use. + */ + cond_resched(); + /* Give up if all names seems to be in use. */ + if (retries++ == 0xFFFFF) { + err = -ENOSPC; + kfree(addr); + goto out; + } goto retry; } addr->hash ^= sk->sk_type; diff --git a/net/wireless/core.c b/net/wireless/core.c index 541e2fff5e9..d6d046b9f6f 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -475,12 +475,10 @@ int wiphy_register(struct wiphy *wiphy) mutex_lock(&cfg80211_mutex); res = device_add(&rdev->wiphy.dev); - if (res) - goto out_unlock; - - res = rfkill_register(rdev->rfkill); - if (res) - goto out_rm_dev; + if (res) { + mutex_unlock(&cfg80211_mutex); + return res; + } /* set up regulatory info */ wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE); @@ -509,13 +507,18 @@ int wiphy_register(struct wiphy *wiphy) cfg80211_debugfs_rdev_add(rdev); mutex_unlock(&cfg80211_mutex); + /* + * due to a locking dependency this has to be outside of the + * cfg80211_mutex lock + */ + res = rfkill_register(rdev->rfkill); + if (res) + goto out_rm_dev; + return 0; out_rm_dev: device_del(&rdev->wiphy.dev); - -out_unlock: - mutex_unlock(&cfg80211_mutex); return res; } EXPORT_SYMBOL(wiphy_register); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index bb5e0a5ecfa..7e5c3a45f81 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1420,6 +1420,9 @@ int cfg80211_wext_giwessid(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; + data->flags = 0; + data->length = 0; + switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_giwessid(dev, info, data, ssid); diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 0ef17bc42ba..8f5116f5af1 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -782,6 +782,22 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, } } + if (IW_IS_GET(cmd) && !(descr->flags & IW_DESCR_FLAG_NOMAX)) { + /* + * If this is a GET, but not NOMAX, it means that the extra + * data is not bounded by userspace, but by max_tokens. Thus + * set the length to max_tokens. This matches the extra data + * allocation. + * The driver should fill it with the number of tokens it + * provided, and it may check iwp->length rather than having + * knowledge of max_tokens. If the driver doesn't change the + * iwp->length, this ioctl just copies back max_token tokens + * filled with zeroes. Hopefully the driver isn't claiming + * them to be valid data. + */ + iwp->length = descr->max_tokens; + } + err = handler(dev, info, (union iwreq_data *) iwp, extra); iwp->length += essid_compat; diff --git a/net/wireless/wext-priv.c b/net/wireless/wext-priv.c index 3feb28e41c5..674d426a9d2 100644 --- a/net/wireless/wext-priv.c +++ b/net/wireless/wext-priv.c @@ -152,7 +152,7 @@ static int ioctl_private_iw_point(struct iw_point *iwp, unsigned int cmd, } else if (!iwp->pointer) return -EFAULT; - extra = kmalloc(extra_size, GFP_KERNEL); + extra = kzalloc(extra_size, GFP_KERNEL); if (!extra) return -ENOMEM; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index a3cca0a9434..64f2ae1fdc1 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -101,7 +101,7 @@ resume: err = -EHOSTUNREACH; goto error_nolock; } - skb_dst_set_noref(skb, dst); + skb_dst_set(skb, dst_clone(dst)); x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2b3ed7ad493..cbab6e1a8c9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1175,9 +1175,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, tmpl->mode == XFRM_MODE_BEET) { remote = &tmpl->id.daddr; local = &tmpl->saddr; - family = tmpl->encap_family; - if (xfrm_addr_any(local, family)) { - error = xfrm_get_saddr(net, &tmp, remote, family); + if (xfrm_addr_any(local, tmpl->encap_family)) { + error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family); if (error) goto fail; local = &tmp; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5208b12fbfb..eb96ce52f17 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -656,15 +656,23 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) EXPORT_SYMBOL(xfrm_sad_getinfo); static int -xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr, - unsigned short family) +xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl, + struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr, + unsigned short family) { struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); if (!afinfo) return -1; - afinfo->init_tempsel(x, fl, tmpl, daddr, saddr); + afinfo->init_tempsel(&x->sel, fl); + + if (family != tmpl->encap_family) { + xfrm_state_put_afinfo(afinfo); + afinfo = xfrm_state_get_afinfo(tmpl->encap_family); + if (!afinfo) + return -1; + } + afinfo->init_temprop(x, tmpl, daddr, saddr); xfrm_state_put_afinfo(afinfo); return 0; } @@ -790,37 +798,38 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, int error = 0; struct xfrm_state *best = NULL; u32 mark = pol->mark.v & pol->mark.m; + unsigned short encap_family = tmpl->encap_family; to_put = NULL; spin_lock_bh(&xfrm_state_lock); - h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, family); + h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { - if (x->props.family == family && + if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && !(x->props.flags & XFRM_STATE_WILDRECV) && - xfrm_state_addr_check(x, daddr, saddr, family) && + xfrm_state_addr_check(x, daddr, saddr, encap_family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, family, daddr, saddr, + xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, &best, &acquire_in_progress, &error); } if (best) goto found; - h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family); + h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) { - if (x->props.family == family && + if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && !(x->props.flags & XFRM_STATE_WILDRECV) && - xfrm_state_addr_check(x, daddr, saddr, family) && + xfrm_state_addr_check(x, daddr, saddr, encap_family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, family, daddr, saddr, + xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, &best, &acquire_in_progress, &error); } @@ -829,7 +838,7 @@ found: if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi, - tmpl->id.proto, family)) != NULL) { + tmpl->id.proto, encap_family)) != NULL) { to_put = x0; error = -EEXIST; goto out; @@ -839,9 +848,9 @@ found: error = -ENOMEM; goto out; } - /* Initialize temporary selector matching only + /* Initialize temporary state matching only * to current session. */ - xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); + xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family); memcpy(&x->mark, &pol->mark, sizeof(x->mark)); error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); @@ -856,10 +865,10 @@ found: x->km.state = XFRM_STATE_ACQ; list_add(&x->km.all, &net->xfrm.state_all); hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); - h = xfrm_src_hash(net, daddr, saddr, family); + h = xfrm_src_hash(net, daddr, saddr, encap_family); hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); if (x->id.spi) { - h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, family); + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b14ed4b1f27..8bae6b22c84 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1801,7 +1801,7 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_user_expire *ue = nlmsg_data(nlh); struct xfrm_usersa_info *p = &ue->state; struct xfrm_mark m; - u32 mark = xfrm_mark_get(attrs, &m);; + u32 mark = xfrm_mark_get(attrs, &m); x = xfrm_state_lookup(net, mark, &p->id.daddr, p->id.spi, p->id.proto, p->family); |