From 85dfb745ee40232876663ae206cba35f24ab2a40 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 18 Feb 2013 16:24:20 +0100 Subject: af_key: initialize satype in key_notify_policy_flush() This field was left uninitialized. Some user daemons perform check against this field. Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- net/key/af_key.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 9ef79851f29..d5a4a796f02 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2694,6 +2694,7 @@ static int key_notify_policy_flush(const struct km_event *c) hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; + hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; -- cgit v1.2.3-70-g09d2 From 27a737ff7cb062fb9cbceba9b44d60aa74862bfa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Mar 2013 23:17:08 +0100 Subject: mac80211: always synchronize_net() during station removal If there are keys left during station removal, then a synchronize_net() will be done (for each key, I have a patch to address this for 3.10), otherwise it won't be done at all which causes issues because the station could be used for TX while it's being removed from the driver -- that might confuse the driver. Fix this by always doing synchronize_net() if no key was present any more. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index a79ce820cb5..238a0cca320 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -766,6 +766,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta) struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; int ret, i; + bool have_key = false; might_sleep(); @@ -793,12 +794,19 @@ int __must_check __sta_info_destroy(struct sta_info *sta) list_del_rcu(&sta->list); mutex_lock(&local->key_mtx); - for (i = 0; i < NUM_DEFAULT_KEYS; i++) + for (i = 0; i < NUM_DEFAULT_KEYS; i++) { __ieee80211_key_free(key_mtx_dereference(local, sta->gtk[i])); - if (sta->ptk) + have_key = true; + } + if (sta->ptk) { __ieee80211_key_free(key_mtx_dereference(local, sta->ptk)); + have_key = true; + } mutex_unlock(&local->key_mtx); + if (!have_key) + synchronize_net(); + sta->dead = true; local->num_sta--; -- cgit v1.2.3-70-g09d2 From 021fcdc13acbab78589325ae2db0b384b4ee7222 Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Thu, 7 Mar 2013 11:08:29 +0200 Subject: cfg80211: fix inconsistency in trace for rdev_set_mac_acl There is NETDEV_ENTRY that was incorrectly assigned as WIPHY_ASSIGN, fix it. Signed-off-by: Vladimir Kondratiev Signed-off-by: Johannes Berg --- net/wireless/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/trace.h b/net/wireless/trace.h index b7a531380e1..6847d043ede 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1778,7 +1778,7 @@ TRACE_EVENT(rdev_set_mac_acl, ), TP_fast_assign( WIPHY_ASSIGN; - WIPHY_ASSIGN; + NETDEV_ASSIGN; __entry->acl_policy = params->acl_policy; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", acl policy: %d", -- cgit v1.2.3-70-g09d2 From 1345ee6a6d90813f972379fad8b75f17026fc8b2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Mar 2013 10:31:05 +0100 Subject: cfg80211: fix potential BSS memory leak and update In the odd case that while updating information from a beacon, a BSS was found that is part of a hidden group, we drop the new information. In this case, however, we leak the IE buffer from the update, and erroneously update the entry's timestamp so it will never time out. Fix both these issues. Cc: Larry Finger Signed-off-by: Johannes Berg --- net/wireless/scan.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 674aadca007..e93bd31d23b 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -698,11 +698,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, found = rb_find_bss(dev, tmp, BSS_CMP_REGULAR); if (found) { - found->pub.beacon_interval = tmp->pub.beacon_interval; - found->pub.signal = tmp->pub.signal; - found->pub.capability = tmp->pub.capability; - found->ts = tmp->ts; - /* Update IEs */ if (rcu_access_pointer(tmp->pub.proberesp_ies)) { const struct cfg80211_bss_ies *old; @@ -723,6 +718,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, if (found->pub.hidden_beacon_bss && !list_empty(&found->hidden_list)) { + const struct cfg80211_bss_ies *f; + /* * The found BSS struct is one of the probe * response members of a group, but we're @@ -732,6 +729,10 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, * SSID to showing it, which is confusing so * drop this information. */ + + f = rcu_access_pointer(tmp->pub.beacon_ies); + kfree_rcu((struct cfg80211_bss_ies *)f, + rcu_head); goto drop; } @@ -761,6 +762,11 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); } + + found->pub.beacon_interval = tmp->pub.beacon_interval; + found->pub.signal = tmp->pub.signal; + found->pub.capability = tmp->pub.capability; + found->ts = tmp->ts; } else { struct cfg80211_internal_bss *new; struct cfg80211_internal_bss *hidden; -- cgit v1.2.3-70-g09d2 From 07e5a5f5ab7474589c15fc5d88e4f7fc43979530 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 7 Mar 2013 13:22:05 +0100 Subject: mac80211: fix crash with P2P Device returning action frames If a P2P Device interface receives an unhandled action frame, we attempt to return it. This crashes because it doesn't have a channel context. Fix the crash by using status->band and properly mark the return frame as an off-channel frame. Reported-by: Ilan Peer Reviewed-by: Ilan Peer Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index bb73ed2d20b..c6844ad080b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2675,7 +2675,19 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) memset(nskb->cb, 0, sizeof(nskb->cb)); - ieee80211_tx_skb(rx->sdata, nskb); + if (rx->sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(nskb); + + info->flags = IEEE80211_TX_CTL_TX_OFFCHAN | + IEEE80211_TX_INTFL_OFFCHAN_TX_OK | + IEEE80211_TX_CTL_NO_CCK_RATE; + if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) + info->hw_queue = + local->hw.offchannel_tx_hw_queue; + } + + __ieee80211_tx_skb_tid_band(rx->sdata, nskb, 7, + status->band); } dev_kfree_skb(rx->skb); return RX_QUEUED; -- cgit v1.2.3-70-g09d2 From eb20ff9c91ddcb2d55c1849a87d3db85af5e88a9 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 13 Mar 2013 19:46:20 -0300 Subject: Bluetooth: Fix not closing SCO sockets in the BT_CONNECT2 state With deferred setup for SCO, it is possible that userspace closes the socket when it is in the BT_CONNECT2 state, after the Connect Request is received but before the Accept Synchonous Connection is sent. If this happens the following crash was observed, when the connection is terminated: [ +0.000003] hci_sync_conn_complete_evt: hci0 status 0x10 [ +0.000005] sco_connect_cfm: hcon ffff88003d1bd800 bdaddr 40:98:4e:32:d7:39 status 16 [ +0.000003] sco_conn_del: hcon ffff88003d1bd800 conn ffff88003cc8e300, err 110 [ +0.000015] BUG: unable to handle kernel NULL pointer dereference at 0000000000000199 [ +0.000906] IP: [] __lock_acquire+0xed/0xe82 [ +0.000000] PGD 3d21f067 PUD 3d291067 PMD 0 [ +0.000000] Oops: 0002 [#1] SMP [ +0.000000] Modules linked in: rfcomm bnep btusb bluetooth [ +0.000000] CPU 0 [ +0.000000] Pid: 1481, comm: kworker/u:2H Not tainted 3.9.0-rc1-25019-gad82cdd #1 Bochs Bochs [ +0.000000] RIP: 0010:[] [] __lock_acquire+0xed/0xe82 [ +0.000000] RSP: 0018:ffff88003c3c19d8 EFLAGS: 00010002 [ +0.000000] RAX: 0000000000000001 RBX: 0000000000000246 RCX: 0000000000000000 [ +0.000000] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88003d1be868 [ +0.000000] RBP: ffff88003c3c1a98 R08: 0000000000000002 R09: 0000000000000000 [ +0.000000] R10: ffff88003d1be868 R11: ffff88003e20b000 R12: 0000000000000002 [ +0.000000] R13: ffff88003aaa8000 R14: 000000000000006e R15: ffff88003d1be850 [ +0.000000] FS: 0000000000000000(0000) GS:ffff88003e200000(0000) knlGS:0000000000000000 [ +0.000000] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ +0.000000] CR2: 0000000000000199 CR3: 000000003c1cb000 CR4: 00000000000006b0 [ +0.000000] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ +0.000000] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ +0.000000] Process kworker/u:2H (pid: 1481, threadinfo ffff88003c3c0000, task ffff88003aaa8000) [ +0.000000] Stack: [ +0.000000] ffffffff81b16342 0000000000000000 0000000000000000 ffff88003d1be868 [ +0.000000] ffffffff00000000 00018c0c7863e367 000000003c3c1a28 ffffffff8101efbd [ +0.000000] 0000000000000000 ffff88003e3d2400 ffff88003c3c1a38 ffffffff81007c7a [ +0.000000] Call Trace: [ +0.000000] [] ? kvm_clock_read+0x34/0x3b [ +0.000000] [] ? paravirt_sched_clock+0x9/0xd [ +0.000000] [] ? sched_clock+0x9/0xb [ +0.000000] [] ? sched_clock_local+0x12/0x75 [ +0.000000] [] lock_acquire+0x93/0xb1 [ +0.000000] [] ? spin_lock+0x9/0xb [bluetooth] [ +0.000000] [] ? lock_release_holdtime.part.22+0x4e/0x55 [ +0.000000] [] _raw_spin_lock+0x40/0x74 [ +0.000000] [] ? spin_lock+0x9/0xb [bluetooth] [ +0.000000] [] ? _raw_spin_unlock+0x23/0x36 [ +0.000000] [] spin_lock+0x9/0xb [bluetooth] [ +0.000000] [] sco_conn_del+0x76/0xbb [bluetooth] [ +0.000000] [] sco_connect_cfm+0x2da/0x2e9 [bluetooth] [ +0.000000] [] hci_proto_connect_cfm+0x38/0x65 [bluetooth] [ +0.000000] [] hci_sync_conn_complete_evt.isra.79+0x11a/0x13e [bluetooth] [ +0.000000] [] hci_event_packet+0x153b/0x239d [bluetooth] [ +0.000000] [] ? _raw_spin_unlock_irqrestore+0x48/0x5c [ +0.000000] [] hci_rx_work+0xf3/0x2e3 [bluetooth] [ +0.000000] [] process_one_work+0x1dc/0x30b [ +0.000000] [] ? process_one_work+0x172/0x30b [ +0.000000] [] ? spin_lock_irq+0x9/0xb [ +0.000000] [] worker_thread+0x123/0x1d2 [ +0.000000] [] ? manage_workers+0x240/0x240 [ +0.000000] [] kthread+0x9d/0xa5 [ +0.000000] [] ? __kthread_parkme+0x60/0x60 [ +0.000000] [] ret_from_fork+0x7c/0xb0 [ +0.000000] [] ? __kthread_parkme+0x60/0x60 [ +0.000000] Code: d7 44 89 8d 50 ff ff ff 4c 89 95 58 ff ff ff e8 44 fc ff ff 44 8b 8d 50 ff ff ff 48 85 c0 4c 8b 95 58 ff ff ff 0f 84 7a 04 00 00 ff 80 98 01 00 00 83 3d 25 41 a7 00 00 45 8b b5 e8 05 00 00 [ +0.000000] RIP [] __lock_acquire+0xed/0xe82 [ +0.000000] RSP [ +0.000000] CR2: 0000000000000199 [ +0.000000] ---[ end trace e73cd3b52352dd34 ]--- Cc: stable@vger.kernel.org [3.8] Signed-off-by: Vinicius Costa Gomes Tested-by: Frederic Dalleau Signed-off-by: Gustavo Padovan --- net/bluetooth/sco.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 57f250c20e3..aaf1957bc4f 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -361,6 +361,7 @@ static void __sco_sock_close(struct sock *sk) sco_chan_del(sk, ECONNRESET); break; + case BT_CONNECT2: case BT_CONNECT: case BT_DISCONN: sco_chan_del(sk, ECONNRESET); -- cgit v1.2.3-70-g09d2 From 0d98da5d845e0d0293055913ce65c9904b3b902a Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 7 Mar 2013 17:20:46 +0000 Subject: netfilter: nf_conntrack: register pernet subsystem before register L4 proto In (c296bb4 netfilter: nf_conntrack: refactor l4proto support for netns) the l4proto gre/dccp/udplite/sctp registration happened before the pernet subsystem, which is wrong. Register pernet subsystem before register L4proto since after register L4proto, init_conntrack may try to access the resources which allocated in register_pernet_subsys. Reported-by: Alexey Dobriyan Cc: Alexey Dobriyan Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_dccp.c | 12 ++++++------ net/netfilter/nf_conntrack_proto_gre.c | 12 ++++++------ net/netfilter/nf_conntrack_proto_sctp.c | 12 ++++++------ net/netfilter/nf_conntrack_proto_udplite.c | 12 ++++++------ 4 files changed, 24 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 432f9578000..ba65b2041eb 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -969,6 +969,10 @@ static int __init nf_conntrack_proto_dccp_init(void) { int ret; + ret = register_pernet_subsys(&dccp_net_ops); + if (ret < 0) + goto out_pernet; + ret = nf_ct_l4proto_register(&dccp_proto4); if (ret < 0) goto out_dccp4; @@ -977,16 +981,12 @@ static int __init nf_conntrack_proto_dccp_init(void) if (ret < 0) goto out_dccp6; - ret = register_pernet_subsys(&dccp_net_ops); - if (ret < 0) - goto out_pernet; - return 0; -out_pernet: - nf_ct_l4proto_unregister(&dccp_proto6); out_dccp6: nf_ct_l4proto_unregister(&dccp_proto4); out_dccp4: + unregister_pernet_subsys(&dccp_net_ops); +out_pernet: return ret; } diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index bd7d01d9c7e..155ce9f8a0d 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -420,18 +420,18 @@ static int __init nf_ct_proto_gre_init(void) { int ret; - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4); - if (ret < 0) - goto out_gre4; - ret = register_pernet_subsys(&proto_gre_net_ops); if (ret < 0) goto out_pernet; + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4); + if (ret < 0) + goto out_gre4; + return 0; -out_pernet: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4); out_gre4: + unregister_pernet_subsys(&proto_gre_net_ops); +out_pernet: return ret; } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 480f616d593..ec83536def9 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -888,6 +888,10 @@ static int __init nf_conntrack_proto_sctp_init(void) { int ret; + ret = register_pernet_subsys(&sctp_net_ops); + if (ret < 0) + goto out_pernet; + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4); if (ret < 0) goto out_sctp4; @@ -896,16 +900,12 @@ static int __init nf_conntrack_proto_sctp_init(void) if (ret < 0) goto out_sctp6; - ret = register_pernet_subsys(&sctp_net_ops); - if (ret < 0) - goto out_pernet; - return 0; -out_pernet: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6); out_sctp6: nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4); out_sctp4: + unregister_pernet_subsys(&sctp_net_ops); +out_pernet: return ret; } diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 157489581c3..ca969f6273f 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -371,6 +371,10 @@ static int __init nf_conntrack_proto_udplite_init(void) { int ret; + ret = register_pernet_subsys(&udplite_net_ops); + if (ret < 0) + goto out_pernet; + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4); if (ret < 0) goto out_udplite4; @@ -379,16 +383,12 @@ static int __init nf_conntrack_proto_udplite_init(void) if (ret < 0) goto out_udplite6; - ret = register_pernet_subsys(&udplite_net_ops); - if (ret < 0) - goto out_pernet; - return 0; -out_pernet: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6); out_udplite6: nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4); out_udplite4: + unregister_pernet_subsys(&udplite_net_ops); +out_pernet: return ret; } -- cgit v1.2.3-70-g09d2 From bae99f7a1d372374aaf9ed8910f3b825da995b36 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 14 Mar 2013 06:03:18 +0000 Subject: netfilter: nfnetlink_queue: fix incorrect initialization of copy range field 2^16 = 0xffff, not 0xfffff (note the extra 'f'). Not dangerous since you adjust it to min_t(data_len, skb->len) just after on. Reported-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 858fd52c104..1cb48540f86 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -112,7 +112,7 @@ instance_create(u_int16_t queue_num, int portid) inst->queue_num = queue_num; inst->peer_portid = portid; inst->queue_maxlen = NFQNL_QMAX_DEFAULT; - inst->copy_range = 0xfffff; + inst->copy_range = 0xffff; inst->copy_mode = NFQNL_COPY_NONE; spin_lock_init(&inst->lock); INIT_LIST_HEAD(&inst->queue_list); -- cgit v1.2.3-70-g09d2 From a82783c91d5dce680dbd290ebf301a520b0e72a5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Mar 2013 20:11:01 +0000 Subject: netfilter: ip6t_NPT: restrict to mangle table As the translation is stateless, using it in nat table doesn't work (only initial packet is translated). filter table OUTPUT works but won't re-route the packet after translation. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_NPT.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c index 83acc1405a1..33608c61027 100644 --- a/net/ipv6/netfilter/ip6t_NPT.c +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -114,6 +114,7 @@ ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par) static struct xt_target ip6t_npt_target_reg[] __read_mostly = { { .name = "SNPT", + .table = "mangle", .target = ip6t_snpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .checkentry = ip6t_npt_checkentry, @@ -124,6 +125,7 @@ static struct xt_target ip6t_npt_target_reg[] __read_mostly = { }, { .name = "DNPT", + .table = "mangle", .target = ip6t_dnpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .checkentry = ip6t_npt_checkentry, -- cgit v1.2.3-70-g09d2 From 92f28d973cce45ef5823209aab3138eb45d8b349 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 15 Mar 2013 01:03:33 -0700 Subject: scm: Require CAP_SYS_ADMIN over the current pidns to spoof pids. Don't allow spoofing pids over unix domain sockets in the corner cases where a user has created a user namespace but has not yet created a pid namespace. Cc: stable@vger.kernel.org Reported-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" --- net/core/scm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/scm.c b/net/core/scm.c index 905dcc6ad1e..2dc6cdaaae8 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -52,7 +53,8 @@ static __inline__ int scm_check_creds(struct ucred *creds) if (!uid_valid(uid) || !gid_valid(gid)) return -EINVAL; - if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) && + if ((creds->pid == task_tgid_vnr(current) || + ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) && ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || -- cgit v1.2.3-70-g09d2 From cf2e39429c245245db889fffdfbdf3f889a6cb22 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 9 Mar 2013 23:25:06 +0200 Subject: ipvs: fix sctp chunk length order Fix wrong but non-fatal access to chunk length. sch->length should be in network order, next chunk should be aligned to 4 bytes. Problem noticed in sparse output. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto_sctp.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index ae8ec6f2768..cd1d7298f7b 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -906,7 +906,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, sctp_chunkhdr_t _sctpch, *sch; unsigned char chunk_type; int event, next_state; - int ihl; + int ihl, cofs; #ifdef CONFIG_IP_VS_IPV6 ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); @@ -914,8 +914,8 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, ihl = ip_hdrlen(skb); #endif - sch = skb_header_pointer(skb, ihl + sizeof(sctp_sctphdr_t), - sizeof(_sctpch), &_sctpch); + cofs = ihl + sizeof(sctp_sctphdr_t); + sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch); if (sch == NULL) return; @@ -933,10 +933,12 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, */ if ((sch->type == SCTP_CID_COOKIE_ECHO) || (sch->type == SCTP_CID_COOKIE_ACK)) { - sch = skb_header_pointer(skb, (ihl + sizeof(sctp_sctphdr_t) + - sch->length), sizeof(_sctpch), &_sctpch); - if (sch) { - if (sch->type == SCTP_CID_ABORT) + int clen = ntohs(sch->length); + + if (clen >= sizeof(sctp_chunkhdr_t)) { + sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4), + sizeof(_sctpch), &_sctpch); + if (sch && sch->type == SCTP_CID_ABORT) chunk_type = sch->type; } } -- cgit v1.2.3-70-g09d2 From 0c12582fbcdea0cbb0dfd224e1c5f9a8428ffa18 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 9 Mar 2013 23:25:04 +0200 Subject: ipvs: add backup_only flag to avoid loops Dmitry Akindinov is reporting for a problem where SYNs are looping between the master and backup server when the backup server is used as real server in DR mode and has IPVS rules to function as director. Even when the backup function is enabled we continue to forward traffic and schedule new connections when the current master is using the backup server as real server. While this is not a problem for NAT, for DR and TUN method the backup server can not determine if a request comes from client or from director. To avoid such loops add new sysctl flag backup_only. It can be needed for DR/TUN setups that do not need backup and director function at the same time. When the backup function is enabled we stop any forwarding and pass the traffic to the local stack (real server mode). The flag disables the director function when the backup function is enabled. For setups that enable backup function for some virtual services and director function for other virtual services there should be another more complex solution to support DR/TUN mode, may be to assign per-virtual service syncid value, so that we can differentiate the requests. Reported-by: Dmitry Akindinov Tested-by: German Myzovsky Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- Documentation/networking/ipvs-sysctl.txt | 7 +++++++ include/net/ip_vs.h | 12 ++++++++++++ net/netfilter/ipvs/ip_vs_core.c | 12 ++++++++---- net/netfilter/ipvs/ip_vs_ctl.c | 7 +++++++ 4 files changed, 34 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt index f2a2488f1bf..9573d0c48c6 100644 --- a/Documentation/networking/ipvs-sysctl.txt +++ b/Documentation/networking/ipvs-sysctl.txt @@ -15,6 +15,13 @@ amemthresh - INTEGER enabled and the variable is automatically set to 2, otherwise the strategy is disabled and the variable is set to 1. +backup_only - BOOLEAN + 0 - disabled (default) + not 0 - enabled + + If set, disable the director function while the server is + in backup mode to avoid packet loops for DR/TUN methods. + conntrack - BOOLEAN 0 - disabled (default) not 0 - enabled diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 68c69d54d39..fce8e6b66d5 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -976,6 +976,7 @@ struct netns_ipvs { int sysctl_sync_retries; int sysctl_nat_icmp_send; int sysctl_pmtu_disc; + int sysctl_backup_only; /* ip_vs_lblc */ int sysctl_lblc_expiration; @@ -1067,6 +1068,12 @@ static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs) return ipvs->sysctl_pmtu_disc; } +static inline int sysctl_backup_only(struct netns_ipvs *ipvs) +{ + return ipvs->sync_state & IP_VS_STATE_BACKUP && + ipvs->sysctl_backup_only; +} + #else static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) @@ -1114,6 +1121,11 @@ static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs) return 1; } +static inline int sysctl_backup_only(struct netns_ipvs *ipvs) +{ + return 0; +} + #endif /* diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 47edf5a40a5..18b4bc55fa3 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1577,7 +1577,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + ipvs = net_ipvs(net); + if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; ip_vs_fill_iph_skb(af, skb, &iph); @@ -1654,7 +1655,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); - ipvs = net_ipvs(net); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ @@ -1815,13 +1815,15 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, { int r; struct net *net; + struct netns_ipvs *ipvs; if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + ipvs = net_ipvs(net); + if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; return ip_vs_in_icmp(skb, &r, hooknum); @@ -1835,6 +1837,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, { int r; struct net *net; + struct netns_ipvs *ipvs; struct ip_vs_iphdr iphdr; ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr); @@ -1843,7 +1846,8 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + ipvs = net_ipvs(net); + if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c68198bf912..9e2d1cccd1e 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1808,6 +1808,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "backup_only", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -3741,6 +3747,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; ipvs->sysctl_pmtu_disc = 1; tbl[idx++].data = &ipvs->sysctl_pmtu_disc; + tbl[idx++].data = &ipvs->sysctl_backup_only; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); -- cgit v1.2.3-70-g09d2 From bf93ad72cd8cfabe66a7b3d66236a1266d357189 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 9 Mar 2013 23:25:05 +0200 Subject: ipvs: remove extra rcu lock In 3.7 we added code that uses ipv4_update_pmtu but after commit c5ae7d4192 (ipv4: must use rcu protection while calling fib_lookup) the RCU lock is not needed. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 18b4bc55fa3..61f49d24171 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1394,10 +1394,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) skb_reset_network_header(skb); IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n", &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu); - rcu_read_lock(); ipv4_update_pmtu(skb, dev_net(skb->dev), mtu, 0, 0, 0, 0); - rcu_read_unlock(); /* Client uses PMTUD? */ if (!(cih->frag_off & htons(IP_DF))) goto ignore_ipip; -- cgit v1.2.3-70-g09d2 From 3dd6664fac7e6041bfc8756ae9e8c78f59108cd9 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Tue, 19 Mar 2013 13:09:59 +0000 Subject: netfilter: remove unused "config IP_NF_QUEUE" Kconfig symbol IP_NF_QUEUE is unused since commit d16cf20e2f2f13411eece7f7fb72c17d141c4a84 ("netfilter: remove ip_queue support"). Let's remove it too. Signed-off-by: Paul Bolle Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/Kconfig | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index ce2d43e1f09..0d755c50994 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -36,19 +36,6 @@ config NF_CONNTRACK_PROC_COMPAT If unsure, say Y. -config IP_NF_QUEUE - tristate "IP Userspace queueing via NETLINK (OBSOLETE)" - depends on NETFILTER_ADVANCED - help - Netfilter has the ability to queue packets to user space: the - netlink device can be used to access them using this driver. - - This option enables the old IPv4-only "ip_queue" implementation - which has been obsoleted by the new "nfnetlink_queue" code (see - CONFIG_NETFILTER_NETLINK_QUEUE). - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" default m if NETFILTER_ADVANCED=n -- cgit v1.2.3-70-g09d2 From bec964ed3b2549086e1fdfbf7f1ce8c22f89baa4 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 20 Mar 2013 10:50:50 +0100 Subject: NFC: llcp: Detach socket from process context only when releasing the socket Calling sock_orphan when e.g. the NFC adapter is removed can lead to kernel crashes when e.g. a connection less client is sleeping on the Rx workqueue, waiting for data to show up. Signed-off-by: Samuel Ortiz --- net/nfc/llcp/llcp.c | 6 ------ net/nfc/llcp/sock.c | 2 -- 2 files changed, 8 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index b530afadd76..336813ff8cd 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -107,8 +107,6 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen, accept_sk->sk_state_change(sk); bh_unlock_sock(accept_sk); - - sock_orphan(accept_sk); } if (listen == true) { @@ -134,8 +132,6 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen, bh_unlock_sock(sk); - sock_orphan(sk); - sk_del_node_init(sk); } @@ -164,8 +160,6 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen, bh_unlock_sock(sk); - sock_orphan(sk); - sk_del_node_init(sk); } diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index 5c7cdf3f2a8..e488e440186 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -464,8 +464,6 @@ static int llcp_sock_release(struct socket *sock) nfc_llcp_accept_unlink(accept_sk); release_sock(accept_sk); - - sock_orphan(accept_sk); } } -- cgit v1.2.3-70-g09d2 From 0017c0b57500606aab894cdb02fdf3380ddd60ee Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 11 Mar 2013 10:32:16 +0100 Subject: xfrm: Fix replay notification for esn. We may miscalculate the sequence number difference from the last time we send a notification if a sequence number wrap occured in the meantime. We fix this by adding a separate replay notify function for esn. Here we take the high bits of the sequence number into account to calculate the difference. Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_replay.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 35754cc8a9e..a3906737f49 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -334,6 +334,72 @@ static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) x->xflags &= ~XFRM_TIME_DEFER; } +static void xfrm_replay_notify_esn(struct xfrm_state *x, int event) +{ + u32 seq_diff, oseq_diff; + struct km_event c; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn; + + /* we send notify messages in case + * 1. we updated on of the sequence numbers, and the seqno difference + * is at least x->replay_maxdiff, in this case we also update the + * timeout of our timer function + * 2. if x->replay_maxage has elapsed since last update, + * and there were changes + * + * The state structure must be locked! + */ + + switch (event) { + case XFRM_REPLAY_UPDATE: + if (!x->replay_maxdiff) + break; + + if (replay_esn->seq_hi == preplay_esn->seq_hi) + seq_diff = replay_esn->seq - preplay_esn->seq; + else + seq_diff = UINT_MAX - preplay_esn->seq + + replay_esn->seq; + + if (replay_esn->oseq_hi == preplay_esn->oseq_hi) + oseq_diff = replay_esn->oseq - preplay_esn->oseq; + else + oseq_diff = UINT_MAX - preplay_esn->oseq + + replay_esn->oseq; + + if (seq_diff < x->replay_maxdiff && + oseq_diff < x->replay_maxdiff) { + + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + } + + break; + + case XFRM_REPLAY_TIMEOUT: + if (memcmp(x->replay_esn, x->preplay_esn, + xfrm_replay_state_esn_len(replay_esn)) == 0) { + x->xflags |= XFRM_TIME_DEFER; + return; + } + + break; + } + + memcpy(x->preplay_esn, x->replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + c.event = XFRM_MSG_NEWAE; + c.data.aevent = event; + km_state_notify(x, &c); + + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + x->xflags &= ~XFRM_TIME_DEFER; +} + static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb) { int err = 0; @@ -510,7 +576,7 @@ static struct xfrm_replay xfrm_replay_esn = { .advance = xfrm_replay_advance_esn, .check = xfrm_replay_check_esn, .recheck = xfrm_replay_recheck_esn, - .notify = xfrm_replay_notify_bmp, + .notify = xfrm_replay_notify_esn, .overflow = xfrm_replay_overflow_esn, }; -- cgit v1.2.3-70-g09d2 From b3155155440190de78fb501953136a6c5b82e327 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Wed, 20 Mar 2013 16:00:16 +0100 Subject: NFC: llcp: Remove possible double call to kfree_skb kfree_skb was called twice when the socket receive queue is full Signed-off-by: Thierry Escande Signed-off-by: Samuel Ortiz --- net/nfc/llcp/llcp.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 336813ff8cd..ee25f25f0cd 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -821,7 +821,6 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local, skb_get(skb); } else { pr_err("Receive queue is full\n"); - kfree_skb(skb); } nfc_llcp_sock_put(llcp_sock); @@ -1022,7 +1021,6 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local, skb_get(skb); } else { pr_err("Receive queue is full\n"); - kfree_skb(skb); } } -- cgit v1.2.3-70-g09d2 From f1e79e208076ffe7bad97158275f1c572c04f5c7 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Tue, 19 Mar 2013 01:47:27 +0000 Subject: genetlink: trigger BUG_ON if a group name is too long Trigger BUG_ON if a group name is longer than GENL_NAMSIZ. Signed-off-by: Masatake YAMATO Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index f2aabb6f410..5a55be3f17a 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -142,6 +142,7 @@ int genl_register_mc_group(struct genl_family *family, int err = 0; BUG_ON(grp->name[0] == '\0'); + BUG_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL); genl_lock(); -- cgit v1.2.3-70-g09d2 From 44046a593eb770dbecdabf1c82bcd252f2a8337b Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:12 +0000 Subject: udp: add encap_destroy callback Users of udp encapsulation currently have an encap_rcv callback which they can use to hook into the udp receive path. In situations where a encapsulation user allocates resources associated with a udp encap socket, it may be convenient to be able to also hook the proto .destroy operation. For example, if an encap user holds a reference to the udp socket, the destroy hook might be used to relinquish this reference. This patch adds a socket destroy hook into udp, which is set and enabled in the same way as the existing encap_rcv hook. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- include/linux/udp.h | 1 + net/ipv4/udp.c | 7 +++++++ net/ipv6/udp.c | 8 ++++++++ 3 files changed, 16 insertions(+) (limited to 'net') diff --git a/include/linux/udp.h b/include/linux/udp.h index 9d81de123c9..42278bbf7a8 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -68,6 +68,7 @@ struct udp_sock { * For encapsulation sockets. */ int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); + void (*encap_destroy)(struct sock *sk); }; static inline struct udp_sock *udp_sk(const struct sock *sk) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 265c42cf963..0a073a26372 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1762,9 +1762,16 @@ int udp_rcv(struct sk_buff *skb) void udp_destroy_sock(struct sock *sk) { + struct udp_sock *up = udp_sk(sk); bool slow = lock_sock_fast(sk); udp_flush_pending_frames(sk); unlock_sock_fast(sk, slow); + if (static_key_false(&udp_encap_needed) && up->encap_type) { + void (*encap_destroy)(struct sock *sk); + encap_destroy = ACCESS_ONCE(up->encap_destroy); + if (encap_destroy) + encap_destroy(sk); + } } /* diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 599e1ba6d1c..d8e5e852fc7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1285,10 +1285,18 @@ do_confirm: void udpv6_destroy_sock(struct sock *sk) { + struct udp_sock *up = udp_sk(sk); lock_sock(sk); udp_v6_flush_pending_frames(sk); release_sock(sk); + if (static_key_false(&udpv6_encap_needed) && up->encap_type) { + void (*encap_destroy)(struct sock *sk); + encap_destroy = ACCESS_ONCE(up->encap_destroy); + if (encap_destroy) + encap_destroy(sk); + } + inet6_destroy_sock(sk); } -- cgit v1.2.3-70-g09d2 From 9980d001cec86c3c75f3a6008ddb73c397ea3b3e Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:13 +0000 Subject: l2tp: add udp encap socket destroy handler L2TP sessions hold a reference to the tunnel socket to prevent it going away while sessions are still active. However, since tunnel destruction is handled by the sock sk_destruct callback there is a catch-22: a tunnel with sessions cannot be deleted since each session holds a reference to the tunnel socket. If userspace closes a managed tunnel socket, or dies, the tunnel will persist and it will be neccessary to individually delete the sessions using netlink commands. This is ugly. To prevent this occuring, this patch leverages the udp encapsulation socket destroy callback to gain early notification when the tunnel socket is closed. This allows us to safely close the sessions running in the tunnel, dropping the tunnel socket references in the process. The tunnel socket is then destroyed as normal, and the tunnel resources deallocated in sk_destruct. While we're at it, ensure that l2tp_tunnel_closeall correctly drops session references to allow the sessions to be deleted rather than leaking. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index d36875f3427..ee726a75229 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1282,6 +1282,7 @@ static void l2tp_tunnel_destruct(struct sock *sk) /* No longer an encapsulation socket. See net/ipv4/udp.c */ (udp_sk(sk))->encap_type = 0; (udp_sk(sk))->encap_rcv = NULL; + (udp_sk(sk))->encap_destroy = NULL; break; case L2TP_ENCAPTYPE_IP: break; @@ -1360,6 +1361,8 @@ again: if (session->deref != NULL) (*session->deref)(session); + l2tp_session_dec_refcount(session); + write_lock_bh(&tunnel->hlist_lock); /* Now restart from the beginning of this hash @@ -1373,6 +1376,16 @@ again: write_unlock_bh(&tunnel->hlist_lock); } +/* Tunnel socket destroy hook for UDP encapsulation */ +static void l2tp_udp_encap_destroy(struct sock *sk) +{ + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + if (tunnel) { + l2tp_tunnel_closeall(tunnel); + sock_put(sk); + } +} + /* Really kill the tunnel. * Come here only when all sessions have been cleared from the tunnel. */ @@ -1668,6 +1681,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP; udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv; + udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == PF_INET6) udpv6_encap_enable(); -- cgit v1.2.3-70-g09d2 From e34f4c7050e5471b6d4fb25380713937fc837514 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:14 +0000 Subject: l2tp: export l2tp_tunnel_closeall l2tp_core internally uses l2tp_tunnel_closeall to close all sessions in a tunnel when a UDP-encapsulation socket is destroyed. We need to do something similar for IP-encapsulation sockets. Export l2tp_tunnel_closeall as a GPL symbol to enable l2tp_ip and l2tp_ip6 to call it from their .destroy handlers. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 ++-- net/l2tp/l2tp_core.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index ee726a75229..287e327342d 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -114,7 +114,6 @@ struct l2tp_net { static void l2tp_session_set_header_len(struct l2tp_session *session, int version); static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); -static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); static inline struct l2tp_net *l2tp_pernet(struct net *net) { @@ -1312,7 +1311,7 @@ end: /* When the tunnel is closed, all the attached sessions need to go too. */ -static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) +void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) { int hash; struct hlist_node *walk; @@ -1375,6 +1374,7 @@ again: } write_unlock_bh(&tunnel->hlist_lock); } +EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall); /* Tunnel socket destroy hook for UDP encapsulation */ static void l2tp_udp_encap_destroy(struct sock *sk) diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 8eb8f1d47f3..b0861f68a10 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -240,6 +240,7 @@ extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp); +extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); extern int l2tp_session_delete(struct l2tp_session *session); -- cgit v1.2.3-70-g09d2 From 936063175afd895913a5e9db77e1a0ef43ea44ea Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:15 +0000 Subject: l2tp: close sessions in ip socket destroy callback l2tp_core hooks UDP's .destroy handler to gain advance warning of a tunnel socket being closed from userspace. We need to do the same thing for IP-encapsulation sockets. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 6 ++++++ net/l2tp/l2tp_ip6.c | 7 +++++++ 2 files changed, 13 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 7f41b705126..571db8dd229 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -228,10 +228,16 @@ static void l2tp_ip_close(struct sock *sk, long timeout) static void l2tp_ip_destroy_sock(struct sock *sk) { struct sk_buff *skb; + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) kfree_skb(skb); + if (tunnel) { + l2tp_tunnel_closeall(tunnel); + sock_put(sk); + } + sk_refcnt_debug_dec(sk); } diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 41f2f8126eb..c74f5a91ff6 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -241,10 +241,17 @@ static void l2tp_ip6_close(struct sock *sk, long timeout) static void l2tp_ip6_destroy_sock(struct sock *sk) { + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); + if (tunnel) { + l2tp_tunnel_closeall(tunnel); + sock_put(sk); + } + inet6_destroy_sock(sk); } -- cgit v1.2.3-70-g09d2 From 2b551c6e7d5bca2c78c216b15ef675653d4f459a Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:16 +0000 Subject: l2tp: close sessions before initiating tunnel delete When a user deletes a tunnel using netlink, all the sessions in the tunnel should also be deleted. Since running sessions will pin the tunnel socket with the references they hold, have the l2tp_tunnel_delete close all sessions in a tunnel before finally closing the tunnel socket. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 287e327342d..0dd50c079f2 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1737,6 +1737,7 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); */ int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { + l2tp_tunnel_closeall(tunnel); return (false == queue_work(l2tp_wq, &tunnel->del_work)); } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); -- cgit v1.2.3-70-g09d2 From 8abbbe8ff572fd84d1b98eb9acf30611a97cf72e Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:17 +0000 Subject: l2tp: take a reference for kernel sockets in l2tp_tunnel_sock_lookup When looking up the tunnel socket in struct l2tp_tunnel, hold a reference whether the socket was created by the kernel or by userspace. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 0dd50c079f2..45373fee38c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -191,6 +191,7 @@ struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel) } else { /* Socket is owned by kernelspace */ sk = tunnel->sock; + sock_hold(sk); } out: @@ -209,6 +210,7 @@ void l2tp_tunnel_sock_put(struct sock *sk) } sock_put(sk); } + sock_put(sk); } EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_put); -- cgit v1.2.3-70-g09d2 From 02d13ed5f94af38c37d1abd53462fe48d78bcc9d Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:18 +0000 Subject: l2tp: don't BUG_ON sk_socket being NULL It is valid for an existing struct sock object to have a NULL sk_socket pointer, so don't BUG_ON in l2tp_tunnel_del_work if that should occur. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 45373fee38c..e841ef2a68a 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1412,19 +1412,21 @@ static void l2tp_tunnel_del_work(struct work_struct *work) return; sock = sk->sk_socket; - BUG_ON(!sock); - /* If the tunnel socket was created directly by the kernel, use the - * sk_* API to release the socket now. Otherwise go through the - * inet_* layer to shut the socket down, and let userspace close it. + /* If the tunnel socket was created by userspace, then go through the + * inet layer to shut the socket down, and let userspace close it. + * Otherwise, if we created the socket directly within the kernel, use + * the sk API to release it here. * In either case the tunnel resources are freed in the socket * destructor when the tunnel socket goes away. */ - if (sock->file == NULL) { - kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sk); + if (tunnel->fd >= 0) { + if (sock) + inet_shutdown(sock, 2); } else { - inet_shutdown(sock, 2); + if (sock) + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sk); } l2tp_tunnel_sock_put(sk); -- cgit v1.2.3-70-g09d2 From 48f72f92b31431c40279b0fba6c5588e07e67d95 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:19 +0000 Subject: l2tp: add session reorder queue purge function to core If an l2tp session is deleted, it is necessary to delete skbs in-flight on the session's reorder queue before taking it down. Rather than having each pseudowire implementation reaching into the l2tp_session struct to handle this itself, provide a function in l2tp_core to purge the session queue. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 17 +++++++++++++++++ net/l2tp/l2tp_core.h | 1 + 2 files changed, 18 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index e841ef2a68a..69c316dd02d 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -829,6 +829,23 @@ discard: } EXPORT_SYMBOL(l2tp_recv_common); +/* Drop skbs from the session's reorder_q + */ +int l2tp_session_queue_purge(struct l2tp_session *session) +{ + struct sk_buff *skb = NULL; + BUG_ON(!session); + BUG_ON(session->magic != L2TP_SESSION_MAGIC); + while ((skb = skb_dequeue(&session->reorder_q))) { + atomic_long_inc(&session->stats.rx_errors); + kfree_skb(skb); + if (session->deref) + (*session->deref)(session); + } + return 0; +} +EXPORT_SYMBOL_GPL(l2tp_session_queue_purge); + /* Internal UDP receive frame. Do the real work of receiving an L2TP data frame * here. The skb is not on a list when we get here. * Returns 0 if the packet was a data packet and was successfully passed on. diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index b0861f68a10..d40713d105f 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -246,6 +246,7 @@ extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunne extern int l2tp_session_delete(struct l2tp_session *session); extern void l2tp_session_free(struct l2tp_session *session); extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb)); +extern int l2tp_session_queue_purge(struct l2tp_session *session); extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len); -- cgit v1.2.3-70-g09d2 From 4c6e2fd35460208596fa099ee0750a4b0438aa5c Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:20 +0000 Subject: l2tp: purge session reorder queue on delete Add calls to l2tp_session_queue_purge as a part of l2tp_tunnel_closeall and l2tp_session_delete. Pseudowire implementations which are deleted only via. l2tp_core l2tp_session_delete calls can dispense with their own code for flushing the reorder queue. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 69c316dd02d..c00f31b8cc0 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1373,6 +1373,8 @@ again: synchronize_rcu(); } + l2tp_session_queue_purge(session); + if (session->session_close != NULL) (*session->session_close)(session); @@ -1813,6 +1815,8 @@ EXPORT_SYMBOL_GPL(l2tp_session_free); */ int l2tp_session_delete(struct l2tp_session *session) { + l2tp_session_queue_purge(session); + if (session->session_close != NULL) (*session->session_close)(session); -- cgit v1.2.3-70-g09d2 From cf2f5c886a209377daefd5d2ba0bcd49c3887813 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:21 +0000 Subject: l2tp: push all ppp pseudowire shutdown through .release handler If userspace deletes a ppp pseudowire using the netlink API, either by directly deleting the session or by deleting the tunnel that contains the session, we need to tear down the corresponding pppox channel. Rather than trying to manage two pppox unbind codepaths, switch the netlink and l2tp_core session_close handlers to close via. the l2tp_ppp socket .release handler. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 53 ++++++++++------------------------------------------- 1 file changed, 10 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 6a53371dba1..7e3e16aefcb 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -97,6 +97,7 @@ #include #include #include +#include #include #include @@ -447,34 +448,16 @@ static void pppol2tp_session_close(struct l2tp_session *session) { struct pppol2tp_session *ps = l2tp_session_priv(session); struct sock *sk = ps->sock; - struct sk_buff *skb; + struct socket *sock = sk->sk_socket; BUG_ON(session->magic != L2TP_SESSION_MAGIC); - if (session->session_id == 0) - goto out; - - if (sk != NULL) { - lock_sock(sk); - - if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { - pppox_unbind_sock(sk); - sk->sk_state = PPPOX_DEAD; - sk->sk_state_change(sk); - } - - /* Purge any queued data */ - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); - while ((skb = skb_dequeue(&session->reorder_q))) { - kfree_skb(skb); - sock_put(sk); - } - release_sock(sk); + if (sock) { + inet_shutdown(sock, 2); + /* Don't let the session go away before our socket does */ + l2tp_session_inc_refcount(session); } - -out: return; } @@ -525,16 +508,12 @@ static int pppol2tp_release(struct socket *sock) session = pppol2tp_sock_to_session(sk); /* Purge any queued data */ - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); if (session != NULL) { - struct sk_buff *skb; - while ((skb = skb_dequeue(&session->reorder_q))) { - kfree_skb(skb); - sock_put(sk); - } + l2tp_session_queue_purge(session); sock_put(sk); } + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); release_sock(sk); @@ -880,18 +859,6 @@ out: return error; } -/* Called when deleting sessions via the netlink interface. - */ -static int pppol2tp_session_delete(struct l2tp_session *session) -{ - struct pppol2tp_session *ps = l2tp_session_priv(session); - - if (ps->sock == NULL) - l2tp_session_dec_refcount(session); - - return 0; -} - #endif /* CONFIG_L2TP_V3 */ /* getname() support. @@ -1839,7 +1806,7 @@ static const struct pppox_proto pppol2tp_proto = { static const struct l2tp_nl_cmd_ops pppol2tp_nl_cmd_ops = { .session_create = pppol2tp_session_create, - .session_delete = pppol2tp_session_delete, + .session_delete = l2tp_session_delete, }; #endif /* CONFIG_L2TP_V3 */ -- cgit v1.2.3-70-g09d2 From 7b7c0719cd7afee725b920d75ec6a500b76107e6 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:22 +0000 Subject: l2tp: avoid deadlock in l2tp stats update l2tp's u64_stats writers were incorrectly synchronised, making it possible to deadlock a 64bit machine running a 32bit kernel simply by sending the l2tp code netlink commands while passing data through l2tp sessions. Previous discussion on netdev determined that alternative solutions such as spinlock writer synchronisation or per-cpu data would bring unjustified overhead, given that most users interested in high volume traffic will likely be running 64bit kernels on 64bit hardware. As such, this patch replaces l2tp's use of u64_stats with atomic_long_t, thereby avoiding the deadlock. Ref: http://marc.info/?l=linux-netdev&m=134029167910731&w=2 http://marc.info/?l=linux-netdev&m=134079868111131&w=2 Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 75 +++++++++++++------------------------------------ net/l2tp/l2tp_core.h | 19 ++++++------- net/l2tp/l2tp_debugfs.c | 28 +++++++++--------- net/l2tp/l2tp_netlink.c | 72 ++++++++++++++++++----------------------------- net/l2tp/l2tp_ppp.c | 46 +++++++++++++++--------------- 5 files changed, 93 insertions(+), 147 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index c00f31b8cc0..97d30ac67c8 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -374,10 +374,8 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk struct sk_buff *skbp; struct sk_buff *tmp; u32 ns = L2TP_SKB_CB(skb)->ns; - struct l2tp_stats *sstats; spin_lock_bh(&session->reorder_q.lock); - sstats = &session->stats; skb_queue_walk_safe(&session->reorder_q, skbp, tmp) { if (L2TP_SKB_CB(skbp)->ns > ns) { __skb_queue_before(&session->reorder_q, skbp, skb); @@ -385,9 +383,7 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n", session->name, ns, L2TP_SKB_CB(skbp)->ns, skb_queue_len(&session->reorder_q)); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_oos_packets++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_oos_packets); goto out; } } @@ -404,23 +400,16 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff * { struct l2tp_tunnel *tunnel = session->tunnel; int length = L2TP_SKB_CB(skb)->length; - struct l2tp_stats *tstats, *sstats; /* We're about to requeue the skb, so return resources * to its current owner (a socket receive buffer). */ skb_orphan(skb); - tstats = &tunnel->stats; - u64_stats_update_begin(&tstats->syncp); - sstats = &session->stats; - u64_stats_update_begin(&sstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += length; - sstats->rx_packets++; - sstats->rx_bytes += length; - u64_stats_update_end(&tstats->syncp); - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&tunnel->stats.rx_packets); + atomic_long_add(length, &tunnel->stats.rx_bytes); + atomic_long_inc(&session->stats.rx_packets); + atomic_long_add(length, &session->stats.rx_bytes); if (L2TP_SKB_CB(skb)->has_seq) { /* Bump our Nr */ @@ -451,7 +440,6 @@ static void l2tp_recv_dequeue(struct l2tp_session *session) { struct sk_buff *skb; struct sk_buff *tmp; - struct l2tp_stats *sstats; /* If the pkt at the head of the queue has the nr that we * expect to send up next, dequeue it and any other @@ -459,13 +447,10 @@ static void l2tp_recv_dequeue(struct l2tp_session *session) */ start: spin_lock_bh(&session->reorder_q.lock); - sstats = &session->stats; skb_queue_walk_safe(&session->reorder_q, skb, tmp) { if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) { - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - sstats->rx_errors++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); + atomic_long_inc(&session->stats.rx_errors); l2tp_dbg(session, L2TP_MSG_SEQ, "%s: oos pkt %u len %d discarded (too old), waiting for %u, reorder_q_len=%d\n", session->name, L2TP_SKB_CB(skb)->ns, @@ -624,7 +609,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, struct l2tp_tunnel *tunnel = session->tunnel; int offset; u32 ns, nr; - struct l2tp_stats *sstats = &session->stats; /* The ref count is increased since we now hold a pointer to * the session. Take care to decrement the refcnt when exiting @@ -641,9 +625,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, "%s: cookie mismatch (%u/%u). Discarding.\n", tunnel->name, tunnel->tunnel_id, session->session_id); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_cookie_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_cookie_discards); goto discard; } ptr += session->peer_cookie_len; @@ -712,9 +694,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, l2tp_warn(session, L2TP_MSG_SEQ, "%s: recv data has no seq numbers when required. Discarding.\n", session->name); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } @@ -733,9 +713,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, l2tp_warn(session, L2TP_MSG_SEQ, "%s: recv data has no seq numbers when required. Discarding.\n", session->name); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } } @@ -789,9 +767,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, * packets */ if (L2TP_SKB_CB(skb)->ns != session->nr) { - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); l2tp_dbg(session, L2TP_MSG_SEQ, "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n", session->name, L2TP_SKB_CB(skb)->ns, @@ -817,9 +793,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, return; discard: - u64_stats_update_begin(&sstats->syncp); - sstats->rx_errors++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); if (session->deref) @@ -861,7 +835,6 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, u32 tunnel_id, session_id; u16 version; int length; - struct l2tp_stats *tstats; if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb)) goto discard_bad_csum; @@ -950,10 +923,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, discard_bad_csum: LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name); UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0); - tstats = &tunnel->stats; - u64_stats_update_begin(&tstats->syncp); - tstats->rx_errors++; - u64_stats_update_end(&tstats->syncp); + atomic_long_inc(&tunnel->stats.rx_errors); kfree_skb(skb); return 0; @@ -1080,7 +1050,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, struct l2tp_tunnel *tunnel = session->tunnel; unsigned int len = skb->len; int error; - struct l2tp_stats *tstats, *sstats; /* Debug */ if (session->send_seq) @@ -1109,21 +1078,15 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, error = ip_queue_xmit(skb, fl); /* Update stats */ - tstats = &tunnel->stats; - u64_stats_update_begin(&tstats->syncp); - sstats = &session->stats; - u64_stats_update_begin(&sstats->syncp); if (error >= 0) { - tstats->tx_packets++; - tstats->tx_bytes += len; - sstats->tx_packets++; - sstats->tx_bytes += len; + atomic_long_inc(&tunnel->stats.tx_packets); + atomic_long_add(len, &tunnel->stats.tx_bytes); + atomic_long_inc(&session->stats.tx_packets); + atomic_long_add(len, &session->stats.tx_bytes); } else { - tstats->tx_errors++; - sstats->tx_errors++; + atomic_long_inc(&tunnel->stats.tx_errors); + atomic_long_inc(&session->stats.tx_errors); } - u64_stats_update_end(&tstats->syncp); - u64_stats_update_end(&sstats->syncp); return 0; } diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index d40713d105f..519b013f8b3 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -36,16 +36,15 @@ enum { struct sk_buff; struct l2tp_stats { - u64 tx_packets; - u64 tx_bytes; - u64 tx_errors; - u64 rx_packets; - u64 rx_bytes; - u64 rx_seq_discards; - u64 rx_oos_packets; - u64 rx_errors; - u64 rx_cookie_discards; - struct u64_stats_sync syncp; + atomic_long_t tx_packets; + atomic_long_t tx_bytes; + atomic_long_t tx_errors; + atomic_long_t rx_packets; + atomic_long_t rx_bytes; + atomic_long_t rx_seq_discards; + atomic_long_t rx_oos_packets; + atomic_long_t rx_errors; + atomic_long_t rx_cookie_discards; }; struct l2tp_tunnel; diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index c3813bc8455..072d7202e18 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -146,14 +146,14 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0, atomic_read(&tunnel->ref_count)); - seq_printf(m, " %08x rx %llu/%llu/%llu rx %llu/%llu/%llu\n", + seq_printf(m, " %08x rx %ld/%ld/%ld rx %ld/%ld/%ld\n", tunnel->debug, - (unsigned long long)tunnel->stats.tx_packets, - (unsigned long long)tunnel->stats.tx_bytes, - (unsigned long long)tunnel->stats.tx_errors, - (unsigned long long)tunnel->stats.rx_packets, - (unsigned long long)tunnel->stats.rx_bytes, - (unsigned long long)tunnel->stats.rx_errors); + atomic_long_read(&tunnel->stats.tx_packets), + atomic_long_read(&tunnel->stats.tx_bytes), + atomic_long_read(&tunnel->stats.tx_errors), + atomic_long_read(&tunnel->stats.rx_packets), + atomic_long_read(&tunnel->stats.rx_bytes), + atomic_long_read(&tunnel->stats.rx_errors)); if (tunnel->show != NULL) tunnel->show(m, tunnel); @@ -203,14 +203,14 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v) seq_printf(m, "\n"); } - seq_printf(m, " %hu/%hu tx %llu/%llu/%llu rx %llu/%llu/%llu\n", + seq_printf(m, " %hu/%hu tx %ld/%ld/%ld rx %ld/%ld/%ld\n", session->nr, session->ns, - (unsigned long long)session->stats.tx_packets, - (unsigned long long)session->stats.tx_bytes, - (unsigned long long)session->stats.tx_errors, - (unsigned long long)session->stats.rx_packets, - (unsigned long long)session->stats.rx_bytes, - (unsigned long long)session->stats.rx_errors); + atomic_long_read(&session->stats.tx_packets), + atomic_long_read(&session->stats.tx_bytes), + atomic_long_read(&session->stats.tx_errors), + atomic_long_read(&session->stats.rx_packets), + atomic_long_read(&session->stats.rx_bytes), + atomic_long_read(&session->stats.rx_errors)); if (session->show != NULL) session->show(m, session); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index c1bab22db85..0825ff26e11 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -246,8 +246,6 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla #if IS_ENABLED(CONFIG_IPV6) struct ipv6_pinfo *np = NULL; #endif - struct l2tp_stats stats; - unsigned int start; hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_TUNNEL_GET); @@ -265,28 +263,22 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla if (nest == NULL) goto nla_put_failure; - do { - start = u64_stats_fetch_begin(&tunnel->stats.syncp); - stats.tx_packets = tunnel->stats.tx_packets; - stats.tx_bytes = tunnel->stats.tx_bytes; - stats.tx_errors = tunnel->stats.tx_errors; - stats.rx_packets = tunnel->stats.rx_packets; - stats.rx_bytes = tunnel->stats.rx_bytes; - stats.rx_errors = tunnel->stats.rx_errors; - stats.rx_seq_discards = tunnel->stats.rx_seq_discards; - stats.rx_oos_packets = tunnel->stats.rx_oos_packets; - } while (u64_stats_fetch_retry(&tunnel->stats.syncp, start)); - - if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) || - nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) || - nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) || - nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) || + if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, + atomic_long_read(&tunnel->stats.tx_packets)) || + nla_put_u64(skb, L2TP_ATTR_TX_BYTES, + atomic_long_read(&tunnel->stats.tx_bytes)) || + nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, + atomic_long_read(&tunnel->stats.tx_errors)) || + nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, + atomic_long_read(&tunnel->stats.rx_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_BYTES, + atomic_long_read(&tunnel->stats.rx_bytes)) || nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, - stats.rx_seq_discards) || + atomic_long_read(&tunnel->stats.rx_seq_discards)) || nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS, - stats.rx_oos_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors)) + atomic_long_read(&tunnel->stats.rx_oos_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, + atomic_long_read(&tunnel->stats.rx_errors))) goto nla_put_failure; nla_nest_end(skb, nest); @@ -612,8 +604,6 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl struct nlattr *nest; struct l2tp_tunnel *tunnel = session->tunnel; struct sock *sk = NULL; - struct l2tp_stats stats; - unsigned int start; sk = tunnel->sock; @@ -656,28 +646,22 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl if (nest == NULL) goto nla_put_failure; - do { - start = u64_stats_fetch_begin(&session->stats.syncp); - stats.tx_packets = session->stats.tx_packets; - stats.tx_bytes = session->stats.tx_bytes; - stats.tx_errors = session->stats.tx_errors; - stats.rx_packets = session->stats.rx_packets; - stats.rx_bytes = session->stats.rx_bytes; - stats.rx_errors = session->stats.rx_errors; - stats.rx_seq_discards = session->stats.rx_seq_discards; - stats.rx_oos_packets = session->stats.rx_oos_packets; - } while (u64_stats_fetch_retry(&session->stats.syncp, start)); - - if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) || - nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) || - nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) || - nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) || + if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, + atomic_long_read(&session->stats.tx_packets)) || + nla_put_u64(skb, L2TP_ATTR_TX_BYTES, + atomic_long_read(&session->stats.tx_bytes)) || + nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, + atomic_long_read(&session->stats.tx_errors)) || + nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, + atomic_long_read(&session->stats.rx_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_BYTES, + atomic_long_read(&session->stats.rx_bytes)) || nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, - stats.rx_seq_discards) || + atomic_long_read(&session->stats.rx_seq_discards)) || nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS, - stats.rx_oos_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors)) + atomic_long_read(&session->stats.rx_oos_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, + atomic_long_read(&session->stats.rx_errors))) goto nla_put_failure; nla_nest_end(skb, nest); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 7e3e16aefcb..9d0eb8c1353 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -260,7 +260,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int session->name); /* Not bound. Nothing we can do, so discard. */ - session->stats.rx_errors++; + atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); } @@ -992,14 +992,14 @@ end: static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest, struct l2tp_stats *stats) { - dest->tx_packets = stats->tx_packets; - dest->tx_bytes = stats->tx_bytes; - dest->tx_errors = stats->tx_errors; - dest->rx_packets = stats->rx_packets; - dest->rx_bytes = stats->rx_bytes; - dest->rx_seq_discards = stats->rx_seq_discards; - dest->rx_oos_packets = stats->rx_oos_packets; - dest->rx_errors = stats->rx_errors; + dest->tx_packets = atomic_long_read(&stats->tx_packets); + dest->tx_bytes = atomic_long_read(&stats->tx_bytes); + dest->tx_errors = atomic_long_read(&stats->tx_errors); + dest->rx_packets = atomic_long_read(&stats->rx_packets); + dest->rx_bytes = atomic_long_read(&stats->rx_bytes); + dest->rx_seq_discards = atomic_long_read(&stats->rx_seq_discards); + dest->rx_oos_packets = atomic_long_read(&stats->rx_oos_packets); + dest->rx_errors = atomic_long_read(&stats->rx_errors); } /* Session ioctl helper. @@ -1633,14 +1633,14 @@ static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v) tunnel->name, (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N', atomic_read(&tunnel->ref_count) - 1); - seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n", + seq_printf(m, " %08x %ld/%ld/%ld %ld/%ld/%ld\n", tunnel->debug, - (unsigned long long)tunnel->stats.tx_packets, - (unsigned long long)tunnel->stats.tx_bytes, - (unsigned long long)tunnel->stats.tx_errors, - (unsigned long long)tunnel->stats.rx_packets, - (unsigned long long)tunnel->stats.rx_bytes, - (unsigned long long)tunnel->stats.rx_errors); + atomic_long_read(&tunnel->stats.tx_packets), + atomic_long_read(&tunnel->stats.tx_bytes), + atomic_long_read(&tunnel->stats.tx_errors), + atomic_long_read(&tunnel->stats.rx_packets), + atomic_long_read(&tunnel->stats.rx_bytes), + atomic_long_read(&tunnel->stats.rx_errors)); } static void pppol2tp_seq_session_show(struct seq_file *m, void *v) @@ -1675,14 +1675,14 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) session->lns_mode ? "LNS" : "LAC", session->debug, jiffies_to_msecs(session->reorder_timeout)); - seq_printf(m, " %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n", + seq_printf(m, " %hu/%hu %ld/%ld/%ld %ld/%ld/%ld\n", session->nr, session->ns, - (unsigned long long)session->stats.tx_packets, - (unsigned long long)session->stats.tx_bytes, - (unsigned long long)session->stats.tx_errors, - (unsigned long long)session->stats.rx_packets, - (unsigned long long)session->stats.rx_bytes, - (unsigned long long)session->stats.rx_errors); + atomic_long_read(&session->stats.tx_packets), + atomic_long_read(&session->stats.tx_bytes), + atomic_long_read(&session->stats.tx_errors), + atomic_long_read(&session->stats.rx_packets), + atomic_long_read(&session->stats.rx_bytes), + atomic_long_read(&session->stats.rx_errors)); if (po) seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); -- cgit v1.2.3-70-g09d2 From f6e16b299bacaa71c6604a784f2d088a966f8c23 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:23 +0000 Subject: l2tp: unhash l2tp sessions on delete, not on free If we postpone unhashing of l2tp sessions until the structure is freed, we risk: 1. further packets arriving and getting queued while the pseudowire is being closed down 2. the recv path hitting "scheduling while atomic" errors in the case that recv drops the last reference to a session and calls l2tp_session_free while in atomic context As such, l2tp sessions should be unhashed from l2tp_core data structures early in the teardown process prior to calling pseudowire close. For pseudowires like l2tp_ppp which have multiple shutdown codepaths, provide an unhash hook. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 75 ++++++++++++++++++++++++---------------------------- net/l2tp/l2tp_core.h | 1 + net/l2tp/l2tp_ppp.c | 12 +++------ 3 files changed, 38 insertions(+), 50 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 97d30ac67c8..8aecf5df665 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1316,26 +1316,12 @@ again: hlist_del_init(&session->hlist); - /* Since we should hold the sock lock while - * doing any unbinding, we need to release the - * lock we're holding before taking that lock. - * Hold a reference to the sock so it doesn't - * disappear as we're jumping between locks. - */ if (session->ref != NULL) (*session->ref)(session); write_unlock_bh(&tunnel->hlist_lock); - if (tunnel->version != L2TP_HDR_VER_2) { - struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - - spin_lock_bh(&pn->l2tp_session_hlist_lock); - hlist_del_init_rcu(&session->global_hlist); - spin_unlock_bh(&pn->l2tp_session_hlist_lock); - synchronize_rcu(); - } - + __l2tp_session_unhash(session); l2tp_session_queue_purge(session); if (session->session_close != NULL) @@ -1732,64 +1718,71 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); */ void l2tp_session_free(struct l2tp_session *session) { - struct l2tp_tunnel *tunnel; + struct l2tp_tunnel *tunnel = session->tunnel; BUG_ON(atomic_read(&session->ref_count) != 0); - tunnel = session->tunnel; - if (tunnel != NULL) { + if (tunnel) { BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); + if (session->session_id != 0) + atomic_dec(&l2tp_session_count); + sock_put(tunnel->sock); + session->tunnel = NULL; + l2tp_tunnel_dec_refcount(tunnel); + } + + kfree(session); + + return; +} +EXPORT_SYMBOL_GPL(l2tp_session_free); + +/* Remove an l2tp session from l2tp_core's hash lists. + * Provides a tidyup interface for pseudowire code which can't just route all + * shutdown via. l2tp_session_delete and a pseudowire-specific session_close + * callback. + */ +void __l2tp_session_unhash(struct l2tp_session *session) +{ + struct l2tp_tunnel *tunnel = session->tunnel; - /* Delete the session from the hash */ + /* Remove the session from core hashes */ + if (tunnel) { + /* Remove from the per-tunnel hash */ write_lock_bh(&tunnel->hlist_lock); hlist_del_init(&session->hlist); write_unlock_bh(&tunnel->hlist_lock); - /* Unlink from the global hash if not L2TPv2 */ + /* For L2TPv3 we have a per-net hash: remove from there, too */ if (tunnel->version != L2TP_HDR_VER_2) { struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - spin_lock_bh(&pn->l2tp_session_hlist_lock); hlist_del_init_rcu(&session->global_hlist); spin_unlock_bh(&pn->l2tp_session_hlist_lock); synchronize_rcu(); } - - if (session->session_id != 0) - atomic_dec(&l2tp_session_count); - - sock_put(tunnel->sock); - - /* This will delete the tunnel context if this - * is the last session on the tunnel. - */ - session->tunnel = NULL; - l2tp_tunnel_dec_refcount(tunnel); } - - kfree(session); - - return; } -EXPORT_SYMBOL_GPL(l2tp_session_free); +EXPORT_SYMBOL_GPL(__l2tp_session_unhash); /* This function is used by the netlink SESSION_DELETE command and by pseudowire modules. */ int l2tp_session_delete(struct l2tp_session *session) { + if (session->ref) + (*session->ref)(session); + __l2tp_session_unhash(session); l2tp_session_queue_purge(session); - if (session->session_close != NULL) (*session->session_close)(session); - + if (session->deref) + (*session->ref)(session); l2tp_session_dec_refcount(session); - return 0; } EXPORT_SYMBOL_GPL(l2tp_session_delete); - /* We come here whenever a session's send_seq, cookie_len or * l2specific_len parameters are set. */ diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 519b013f8b3..485a490fd99 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -242,6 +242,7 @@ extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_i extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); +extern void __l2tp_session_unhash(struct l2tp_session *session); extern int l2tp_session_delete(struct l2tp_session *session); extern void l2tp_session_free(struct l2tp_session *session); extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb)); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 9d0eb8c1353..637a341c1e2 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -466,19 +466,12 @@ static void pppol2tp_session_close(struct l2tp_session *session) */ static void pppol2tp_session_destruct(struct sock *sk) { - struct l2tp_session *session; - - if (sk->sk_user_data != NULL) { - session = sk->sk_user_data; - if (session == NULL) - goto out; - + struct l2tp_session *session = sk->sk_user_data; + if (session) { sk->sk_user_data = NULL; BUG_ON(session->magic != L2TP_SESSION_MAGIC); l2tp_session_dec_refcount(session); } - -out: return; } @@ -509,6 +502,7 @@ static int pppol2tp_release(struct socket *sock) /* Purge any queued data */ if (session != NULL) { + __l2tp_session_unhash(session); l2tp_session_queue_purge(session); sock_put(sk); } -- cgit v1.2.3-70-g09d2 From 8ed781668dd49b608f1e67a22e3b445fd0c2cd6f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 19 Mar 2013 06:39:29 +0000 Subject: flow_keys: include thoff into flow_keys for later usage In skb_flow_dissect(), we perform a dissection of a skbuff. Since we're doing the work here anyway, also store thoff for a later usage, e.g. in the BPF filter. Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/flow_keys.h | 1 + net/core/flow_dissector.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'net') diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h index 80461c1ae9e..bb8271d487b 100644 --- a/include/net/flow_keys.h +++ b/include/net/flow_keys.h @@ -9,6 +9,7 @@ struct flow_keys { __be32 ports; __be16 port16[2]; }; + u16 thoff; u8 ip_proto; }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 9d4c7201400..e187bf06d67 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -140,6 +140,8 @@ ipv6: flow->ports = *ports; } + flow->thoff = (u16) nhoff; + return true; } EXPORT_SYMBOL(skb_flow_dissect); -- cgit v1.2.3-70-g09d2 From 283951f95b067877ca5ea77afaa212bb1e0507b5 Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Tue, 19 Mar 2013 08:19:29 +0000 Subject: ipconfig: Fix newline handling in log message. When using ipconfig the logs currently look like: Single name server: [ 3.467270] IP-Config: Complete: [ 3.470613] device=eth0, hwaddr=ac:de:48:00:00:01, ipaddr=172.16.42.2, mask=255.255.255.0, gw=172.16.42.1 [ 3.480670] host=infigo-1, domain=, nis-domain=(none) [ 3.486166] bootserver=172.16.42.1, rootserver=172.16.42.1, rootpath= [ 3.492910] nameserver0=172.16.42.1[ 3.496853] ALSA device list: Three name servers: [ 3.496949] IP-Config: Complete: [ 3.500293] device=eth0, hwaddr=ac:de:48:00:00:01, ipaddr=172.16.42.2, mask=255.255.255.0, gw=172.16.42.1 [ 3.510367] host=infigo-1, domain=, nis-domain=(none) [ 3.515864] bootserver=172.16.42.1, rootserver=172.16.42.1, rootpath= [ 3.522635] nameserver0=172.16.42.1, nameserver1=172.16.42.100 [ 3.529149] , nameserver2=172.16.42.200 Fix newline handling for these cases Signed-off-by: Martin Fuzzey Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 98cbc687701..bf6c5cf31ae 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1522,7 +1522,8 @@ static int __init ip_auto_config(void) } for (i++; i < CONF_NAMESERVERS_MAX; i++) if (ic_nameservers[i] != NONE) - pr_cont(", nameserver%u=%pI4\n", i, &ic_nameservers[i]); + pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]); + pr_cont("\n"); #endif /* !SILENT */ return 0; -- cgit v1.2.3-70-g09d2 From 896ee0eee6261e30c3623be931c3f621428947df Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Mar 2013 05:19:24 +0000 Subject: net/irda: add missing error path release_sock call This makes sure that release_sock is called for all error conditions in irda_getsockopt. Signed-off-by: Kees Cook Reported-by: Brad Spengler Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- net/irda/af_irda.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index d07e3a62644..d28e7f014cc 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -2583,8 +2583,10 @@ bed: NULL, NULL, NULL); /* Check if the we got some results */ - if (!self->cachedaddr) - return -EAGAIN; /* Didn't find any devices */ + if (!self->cachedaddr) { + err = -EAGAIN; /* Didn't find any devices */ + goto out; + } daddr = self->cachedaddr; /* Cleanup */ self->cachedaddr = 0; -- cgit v1.2.3-70-g09d2 From ce1eadda6badef9e4e3460097ede674fca47383d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Mar 2013 20:26:57 +0100 Subject: cfg80211: fix wdev tracing crash Arend reported a crash in tracing if the driver returns an ERR_PTR() value from the add_virtual_intf() callback. This is due to the tracing then still attempting to dereference the "pointer", fix this by using IS_ERR_OR_NULL(). Reported-by: Arend van Spriel Tested-by: Arend van Spriel Signed-off-by: Johannes Berg --- net/wireless/trace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 6847d043ede..7586de77a2f 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -27,7 +27,8 @@ #define WIPHY_PR_ARG __entry->wiphy_name #define WDEV_ENTRY __field(u32, id) -#define WDEV_ASSIGN (__entry->id) = (wdev ? wdev->identifier : 0) +#define WDEV_ASSIGN (__entry->id) = (!IS_ERR_OR_NULL(wdev) \ + ? wdev->identifier : 0) #define WDEV_PR_FMT "wdev(%u)" #define WDEV_PR_ARG (__entry->id) -- cgit v1.2.3-70-g09d2 From 8b305780ed0c49a49c6bd58a4372fd6b22a5a71e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Mar 2013 22:25:17 +0100 Subject: mac80211: fix virtual monitor interface locking The virtual monitor interface has a locking issue, it calls into the channel context code with the iflist mutex held which isn't allowed since it is usually acquired the other way around. The mutex is still required for the interface iteration, but need not be held across the channel calls. Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index baaa8608e52..3bfe2612c8c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -349,21 +349,19 @@ static void ieee80211_set_default_queues(struct ieee80211_sub_if_data *sdata) static int ieee80211_add_virtual_monitor(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - int ret = 0; + int ret; if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF)) return 0; - mutex_lock(&local->iflist_mtx); + ASSERT_RTNL(); if (local->monitor_sdata) - goto out_unlock; + return 0; sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, GFP_KERNEL); - if (!sdata) { - ret = -ENOMEM; - goto out_unlock; - } + if (!sdata) + return -ENOMEM; /* set up data */ sdata->local = local; @@ -377,13 +375,13 @@ static int ieee80211_add_virtual_monitor(struct ieee80211_local *local) if (WARN_ON(ret)) { /* ok .. stupid driver, it asked for this! */ kfree(sdata); - goto out_unlock; + return ret; } ret = ieee80211_check_queues(sdata); if (ret) { kfree(sdata); - goto out_unlock; + return ret; } ret = ieee80211_vif_use_channel(sdata, &local->monitor_chandef, @@ -391,13 +389,14 @@ static int ieee80211_add_virtual_monitor(struct ieee80211_local *local) if (ret) { drv_remove_interface(local, sdata); kfree(sdata); - goto out_unlock; + return ret; } + mutex_lock(&local->iflist_mtx); rcu_assign_pointer(local->monitor_sdata, sdata); - out_unlock: mutex_unlock(&local->iflist_mtx); - return ret; + + return 0; } static void ieee80211_del_virtual_monitor(struct ieee80211_local *local) @@ -407,14 +406,20 @@ static void ieee80211_del_virtual_monitor(struct ieee80211_local *local) if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF)) return; + ASSERT_RTNL(); + mutex_lock(&local->iflist_mtx); sdata = rcu_dereference_protected(local->monitor_sdata, lockdep_is_held(&local->iflist_mtx)); - if (!sdata) - goto out_unlock; + if (!sdata) { + mutex_unlock(&local->iflist_mtx); + return; + } rcu_assign_pointer(local->monitor_sdata, NULL); + mutex_unlock(&local->iflist_mtx); + synchronize_net(); ieee80211_vif_release_channel(sdata); @@ -422,8 +427,6 @@ static void ieee80211_del_virtual_monitor(struct ieee80211_local *local) drv_remove_interface(local, sdata); kfree(sdata); - out_unlock: - mutex_unlock(&local->iflist_mtx); } /* -- cgit v1.2.3-70-g09d2 From f4541d60a449afd40448b06496dcd510f505928e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Mar 2013 17:36:09 +0000 Subject: tcp: preserve ACK clocking in TSO A long standing problem with TSO is the fact that tcp_tso_should_defer() rearms the deferred timer, while it should not. Current code leads to following bad bursty behavior : 20:11:24.484333 IP A > B: . 297161:316921(19760) ack 1 win 119 20:11:24.484337 IP B > A: . ack 263721 win 1117 20:11:24.485086 IP B > A: . ack 265241 win 1117 20:11:24.485925 IP B > A: . ack 266761 win 1117 20:11:24.486759 IP B > A: . ack 268281 win 1117 20:11:24.487594 IP B > A: . ack 269801 win 1117 20:11:24.488430 IP B > A: . ack 271321 win 1117 20:11:24.489267 IP B > A: . ack 272841 win 1117 20:11:24.490104 IP B > A: . ack 274361 win 1117 20:11:24.490939 IP B > A: . ack 275881 win 1117 20:11:24.491775 IP B > A: . ack 277401 win 1117 20:11:24.491784 IP A > B: . 316921:332881(15960) ack 1 win 119 20:11:24.492620 IP B > A: . ack 278921 win 1117 20:11:24.493448 IP B > A: . ack 280441 win 1117 20:11:24.494286 IP B > A: . ack 281961 win 1117 20:11:24.495122 IP B > A: . ack 283481 win 1117 20:11:24.495958 IP B > A: . ack 285001 win 1117 20:11:24.496791 IP B > A: . ack 286521 win 1117 20:11:24.497628 IP B > A: . ack 288041 win 1117 20:11:24.498459 IP B > A: . ack 289561 win 1117 20:11:24.499296 IP B > A: . ack 291081 win 1117 20:11:24.500133 IP B > A: . ack 292601 win 1117 20:11:24.500970 IP B > A: . ack 294121 win 1117 20:11:24.501388 IP B > A: . ack 295641 win 1117 20:11:24.501398 IP A > B: . 332881:351881(19000) ack 1 win 119 While the expected behavior is more like : 20:19:49.259620 IP A > B: . 197601:202161(4560) ack 1 win 119 20:19:49.260446 IP B > A: . ack 154281 win 1212 20:19:49.261282 IP B > A: . ack 155801 win 1212 20:19:49.262125 IP B > A: . ack 157321 win 1212 20:19:49.262136 IP A > B: . 202161:206721(4560) ack 1 win 119 20:19:49.262958 IP B > A: . ack 158841 win 1212 20:19:49.263795 IP B > A: . ack 160361 win 1212 20:19:49.264628 IP B > A: . ack 161881 win 1212 20:19:49.264637 IP A > B: . 206721:211281(4560) ack 1 win 119 20:19:49.265465 IP B > A: . ack 163401 win 1212 20:19:49.265886 IP B > A: . ack 164921 win 1212 20:19:49.266722 IP B > A: . ack 166441 win 1212 20:19:49.266732 IP A > B: . 211281:215841(4560) ack 1 win 119 20:19:49.267559 IP B > A: . ack 167961 win 1212 20:19:49.268394 IP B > A: . ack 169481 win 1212 20:19:49.269232 IP B > A: . ack 171001 win 1212 20:19:49.269241 IP A > B: . 215841:221161(5320) ack 1 win 119 Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Van Jacobson Cc: Neal Cardwell Cc: Nandita Dukkipati Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 817fbb396bc..5d0b4387cba 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1809,8 +1809,11 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) goto send_now; } - /* Ok, it looks like it is advisable to defer. */ - tp->tso_deferred = 1 | (jiffies << 1); + /* Ok, it looks like it is advisable to defer. + * Do not rearm the timer if already set to not break TCP ACK clocking. + */ + if (!tp->tso_deferred) + tp->tso_deferred = 1 | (jiffies << 1); return true; -- cgit v1.2.3-70-g09d2 From f9f475292dbb0e7035fb6661d1524761ea0888d9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Mar 2013 15:04:07 +0100 Subject: cfg80211: always check for scan end on P2P device If a P2P device wdev is removed while it has a scan, then the scan completion might crash later as it is already freed by that time. To avoid the crash always check the scan completion when the P2P device is being removed for some reason. If the driver already canceled it, don't want and free it, otherwise warn and leak it to avoid later crashes. In order to do this, locking needs to be changed away from the rdev mutex (which can't always be guaranteed). For now, use the sched_scan_mtx instead, I'll rename it to just scan_mtx in a later patch. Signed-off-by: Johannes Berg --- net/wireless/core.c | 64 ++++++++++++++++++++++++++++++++++++------------- net/wireless/core.h | 3 +++ net/wireless/nl80211.c | 52 +++++++++++++++++++++------------------- net/wireless/scan.c | 8 ++++--- net/wireless/sme.c | 6 +++-- net/wireless/wext-sme.c | 6 +++++ 6 files changed, 92 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 92200210506..11743d48cbc 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -212,6 +212,39 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) rdev_rfkill_poll(rdev); } +void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + lockdep_assert_held(&rdev->devlist_mtx); + lockdep_assert_held(&rdev->sched_scan_mtx); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)) + return; + + if (!wdev->p2p_started) + return; + + rdev_stop_p2p_device(rdev, wdev); + wdev->p2p_started = false; + + rdev->opencount--; + + if (rdev->scan_req && rdev->scan_req->wdev == wdev) { + bool busy = work_busy(&rdev->scan_done_wk); + + /* + * If the work isn't pending or running (in which case it would + * be waiting for the lock we hold) the driver didn't properly + * cancel the scan when the interface was removed. In this case + * warn and leak the scan request object to not crash later. + */ + WARN_ON(!busy); + + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev, !busy); + } +} + static int cfg80211_rfkill_set_block(void *data, bool blocked) { struct cfg80211_registered_device *rdev = data; @@ -221,7 +254,8 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) return 0; rtnl_lock(); - mutex_lock(&rdev->devlist_mtx); + + /* read-only iteration need not hold the devlist_mtx */ list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->netdev) { @@ -231,18 +265,18 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) /* otherwise, check iftype */ switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: - if (!wdev->p2p_started) - break; - rdev_stop_p2p_device(rdev, wdev); - wdev->p2p_started = false; - rdev->opencount--; + /* but this requires it */ + mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); + cfg80211_stop_p2p_device(rdev, wdev); + mutex_unlock(&rdev->sched_scan_mtx); + mutex_unlock(&rdev->devlist_mtx); break; default: break; } } - mutex_unlock(&rdev->devlist_mtx); rtnl_unlock(); return 0; @@ -745,17 +779,13 @@ static void wdev_cleanup_work(struct work_struct *work) wdev = container_of(work, struct wireless_dev, cleanup_work); rdev = wiphy_to_dev(wdev->wiphy); - cfg80211_lock_rdev(rdev); + mutex_lock(&rdev->sched_scan_mtx); if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) { rdev->scan_req->aborted = true; ___cfg80211_scan_done(rdev, true); } - cfg80211_unlock_rdev(rdev); - - mutex_lock(&rdev->sched_scan_mtx); - if (WARN_ON(rdev->sched_scan_req && rdev->sched_scan_req->dev == wdev->netdev)) { __cfg80211_stop_sched_scan(rdev, false); @@ -781,21 +811,19 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) return; mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); list_del_rcu(&wdev->list); rdev->devlist_generation++; switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: - if (!wdev->p2p_started) - break; - rdev_stop_p2p_device(rdev, wdev); - wdev->p2p_started = false; - rdev->opencount--; + cfg80211_stop_p2p_device(rdev, wdev); break; default: WARN_ON_ONCE(1); break; } + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); } EXPORT_SYMBOL(cfg80211_unregister_wdev); @@ -937,6 +965,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, cfg80211_update_iface_num(rdev, wdev->iftype, 1); cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); switch (wdev->iftype) { #ifdef CONFIG_CFG80211_WEXT @@ -968,6 +997,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; } wdev_unlock(wdev); + mutex_unlock(&rdev->sched_scan_mtx); rdev->opencount++; mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); diff --git a/net/wireless/core.h b/net/wireless/core.h index 3aec0e429d8..5845c2b37aa 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -503,6 +503,9 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); +void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); + #define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d44ab216c0e..58e13a8c95f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4702,14 +4702,19 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->scan) return -EOPNOTSUPP; - if (rdev->scan_req) - return -EBUSY; + mutex_lock(&rdev->sched_scan_mtx); + if (rdev->scan_req) { + err = -EBUSY; + goto unlock; + } if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { n_channels = validate_scan_freqs( info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]); - if (!n_channels) - return -EINVAL; + if (!n_channels) { + err = -EINVAL; + goto unlock; + } } else { enum ieee80211_band band; n_channels = 0; @@ -4723,23 +4728,29 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) n_ssids++; - if (n_ssids > wiphy->max_scan_ssids) - return -EINVAL; + if (n_ssids > wiphy->max_scan_ssids) { + err = -EINVAL; + goto unlock; + } if (info->attrs[NL80211_ATTR_IE]) ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); else ie_len = 0; - if (ie_len > wiphy->max_scan_ie_len) - return -EINVAL; + if (ie_len > wiphy->max_scan_ie_len) { + err = -EINVAL; + goto unlock; + } request = kzalloc(sizeof(*request) + sizeof(*request->ssids) * n_ssids + sizeof(*request->channels) * n_channels + ie_len, GFP_KERNEL); - if (!request) - return -ENOMEM; + if (!request) { + err = -ENOMEM; + goto unlock; + } if (n_ssids) request->ssids = (void *)&request->channels[n_channels]; @@ -4876,6 +4887,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) kfree(request); } + unlock: + mutex_unlock(&rdev->sched_scan_mtx); return err; } @@ -7749,20 +7762,9 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->stop_p2p_device) return -EOPNOTSUPP; - if (!wdev->p2p_started) - return 0; - - rdev_stop_p2p_device(rdev, wdev); - wdev->p2p_started = false; - - mutex_lock(&rdev->devlist_mtx); - rdev->opencount--; - mutex_unlock(&rdev->devlist_mtx); - - if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) { - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, true); - } + mutex_lock(&rdev->sched_scan_mtx); + cfg80211_stop_p2p_device(rdev, wdev); + mutex_unlock(&rdev->sched_scan_mtx); return 0; } @@ -8486,7 +8488,7 @@ static int nl80211_add_scan_req(struct sk_buff *msg, struct nlattr *nest; int i; - ASSERT_RDEV_LOCK(rdev); + lockdep_assert_held(&rdev->sched_scan_mtx); if (WARN_ON(!req)) return 0; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index e93bd31d23b..fd99ea495b7 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -169,7 +169,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) union iwreq_data wrqu; #endif - ASSERT_RDEV_LOCK(rdev); + lockdep_assert_held(&rdev->sched_scan_mtx); request = rdev->scan_req; @@ -230,9 +230,9 @@ void __cfg80211_scan_done(struct work_struct *wk) rdev = container_of(wk, struct cfg80211_registered_device, scan_done_wk); - cfg80211_lock_rdev(rdev); + mutex_lock(&rdev->sched_scan_mtx); ___cfg80211_scan_done(rdev, false); - cfg80211_unlock_rdev(rdev); + mutex_unlock(&rdev->sched_scan_mtx); } void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) @@ -1062,6 +1062,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, if (IS_ERR(rdev)) return PTR_ERR(rdev); + mutex_lock(&rdev->sched_scan_mtx); if (rdev->scan_req) { err = -EBUSY; goto out; @@ -1168,6 +1169,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, dev_hold(dev); } out: + mutex_unlock(&rdev->sched_scan_mtx); kfree(creq); cfg80211_unlock_rdev(rdev); return err; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f432bd3755b..09d994d192f 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -85,6 +85,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) ASSERT_RTNL(); ASSERT_RDEV_LOCK(rdev); ASSERT_WDEV_LOCK(wdev); + lockdep_assert_held(&rdev->sched_scan_mtx); if (rdev->scan_req) return -EBUSY; @@ -320,11 +321,9 @@ void cfg80211_sme_scan_done(struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; - mutex_lock(&wiphy_to_dev(wdev->wiphy)->devlist_mtx); wdev_lock(wdev); __cfg80211_sme_scan_done(dev); wdev_unlock(wdev); - mutex_unlock(&wiphy_to_dev(wdev->wiphy)->devlist_mtx); } void cfg80211_sme_rx_auth(struct net_device *dev, @@ -924,9 +923,12 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, int err; mutex_lock(&rdev->devlist_mtx); + /* might request scan - scan_mtx -> wdev_mtx dependency */ + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(dev->ieee80211_ptr); err = __cfg80211_connect(rdev, dev, connect, connkeys, NULL); wdev_unlock(dev->ieee80211_ptr); + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); return err; diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index fb9622f6d99..e79cb5c0655 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -89,6 +89,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) { @@ -135,6 +136,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); return err; @@ -190,6 +192,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); err = 0; @@ -223,6 +226,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); return err; @@ -285,6 +289,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) { @@ -313,6 +318,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); return err; -- cgit v1.2.3-70-g09d2 From 370bd005937c0e00f9104a602f9fe1dd6b21b54b Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Tue, 19 Mar 2013 17:50:50 -0700 Subject: mac80211: Don't restart sta-timer if not associated. I found another crash when deleting lots of virtual stations in a congested environment. I think the problem is that the ieee80211_mlme_notify_scan_completed could call ieee80211_restart_sta_timer for a stopped interface that was about to be deleted. With the following patch I am unable to reproduce the crash. Signed-off-by: Ben Greear [move check, also make the same change in mesh] Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 3 ++- net/mac80211/mlme.c | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 29ce2aa87e7..4749b385869 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1060,7 +1060,8 @@ void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) - if (ieee80211_vif_is_mesh(&sdata->vif)) + if (ieee80211_vif_is_mesh(&sdata->vif) && + ieee80211_sdata_running(sdata)) ieee80211_queue_work(&local->hw, &sdata->work); rcu_read_unlock(); } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 141577412d8..82cc30318a8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3608,8 +3608,10 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) /* Restart STA timers */ rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) - ieee80211_restart_sta_timer(sdata); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (ieee80211_sdata_running(sdata)) + ieee80211_restart_sta_timer(sdata); + } rcu_read_unlock(); } -- cgit v1.2.3-70-g09d2 From 9979a55a833883242e3a29f3596676edd7199c46 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Mar 2013 14:38:28 +0000 Subject: net: remove a WARN_ON() in net_enable_timestamp() The WARN_ON(in_interrupt()) in net_enable_timestamp() can get false positive, in socket clone path, run from softirq context : [ 3641.624425] WARNING: at net/core/dev.c:1532 net_enable_timestamp+0x7b/0x80() [ 3641.668811] Call Trace: [ 3641.671254] [] warn_slowpath_common+0x87/0xc0 [ 3641.677871] [] warn_slowpath_null+0x1a/0x20 [ 3641.683683] [] net_enable_timestamp+0x7b/0x80 [ 3641.689668] [] sk_clone_lock+0x425/0x450 [ 3641.695222] [] inet_csk_clone_lock+0x16/0x170 [ 3641.701213] [] tcp_create_openreq_child+0x29/0x820 [ 3641.707663] [] ? ipt_do_table+0x222/0x670 [ 3641.713354] [] tcp_v4_syn_recv_sock+0xab/0x3d0 [ 3641.719425] [] tcp_check_req+0x3da/0x530 [ 3641.724979] [] ? inet_hashinfo_init+0x60/0x80 [ 3641.730964] [] ? tcp_v4_rcv+0x79f/0xbe0 [ 3641.736430] [] tcp_v4_do_rcv+0x38d/0x4f0 [ 3641.741985] [] tcp_v4_rcv+0xa7a/0xbe0 Its safe at this point because the parent socket owns a reference on the netstamp_needed, so we cant have a 0 -> 1 transition, which requires to lock a mutex. Instead of refining the check, lets remove it, as all known callers are safe. If it ever changes in the future, static_key_slow_inc() will complain anyway. Reported-by: Laurent Chavey Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d540ced1f6c..b13e5c766c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1545,7 +1545,6 @@ void net_enable_timestamp(void) return; } #endif - WARN_ON(in_interrupt()); static_key_slow_inc(&netstamp_needed); } EXPORT_SYMBOL(net_enable_timestamp); -- cgit v1.2.3-70-g09d2 From 4a7df340ed1bac190c124c1601bfc10cde9fb4fb Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 22 Mar 2013 19:14:07 +0000 Subject: 8021q: fix a potential use-after-free vlan_vid_del() could possibly free ->vlan_info after a RCU grace period, however, we may still refer to the freed memory area by 'grp' pointer. Found by code inspection. This patch moves vlan_vid_del() as behind as possible. Cc: Patrick McHardy Cc: "David S. Miller" Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/8021q/vlan.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index a18714469bf..85addcd9372 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -86,13 +86,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) grp = &vlan_info->grp; - /* Take it out of our own structures, but be sure to interlock with - * HW accelerating devices or SW vlan input packet processing if - * VLAN is not 0 (leave it there for 802.1p). - */ - if (vlan_id) - vlan_vid_del(real_dev, vlan_id); - grp->nr_vlan_devs--; if (vlan->flags & VLAN_FLAG_MVRP) @@ -114,6 +107,13 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) vlan_gvrp_uninit_applicant(real_dev); } + /* Take it out of our own structures, but be sure to interlock with + * HW accelerating devices or SW vlan input packet processing if + * VLAN is not 0 (leave it there for 802.1p). + */ + if (vlan_id) + vlan_vid_del(real_dev, vlan_id); + /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); } -- cgit v1.2.3-70-g09d2 From 9b46922e15f4d9d2aedcd320c3b7f7f54d956da7 Mon Sep 17 00:00:00 2001 From: Hong zhi guo Date: Sat, 23 Mar 2013 02:27:50 +0000 Subject: bridge: fix crash when set mac address of br interface When I tried to set mac address of a bridge interface to a mac address which already learned on this bridge, I got system hang. The cause is straight forward: function br_fdb_change_mac_address calls fdb_insert with NULL source nbp. Then an fdb lookup is performed. If an fdb entry is found and it's local, it's OK. But if it's not local, source is dereferenced for printk without NULL check. Signed-off-by: Hong Zhiguo Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index b0812c91c0f..bab338e6270 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -423,7 +423,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, return 0; br_warn(br, "adding interface %s with same address " "as a received packet\n", - source->dev->name); + source ? source->dev->name : br->dev->name); fdb_delete(br, fdb); } -- cgit v1.2.3-70-g09d2 From 7ebe183c6d444ef5587d803b64a1f4734b18c564 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Sun, 24 Mar 2013 10:42:25 +0000 Subject: tcp: undo spurious timeout after SACK reneging On SACK reneging the sender immediately retransmits and forces a timeout but disables Eifel (undo). If the (buggy) receiver does not drop any packet this can trigger a false slow-start retransmit storm driven by the ACKs of the original packets. This can be detected with undo and TCP timestamps. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0d9bdacce99..3bd55bad230 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2059,11 +2059,8 @@ void tcp_enter_loss(struct sock *sk, int how) if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); - if (!how) { - /* Push undo marker, if it was plain RTO and nothing - * was retransmitted. */ - tp->undo_marker = tp->snd_una; - } else { + tp->undo_marker = tp->snd_una; + if (how) { tp->sacked_out = 0; tp->fackets_out = 0; } -- cgit v1.2.3-70-g09d2 From 799ef90c55e692e096d8bd9e5871b95264b1e9ba Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 20 Mar 2013 22:59:11 +0100 Subject: xfrm: Fix esn sequence number diff calculation in xfrm_replay_notify_esn() Commit 0017c0b "xfrm: Fix replay notification for esn." is off by one for the sequence number wrapped case as UINT_MAX is 0xffffffff, not 0x100000000. ;) Just calculate the diff like done everywhere else in the file. Signed-off-by: Mathias Krause Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_replay.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index a3906737f49..8dafe6d3c6e 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -359,14 +359,12 @@ static void xfrm_replay_notify_esn(struct xfrm_state *x, int event) if (replay_esn->seq_hi == preplay_esn->seq_hi) seq_diff = replay_esn->seq - preplay_esn->seq; else - seq_diff = UINT_MAX - preplay_esn->seq - + replay_esn->seq; + seq_diff = ~preplay_esn->seq + replay_esn->seq + 1; if (replay_esn->oseq_hi == preplay_esn->oseq_hi) oseq_diff = replay_esn->oseq - preplay_esn->oseq; else - oseq_diff = UINT_MAX - preplay_esn->oseq - + replay_esn->oseq; + oseq_diff = ~preplay_esn->oseq + replay_esn->oseq + 1; if (seq_diff < x->replay_maxdiff && oseq_diff < x->replay_maxdiff) { -- cgit v1.2.3-70-g09d2 From 3fbd45ca8d1c98f3c2582ef8bc70ade42f70947b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 25 Mar 2013 11:51:14 +0100 Subject: mac80211: fix remain-on-channel cancel crash If a ROC item is canceled just as it expires, the work struct may be scheduled while it is running (and waiting for the mutex). This results in it being run after being freed, which obviously crashes. To fix this don't free it when aborting is requested but instead mark it as "to be freed", which makes the work a no-op and allows freeing it outside. Cc: stable@vger.kernel.org [3.6+] Reported-by: Jouni Malinen Tested-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 6 ++++-- net/mac80211/ieee80211_i.h | 3 ++- net/mac80211/offchannel.c | 23 +++++++++++++++++------ 3 files changed, 23 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fb306814576..a6893602f87 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2582,7 +2582,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, list_del(&dep->list); mutex_unlock(&local->mtx); - ieee80211_roc_notify_destroy(dep); + ieee80211_roc_notify_destroy(dep, true); return 0; } @@ -2622,7 +2622,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, ieee80211_start_next_roc(local); mutex_unlock(&local->mtx); - ieee80211_roc_notify_destroy(found); + ieee80211_roc_notify_destroy(found, true); } else { /* work may be pending so use it all the time */ found->abort = true; @@ -2632,6 +2632,8 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, /* work will clean up etc */ flush_delayed_work(&found->work); + WARN_ON(!found->to_be_freed); + kfree(found); } return 0; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 388580a1bad..7bdefd901f9 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -309,6 +309,7 @@ struct ieee80211_roc_work { struct ieee80211_channel *chan; bool started, abort, hw_begun, notified; + bool to_be_freed; unsigned long hw_start_time; @@ -1347,7 +1348,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local); void ieee80211_roc_setup(struct ieee80211_local *local); void ieee80211_start_next_roc(struct ieee80211_local *local); void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata); -void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc); +void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free); void ieee80211_sw_roc_work(struct work_struct *work); void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index cc79b4a2e82..430bd254e49 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -297,10 +297,13 @@ void ieee80211_start_next_roc(struct ieee80211_local *local) } } -void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) +void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free) { struct ieee80211_roc_work *dep, *tmp; + if (WARN_ON(roc->to_be_freed)) + return; + /* was never transmitted */ if (roc->frame) { cfg80211_mgmt_tx_status(&roc->sdata->wdev, @@ -316,9 +319,12 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) GFP_KERNEL); list_for_each_entry_safe(dep, tmp, &roc->dependents, list) - ieee80211_roc_notify_destroy(dep); + ieee80211_roc_notify_destroy(dep, true); - kfree(roc); + if (free) + kfree(roc); + else + roc->to_be_freed = true; } void ieee80211_sw_roc_work(struct work_struct *work) @@ -331,6 +337,9 @@ void ieee80211_sw_roc_work(struct work_struct *work) mutex_lock(&local->mtx); + if (roc->to_be_freed) + goto out_unlock; + if (roc->abort) goto finish; @@ -370,7 +379,7 @@ void ieee80211_sw_roc_work(struct work_struct *work) finish: list_del(&roc->list); started = roc->started; - ieee80211_roc_notify_destroy(roc); + ieee80211_roc_notify_destroy(roc, !roc->abort); if (started) { drv_flush(local, false); @@ -410,7 +419,7 @@ static void ieee80211_hw_roc_done(struct work_struct *work) list_del(&roc->list); - ieee80211_roc_notify_destroy(roc); + ieee80211_roc_notify_destroy(roc, true); /* if there's another roc, start it now */ ieee80211_start_next_roc(local); @@ -460,12 +469,14 @@ void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata) list_for_each_entry_safe(roc, tmp, &tmp_list, list) { if (local->ops->remain_on_channel) { list_del(&roc->list); - ieee80211_roc_notify_destroy(roc); + ieee80211_roc_notify_destroy(roc, true); } else { ieee80211_queue_delayed_work(&local->hw, &roc->work, 0); /* work will clean up etc */ flush_delayed_work(&roc->work); + WARN_ON(!roc->to_be_freed); + kfree(roc); } } -- cgit v1.2.3-70-g09d2 From 558724a5b2a73ad0c7638e21e8dffc419d267b6c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 22 Mar 2013 01:28:18 +0000 Subject: netfilter: nfnetlink_queue: fix error return code in nfnetlink_queue_init() Fix to return a negative error code from the error handling case instead of 0, as returned elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 1cb48540f86..42680b2baa1 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -1062,8 +1062,10 @@ static int __init nfnetlink_queue_init(void) #ifdef CONFIG_PROC_FS if (!proc_create("nfnetlink_queue", 0440, - proc_net_netfilter, &nfqnl_file_ops)) + proc_net_netfilter, &nfqnl_file_ops)) { + status = -ENOMEM; goto cleanup_subsys; + } #endif register_netdevice_notifier(&nfqnl_dev_notifier); -- cgit v1.2.3-70-g09d2 From deadcfc3324410726cd6a663fb4fc46be595abe7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 23 Mar 2013 16:57:59 +0100 Subject: netfilter: nfnetlink_acct: return -EINVAL if object name is empty If user-space tries to create accounting object with an empty name, then return -EINVAL. Reported-by: Michael Zintakis Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_acct.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 589d686f0b4..dc3fd5d4446 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -49,6 +49,8 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, return -EINVAL; acct_name = nla_data(tb[NFACCT_NAME]); + if (strlen(acct_name) == 0) + return -EINVAL; list_for_each_entry(nfacct, &nfnl_acct_list, head) { if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0) -- cgit v1.2.3-70-g09d2 From 1166fde6a923c30f4351515b6a9a1efc513e7d00 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 25 Mar 2013 11:23:40 -0400 Subject: SUNRPC: Add barriers to ensure read ordering in rpc_wake_up_task_queue_locked We need to be careful when testing task->tk_waitqueue in rpc_wake_up_task_queue_locked, because it can be changed while we are holding the queue->lock. By adding appropriate memory barriers, we can ensure that it is safe to test task->tk_waitqueue for equality if the RPC_TASK_QUEUED bit is set. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- net/sunrpc/sched.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index fb20f25ddec..f8529fc8e54 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -180,6 +180,8 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); task->tk_waitqueue = queue; queue->qlen++; + /* barrier matches the read in rpc_wake_up_task_queue_locked() */ + smp_wmb(); rpc_set_queued(task); dprintk("RPC: %5u added to queue %p \"%s\"\n", @@ -430,8 +432,11 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task */ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue) - __rpc_do_wake_up_task(queue, task); + if (RPC_IS_QUEUED(task)) { + smp_rmb(); + if (task->tk_waitqueue == queue) + __rpc_do_wake_up_task(queue, task); + } } /* -- cgit v1.2.3-70-g09d2 From 382a103b2b528a3085cde4ac56fc69d92a828b72 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 22 Mar 2013 22:30:09 +0100 Subject: mac80211: fix idle handling sequence Corey Richardson reported that my idle handling cleanup (commit fd0f979a1b, "mac80211: simplify idle handling") broke ath9k_htc. The reason appears to be that it wants to go out of idle before switching channels. To fix it, reimplement that sequence. Reported-by: Corey Richardson Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 17 ++++++++++++++--- net/mac80211/ieee80211_i.h | 1 + net/mac80211/iface.c | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 78c0d90dd64..931be419ab5 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -63,6 +63,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local, enum ieee80211_chanctx_mode mode) { struct ieee80211_chanctx *ctx; + u32 changed; int err; lockdep_assert_held(&local->chanctx_mtx); @@ -76,6 +77,13 @@ ieee80211_new_chanctx(struct ieee80211_local *local, ctx->conf.rx_chains_dynamic = 1; ctx->mode = mode; + /* acquire mutex to prevent idle from changing */ + mutex_lock(&local->mtx); + /* turn idle off *before* setting channel -- some drivers need that */ + changed = ieee80211_idle_off(local); + if (changed) + ieee80211_hw_config(local, changed); + if (!local->use_chanctx) { local->_oper_channel_type = cfg80211_get_chandef_type(chandef); @@ -85,14 +93,17 @@ ieee80211_new_chanctx(struct ieee80211_local *local, err = drv_add_chanctx(local, ctx); if (err) { kfree(ctx); - return ERR_PTR(err); + ctx = ERR_PTR(err); + + ieee80211_recalc_idle(local); + goto out; } } + /* and keep the mutex held until the new chanctx is on the list */ list_add_rcu(&ctx->list, &local->chanctx_list); - mutex_lock(&local->mtx); - ieee80211_recalc_idle(local); + out: mutex_unlock(&local->mtx); return ctx; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 7bdefd901f9..5672533a083 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1362,6 +1362,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, enum nl80211_iftype type); void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata); void ieee80211_remove_interfaces(struct ieee80211_local *local); +u32 ieee80211_idle_off(struct ieee80211_local *local); void ieee80211_recalc_idle(struct ieee80211_local *local); void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata, const int offset); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 3bfe2612c8c..58150f877ec 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -78,7 +78,7 @@ void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER); } -static u32 ieee80211_idle_off(struct ieee80211_local *local) +u32 ieee80211_idle_off(struct ieee80211_local *local) { if (!(local->hw.conf.flags & IEEE80211_CONF_IDLE)) return 0; -- cgit v1.2.3-70-g09d2 From ded34e0fe8fe8c2d595bfa30626654e4b87621e0 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 25 Mar 2013 03:18:33 +0000 Subject: unix: fix a race condition in unix_release() As reported by Jan, and others over the past few years, there is a race condition caused by unix_release setting the sock->sk pointer to NULL before properly marking the socket as dead/orphaned. This can cause a problem with the LSM hook security_unix_may_send() if there is another socket attempting to write to this partially released socket in between when sock->sk is set to NULL and it is marked as dead/orphaned. This patch fixes this by only setting sock->sk to NULL after the socket has been marked as dead; I also take the opportunity to make unix_release_sock() a void function as it only ever returned 0/success. Dave, I think this one should go on the -stable pile. Special thanks to Jan for coming up with a reproducer for this problem. Reported-by: Jan Stancek Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/unix/af_unix.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 51be64f163e..f153a8d6e33 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -382,7 +382,7 @@ static void unix_sock_destructor(struct sock *sk) #endif } -static int unix_release_sock(struct sock *sk, int embrion) +static void unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); struct path path; @@ -451,8 +451,6 @@ static int unix_release_sock(struct sock *sk, int embrion) if (unix_tot_inflight) unix_gc(); /* Garbage collect fds */ - - return 0; } static void init_peercred(struct sock *sk) @@ -699,9 +697,10 @@ static int unix_release(struct socket *sock) if (!sk) return 0; + unix_release_sock(sk, 0); sock->sk = NULL; - return unix_release_sock(sk, 0); + return 0; } static int unix_autobind(struct socket *sock) -- cgit v1.2.3-70-g09d2 From a79ca223e029aa4f09abb337accf1812c900a800 Mon Sep 17 00:00:00 2001 From: Hong Zhiguo Date: Tue, 26 Mar 2013 01:52:45 +0800 Subject: ipv6: fix bad free of addrconf_init_net Signed-off-by: Hong Zhiguo Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f2c7e615f90..26512250e09 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4784,26 +4784,20 @@ static void addrconf_sysctl_unregister(struct inet6_dev *idev) static int __net_init addrconf_init_net(struct net *net) { - int err; + int err = -ENOMEM; struct ipv6_devconf *all, *dflt; - err = -ENOMEM; - all = &ipv6_devconf; - dflt = &ipv6_devconf_dflt; + all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL); + if (all == NULL) + goto err_alloc_all; - if (!net_eq(net, &init_net)) { - all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL); - if (all == NULL) - goto err_alloc_all; + dflt = kmemdup(&ipv6_devconf_dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); + if (dflt == NULL) + goto err_alloc_dflt; - dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); - if (dflt == NULL) - goto err_alloc_dflt; - } else { - /* these will be inherited by all namespaces */ - dflt->autoconf = ipv6_defaults.autoconf; - dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; - } + /* these will be inherited by all namespaces */ + dflt->autoconf = ipv6_defaults.autoconf; + dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; net->ipv6.devconf_all = all; net->ipv6.devconf_dflt = dflt; -- cgit v1.2.3-70-g09d2 From 39a352a5b5896403ad4ce842a9bc3845a01c02cd Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 26 Mar 2013 14:35:57 +0100 Subject: NFC: llcp: Keep the connected socket parent pointer alive And avoid decreasing the ack log twice when dequeueing connected LLCP sockets. Signed-off-by: Samuel Ortiz --- net/nfc/llcp/sock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index e488e440186..8f025746f33 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -270,7 +270,9 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent, } if (sk->sk_state == LLCP_CONNECTED || !newsock) { - nfc_llcp_accept_unlink(sk); + list_del_init(&lsk->accept_queue); + sock_put(sk); + if (newsock) sock_graft(sk, newsock); -- cgit v1.2.3-70-g09d2 From 14134f6584212d585b310ce95428014b653dfaf6 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Mon, 25 Mar 2013 17:02:04 +0000 Subject: af_unix: dont send SCM_CREDENTIAL when dest socket is NULL SCM_SCREDENTIALS should apply to write() syscalls only either source or destination socket asserted SOCK_PASSCRED. The original implememtation in maybe_add_creds is wrong, and breaks several LSB testcases ( i.e. /tset/LSB.os/netowkr/recvfrom/T.recvfrom). Origionally-authored-by: Karel Srot Signed-off-by: Ding Tianhong Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/unix/af_unix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f153a8d6e33..971282b6f6a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1412,8 +1412,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, if (UNIXCB(skb).cred) return; if (test_bit(SOCK_PASSCRED, &sock->flags) || - !other->sk_socket || - test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { + (other->sk_socket && + test_bit(SOCK_PASSCRED, &other->sk_socket->flags))) { UNIXCB(skb).pid = get_pid(task_tgid(current)); UNIXCB(skb).cred = get_current_cred(); } -- cgit v1.2.3-70-g09d2 From 330305cc4a6b0cb75c22fc01b8826f0ad755550f Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Sun, 24 Mar 2013 17:36:29 +0000 Subject: ipv4: Fix ip-header identification for gso packets. ip-header id needs to be incremented even if IP_DF flag is set. This behaviour was changed in commit 490ab08127cebc25e3a26 (IP_GRE: Fix IP-Identification). Following patch fixes it so that identification is always incremented. Reported-by: Cong Wang Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/ipip.h | 16 ++++++---------- net/ipv4/af_inet.c | 3 +-- 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/net/ipip.h b/include/net/ipip.h index fd19625ff99..982141c1520 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -77,15 +77,11 @@ static inline void tunnel_ip_select_ident(struct sk_buff *skb, { struct iphdr *iph = ip_hdr(skb); - if (iph->frag_off & htons(IP_DF)) - iph->id = 0; - else { - /* Use inner packet iph-id if possible. */ - if (skb->protocol == htons(ETH_P_IP) && old_iph->id) - iph->id = old_iph->id; - else - __ip_select_ident(iph, dst, - (skb_shinfo(skb)->gso_segs ?: 1) - 1); - } + /* Use inner packet iph-id if possible. */ + if (skb->protocol == htons(ETH_P_IP) && old_iph->id) + iph->id = old_iph->id; + else + __ip_select_ident(iph, dst, + (skb_shinfo(skb)->gso_segs ?: 1) - 1); } #endif diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 68f6a94f766..c929d9c1c4b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1333,8 +1333,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, iph->frag_off |= htons(IP_MF); offset += (skb->len - skb->mac_len - iph->ihl * 4); } else { - if (!(iph->frag_off & htons(IP_DF))) - iph->id = htons(id++); + iph->id = htons(id++); } iph->tot_len = htons(skb->len - skb->mac_len); iph->check = 0; -- cgit v1.2.3-70-g09d2 From 5389090b59f7f72a30e25f5fd1fc560340543970 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 27 Mar 2013 03:57:10 +0000 Subject: netfilter: nf_conntrack: fix error return code Fix to return a negative error code from the error handling case instead of 0, as returned elsewhere in function nf_conntrack_standalone_init(). Signed-off-by: Wei Yongjun Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 6bcce401fd1..fedee394366 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -568,6 +568,7 @@ static int __init nf_conntrack_standalone_init(void) register_net_sysctl(&init_net, "net", nf_ct_netfilter_table); if (!nf_ct_netfilter_header) { pr_err("nf_conntrack: can't register to sysctl.\n"); + ret = -ENOMEM; goto out_sysctl; } #endif -- cgit v1.2.3-70-g09d2 From fcca143d696092110ae1e361866576804fe887f3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 27 Mar 2013 03:22:45 +0000 Subject: rtnetlink: fix error return code in rtnl_link_fill() Fix to return a negative error code from the error handling case instead of 0(possible overwrite to 0 by ops->fill_xstats call), as returned elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5fb8d7e4729..b65441da74a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -496,8 +496,10 @@ static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) } if (ops->fill_info) { data = nla_nest_start(skb, IFLA_INFO_DATA); - if (data == NULL) + if (data == NULL) { + err = -EMSGSIZE; goto err_cancel_link; + } err = ops->fill_info(skb, dev); if (err < 0) goto err_cancel_data; -- cgit v1.2.3-70-g09d2 From ea872d7712528ad991bdabb63515bc00ee10993e Mon Sep 17 00:00:00 2001 From: Sergey Popovich Date: Wed, 27 Mar 2013 05:41:59 +0000 Subject: sch: add missing u64 in psched_ratecfg_precompute() It seems that commit commit 292f1c7ff6cc10516076ceeea45ed11833bb71c7 Author: Jiri Pirko Date: Tue Feb 12 00:12:03 2013 +0000 sch: make htb_rate_cfg and functions around that generic adds little regression. Before: # tc qdisc add dev eth0 root handle 1: htb default ffff # tc class add dev eth0 classid 1:ffff htb rate 5Gbit # tc -s class show dev eth0 class htb 1:ffff root prio 0 rate 5000Mbit ceil 5000Mbit burst 625b cburst 625b Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) rate 0bit 0pps backlog 0b 0p requeues 0 lended: 0 borrowed: 0 giants: 0 tokens: 31 ctokens: 31 After: # tc qdisc add dev eth0 root handle 1: htb default ffff # tc class add dev eth0 classid 1:ffff htb rate 5Gbit # tc -s class show dev eth0 class htb 1:ffff root prio 0 rate 1544Mbit ceil 1544Mbit burst 625b cburst 625b Sent 5073 bytes 41 pkt (dropped 0, overlimits 0 requeues 0) rate 1976bit 2pps backlog 0b 0p requeues 0 lended: 41 borrowed: 0 giants: 0 tokens: 1802 ctokens: 1802 This probably due to lost u64 cast of rate parameter in psched_ratecfg_precompute() (net/sched/sch_generic.c). Signed-off-by: Sergey Popovich Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ffad48109a2..eac7e0ee23c 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -904,7 +904,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate) u64 mult; int shift; - r->rate_bps = rate << 3; + r->rate_bps = (u64)rate << 3; r->shift = 0; r->mult = 1; /* -- cgit v1.2.3-70-g09d2 From 1c4a154e5253687c51123956dfcee9e9dfa8542d Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 26 Mar 2013 08:13:34 +0000 Subject: ipv6: don't accept node local multicast traffic from the wire Erik Hugne's errata proposal (Errata ID: 3480) to RFC4291 has been verified: http://www.rfc-editor.org/errata_search.php?eid=3480 We have to check for pkt_type and loopback flag because either the packets are allowed to travel over the loopback interface (in which case pkt_type is PACKET_HOST and IFF_LOOPBACK flag is set) or they travel over a non-loopback interface back to us (in which case PACKET_TYPE is PACKET_LOOPBACK and IFF_LOOPBACK flag is not set). Cc: Erik Hugne Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index e33fe0ab256..2bab2aa5974 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -118,6 +118,18 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt ipv6_addr_loopback(&hdr->daddr)) goto err; + /* RFC4291 Errata ID: 3480 + * Interface-Local scope spans only a single interface on a + * node and is useful only for loopback transmission of + * multicast. Packets with interface-local scope received + * from another node must be discarded. + */ + if (!(skb->pkt_type == PACKET_LOOPBACK || + dev->flags & IFF_LOOPBACK) && + ipv6_addr_is_multicast(&hdr->daddr) && + IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1) + goto err; + /* RFC4291 2.7 * Nodes must not originate a packet to a multicast address whose scope * field contains the reserved value 0; if such a packet is received, it -- cgit v1.2.3-70-g09d2 From 50eab0503a7579ada512e4968738b7c9737cf36e Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Wed, 27 Mar 2013 23:42:41 +0000 Subject: net: fix the use of this_cpu_ptr flush_tasklet is not percpu var, and percpu is percpu var, and this_cpu_ptr(&info->cache->percpu->flush_tasklet) is not equal to &this_cpu_ptr(info->cache->percpu)->flush_tasklet 1f743b076(use this_cpu_ptr per-cpu helper) introduced this bug. Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/core/flow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/flow.c b/net/core/flow.c index c56ea6f7f6c..2bfd081c59f 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -328,7 +328,7 @@ static void flow_cache_flush_per_cpu(void *data) struct flow_flush_info *info = data; struct tasklet_struct *tasklet; - tasklet = this_cpu_ptr(&info->cache->percpu->flush_tasklet); + tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet; tasklet->data = (unsigned long)info; tasklet_schedule(tasklet); } -- cgit v1.2.3-70-g09d2 From a561cf7edf9863198bfccecfc5cfe26d951ebd20 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Wed, 27 Mar 2013 23:13:26 +0000 Subject: net: core: Remove redundant call to 'nf_reset' in 'dev_forward_skb' 'nf_reset' is called just prior calling 'netif_rx'. No need to call it twice. Reported-by: Igor Michailov Signed-off-by: Shmulik Ladkani Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index b13e5c766c1..6591440cc03 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1624,7 +1624,6 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) } skb_orphan(skb); - nf_reset(skb); if (unlikely(!is_skb_forwardable(dev, skb))) { atomic_long_inc(&dev->rx_dropped); -- cgit v1.2.3-70-g09d2 From cd68ddd4c29ab523440299f24ff2417fe7a0dca6 Mon Sep 17 00:00:00 2001 From: Vijay Subramanian Date: Thu, 28 Mar 2013 13:52:00 +0000 Subject: net: fq_codel: Fix off-by-one error Currently, we hold a max of sch->limit -1 number of packets instead of sch->limit packets. Fix this off-by-one error. Signed-off-by: Vijay Subramanian Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_fq_codel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 4e606fcb253..55786283a3d 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -195,7 +195,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) flow->deficit = q->quantum; flow->dropped = 0; } - if (++sch->q.qlen < sch->limit) + if (++sch->q.qlen <= sch->limit) return NET_XMIT_SUCCESS; q->drop_overlimit++; -- cgit v1.2.3-70-g09d2 From 00cfec37484761a44a3b6f4675a54caa618210ae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Mar 2013 03:01:22 +0000 Subject: net: add a synchronize_net() in netdev_rx_handler_unregister() commit 35d48903e97819 (bonding: fix rx_handler locking) added a race in bonding driver, reported by Steven Rostedt who did a very good diagnosis : I'm currently debugging a crash in an old 3.0-rt kernel that one of our customers is seeing. The bug happens with a stress test that loads and unloads the bonding module in a loop (I don't know all the details as I'm not the one that is directly interacting with the customer). But the bug looks to be something that may still be present and possibly present in mainline too. It will just be much harder to trigger it in mainline. In -rt, interrupts are threads, and can schedule in and out just like any other thread. Note, mainline now supports interrupt threads so this may be easily reproducible in mainline as well. I don't have the ability to tell the customer to try mainline or other kernels, so my hands are somewhat tied to what I can do. But according to a core dump, I tracked down that the eth irq thread crashed in bond_handle_frame() here: slave = bond_slave_get_rcu(skb->dev); bond = slave->bond; <--- BUG the slave returned was NULL and accessing slave->bond caused a NULL pointer dereference. Looking at the code that unregisters the handler: void netdev_rx_handler_unregister(struct net_device *dev) { ASSERT_RTNL(); RCU_INIT_POINTER(dev->rx_handler, NULL); RCU_INIT_POINTER(dev->rx_handler_data, NULL); } Which is basically: dev->rx_handler = NULL; dev->rx_handler_data = NULL; And looking at __netif_receive_skb() we have: rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } switch (rx_handler(&skb)) { My question to all of you is, what stops this interrupt from happening while the bonding module is unloading? What happens if the interrupt triggers and we have this: CPU0 CPU1 ---- ---- rx_handler = skb->dev->rx_handler netdev_rx_handler_unregister() { dev->rx_handler = NULL; dev->rx_handler_data = NULL; rx_handler() bond_handle_frame() { slave = skb->dev->rx_handler; bond = slave->bond; <-- NULL pointer dereference!!! What protection am I missing in the bond release handler that would prevent the above from happening? We can fix bug this in two ways. First is adding a test in bond_handle_frame() and others to check if rx_handler_data is NULL. A second way is adding a synchronize_net() in netdev_rx_handler_unregister() to make sure that a rcu protected reader has the guarantee to see a non NULL rx_handler_data. The second way is better as it avoids an extra test in fast path. Reported-by: Steven Rostedt Signed-off-by: Eric Dumazet Cc: Jiri Pirko Cc: Paul E. McKenney Acked-by: Steven Rostedt Reviewed-by: Paul E. McKenney Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 6591440cc03..13e6447f039 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3313,6 +3313,7 @@ int netdev_rx_handler_register(struct net_device *dev, if (dev->rx_handler) return -EBUSY; + /* Note: rx_handler_data must be set before rx_handler */ rcu_assign_pointer(dev->rx_handler_data, rx_handler_data); rcu_assign_pointer(dev->rx_handler, rx_handler); @@ -3333,6 +3334,11 @@ void netdev_rx_handler_unregister(struct net_device *dev) ASSERT_RTNL(); RCU_INIT_POINTER(dev->rx_handler, NULL); + /* a reader seeing a non NULL rx_handler in a rcu_read_lock() + * section has a guarantee to see a non NULL rx_handler_data + * as well. + */ + synchronize_net(); RCU_INIT_POINTER(dev->rx_handler_data, NULL); } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); -- cgit v1.2.3-70-g09d2 From f0f6ee1f70c4eaab9d52cf7d255df4bd89f8d1c2 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 1 Apr 2013 03:01:32 +0000 Subject: cbq: incorrect processing of high limits currently cbq works incorrectly for limits > 10% real link bandwidth, and practically does not work for limits > 50% real link bandwidth. Below are results of experiments taken on 1 Gbit link In shaper | Actual Result -----------+--------------- 100M | 108 Mbps 200M | 244 Mbps 300M | 412 Mbps 500M | 893 Mbps This happen because of q->now changes incorrectly in cbq_dequeue(): when it is called before real end of packet transmitting, L2T is greater than real time delay, q_now gets an extra boost but never compensate it. To fix this problem we prevent change of q->now until its synchronization with real time. Signed-off-by: Vasily Averin Reviewed-by: Alexey Kuznetsov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 13aa47aa2ff..1bc210ffcba 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -962,8 +962,11 @@ cbq_dequeue(struct Qdisc *sch) cbq_update(q); if ((incr -= incr2) < 0) incr = 0; + q->now += incr; + } else { + if (now > q->now) + q->now = now; } - q->now += incr; q->now_rt = now; for (;;) { -- cgit v1.2.3-70-g09d2 From 25fb6ca4ed9cad72f14f61629b68dc03c0d9713f Mon Sep 17 00:00:00 2001 From: Balakumaran Kannan Date: Tue, 2 Apr 2013 16:15:05 +0530 Subject: net IPv6 : Fix broken IPv6 routing table after loopback down-up IPv6 Routing table becomes broken once we do ifdown, ifup of the loopback(lo) interface. After down-up, routes of other interface's IPv6 addresses through 'lo' are lost. IPv6 addresses assigned to all interfaces are routed through 'lo' for internal communication. Once 'lo' is down, those routing entries are removed from routing table. But those removed entries are not being re-created properly when 'lo' is brought up. So IPv6 addresses of other interfaces becomes unreachable from the same machine. Also this breaks communication with other machines because of NDISC packet processing failure. This patch fixes this issue by reading all interface's IPv6 addresses and adding them to IPv6 routing table while bringing up 'lo'. ==Testing== Before applying the patch: $ route -A inet6 Kernel IPv6 routing table Destination Next Hop Flag Met Ref Use If 2000::20/128 :: U 256 0 0 eth0 fe80::/64 :: U 256 0 0 eth0 ::/0 :: !n -1 1 1 lo ::1/128 :: Un 0 1 0 lo 2000::20/128 :: Un 0 1 0 lo fe80::xxxx:xxxx:xxxx:xxxx/128 :: Un 0 1 0 lo ff00::/8 :: U 256 0 0 eth0 ::/0 :: !n -1 1 1 lo $ sudo ifdown lo $ sudo ifup lo $ route -A inet6 Kernel IPv6 routing table Destination Next Hop Flag Met Ref Use If 2000::20/128 :: U 256 0 0 eth0 fe80::/64 :: U 256 0 0 eth0 ::/0 :: !n -1 1 1 lo ::1/128 :: Un 0 1 0 lo ff00::/8 :: U 256 0 0 eth0 ::/0 :: !n -1 1 1 lo $ After applying the patch: $ route -A inet6 Kernel IPv6 routing table Destination Next Hop Flag Met Ref Use If 2000::20/128 :: U 256 0 0 eth0 fe80::/64 :: U 256 0 0 eth0 ::/0 :: !n -1 1 1 lo ::1/128 :: Un 0 1 0 lo 2000::20/128 :: Un 0 1 0 lo fe80::xxxx:xxxx:xxxx:xxxx/128 :: Un 0 1 0 lo ff00::/8 :: U 256 0 0 eth0 ::/0 :: !n -1 1 1 lo $ sudo ifdown lo $ sudo ifup lo $ route -A inet6 Kernel IPv6 routing table Destination Next Hop Flag Met Ref Use If 2000::20/128 :: U 256 0 0 eth0 fe80::/64 :: U 256 0 0 eth0 ::/0 :: !n -1 1 1 lo ::1/128 :: Un 0 1 0 lo 2000::20/128 :: Un 0 1 0 lo fe80::xxxx:xxxx:xxxx:xxxx/128 :: Un 0 1 0 lo ff00::/8 :: U 256 0 0 eth0 ::/0 :: !n -1 1 1 lo $ Signed-off-by: Balakumaran Kannan Signed-off-by: Maruthi Thotad Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 26512250e09..a459c4f5b76 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2529,6 +2529,9 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) static void init_loopback(struct net_device *dev) { struct inet6_dev *idev; + struct net_device *sp_dev; + struct inet6_ifaddr *sp_ifa; + struct rt6_info *sp_rt; /* ::1 */ @@ -2540,6 +2543,30 @@ static void init_loopback(struct net_device *dev) } add_addr(idev, &in6addr_loopback, 128, IFA_HOST); + + /* Add routes to other interface's IPv6 addresses */ + for_each_netdev(dev_net(dev), sp_dev) { + if (!strcmp(sp_dev->name, dev->name)) + continue; + + idev = __in6_dev_get(sp_dev); + if (!idev) + continue; + + read_lock_bh(&idev->lock); + list_for_each_entry(sp_ifa, &idev->addr_list, if_list) { + + if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE)) + continue; + + sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0); + + /* Failure cases are ignored */ + if (!IS_ERR(sp_rt)) + ip6_ins_rt(sp_rt); + } + read_unlock_bh(&idev->lock); + } } static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr) -- cgit v1.2.3-70-g09d2 From 990454b5a48babde44a23c0f22bae5523f4fdf13 Mon Sep 17 00:00:00 2001 From: Reilly Grant Date: Mon, 1 Apr 2013 11:41:52 -0700 Subject: VSOCK: Handle changes to the VMCI context ID. The VMCI context ID of a virtual machine may change at any time. There is a VMCI event which signals this but datagrams may be processed before this is handled. It is therefore necessary to be flexible about the destination context ID of any datagrams received. (It can be assumed to be correct because it is provided by the hypervisor.) The context ID on existing sockets should be updated to reflect how the hypervisor is currently referring to the system. Signed-off-by: Reilly Grant Acked-by: Andy King Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 6 +++--- net/vmw_vsock/vmci_transport.c | 31 ++++++++++++++++++++----------- net/vmw_vsock/vsock_addr.c | 10 ---------- net/vmw_vsock/vsock_addr.h | 2 -- 4 files changed, 23 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index ca511c4f388..d8079daf1bd 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -207,7 +207,7 @@ static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) struct vsock_sock *vsk; list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) - if (vsock_addr_equals_addr_any(addr, &vsk->local_addr)) + if (addr->svm_port == vsk->local_addr.svm_port) return sk_vsock(vsk); return NULL; @@ -220,8 +220,8 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, list_for_each_entry(vsk, vsock_connected_sockets(src, dst), connected_table) { - if (vsock_addr_equals_addr(src, &vsk->remote_addr) - && vsock_addr_equals_addr(dst, &vsk->local_addr)) { + if (vsock_addr_equals_addr(src, &vsk->remote_addr) && + dst->svm_port == vsk->local_addr.svm_port) { return sk_vsock(vsk); } } diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index a70ace83a15..1f6508e249a 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -464,19 +464,16 @@ static struct sock *vmci_transport_get_pending( struct vsock_sock *vlistener; struct vsock_sock *vpending; struct sock *pending; + struct sockaddr_vm src; + + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); vlistener = vsock_sk(listener); list_for_each_entry(vpending, &vlistener->pending_links, pending_links) { - struct sockaddr_vm src; - struct sockaddr_vm dst; - - vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); - vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port); - if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && - vsock_addr_equals_addr(&dst, &vpending->local_addr)) { + pkt->dst_port == vpending->local_addr.svm_port) { pending = sk_vsock(vpending); sock_hold(pending); goto found; @@ -739,10 +736,15 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg) */ bh_lock_sock(sk); - if (!sock_owned_by_user(sk) && sk->sk_state == SS_CONNECTED) - vmci_trans(vsk)->notify_ops->handle_notify_pkt( - sk, pkt, true, &dst, &src, - &bh_process_pkt); + if (!sock_owned_by_user(sk)) { + /* The local context ID may be out of date, update it. */ + vsk->local_addr.svm_cid = dst.svm_cid; + + if (sk->sk_state == SS_CONNECTED) + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, true, &dst, &src, + &bh_process_pkt); + } bh_unlock_sock(sk); @@ -902,6 +904,9 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work) lock_sock(sk); + /* The local context ID may be out of date. */ + vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context; + switch (sk->sk_state) { case SS_LISTEN: vmci_transport_recv_listen(sk, pkt); @@ -958,6 +963,10 @@ static int vmci_transport_recv_listen(struct sock *sk, pending = vmci_transport_get_pending(sk, pkt); if (pending) { lock_sock(pending); + + /* The local context ID may be out of date. */ + vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context; + switch (pending->sk_state) { case SS_CONNECTING: err = vmci_transport_recv_connecting_server(sk, diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c index b7df1aea7c5..ec2611b4ea0 100644 --- a/net/vmw_vsock/vsock_addr.c +++ b/net/vmw_vsock/vsock_addr.c @@ -64,16 +64,6 @@ bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, } EXPORT_SYMBOL_GPL(vsock_addr_equals_addr); -bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, - const struct sockaddr_vm *other) -{ - return (addr->svm_cid == VMADDR_CID_ANY || - other->svm_cid == VMADDR_CID_ANY || - addr->svm_cid == other->svm_cid) && - addr->svm_port == other->svm_port; -} -EXPORT_SYMBOL_GPL(vsock_addr_equals_addr_any); - int vsock_addr_cast(const struct sockaddr *addr, size_t len, struct sockaddr_vm **out_addr) { diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h index cdfbcefdf84..9ccd5316eac 100644 --- a/net/vmw_vsock/vsock_addr.h +++ b/net/vmw_vsock/vsock_addr.h @@ -24,8 +24,6 @@ bool vsock_addr_bound(const struct sockaddr_vm *addr); void vsock_addr_unbind(struct sockaddr_vm *addr); bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, const struct sockaddr_vm *other); -bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, - const struct sockaddr_vm *other); int vsock_addr_cast(const struct sockaddr *addr, size_t len, struct sockaddr_vm **out_addr); -- cgit v1.2.3-70-g09d2 From 906b1c394d0906a154fbdc904ca506bceb515756 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sat, 30 Mar 2013 10:23:12 +0000 Subject: netfilter: ip6t_NPT: Fix translation for non-multiple of 32 prefix lengths The bitmask used for the prefix mangling was being calculated incorrectly, leading to the wrong part of the address being replaced when the prefix length wasn't a multiple of 32. Signed-off-by: Matthias Schiffer Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_NPT.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c index 33608c61027..cb631143721 100644 --- a/net/ipv6/netfilter/ip6t_NPT.c +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -57,7 +57,7 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, if (pfx_len - i >= 32) mask = 0; else - mask = htonl(~((1 << (pfx_len - i)) - 1)); + mask = htonl((1 << (i - pfx_len + 32)) - 1); idx = i / 32; addr->s6_addr32[idx] &= mask; -- cgit v1.2.3-70-g09d2 From 4543fbefe6e06a9e40d9f2b28d688393a299f079 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 2 Apr 2013 17:10:07 -0400 Subject: net: count hw_addr syncs so that unsync works properly. A few drivers use dev_uc_sync/unsync to synchronize the address lists from master down to slave/lower devices. In some cases (bond/team) a single address list is synched down to multiple devices. At the time of unsync, we have a leak in these lower devices, because "synced" is treated as a boolean and the address will not be unsynced for anything after the first device/call. Treat "synced" as a count (same as refcount) and allow all unsync calls to work. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- net/core/dev_addr_lists.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8bfa95600e4..6151e903eef 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -210,9 +210,9 @@ struct netdev_hw_addr { #define NETDEV_HW_ADDR_T_SLAVE 3 #define NETDEV_HW_ADDR_T_UNICAST 4 #define NETDEV_HW_ADDR_T_MULTICAST 5 - bool synced; bool global_use; int refcount; + int synced; struct rcu_head rcu_head; }; diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index bd2eb9d3e36..abdc9e6ef33 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -37,7 +37,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, ha->type = addr_type; ha->refcount = 1; ha->global_use = global; - ha->synced = false; + ha->synced = 0; list_add_tail_rcu(&ha->list, &list->list); list->count++; @@ -165,7 +165,7 @@ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, addr_len, ha->type); if (err) break; - ha->synced = true; + ha->synced++; ha->refcount++; } else if (ha->refcount == 1) { __hw_addr_del(to_list, ha->addr, addr_len, ha->type); @@ -186,7 +186,7 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, if (ha->synced) { __hw_addr_del(to_list, ha->addr, addr_len, ha->type); - ha->synced = false; + ha->synced--; __hw_addr_del(from_list, ha->addr, addr_len, ha->type); } -- cgit v1.2.3-70-g09d2 From 25da0e3e9d3fb2b522bc2a598076735850310eb1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 3 Apr 2013 16:13:35 +0000 Subject: Revert "af_unix: dont send SCM_CREDENTIAL when dest socket is NULL" This reverts commit 14134f6584212d585b310ce95428014b653dfaf6. The problem that the above patch was meant to address is that af_unix messages are not being coallesced because we are sending unnecesarry credentials. Not sending credentials in maybe_add_creds totally breaks unconnected unix domain sockets that wish to send credentails to other sockets. In practice this break some versions of udev because they receive a message and the sending uid is bogus so they drop the message. Reported-by: Sven Joachim Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/unix/af_unix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 971282b6f6a..f153a8d6e33 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1412,8 +1412,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, if (UNIXCB(skb).cred) return; if (test_bit(SOCK_PASSCRED, &sock->flags) || - (other->sk_socket && - test_bit(SOCK_PASSCRED, &other->sk_socket->flags))) { + !other->sk_socket || + test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); UNIXCB(skb).cred = get_current_cred(); } -- cgit v1.2.3-70-g09d2 From 0e82e7f6dfeec1013339612f74abc2cdd29d43d2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 3 Apr 2013 16:14:47 +0000 Subject: af_unix: If we don't care about credentials coallesce all messages It was reported that the following LSB test case failed https://lsbbugs.linuxfoundation.org/attachment.cgi?id=2144 because we were not coallescing unix stream messages when the application was expecting us to. The problem was that the first send was before the socket was accepted and thus sock->sk_socket was NULL in maybe_add_creds, and the second send after the socket was accepted had a non-NULL value for sk->socket and thus we could tell the credentials were not needed so we did not bother. The unnecessary credentials on the first message cause unix_stream_recvmsg to start verifying that all messages had the same credentials before coallescing and then the coallescing failed because the second message had no credentials. Ignoring credentials when we don't care in unix_stream_recvmsg fixes a long standing pessimization which would fail to coallesce messages when reading from a unix stream socket if the senders were different even if we did not care about their credentials. I have tested this and verified that the in the LSB test case mentioned above that the messages do coallesce now, while the were failing to coallesce without this change. Reported-by: Karel Srot Reported-by: Ding Tianhong Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f153a8d6e33..2db702d82e7 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1993,7 +1993,7 @@ again: if ((UNIXCB(skb).pid != siocb->scm->pid) || (UNIXCB(skb).cred != siocb->scm->cred)) break; - } else { + } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); check_creds = 1; -- cgit v1.2.3-70-g09d2 From 34e2ed34a035de07277cca817fe8264324398141 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 4 Apr 2013 08:33:00 +0000 Subject: net: ipv4: notify when address lifetime changes if userspace changes lifetime of address, send netlink notification and call notifier. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f678507bc82..96083b7a436 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -802,8 +802,10 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg if (nlh->nlmsg_flags & NLM_F_EXCL || !(nlh->nlmsg_flags & NLM_F_REPLACE)) return -EEXIST; - - set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft); + ifa = ifa_existing; + set_ifa_lifetime(ifa, valid_lft, prefered_lft); + rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); + blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); } return 0; } -- cgit v1.2.3-70-g09d2 From 124dff01afbdbff251f0385beca84ba1b9adda68 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 5 Apr 2013 20:42:05 +0200 Subject: netfilter: don't reset nf_trace in nf_reset() Commit 130549fe ("netfilter: reset nf_trace in nf_reset") added code to reset nf_trace in nf_reset(). This is wrong and unnecessary. nf_reset() is used in the following cases: - when passing packets up the the socket layer, at which point we want to release all netfilter references that might keep modules pinned while the packet is queued. nf_trace doesn't matter anymore at this point. - when encapsulating or decapsulating IPsec packets. We want to continue tracing these packets after IPsec processing. - when passing packets through virtual network devices. Only devices on that encapsulate in IPv4/v6 matter since otherwise nf_trace is not used anymore. Its not entirely clear whether those packets should be traced after that, however we've always done that. - when passing packets through virtual network devices that make the packet cross network namespace boundaries. This is the only cases where we clearly want to reset nf_trace and is also what the original patch intended to fix. Add a new function nf_reset_trace() and use it in dev_forward_skb() to fix this properly. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ net/core/dev.c | 1 + 2 files changed, 5 insertions(+) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 72b396751de..b8292d8cc9f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2641,6 +2641,10 @@ static inline void nf_reset(struct sk_buff *skb) nf_bridge_put(skb->nf_bridge); skb->nf_bridge = NULL; #endif +} + +static inline void nf_reset_trace(struct sk_buff *skb) +{ #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) skb->nf_trace = 0; #endif diff --git a/net/core/dev.c b/net/core/dev.c index 13e6447f039..e7d68ed8aaf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1639,6 +1639,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) skb->mark = 0; secpath_reset(skb); nf_reset(skb); + nf_reset_trace(skb); return netif_rx(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); -- cgit v1.2.3-70-g09d2