From 4aa832c27edf902130f8bace1d42cf22468823fa Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sun, 8 Jan 2012 22:51:16 +0200 Subject: Bluetooth: Remove bogus inline declaration from l2cap_chan_connect As reported by Dan Carpenter this function causes a Sparse warning and shouldn't be declared inline: include/net/bluetooth/l2cap.h:837:30 error: marked inline, but without a definition" Reported-by: Dan Carpenter Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 68f58915069..124f7cf45bd 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -834,7 +834,7 @@ int l2cap_add_scid(struct l2cap_chan *chan, __u16 scid); struct l2cap_chan *l2cap_chan_create(struct sock *sk); void l2cap_chan_close(struct l2cap_chan *chan, int reason); void l2cap_chan_destroy(struct l2cap_chan *chan); -inline int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, +int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *dst); int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, u32 priority); -- cgit v1.2.3-70-g09d2 From 6e1da683f79a22fafaada62d547138daaa9e3456 Mon Sep 17 00:00:00 2001 From: Andrzej Kaczmarek Date: Wed, 4 Jan 2012 12:10:42 +0100 Subject: Bluetooth: l2cap_set_timer needs jiffies as timeout value After moving L2CAP timers to workqueues l2cap_set_timer expects timeout value to be specified in jiffies but constants defined in miliseconds are used. This makes timeouts unreliable when CONFIG_HZ is not set to 1000. __set_chan_timer macro still uses jiffies as input to avoid multiple conversions from/to jiffies for sk_sndtimeo value which is already specified in jiffies. Signed-off-by: Andrzej Kaczmarek Ackec-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/l2cap.h | 6 +++--- net/bluetooth/l2cap_core.c | 12 ++++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 124f7cf45bd..26486e182f5 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -626,13 +626,13 @@ static inline void l2cap_clear_timer(struct l2cap_chan *chan, #define __set_chan_timer(c, t) l2cap_set_timer(c, &c->chan_timer, (t)) #define __clear_chan_timer(c) l2cap_clear_timer(c, &c->chan_timer) #define __set_retrans_timer(c) l2cap_set_timer(c, &c->retrans_timer, \ - L2CAP_DEFAULT_RETRANS_TO); + msecs_to_jiffies(L2CAP_DEFAULT_RETRANS_TO)); #define __clear_retrans_timer(c) l2cap_clear_timer(c, &c->retrans_timer) #define __set_monitor_timer(c) l2cap_set_timer(c, &c->monitor_timer, \ - L2CAP_DEFAULT_MONITOR_TO); + msecs_to_jiffies(L2CAP_DEFAULT_MONITOR_TO)); #define __clear_monitor_timer(c) l2cap_clear_timer(c, &c->monitor_timer) #define __set_ack_timer(c) l2cap_set_timer(c, &chan->ack_timer, \ - L2CAP_DEFAULT_ACK_TO); + msecs_to_jiffies(L2CAP_DEFAULT_ACK_TO)); #define __clear_ack_timer(c) l2cap_clear_timer(c, &c->ack_timer) static inline int __seq_offset(struct l2cap_chan *chan, __u16 seq1, __u16 seq2) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 980abdb3067..dcccae04ae0 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2970,7 +2970,8 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr default: sk->sk_err = ECONNRESET; - __set_chan_timer(chan, L2CAP_DISC_REJ_TIMEOUT); + __set_chan_timer(chan, + msecs_to_jiffies(L2CAP_DISC_REJ_TIMEOUT)); l2cap_send_disconn_req(conn, chan, ECONNRESET); goto done; } @@ -4478,7 +4479,8 @@ static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt) if (encrypt == 0x00) { if (chan->sec_level == BT_SECURITY_MEDIUM) { __clear_chan_timer(chan); - __set_chan_timer(chan, L2CAP_ENC_TIMEOUT); + __set_chan_timer(chan, + msecs_to_jiffies(L2CAP_ENC_TIMEOUT)); } else if (chan->sec_level == BT_SECURITY_HIGH) l2cap_chan_close(chan, ECONNREFUSED); } else { @@ -4546,7 +4548,8 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) L2CAP_CONN_REQ, sizeof(req), &req); } else { __clear_chan_timer(chan); - __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); + __set_chan_timer(chan, + msecs_to_jiffies(L2CAP_DISC_TIMEOUT)); } } else if (chan->state == BT_CONNECT2) { struct l2cap_conn_rsp rsp; @@ -4566,7 +4569,8 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) } } else { l2cap_state_change(chan, BT_DISCONN); - __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); + __set_chan_timer(chan, + msecs_to_jiffies(L2CAP_DISC_TIMEOUT)); res = L2CAP_CR_SEC_BLOCK; stat = L2CAP_CS_NO_INFO; } -- cgit v1.2.3-70-g09d2 From 331660637b4e5136602a98200a84f6b65ed8d5be Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 4 Jan 2012 11:57:17 -0300 Subject: Bluetooth: Fix using an absolute timeout on hci_conn_put() queue_delayed_work() expects a relative time for when that work should be scheduled. Signed-off-by: Vinicius Costa Gomes Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index ea9231f4935..92b8fea553d 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -561,7 +561,7 @@ static inline void hci_conn_put(struct hci_conn *conn) } cancel_delayed_work_sync(&conn->disc_work); queue_delayed_work(conn->hdev->workqueue, - &conn->disc_work, jiffies + timeo); + &conn->disc_work, timeo); } } -- cgit v1.2.3-70-g09d2 From b5a30dda6598af216c070165ece6068f9f00f33a Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Sun, 22 Jan 2012 00:28:34 +0200 Subject: Bluetooth: silence lockdep warning Since bluetooth uses multiple protocols types, to avoid lockdep warnings, we need to use different lockdep classes (one for each protocol type). This is already done in bt_sock_create but it misses a couple of cases when new connections are created. This patch corrects that to fix the following warning: <4>[ 1864.732366] ======================================================= <4>[ 1864.733030] [ INFO: possible circular locking dependency detected ] <4>[ 1864.733544] 3.0.16-mid3-00007-gc9a0f62 #3 <4>[ 1864.733883] ------------------------------------------------------- <4>[ 1864.734408] t.android.btclc/4204 is trying to acquire lock: <4>[ 1864.734869] (rfcomm_mutex){+.+.+.}, at: [] rfcomm_dlc_close+0x15/0x30 <4>[ 1864.735541] <4>[ 1864.735549] but task is already holding lock: <4>[ 1864.736045] (sk_lock-AF_BLUETOOTH){+.+.+.}, at: [] lock_sock+0xa/0xc <4>[ 1864.736732] <4>[ 1864.736740] which lock already depends on the new lock. <4>[ 1864.736750] <4>[ 1864.737428] <4>[ 1864.737437] the existing dependency chain (in reverse order) is: <4>[ 1864.738016] <4>[ 1864.738023] -> #1 (sk_lock-AF_BLUETOOTH){+.+.+.}: <4>[ 1864.738549] [] lock_acquire+0x104/0x140 <4>[ 1864.738977] [] lock_sock_nested+0x58/0x68 <4>[ 1864.739411] [] l2cap_sock_sendmsg+0x3e/0x76 <4>[ 1864.739858] [] __sock_sendmsg+0x50/0x59 <4>[ 1864.740279] [] sock_sendmsg+0x94/0xa8 <4>[ 1864.740687] [] kernel_sendmsg+0x28/0x37 <4>[ 1864.741106] [] rfcomm_send_frame+0x30/0x38 <4>[ 1864.741542] [] rfcomm_send_ua+0x58/0x5a <4>[ 1864.741959] [] rfcomm_run+0x441/0xb52 <4>[ 1864.742365] [] kthread+0x63/0x68 <4>[ 1864.742742] [] kernel_thread_helper+0x6/0xd <4>[ 1864.743187] <4>[ 1864.743193] -> #0 (rfcomm_mutex){+.+.+.}: <4>[ 1864.743667] [] __lock_acquire+0x988/0xc00 <4>[ 1864.744100] [] lock_acquire+0x104/0x140 <4>[ 1864.744519] [] __mutex_lock_common+0x3b/0x33f <4>[ 1864.744975] [] mutex_lock_nested+0x2d/0x36 <4>[ 1864.745412] [] rfcomm_dlc_close+0x15/0x30 <4>[ 1864.745842] [] __rfcomm_sock_close+0x5f/0x6b <4>[ 1864.746288] [] rfcomm_sock_shutdown+0x2f/0x62 <4>[ 1864.746737] [] sys_socketcall+0x1db/0x422 <4>[ 1864.747165] [] syscall_call+0x7/0xb Signed-off-by: Octavian Purdila Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 2 ++ net/bluetooth/af_bluetooth.c | 12 +++++------- net/bluetooth/l2cap_sock.c | 2 ++ net/bluetooth/rfcomm/sock.c | 2 ++ 4 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index abaad6ed9b8..4a82ca0bb0b 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -256,4 +256,6 @@ void l2cap_exit(void); int sco_init(void); void sco_exit(void); +void bt_sock_reclassify_lock(struct sock *sk, int proto); + #endif /* __BLUETOOTH_H */ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index ef92864ac62..72eb187a5f6 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -71,19 +71,16 @@ static const char *const bt_slock_key_strings[BT_MAX_PROTO] = { "slock-AF_BLUETOOTH-BTPROTO_AVDTP", }; -static inline void bt_sock_reclassify_lock(struct socket *sock, int proto) +void bt_sock_reclassify_lock(struct sock *sk, int proto) { - struct sock *sk = sock->sk; - - if (!sk) - return; - + BUG_ON(!sk); BUG_ON(sock_owned_by_user(sk)); sock_lock_init_class_and_name(sk, bt_slock_key_strings[proto], &bt_slock_key[proto], bt_key_strings[proto], &bt_lock_key[proto]); } +EXPORT_SYMBOL(bt_sock_reclassify_lock); int bt_sock_register(int proto, const struct net_proto_family *ops) { @@ -145,7 +142,8 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto, if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) { err = bt_proto[proto]->create(net, sock, proto, kern); - bt_sock_reclassify_lock(sock, proto); + if (!err) + bt_sock_reclassify_lock(sock->sk, proto); module_put(bt_proto[proto]->owner); } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index c57027f7606..401d9428ae4 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -849,6 +849,8 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(void *data) if (!sk) return NULL; + bt_sock_reclassify_lock(sk, BTPROTO_L2CAP); + l2cap_sock_init(sk, parent); return l2cap_pi(sk)->chan; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index f066678faee..22169c3f148 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -956,6 +956,8 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * if (!sk) goto done; + bt_sock_reclassify_lock(sk, BTPROTO_RFCOMM); + rfcomm_sock_init(sk, parent); bacpy(&bt_sk(sk)->src, &src); bacpy(&bt_sk(sk)->dst, &dst); -- cgit v1.2.3-70-g09d2 From a51cd2be864a3cc0272359b1995e213341dfc7e7 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Fri, 27 Jan 2012 19:42:02 -0300 Subject: Bluetooth: Fix potential deadlock We don't need to use the _sync variant in hci_conn_hold and hci_conn_put to cancel conn->disc_work delayed work. This way we avoid potential deadlocks like this one reported by lockdep. ====================================================== [ INFO: possible circular locking dependency detected ] 3.2.0+ #1 Not tainted ------------------------------------------------------- kworker/u:1/17 is trying to acquire lock: (&hdev->lock){+.+.+.}, at: [] hci_conn_timeout+0x62/0x158 [bluetooth] but task is already holding lock: ((&(&conn->disc_work)->work)){+.+...}, at: [] process_one_work+0x11a/0x2bf which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 ((&(&conn->disc_work)->work)){+.+...}: [] lock_acquire+0x8a/0xa7 [] wait_on_work+0x3d/0xaa [] __cancel_work_timer+0xac/0xef [] cancel_delayed_work_sync+0xd/0xf [] smp_chan_create+0xde/0xe6 [bluetooth] [] smp_conn_security+0xa3/0x12d [bluetooth] [] l2cap_connect_cfm+0x237/0x2e8 [bluetooth] [] hci_proto_connect_cfm+0x2d/0x6f [bluetooth] [] hci_event_packet+0x29d1/0x2d60 [bluetooth] [] hci_rx_work+0xd0/0x2e1 [bluetooth] [] process_one_work+0x178/0x2bf [] worker_thread+0xce/0x152 [] kthread+0x95/0x9d [] kernel_thread_helper+0x4/0x10 -> #1 (slock-AF_BLUETOOTH-BTPROTO_L2CAP){+.+...}: [] lock_acquire+0x8a/0xa7 [] _raw_spin_lock_bh+0x36/0x6a [] lock_sock_nested+0x24/0x7f [] lock_sock+0xb/0xd [bluetooth] [] l2cap_chan_connect+0xa9/0x26f [bluetooth] [] l2cap_sock_connect+0xb3/0xff [bluetooth] [] sys_connect+0x69/0x8a [] system_call_fastpath+0x16/0x1b -> #0 (&hdev->lock){+.+.+.}: [] __lock_acquire+0xa80/0xd74 [] lock_acquire+0x8a/0xa7 [] __mutex_lock_common+0x48/0x38e [] mutex_lock_nested+0x2a/0x31 [] hci_conn_timeout+0x62/0x158 [bluetooth] [] process_one_work+0x178/0x2bf [] worker_thread+0xce/0x152 [] kthread+0x95/0x9d [] kernel_thread_helper+0x4/0x10 other info that might help us debug this: Chain exists of: &hdev->lock --> slock-AF_BLUETOOTH-BTPROTO_L2CAP --> (&(&conn->disc_work)->work) Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock((&(&conn->disc_work)->work)); lock(slock-AF_BLUETOOTH-BTPROTO_L2CAP); lock((&(&conn->disc_work)->work)); lock(&hdev->lock); *** DEADLOCK *** 2 locks held by kworker/u:1/17: #0: (hdev->name){.+.+.+}, at: [] process_one_work+0x11a/0x2bf #1: ((&(&conn->disc_work)->work)){+.+...}, at: [] process_one_work+0x11a/0x2bf stack backtrace: Pid: 17, comm: kworker/u:1 Not tainted 3.2.0+ #1 Call Trace: [] print_circular_bug+0x1f8/0x209 [] __lock_acquire+0xa80/0xd74 [] ? arch_local_irq_restore+0x6/0xd [] ? vprintk+0x3f9/0x41e [] lock_acquire+0x8a/0xa7 [] ? hci_conn_timeout+0x62/0x158 [bluetooth] [] __mutex_lock_common+0x48/0x38e [] ? hci_conn_timeout+0x62/0x158 [bluetooth] [] ? __dynamic_pr_debug+0x6d/0x6f [] ? hci_conn_timeout+0x62/0x158 [bluetooth] [] ? trace_hardirqs_off+0xd/0xf [] mutex_lock_nested+0x2a/0x31 [] hci_conn_timeout+0x62/0x158 [bluetooth] [] process_one_work+0x178/0x2bf [] ? process_one_work+0x11a/0x2bf [] ? lock_acquired+0x1d0/0x1df [] ? hci_acl_disconn+0x65/0x65 [bluetooth] [] worker_thread+0xce/0x152 [] ? finish_task_switch+0x45/0xc5 [] ? manage_workers.isra.25+0x16a/0x16a [] kthread+0x95/0x9d [] kernel_thread_helper+0x4/0x10 [] ? retint_restore_args+0x13/0x13 [] ? __init_kthread_worker+0x55/0x55 [] ? gs_change+0x13/0x13 Signed-off-by: Andre Guedes Signed-off-by: Vinicius Costa Gomes Reviewed-by: Ulisses Furquim Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 92b8fea553d..453893b3120 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -540,7 +540,7 @@ void hci_conn_put_device(struct hci_conn *conn); static inline void hci_conn_hold(struct hci_conn *conn) { atomic_inc(&conn->refcnt); - cancel_delayed_work_sync(&conn->disc_work); + cancel_delayed_work(&conn->disc_work); } static inline void hci_conn_put(struct hci_conn *conn) @@ -559,7 +559,7 @@ static inline void hci_conn_put(struct hci_conn *conn) } else { timeo = msecs_to_jiffies(10); } - cancel_delayed_work_sync(&conn->disc_work); + cancel_delayed_work(&conn->disc_work); queue_delayed_work(conn->hdev->workqueue, &conn->disc_work, timeo); } -- cgit v1.2.3-70-g09d2 From 6de32750822d00bfa92c341166132b0714c5b559 Mon Sep 17 00:00:00 2001 From: Ulisses Furquim Date: Mon, 30 Jan 2012 18:26:28 -0200 Subject: Bluetooth: Remove usage of __cancel_delayed_work() __cancel_delayed_work() is being used in some paths where we cannot sleep waiting for the delayed work to finish. However, that function might return while the timer is running and the work will be queued again. Replace the calls with safer cancel_delayed_work() version which spins until the timer handler finishes on other CPUs and cancels the delayed work. Signed-off-by: Ulisses Furquim Acked-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/l2cap.h | 4 ++-- net/bluetooth/l2cap_core.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 26486e182f5..b1664ed884e 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -611,7 +611,7 @@ static inline void l2cap_set_timer(struct l2cap_chan *chan, { BT_DBG("chan %p state %d timeout %ld", chan, chan->state, timeout); - if (!__cancel_delayed_work(work)) + if (!cancel_delayed_work(work)) l2cap_chan_hold(chan); schedule_delayed_work(work, timeout); } @@ -619,7 +619,7 @@ static inline void l2cap_set_timer(struct l2cap_chan *chan, static inline void l2cap_clear_timer(struct l2cap_chan *chan, struct delayed_work *work) { - if (__cancel_delayed_work(work)) + if (cancel_delayed_work(work)) l2cap_chan_put(chan); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index dcccae04ae0..ec10c698b89 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2574,7 +2574,7 @@ static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hd if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) && cmd->ident == conn->info_ident) { - __cancel_delayed_work(&conn->info_timer); + cancel_delayed_work(&conn->info_timer); conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; @@ -3121,7 +3121,7 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) return 0; - __cancel_delayed_work(&conn->info_timer); + cancel_delayed_work(&conn->info_timer); if (result != L2CAP_IR_SUCCESS) { conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; @@ -4501,7 +4501,7 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) if (hcon->type == LE_LINK) { smp_distribute_keys(conn, 0); - __cancel_delayed_work(&conn->security_timer); + cancel_delayed_work(&conn->security_timer); } rcu_read_lock(); -- cgit v1.2.3-70-g09d2 From 88ba136d6635b262f77cc418d536115fb8e4d4ab Mon Sep 17 00:00:00 2001 From: Joerg Willmann Date: Tue, 21 Feb 2012 13:26:14 +0100 Subject: netfilter: ebtables: fix alignment problem in ppc ebt_among extension of ebtables uses __alignof__(_xt_align) while the corresponding kernel module uses __alignof__(ebt_replace) to determine the alignment in EBT_ALIGN(). These are the results of these values on different platforms: x86 x86_64 ppc __alignof__(_xt_align) 4 8 8 __alignof__(ebt_replace) 4 8 4 ebtables fails to add rules which use the among extension. I'm using kernel 2.6.33 and ebtables 2.0.10-4 According to Bart De Schuymer, userspace alignment was changed to _xt_align to fix an alignment issue on a userspace32-kernel64 system (he thinks it was for an ARM device). So userspace must be right. The kernel alignment macro needs to change so it also uses _xt_align instead of ebt_replace. The userspace changes date back from June 29, 2009. Signed-off-by: Joerg Willmann Signed-off by: Bart De Schuymer Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 8797ed16feb..4dd5bd6994a 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -285,8 +285,8 @@ struct ebt_table { struct module *me; }; -#define EBT_ALIGN(s) (((s) + (__alignof__(struct ebt_replace)-1)) & \ - ~(__alignof__(struct ebt_replace)-1)) +#define EBT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) & \ + ~(__alignof__(struct _xt_align)-1)) extern struct ebt_table *ebt_register_table(struct net *net, const struct ebt_table *table); extern void ebt_unregister_table(struct net *net, struct ebt_table *table); -- cgit v1.2.3-70-g09d2 From 115c9b81928360d769a76c632bae62d15206a94a Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 21 Feb 2012 16:54:48 -0500 Subject: rtnetlink: Fix problem with buffer allocation Implement a new netlink attribute type IFLA_EXT_MASK. The mask is a 32 bit value that can be used to indicate to the kernel that certain extended ifinfo values are requested by the user application. At this time the only mask value defined is RTEXT_FILTER_VF to indicate that the user wants the ifinfo dump to send information about the VFs belonging to the interface. This patch fixes a bug in which certain applications do not have large enough buffers to accommodate the extra information returned by the kernel with large numbers of SR-IOV virtual functions. Those applications will not send the new netlink attribute with the interface info dump request netlink messages so they will not get unexpectedly large request buffers returned by the kernel. Modifies the rtnl_calcit function to traverse the list of net devices and compute the minimum buffer size that can hold the info dumps of all matching devices based upon the filter passed in via the new netlink attribute filter mask. If no filter mask is sent then the buffer allocation defaults to NLMSG_GOODSIZE. With this change it is possible to add yet to be defined netlink attributes to the dump request which should make it fairly extensible in the future. Signed-off-by: Greg Rose Signed-off-by: David S. Miller --- include/linux/if_link.h | 1 + include/linux/rtnetlink.h | 3 ++ include/net/rtnetlink.h | 2 +- net/core/rtnetlink.c | 78 ++++++++++++++++++++++++++++++++++------------- 4 files changed, 62 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index c52d4b5f872..4b24ff453ae 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -137,6 +137,7 @@ enum { IFLA_AF_SPEC, IFLA_GROUP, /* Group the device belongs to */ IFLA_NET_NS_FD, + IFLA_EXT_MASK, /* Extended info mask, VFs, etc */ __IFLA_MAX }; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 8e872ead88b..577592ea0ea 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -602,6 +602,9 @@ struct tcamsg { #define TCA_ACT_TAB 1 /* attr type must be >=1 */ #define TCAA_MAX 1 +/* New extended info filters for IFLA_EXT_MASK */ +#define RTEXT_FILTER_VF (1 << 0) + /* End of information exported to user level */ #ifdef __KERNEL__ diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 678f1ffaf84..37029390197 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -6,7 +6,7 @@ typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, void *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); -typedef u16 (*rtnl_calcit_func)(struct sk_buff *); +typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct nlmsghdr *); extern int __rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 65aebd45002..606a6e8f367 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -60,7 +60,6 @@ struct rtnl_link { }; static DEFINE_MUTEX(rtnl_mutex); -static u16 min_ifinfo_dump_size; void rtnl_lock(void) { @@ -724,10 +723,11 @@ static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) } /* All VF info */ -static inline int rtnl_vfinfo_size(const struct net_device *dev) +static inline int rtnl_vfinfo_size(const struct net_device *dev, + u32 ext_filter_mask) { - if (dev->dev.parent && dev_is_pci(dev->dev.parent)) { - + if (dev->dev.parent && dev_is_pci(dev->dev.parent) && + (ext_filter_mask & RTEXT_FILTER_VF)) { int num_vfs = dev_num_vf(dev->dev.parent); size_t size = nla_total_size(sizeof(struct nlattr)); size += nla_total_size(num_vfs * sizeof(struct nlattr)); @@ -766,7 +766,8 @@ static size_t rtnl_port_size(const struct net_device *dev) return port_self_size; } -static noinline size_t if_nlmsg_size(const struct net_device *dev) +static noinline size_t if_nlmsg_size(const struct net_device *dev, + u32 ext_filter_mask) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ @@ -784,8 +785,9 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev) + nla_total_size(4) /* IFLA_MASTER */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ - + nla_total_size(4) /* IFLA_NUM_VF */ - + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ + + nla_total_size(ext_filter_mask + & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ + + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */ @@ -868,7 +870,7 @@ static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev) static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, - unsigned int flags) + unsigned int flags, u32 ext_filter_mask) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; @@ -941,10 +943,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; copy_rtnl_link_stats64(nla_data(attr), stats); - if (dev->dev.parent) + if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF)) NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)); - if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) { + if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent + && (ext_filter_mask & RTEXT_FILTER_VF)) { int i; struct nlattr *vfinfo, *vf; @@ -1048,6 +1051,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; struct hlist_head *head; struct hlist_node *node; + struct nlattr *tb[IFLA_MAX+1]; + u32 ext_filter_mask = 0; s_h = cb->args[0]; s_idx = cb->args[1]; @@ -1055,6 +1060,12 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = net->dev_base_seq; + nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, + ifla_policy); + + if (tb[IFLA_EXT_MASK]) + ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; @@ -1064,7 +1075,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0, - NLM_F_MULTI) <= 0) + NLM_F_MULTI, + ext_filter_mask) <= 0) goto out; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -1100,6 +1112,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_VF_PORTS] = { .type = NLA_NESTED }, [IFLA_PORT_SELF] = { .type = NLA_NESTED }, [IFLA_AF_SPEC] = { .type = NLA_NESTED }, + [IFLA_EXT_MASK] = { .type = NLA_U32 }, }; EXPORT_SYMBOL(ifla_policy); @@ -1509,8 +1522,6 @@ errout: if (send_addr_notify) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); - min_ifinfo_dump_size = max_t(u16, if_nlmsg_size(dev), - min_ifinfo_dump_size); return err; } @@ -1842,6 +1853,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) struct net_device *dev = NULL; struct sk_buff *nskb; int err; + u32 ext_filter_mask = 0; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); if (err < 0) @@ -1850,6 +1862,9 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + if (tb[IFLA_EXT_MASK]) + ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); + ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(net, ifm->ifi_index); @@ -1861,12 +1876,12 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (dev == NULL) return -ENODEV; - nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); + nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL); if (nskb == NULL) return -ENOBUFS; err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, - nlh->nlmsg_seq, 0, 0); + nlh->nlmsg_seq, 0, 0, ext_filter_mask); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); @@ -1877,8 +1892,31 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) return err; } -static u16 rtnl_calcit(struct sk_buff *skb) +static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) { + struct net *net = sock_net(skb->sk); + struct net_device *dev; + struct nlattr *tb[IFLA_MAX+1]; + u32 ext_filter_mask = 0; + u16 min_ifinfo_dump_size = 0; + + nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, ifla_policy); + + if (tb[IFLA_EXT_MASK]) + ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); + + if (!ext_filter_mask) + return NLMSG_GOODSIZE; + /* + * traverse the list of net devices and compute the minimum + * buffer size based upon the filter mask. + */ + list_for_each_entry(dev, &net->dev_base_head, dev_list) { + min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size, + if_nlmsg_size(dev, + ext_filter_mask)); + } + return min_ifinfo_dump_size; } @@ -1913,13 +1951,11 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) int err = -ENOBUFS; size_t if_info_size; - skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)), GFP_KERNEL); + skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL); if (skb == NULL) goto errout; - min_ifinfo_dump_size = max_t(u16, if_info_size, min_ifinfo_dump_size); - - err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0); + err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -1977,7 +2013,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EOPNOTSUPP; calcit = rtnl_get_calcit(family, type); if (calcit) - min_dump_alloc = calcit(skb); + min_dump_alloc = calcit(skb, nlh); __rtnl_unlock(); rtnl = net->rtnl; -- cgit v1.2.3-70-g09d2 From 03606895cd98c0a628b17324fd7b5ff15db7e3cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Feb 2012 10:55:02 +0000 Subject: ipsec: be careful of non existing mac headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Niccolo Belli reported ipsec crashes in case we handle a frame without mac header (atm in his case) Before copying mac header, better make sure it is present. Bugzilla reference: https://bugzilla.kernel.org/show_bug.cgi?id=42809 Reported-by: Niccolò Belli Tested-by: Niccolò Belli Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 ++++++++++ net/ipv4/xfrm4_mode_beet.c | 5 +---- net/ipv4/xfrm4_mode_tunnel.c | 6 ++---- net/ipv6/xfrm6_mode_beet.c | 6 +----- net/ipv6/xfrm6_mode_tunnel.c | 6 ++---- 5 files changed, 16 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 50db9b04a55..ae86adee374 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1465,6 +1465,16 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) } #endif /* NET_SKBUFF_DATA_USES_OFFSET */ +static inline void skb_mac_header_rebuild(struct sk_buff *skb) +{ + if (skb_mac_header_was_set(skb)) { + const unsigned char *old_mac = skb_mac_header(skb); + + skb_set_mac_header(skb, -skb->mac_len); + memmove(skb_mac_header(skb), old_mac, skb->mac_len); + } +} + static inline int skb_checksum_start_offset(const struct sk_buff *skb) { return skb->csum_start - skb_headroom(skb); diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index 63418185f52..e3db3f91511 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -110,10 +110,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) skb_push(skb, sizeof(*iph)); skb_reset_network_header(skb); - - memmove(skb->data - skb->mac_len, skb_mac_header(skb), - skb->mac_len); - skb_set_mac_header(skb, -skb->mac_len); + skb_mac_header_rebuild(skb); xfrm4_beet_make_header(skb); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 534972e114a..ed4bf11ef9f 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -66,7 +66,6 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { - const unsigned char *old_mac; int err = -EINVAL; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) @@ -84,10 +83,9 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!(x->props.flags & XFRM_STATE_NOECN)) ipip_ecn_decapsulate(skb); - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + err = 0; out: diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index a81ce945075..9949a356d62 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -80,7 +80,6 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *ip6h; - const unsigned char *old_mac; int size = sizeof(struct ipv6hdr); int err; @@ -90,10 +89,7 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) __skb_push(skb, size); skb_reset_network_header(skb); - - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); + skb_mac_header_rebuild(skb); xfrm6_beet_make_header(skb); diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 261e6e6f487..9f2095b19ad 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -63,7 +63,6 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - const unsigned char *old_mac; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) goto out; @@ -80,10 +79,9 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!(x->props.flags & XFRM_STATE_NOECN)) ipip6_ecn_decapsulate(skb); - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + err = 0; out: -- cgit v1.2.3-70-g09d2 From 7d367e06688dc7a2cc98c2ace04e1296e1d987e2 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 24 Feb 2012 11:45:49 +0100 Subject: netfilter: ctnetlink: fix soft lockup when netlink adds new entries (v2) Marcell Zambo and Janos Farago noticed and reported that when new conntrack entries are added via netlink and the conntrack table gets full, soft lockup happens. This is because the nf_conntrack_lock is held while nf_conntrack_alloc is called, which is in turn wants to lock nf_conntrack_lock while evicting entries from the full table. The patch fixes the soft lockup with limiting the holding of the nf_conntrack_lock to the minimum, where it's absolutely required. It required to extend (and thus change) nf_conntrack_hash_insert so that it makes sure conntrack and ctnetlink do not add the same entry twice to the conntrack table. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 2 +- net/netfilter/nf_conntrack_core.c | 38 +++++++++++++++++++++++++---- net/netfilter/nf_conntrack_netlink.c | 46 +++++++++++++----------------------- 3 files changed, 51 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8a2b0ae7dbd..ab86036bbf0 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -209,7 +209,7 @@ extern struct nf_conntrack_tuple_hash * __nf_conntrack_find(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple); -extern void nf_conntrack_hash_insert(struct nf_conn *ct); +extern int nf_conntrack_hash_check_insert(struct nf_conn *ct); extern void nf_ct_delete_from_lists(struct nf_conn *ct); extern void nf_ct_insert_dying_list(struct nf_conn *ct); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 76613f5a55c..ed86a3be678 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -404,19 +404,49 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, &net->ct.hash[repl_hash]); } -void nf_conntrack_hash_insert(struct nf_conn *ct) +int +nf_conntrack_hash_check_insert(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); unsigned int hash, repl_hash; + struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_node *n; u16 zone; zone = nf_ct_zone(ct); - hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + hash = hash_conntrack(net, zone, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, zone, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + + spin_lock_bh(&nf_conntrack_lock); + /* See if there's one in the list already, including reverse */ + hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) + if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &h->tuple) && + zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + goto out; + hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode) + if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, + &h->tuple) && + zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + goto out; + + add_timer(&ct->timeout); + nf_conntrack_get(&ct->ct_general); __nf_conntrack_hash_insert(ct, hash, repl_hash); + NF_CT_STAT_INC(net, insert); + spin_unlock_bh(&nf_conntrack_lock); + + return 0; + +out: + NF_CT_STAT_INC(net, insert_failed); + spin_unlock_bh(&nf_conntrack_lock); + return -EEXIST; } -EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); +EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); /* Confirm a connection given skb; places it in hash table */ int diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9307b033c0c..30c9d4ca021 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1367,15 +1367,12 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, nf_ct_protonum(ct)); if (helper == NULL) { rcu_read_unlock(); - spin_unlock_bh(&nf_conntrack_lock); #ifdef CONFIG_MODULES if (request_module("nfct-helper-%s", helpname) < 0) { - spin_lock_bh(&nf_conntrack_lock); err = -EOPNOTSUPP; goto err1; } - spin_lock_bh(&nf_conntrack_lock); rcu_read_lock(); helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), @@ -1468,8 +1465,10 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, if (tstamp) tstamp->start = ktime_to_ns(ktime_get_real()); - add_timer(&ct->timeout); - nf_conntrack_hash_insert(ct); + err = nf_conntrack_hash_check_insert(ct); + if (err < 0) + goto err2; + rcu_read_unlock(); return ct; @@ -1490,6 +1489,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; struct nfgenmsg *nfmsg = nlmsg_data(nlh); + struct nf_conn *ct; u_int8_t u3 = nfmsg->nfgen_family; u16 zone; int err; @@ -1510,27 +1510,22 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, return err; } - spin_lock_bh(&nf_conntrack_lock); if (cda[CTA_TUPLE_ORIG]) - h = __nf_conntrack_find(net, zone, &otuple); + h = nf_conntrack_find_get(net, zone, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = __nf_conntrack_find(net, zone, &rtuple); + h = nf_conntrack_find_get(net, zone, &rtuple); if (h == NULL) { err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) { - struct nf_conn *ct; enum ip_conntrack_events events; ct = ctnetlink_create_conntrack(net, zone, cda, &otuple, &rtuple, u3); - if (IS_ERR(ct)) { - err = PTR_ERR(ct); - goto out_unlock; - } + if (IS_ERR(ct)) + return PTR_ERR(ct); + err = 0; - nf_conntrack_get(&ct->ct_general); - spin_unlock_bh(&nf_conntrack_lock); if (test_bit(IPS_EXPECTED_BIT, &ct->status)) events = IPCT_RELATED; else @@ -1545,23 +1540,19 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, ct, NETLINK_CB(skb).pid, nlmsg_report(nlh)); nf_ct_put(ct); - } else - spin_unlock_bh(&nf_conntrack_lock); + } return err; } /* implicit 'else' */ - /* We manipulate the conntrack inside the global conntrack table lock, - * so there's no need to increase the refcount */ err = -EEXIST; + ct = nf_ct_tuplehash_to_ctrack(h); if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); - + spin_lock_bh(&nf_conntrack_lock); err = ctnetlink_change_conntrack(ct, cda); + spin_unlock_bh(&nf_conntrack_lock); if (err == 0) { - nf_conntrack_get(&ct->ct_general); - spin_unlock_bh(&nf_conntrack_lock); nf_conntrack_eventmask_report((1 << IPCT_REPLY) | (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | @@ -1570,15 +1561,10 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_MARK), ct, NETLINK_CB(skb).pid, nlmsg_report(nlh)); - nf_ct_put(ct); - } else - spin_unlock_bh(&nf_conntrack_lock); - - return err; + } } -out_unlock: - spin_unlock_bh(&nf_conntrack_lock); + nf_ct_put(ct); return err; } -- cgit v1.2.3-70-g09d2