From b6c6712a42ca3f9fa7f4a3d7c40e3a9dd1fd9e03 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Apr 2010 23:03:29 +0000 Subject: net: sk_dst_cache RCUification With latest CONFIG_PROVE_RCU stuff, I felt more comfortable to make this work. sk->sk_dst_cache is currently protected by a rwlock (sk_dst_lock) This rwlock is readlocked for a very small amount of time, and dst entries are already freed after RCU grace period. This calls for RCU again :) This patch converts sk_dst_lock to a spinlock, and use RCU for readers. __sk_dst_get() is supposed to be called with rcu_read_lock() or if socket locked by user, so use appropriate rcu_dereference_check() condition (rcu_read_lock_held() || sock_owned_by_user(sk)) This patch avoids two atomic ops per tx packet on UDP connected sockets, for example, and permits sk_dst_lock to be much less dirtied. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/core/sock.c') diff --git a/net/core/sock.c b/net/core/sock.c index c5812bbc2cc..7effa1e689d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -364,11 +364,11 @@ EXPORT_SYMBOL(sk_reset_txq); struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) { - struct dst_entry *dst = sk->sk_dst_cache; + struct dst_entry *dst = __sk_dst_get(sk); if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { sk_tx_queue_clear(sk); - sk->sk_dst_cache = NULL; + rcu_assign_pointer(sk->sk_dst_cache, NULL); dst_release(dst); return NULL; } @@ -1157,7 +1157,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) skb_queue_head_init(&newsk->sk_async_wait_queue); #endif - rwlock_init(&newsk->sk_dst_lock); + spin_lock_init(&newsk->sk_dst_lock); rwlock_init(&newsk->sk_callback_lock); lockdep_set_class_and_name(&newsk->sk_callback_lock, af_callback_keys + newsk->sk_family, @@ -1898,7 +1898,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) } else sk->sk_sleep = NULL; - rwlock_init(&sk->sk_dst_lock); + spin_lock_init(&sk->sk_dst_lock); rwlock_init(&sk->sk_callback_lock); lockdep_set_class_and_name(&sk->sk_callback_lock, af_callback_keys + sk->sk_family, -- cgit v1.2.3-70-g09d2 From aa395145165cb06a0d0885221bbe0ce4a564391d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Apr 2010 13:03:51 +0000 Subject: net: sk_sleep() helper Define a new function to return the waitqueue of a "struct sock". static inline wait_queue_head_t *sk_sleep(struct sock *sk) { return sk->sk_sleep; } Change all read occurrences of sk_sleep by a call to this function. Needed for a future RCU conversion. sk_sleep wont be a field directly available. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/atm/atmtcp.c | 6 ++-- drivers/net/macvtap.c | 4 +-- drivers/net/tun.c | 4 +-- drivers/scsi/iscsi_tcp.c | 4 +-- include/net/sock.h | 10 +++++-- include/net/tcp.h | 2 +- net/atm/common.c | 12 ++++---- net/atm/signaling.c | 2 +- net/atm/svc.c | 62 ++++++++++++++++++++--------------------- net/ax25/af_ax25.c | 8 +++--- net/bluetooth/af_bluetooth.c | 6 ++-- net/bluetooth/bnep/core.c | 8 +++--- net/bluetooth/bnep/netdev.c | 6 ++-- net/bluetooth/cmtp/cmtp.h | 2 +- net/bluetooth/cmtp/core.c | 4 +-- net/bluetooth/hidp/core.c | 10 +++---- net/bluetooth/hidp/hidp.h | 4 +-- net/bluetooth/l2cap.c | 4 +-- net/bluetooth/rfcomm/sock.c | 8 +++--- net/bluetooth/sco.c | 4 +-- net/caif/caif_socket.c | 2 +- net/core/datagram.c | 6 ++-- net/core/sock.c | 16 +++++------ net/core/stream.c | 16 +++++------ net/dccp/output.c | 6 ++-- net/dccp/proto.c | 2 +- net/decnet/af_decnet.c | 26 ++++++++--------- net/ipv4/af_inet.c | 6 ++-- net/ipv4/inet_connection_sock.c | 4 +-- net/ipv4/tcp.c | 2 +- net/irda/af_irda.c | 14 +++++----- net/iucv/af_iucv.c | 12 ++++---- net/llc/af_llc.c | 12 ++++---- net/netfilter/ipvs/ip_vs_sync.c | 2 +- net/netrom/af_netrom.c | 8 +++--- net/rds/af_rds.c | 2 +- net/rds/rds.h | 2 +- net/rds/recv.c | 2 +- net/rds/send.c | 2 +- net/rose/af_rose.c | 8 +++--- net/rxrpc/af_rxrpc.c | 4 +-- net/sctp/socket.c | 20 ++++++------- net/sunrpc/svcsock.c | 24 ++++++++-------- net/tipc/socket.c | 26 ++++++++--------- net/unix/af_unix.c | 10 +++---- net/x25/af_x25.c | 8 +++--- 46 files changed, 208 insertions(+), 204 deletions(-) (limited to 'net/core/sock.c') diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index b86712167eb..b9101818b47 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -68,7 +68,7 @@ static int atmtcp_send_control(struct atm_vcc *vcc,int type, *(struct atm_vcc **) &new_msg->vcc = vcc; old_test = test_bit(flag,&vcc->flags); out_vcc->push(out_vcc,skb); - add_wait_queue(sk_atm(vcc)->sk_sleep, &wait); + add_wait_queue(sk_sleep(sk_atm(vcc)), &wait); while (test_bit(flag,&vcc->flags) == old_test) { mb(); out_vcc = PRIV(vcc->dev) ? PRIV(vcc->dev)->vcc : NULL; @@ -80,7 +80,7 @@ static int atmtcp_send_control(struct atm_vcc *vcc,int type, schedule(); } set_current_state(TASK_RUNNING); - remove_wait_queue(sk_atm(vcc)->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk_atm(vcc)), &wait); return error; } @@ -105,7 +105,7 @@ static int atmtcp_recv_control(const struct atmtcp_control *msg) msg->type); return -EINVAL; } - wake_up(sk_atm(vcc)->sk_sleep); + wake_up(sk_sleep(sk_atm(vcc))); return 0; } diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index abba3cc81f1..85d6420f840 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -246,8 +246,8 @@ static void macvtap_sock_write_space(struct sock *sk) !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND); } static int macvtap_open(struct inode *inode, struct file *file) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 43265207d46..20a17938c62 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -868,8 +868,8 @@ static void tun_sock_write_space(struct sock *sk) if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND); tun = tun_sk(sk)->tun; diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 0ee725ced51..9eae04afa9a 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -599,9 +599,9 @@ static void iscsi_sw_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx); write_unlock_bh(&tcp_sw_conn->sock->sk->sk_callback_lock); - if (sock->sk->sk_sleep && waitqueue_active(sock->sk->sk_sleep)) { + if (sk_sleep(sock->sk) && waitqueue_active(sk_sleep(sock->sk))) { sock->sk->sk_err = EIO; - wake_up_interruptible(sock->sk->sk_sleep); + wake_up_interruptible(sk_sleep(sock->sk)); } iscsi_conn_stop(cls_conn, flag); diff --git a/include/net/sock.h b/include/net/sock.h index 56df440a950..8ab05146a44 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1160,6 +1160,10 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock) sk->sk_socket = sock; } +static inline wait_queue_head_t *sk_sleep(struct sock *sk) +{ + return sk->sk_sleep; +} /* Detach socket from process context. * Announce socket dead, detach it from wait queue and inode. * Note that parent inode held reference count on this struct sock, @@ -1346,8 +1350,8 @@ static inline int sk_has_allocations(const struct sock *sk) * tp->rcv_nxt check sock_def_readable * ... { * schedule ... - * if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - * wake_up_interruptible(sk->sk_sleep) + * if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + * wake_up_interruptible(sk_sleep(sk)) * ... * } * @@ -1368,7 +1372,7 @@ static inline int sk_has_sleeper(struct sock *sk) * This memory barrier is paired in the sock_poll_wait. */ smp_mb__after_lock(); - return sk->sk_sleep && waitqueue_active(sk->sk_sleep); + return sk_sleep(sk) && waitqueue_active(sk_sleep(sk)); } /** diff --git a/include/net/tcp.h b/include/net/tcp.h index 70c5159f4b3..b7d83d204a9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -939,7 +939,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { - wake_up_interruptible_sync_poll(sk->sk_sleep, + wake_up_interruptible_sync_poll(sk_sleep(sk), POLLIN | POLLRDNORM | POLLRDBAND); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, diff --git a/net/atm/common.c b/net/atm/common.c index 97ed94aa0cb..e3e10e6f862 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -92,7 +92,7 @@ static void vcc_def_wakeup(struct sock *sk) { read_lock(&sk->sk_callback_lock); if (sk_has_sleeper(sk)) - wake_up(sk->sk_sleep); + wake_up(sk_sleep(sk)); read_unlock(&sk->sk_callback_lock); } @@ -110,7 +110,7 @@ static void vcc_write_space(struct sock *sk) if (vcc_writable(sk)) { if (sk_has_sleeper(sk)) - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } @@ -549,7 +549,7 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, } eff = (size+3) & ~3; /* align to word boundary */ - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); error = 0; while (!(skb = alloc_tx(vcc, eff))) { if (m->msg_flags & MSG_DONTWAIT) { @@ -568,9 +568,9 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, send_sig(SIGPIPE, current, 0); break; } - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (error) goto out; skb->dev = NULL; /* for paths shared with net_device interfaces */ @@ -595,7 +595,7 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait) struct atm_vcc *vcc; unsigned int mask; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; vcc = ATM_SD(sock); diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 6ba6e466ee5..509c8ac02b6 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -131,7 +131,7 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb) } sk->sk_ack_backlog++; skb_queue_tail(&sk->sk_receive_queue, skb); - pr_debug("waking sk->sk_sleep 0x%p\n", sk->sk_sleep); + pr_debug("waking sk_sleep(sk) 0x%p\n", sk_sleep(sk)); sk->sk_state_change(sk); as_indicate_complete: release_sock(sk); diff --git a/net/atm/svc.c b/net/atm/svc.c index 3ba9a45a51a..754ee4791d9 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -49,14 +49,14 @@ static void svc_disconnect(struct atm_vcc *vcc) pr_debug("%p\n", vcc); if (test_bit(ATM_VF_REGIS, &vcc->flags)) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); sigd_enq(vcc, as_close, NULL, NULL, NULL); while (!test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); } /* beware - socket is still in use by atmsigd until the last as_indicate has been answered */ @@ -125,13 +125,13 @@ static int svc_bind(struct socket *sock, struct sockaddr *sockaddr, } vcc->local = *addr; set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); sigd_enq(vcc, as_bind, NULL, NULL, &vcc->local); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); clear_bit(ATM_VF_REGIS, &vcc->flags); /* doesn't count */ if (!sigd) { error = -EUNATCH; @@ -201,10 +201,10 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr, } vcc->remote = *addr; set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); sigd_enq(vcc, as_connect, NULL, NULL, &vcc->remote); if (flags & O_NONBLOCK) { - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); sock->state = SS_CONNECTING; error = -EINPROGRESS; goto out; @@ -213,7 +213,7 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr, while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); if (!signal_pending(current)) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); continue; } @@ -232,14 +232,14 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr, */ sigd_enq(vcc, as_close, NULL, NULL, NULL); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); schedule(); } if (!sk->sk_err) while (!test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); schedule(); } @@ -250,7 +250,7 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr, error = -EINTR; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (error) goto out; if (!sigd) { @@ -302,13 +302,13 @@ static int svc_listen(struct socket *sock, int backlog) goto out; } set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); sigd_enq(vcc, as_listen, NULL, NULL, &vcc->local); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (!sigd) { error = -EUNATCH; goto out; @@ -343,7 +343,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags) while (1) { DEFINE_WAIT(wait); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); while (!(skb = skb_dequeue(&sk->sk_receive_queue)) && sigd) { if (test_bit(ATM_VF_RELEASED, &old_vcc->flags)) @@ -363,10 +363,10 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags) error = -ERESTARTSYS; break; } - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (error) goto out; if (!skb) { @@ -392,17 +392,17 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags) } /* wait should be short, so we ignore the non-blocking flag */ set_bit(ATM_VF_WAITING, &new_vcc->flags); - prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait, TASK_UNINTERRUPTIBLE); sigd_enq(new_vcc, as_accept, old_vcc, NULL, NULL); while (test_bit(ATM_VF_WAITING, &new_vcc->flags) && sigd) { release_sock(sk); schedule(); lock_sock(sk); - prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait, TASK_UNINTERRUPTIBLE); } - finish_wait(sk_atm(new_vcc)->sk_sleep, &wait); + finish_wait(sk_sleep(sk_atm(new_vcc)), &wait); if (!sigd) { error = -EUNATCH; goto out; @@ -438,14 +438,14 @@ int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos) DEFINE_WAIT(wait); set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); sigd_enq2(vcc, as_modify, NULL, NULL, &vcc->local, qos, 0); while (test_bit(ATM_VF_WAITING, &vcc->flags) && !test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (!sigd) return -EUNATCH; return -sk->sk_err; @@ -534,20 +534,20 @@ static int svc_addparty(struct socket *sock, struct sockaddr *sockaddr, lock_sock(sk); set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); sigd_enq(vcc, as_addparty, NULL, NULL, (struct sockaddr_atmsvc *) sockaddr); if (flags & O_NONBLOCK) { - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); error = -EINPROGRESS; goto out; } pr_debug("added wait queue\n"); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); error = xchg(&sk->sk_err_soft, 0); out: release_sock(sk); @@ -563,13 +563,13 @@ static int svc_dropparty(struct socket *sock, int ep_ref) lock_sock(sk); set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); sigd_enq2(vcc, as_dropparty, NULL, NULL, NULL, NULL, ep_ref); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (!sigd) { error = -EUNATCH; goto out; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 65c5801261f..cfdfd7e2a17 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1281,7 +1281,7 @@ static int __must_check ax25_connect(struct socket *sock, DEFINE_WAIT(wait); for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk->sk_state != TCP_SYN_SENT) break; @@ -1294,7 +1294,7 @@ static int __must_check ax25_connect(struct socket *sock, err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out_release; @@ -1346,7 +1346,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags) * hooked into the SABM we saved */ for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); skb = skb_dequeue(&sk->sk_receive_queue); if (skb) break; @@ -1364,7 +1364,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags) err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 404a8500fd0..421c45bd1b9 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -288,7 +288,7 @@ unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *w BT_DBG("sock %p, sk %p", sock, sk); - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == BT_LISTEN) return bt_accept_poll(sk); @@ -378,7 +378,7 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) BT_DBG("sk %p", sk); - add_wait_queue(sk->sk_sleep, &wait); + add_wait_queue(sk_sleep(sk), &wait); while (sk->sk_state != state) { set_current_state(TASK_INTERRUPTIBLE); @@ -401,7 +401,7 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) break; } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); return err; } EXPORT_SYMBOL(bt_sock_wait_state); diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 8062dad6d10..f10b41fb05a 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -474,7 +474,7 @@ static int bnep_session(void *arg) set_user_nice(current, -15); init_waitqueue_entry(&wait, current); - add_wait_queue(sk->sk_sleep, &wait); + add_wait_queue(sk_sleep(sk), &wait); while (!atomic_read(&s->killed)) { set_current_state(TASK_INTERRUPTIBLE); @@ -496,7 +496,7 @@ static int bnep_session(void *arg) schedule(); } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); /* Cleanup session */ down_write(&bnep_session_sem); @@ -507,7 +507,7 @@ static int bnep_session(void *arg) /* Wakeup user-space polling for socket errors */ s->sock->sk->sk_err = EUNATCH; - wake_up_interruptible(s->sock->sk->sk_sleep); + wake_up_interruptible(sk_sleep(s->sock->sk)); /* Release the socket */ fput(s->sock->file); @@ -638,7 +638,7 @@ int bnep_del_connection(struct bnep_conndel_req *req) /* Kill session thread */ atomic_inc(&s->killed); - wake_up_interruptible(s->sock->sk->sk_sleep); + wake_up_interruptible(sk_sleep(s->sock->sk)); } else err = -ENOENT; diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index d48b33f4d4b..0faad5ce6dc 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -109,7 +109,7 @@ static void bnep_net_set_mc_list(struct net_device *dev) } skb_queue_tail(&sk->sk_write_queue, skb); - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); #endif } @@ -193,11 +193,11 @@ static netdev_tx_t bnep_net_xmit(struct sk_buff *skb, /* * We cannot send L2CAP packets from here as we are potentially in a bh. * So we have to queue them and wake up session thread which is sleeping - * on the sk->sk_sleep. + * on the sk_sleep(sk). */ dev->trans_start = jiffies; skb_queue_tail(&sk->sk_write_queue, skb); - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); if (skb_queue_len(&sk->sk_write_queue) >= BNEP_TX_QUEUE_LEN) { BT_DBG("tx queue is full"); diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h index e4663aa14d2..785e79e953c 100644 --- a/net/bluetooth/cmtp/cmtp.h +++ b/net/bluetooth/cmtp/cmtp.h @@ -125,7 +125,7 @@ static inline void cmtp_schedule(struct cmtp_session *session) { struct sock *sk = session->sock->sk; - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); } /* CMTP init defines */ diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 0073ec8495d..d4c6af082d4 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -284,7 +284,7 @@ static int cmtp_session(void *arg) set_user_nice(current, -15); init_waitqueue_entry(&wait, current); - add_wait_queue(sk->sk_sleep, &wait); + add_wait_queue(sk_sleep(sk), &wait); while (!atomic_read(&session->terminate)) { set_current_state(TASK_INTERRUPTIBLE); @@ -301,7 +301,7 @@ static int cmtp_session(void *arg) schedule(); } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); down_write(&cmtp_session_sem); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 280529ad927..bfe641b7dfa 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -561,8 +561,8 @@ static int hidp_session(void *arg) init_waitqueue_entry(&ctrl_wait, current); init_waitqueue_entry(&intr_wait, current); - add_wait_queue(ctrl_sk->sk_sleep, &ctrl_wait); - add_wait_queue(intr_sk->sk_sleep, &intr_wait); + add_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait); + add_wait_queue(sk_sleep(intr_sk), &intr_wait); while (!atomic_read(&session->terminate)) { set_current_state(TASK_INTERRUPTIBLE); @@ -584,8 +584,8 @@ static int hidp_session(void *arg) schedule(); } set_current_state(TASK_RUNNING); - remove_wait_queue(intr_sk->sk_sleep, &intr_wait); - remove_wait_queue(ctrl_sk->sk_sleep, &ctrl_wait); + remove_wait_queue(sk_sleep(intr_sk), &intr_wait); + remove_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait); down_write(&hidp_session_sem); @@ -609,7 +609,7 @@ static int hidp_session(void *arg) fput(session->intr_sock->file); - wait_event_timeout(*(ctrl_sk->sk_sleep), + wait_event_timeout(*(sk_sleep(ctrl_sk)), (ctrl_sk->sk_state == BT_CLOSED), msecs_to_jiffies(500)); fput(session->ctrl_sock->file); diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h index a4e215d50c1..8d934a19da0 100644 --- a/net/bluetooth/hidp/hidp.h +++ b/net/bluetooth/hidp/hidp.h @@ -164,8 +164,8 @@ static inline void hidp_schedule(struct hidp_session *session) struct sock *ctrl_sk = session->ctrl_sock->sk; struct sock *intr_sk = session->intr_sock->sk; - wake_up_interruptible(ctrl_sk->sk_sleep); - wake_up_interruptible(intr_sk->sk_sleep); + wake_up_interruptible(sk_sleep(ctrl_sk)); + wake_up_interruptible(sk_sleep(intr_sk)); } /* HIDP init defines */ diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 99d68c34e4f..c1e60eed5a9 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1147,7 +1147,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl BT_DBG("sk %p timeo %ld", sk, timeo); /* Wait for an incoming connection. (wake-one). */ - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); while (!(nsk = bt_accept_dequeue(sk, newsock))) { set_current_state(TASK_INTERRUPTIBLE); if (!timeo) { @@ -1170,7 +1170,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl } } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (err) goto done; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 8ed3c37684f..43fbf6b4b4b 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -503,7 +503,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f BT_DBG("sk %p timeo %ld", sk, timeo); /* Wait for an incoming connection. (wake-one). */ - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); while (!(nsk = bt_accept_dequeue(sk, newsock))) { set_current_state(TASK_INTERRUPTIBLE); if (!timeo) { @@ -526,7 +526,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f } } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (err) goto done; @@ -621,7 +621,7 @@ static long rfcomm_sock_data_wait(struct sock *sk, long timeo) { DECLARE_WAITQUEUE(wait, current); - add_wait_queue(sk->sk_sleep, &wait); + add_wait_queue(sk_sleep(sk), &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); @@ -640,7 +640,7 @@ static long rfcomm_sock_data_wait(struct sock *sk, long timeo) } __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); return timeo; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index ca6b2ad1c3f..b406d3eff53 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -567,7 +567,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag BT_DBG("sk %p timeo %ld", sk, timeo); /* Wait for an incoming connection. (wake-one). */ - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); while (!(ch = bt_accept_dequeue(sk, newsock))) { set_current_state(TASK_INTERRUPTIBLE); if (!timeo) { @@ -590,7 +590,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag } } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (err) goto done; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index cdf62b9fefa..90317e7d10b 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -689,7 +689,7 @@ static unsigned int caif_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); u32 mask = 0; - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); lock_sock(&(cf_sk->sk)); if (!STATE_IS_OPEN(cf_sk)) { if (!STATE_IS_PENDING(cf_sk)) diff --git a/net/core/datagram.c b/net/core/datagram.c index 2dccd4ee591..5574a5ddf90 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -86,7 +86,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) int error; DEFINE_WAIT_FUNC(wait, receiver_wake_function); - prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); /* Socket errors? */ error = sock_error(sk); @@ -115,7 +115,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) error = 0; *timeo_p = schedule_timeout(*timeo_p); out: - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return error; interrupted: error = sock_intr_errno(*timeo_p); @@ -726,7 +726,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; unsigned int mask; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* exceptional events? */ diff --git a/net/core/sock.c b/net/core/sock.c index 7effa1e689d..58ebd146ce5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1395,7 +1395,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) if (signal_pending(current)) break; set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) break; if (sk->sk_shutdown & SEND_SHUTDOWN) @@ -1404,7 +1404,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) break; timeo = schedule_timeout(timeo); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return timeo; } @@ -1570,11 +1570,11 @@ int sk_wait_data(struct sock *sk, long *timeo) int rc; DEFINE_WAIT(wait); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return rc; } EXPORT_SYMBOL(sk_wait_data); @@ -1798,7 +1798,7 @@ static void sock_def_wakeup(struct sock *sk) { read_lock(&sk->sk_callback_lock); if (sk_has_sleeper(sk)) - wake_up_interruptible_all(sk->sk_sleep); + wake_up_interruptible_all(sk_sleep(sk)); read_unlock(&sk->sk_callback_lock); } @@ -1806,7 +1806,7 @@ static void sock_def_error_report(struct sock *sk) { read_lock(&sk->sk_callback_lock); if (sk_has_sleeper(sk)) - wake_up_interruptible_poll(sk->sk_sleep, POLLERR); + wake_up_interruptible_poll(sk_sleep(sk), POLLERR); sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); read_unlock(&sk->sk_callback_lock); } @@ -1815,7 +1815,7 @@ static void sock_def_readable(struct sock *sk, int len) { read_lock(&sk->sk_callback_lock); if (sk_has_sleeper(sk)) - wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | + wake_up_interruptible_sync_poll(sk_sleep(sk), POLLIN | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); read_unlock(&sk->sk_callback_lock); @@ -1830,7 +1830,7 @@ static void sock_def_write_space(struct sock *sk) */ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { if (sk_has_sleeper(sk)) - wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | + wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND); /* Should agree with poll, otherwise some programs break */ diff --git a/net/core/stream.c b/net/core/stream.c index a37debfeb1b..7b3c3f30b10 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -32,8 +32,8 @@ void sk_stream_write_space(struct sock *sk) if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { clear_bit(SOCK_NOSPACE, &sock->flags); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND); if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); @@ -66,13 +66,13 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) if (signal_pending(tsk)) return sock_intr_errno(*timeo_p); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); sk->sk_write_pending++; done = sk_wait_event(sk, timeo_p, !sk->sk_err && !((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); sk->sk_write_pending--; } while (!done); return 0; @@ -96,13 +96,13 @@ void sk_stream_wait_close(struct sock *sk, long timeout) DEFINE_WAIT(wait); do { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk))) break; } while (!signal_pending(current) && timeout); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); } } @@ -126,7 +126,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) while (1) { set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; @@ -157,7 +157,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) *timeo_p = current_timeo; } out: - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return err; do_error: diff --git a/net/dccp/output.c b/net/dccp/output.c index e98b65e9569..2d3dcb39851 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -198,7 +198,7 @@ void dccp_write_space(struct sock *sk) read_lock(&sk->sk_callback_lock); if (sk_has_sleeper(sk)) - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); @@ -225,7 +225,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) dccp_pr_debug("delayed send by %d msec\n", delay); jiffdelay = msecs_to_jiffies(delay); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); sk->sk_write_pending++; release_sock(sk); @@ -241,7 +241,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); } while ((delay = rc) > 0); out: - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return rc; do_error: diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a0e38d8018f..b03ecf6b2bb 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -312,7 +312,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock, unsigned int mask; struct sock *sk = sock->sk; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == DCCP_LISTEN) return inet_csk_listen_poll(sk); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 55e3b6b0061..d6b93d19790 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -832,7 +832,7 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) scp->segsize_loc = dst_metric(__sk_dst_get(sk), RTAX_ADVMSS); dn_send_conn_conf(sk, allocation); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); for(;;) { release_sock(sk); if (scp->state == DN_CC) @@ -850,9 +850,9 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) err = -EAGAIN; if (!*timeo) break; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err == 0) { sk->sk_socket->state = SS_CONNECTED; } else if (scp->state != DN_CC) { @@ -873,7 +873,7 @@ static int dn_wait_run(struct sock *sk, long *timeo) if (!*timeo) return -EALREADY; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); for(;;) { release_sock(sk); if (scp->state == DN_CI || scp->state == DN_CC) @@ -891,9 +891,9 @@ static int dn_wait_run(struct sock *sk, long *timeo) err = -ETIMEDOUT; if (!*timeo) break; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); out: if (err == 0) { sk->sk_socket->state = SS_CONNECTED; @@ -1040,7 +1040,7 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) struct sk_buff *skb = NULL; int err = 0; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); for(;;) { release_sock(sk); skb = skb_dequeue(&sk->sk_receive_queue); @@ -1060,9 +1060,9 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) err = -EAGAIN; if (!*timeo) break; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return skb == NULL ? ERR_PTR(err) : skb; } @@ -1746,11 +1746,11 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; } - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target)); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); } skb_queue_walk_safe(queue, skb, n) { @@ -2003,12 +2003,12 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, goto out; } - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); sk_wait_event(sk, &timeo, !dn_queue_too_long(scp, queue, flags)); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); continue; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c5376c72550..5ca7290c2e6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -548,7 +548,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo) { DEFINE_WAIT(wait); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); /* Basic assumption: if someone sets sk->sk_err, he _must_ * change state of the socket from TCP_SYN_*. @@ -561,9 +561,9 @@ static long inet_wait_for_connect(struct sock *sk, long timeo) lock_sock(sk); if (signal_pending(current) || !timeo) break; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return timeo; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8da6429269d..e0a3e3537b1 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -234,7 +234,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) * having to remove and re-insert us on the wait queue. */ for (;;) { - prepare_to_wait_exclusive(sk->sk_sleep, &wait, + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); release_sock(sk); if (reqsk_queue_empty(&icsk->icsk_accept_queue)) @@ -253,7 +253,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) if (!timeo) break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return err; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0f8caf64caa..77208334a61 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -378,7 +378,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) struct sock *sk = sock->sk; struct tcp_sock *tp = tcp_sk(sk); - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == TCP_LISTEN) return inet_csk_listen_poll(sk); diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 2a4efcea342..79986a674f6 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -347,7 +347,7 @@ static void irda_flow_indication(void *instance, void *sap, LOCAL_FLOW flow) self->tx_flow = flow; IRDA_DEBUG(1, "%s(), IrTTP wants us to start again\n", __func__); - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); break; default: IRDA_DEBUG(0, "%s(), Unknown flow command!\n", __func__); @@ -900,7 +900,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) if (flags & O_NONBLOCK) goto out; - err = wait_event_interruptible(*(sk->sk_sleep), + err = wait_event_interruptible(*(sk_sleep(sk)), skb_peek(&sk->sk_receive_queue)); if (err) goto out; @@ -1066,7 +1066,7 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr, goto out; err = -ERESTARTSYS; - if (wait_event_interruptible(*(sk->sk_sleep), + if (wait_event_interruptible(*(sk_sleep(sk)), (sk->sk_state != TCP_SYN_SENT))) goto out; @@ -1318,7 +1318,7 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, /* Check if IrTTP is wants us to slow down */ - if (wait_event_interruptible(*(sk->sk_sleep), + if (wait_event_interruptible(*(sk_sleep(sk)), (self->tx_flow != FLOW_STOP || sk->sk_state != TCP_ESTABLISHED))) { err = -ERESTARTSYS; goto out; @@ -1477,7 +1477,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, if (copied >= target) break; - prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); /* * POSIX 1003.1g mandates this order. @@ -1497,7 +1497,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, /* Wait process until data arrives */ schedule(); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out; @@ -1787,7 +1787,7 @@ static unsigned int irda_poll(struct file * file, struct socket *sock, IRDA_DEBUG(4, "%s()\n", __func__); lock_kernel(); - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); mask = 0; /* Exceptional events? */ diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c18286a2167..9636b7d27b4 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -59,7 +59,7 @@ do { \ DEFINE_WAIT(__wait); \ long __timeo = timeo; \ ret = 0; \ - prepare_to_wait(sk->sk_sleep, &__wait, TASK_INTERRUPTIBLE); \ + prepare_to_wait(sk_sleep(sk), &__wait, TASK_INTERRUPTIBLE); \ while (!(condition)) { \ if (!__timeo) { \ ret = -EAGAIN; \ @@ -76,7 +76,7 @@ do { \ if (ret) \ break; \ } \ - finish_wait(sk->sk_sleep, &__wait); \ + finish_wait(sk_sleep(sk), &__wait); \ } while (0) #define iucv_sock_wait(sk, condition, timeo) \ @@ -307,7 +307,7 @@ static void iucv_sock_wake_msglim(struct sock *sk) { read_lock(&sk->sk_callback_lock); if (sk_has_sleeper(sk)) - wake_up_interruptible_all(sk->sk_sleep); + wake_up_interruptible_all(sk_sleep(sk)); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); read_unlock(&sk->sk_callback_lock); } @@ -795,7 +795,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock, timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); /* Wait for an incoming connection */ - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); while (!(nsk = iucv_accept_dequeue(sk, newsock))) { set_current_state(TASK_INTERRUPTIBLE); if (!timeo) { @@ -819,7 +819,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock, } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (err) goto done; @@ -1269,7 +1269,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; unsigned int mask = 0; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == IUCV_LISTEN) return iucv_accept_poll(sk); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 2db6a9f7591..023ba820236 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -536,7 +536,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout) int rc = 0; while (1) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE)) break; rc = -ERESTARTSYS; @@ -547,7 +547,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout) break; rc = 0; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return rc; } @@ -556,13 +556,13 @@ static int llc_ui_wait_for_conn(struct sock *sk, long timeout) DEFINE_WAIT(wait); while (1) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT)) break; if (signal_pending(current) || !timeout) break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return timeout; } @@ -573,7 +573,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) int rc; while (1) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); rc = 0; if (sk_wait_event(sk, &timeout, (sk->sk_shutdown & RCV_SHUTDOWN) || @@ -588,7 +588,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) if (!timeout) break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return rc; } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 8fb0ae61676..7ba06939829 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -802,7 +802,7 @@ static int sync_thread_backup(void *data) ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); while (!kthread_should_stop()) { - wait_event_interruptible(*tinfo->sock->sk->sk_sleep, + wait_event_interruptible(*sk_sleep(tinfo->sock->sk), !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue) || kthread_should_stop()); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index fa07f044b59..06cb02796a0 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -739,7 +739,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, DEFINE_WAIT(wait); for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk->sk_state != TCP_SYN_SENT) break; @@ -752,7 +752,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out_release; } @@ -798,7 +798,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags) * hooked into the SABM we saved */ for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); skb = skb_dequeue(&sk->sk_receive_queue); if (skb) break; @@ -816,7 +816,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags) err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out_release; diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 7919a9edb8e..aebfecbdb84 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -158,7 +158,7 @@ static unsigned int rds_poll(struct file *file, struct socket *sock, unsigned int mask = 0; unsigned long flags; - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); if (rs->rs_seen_congestion) poll_wait(file, &rds_poll_waitq, wait); diff --git a/net/rds/rds.h b/net/rds/rds.h index 4bec6e2ed49..c224b5bb3ba 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -492,7 +492,7 @@ void rds_sock_put(struct rds_sock *rs); void rds_wake_sk_sleep(struct rds_sock *rs); static inline void __rds_wake_sk_sleep(struct sock *sk) { - wait_queue_head_t *waitq = sk->sk_sleep; + wait_queue_head_t *waitq = sk_sleep(sk); if (!sock_flag(sk, SOCK_DEAD) && waitq) wake_up(waitq); diff --git a/net/rds/recv.c b/net/rds/recv.c index e2a2b9344f7..795a00b7f2c 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -432,7 +432,7 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, break; } - timeo = wait_event_interruptible_timeout(*sk->sk_sleep, + timeo = wait_event_interruptible_timeout(*sk_sleep(sk), (!list_empty(&rs->rs_notify_queue) || rs->rs_cong_notify || rds_next_incoming(rs, &inc)), timeo); diff --git a/net/rds/send.c b/net/rds/send.c index 53d6795ac9d..9c1c6bcaa6c 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -915,7 +915,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, goto out; } - timeo = wait_event_interruptible_timeout(*sk->sk_sleep, + timeo = wait_event_interruptible_timeout(*sk_sleep(sk), rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port, dport, diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 4fb711a035f..8e45e76a95f 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -845,7 +845,7 @@ rose_try_next_neigh: DEFINE_WAIT(wait); for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk->sk_state != TCP_SYN_SENT) break; @@ -858,7 +858,7 @@ rose_try_next_neigh: err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out_release; @@ -911,7 +911,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags) * hooked into the SABM we saved */ for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); skb = skb_dequeue(&sk->sk_receive_queue); if (skb) @@ -930,7 +930,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags) err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out_release; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index c060095b27c..c432d76f415 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -65,7 +65,7 @@ static void rxrpc_write_space(struct sock *sk) read_lock(&sk->sk_callback_lock); if (rxrpc_writable(sk)) { if (sk_has_sleeper(sk)) - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } read_unlock(&sk->sk_callback_lock); @@ -589,7 +589,7 @@ static unsigned int rxrpc_poll(struct file *file, struct socket *sock, unsigned int mask; struct sock *sk = sock->sk; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* the socket is readable if there are any messages waiting on the Rx diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c1941276f6e..f34adcca8a8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5702,7 +5702,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) struct sctp_sock *sp = sctp_sk(sk); unsigned int mask; - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); /* A TCP-style listening socket becomes readable when the accept queue * is not empty. @@ -5943,7 +5943,7 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p) int error; DEFINE_WAIT(wait); - prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); /* Socket errors? */ error = sock_error(sk); @@ -5980,14 +5980,14 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p) sctp_lock_sock(sk); ready: - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return 0; interrupted: error = sock_intr_errno(*timeo_p); out: - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); *err = error; return error; } @@ -6061,8 +6061,8 @@ static void __sctp_write_space(struct sctp_association *asoc) wake_up_interruptible(&asoc->wait); if (sctp_writeable(sk)) { - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); /* Note that we try to include the Async I/O support * here by modeling from the current TCP/UDP code. @@ -6296,7 +6296,7 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo) for (;;) { - prepare_to_wait_exclusive(sk->sk_sleep, &wait, + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (list_empty(&ep->asocs)) { @@ -6322,7 +6322,7 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo) break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return err; } @@ -6332,7 +6332,7 @@ static void sctp_wait_for_close(struct sock *sk, long timeout) DEFINE_WAIT(wait); do { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (list_empty(&sctp_sk(sk)->ep->asocs)) break; sctp_release_sock(sk); @@ -6340,7 +6340,7 @@ static void sctp_wait_for_close(struct sock *sk, long timeout) sctp_lock_sock(sk); } while (!signal_pending(current) && timeout); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); } static void sctp_skb_set_owner_r_frag(struct sk_buff *skb, struct sock *sk) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index a29f259204e..ce0d5b35c2a 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -419,8 +419,8 @@ static void svc_udp_data_ready(struct sock *sk, int count) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); } /* @@ -436,10 +436,10 @@ static void svc_write_space(struct sock *sk) svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) { dprintk("RPC svc_write_space: someone sleeping on %p\n", svsk); - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); } } @@ -757,8 +757,8 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) printk("svc: socket %p: no user data\n", sk); } - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_all(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible_all(sk_sleep(sk)); } /* @@ -777,8 +777,8 @@ static void svc_tcp_state_change(struct sock *sk) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_all(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible_all(sk_sleep(sk)); } static void svc_tcp_data_ready(struct sock *sk, int count) @@ -791,8 +791,8 @@ static void svc_tcp_data_ready(struct sock *sk, int count) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); } /* @@ -1494,8 +1494,8 @@ static void svc_sock_detach(struct svc_xprt *xprt) sk->sk_data_ready = svsk->sk_odata; sk->sk_write_space = svsk->sk_owspace; - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); } /* diff --git a/net/tipc/socket.c b/net/tipc/socket.c index cfb20b80b3a..66e889ba48f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -446,7 +446,7 @@ static unsigned int poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; u32 mask; - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); if (!skb_queue_empty(&sk->sk_receive_queue) || (sock->state == SS_UNCONNECTED) || @@ -591,7 +591,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, break; } release_sock(sk); - res = wait_event_interruptible(*sk->sk_sleep, + res = wait_event_interruptible(*sk_sleep(sk), !tport->congested); lock_sock(sk); if (res) @@ -650,7 +650,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, break; } release_sock(sk); - res = wait_event_interruptible(*sk->sk_sleep, + res = wait_event_interruptible(*sk_sleep(sk), (!tport->congested || !tport->connected)); lock_sock(sk); if (res) @@ -931,7 +931,7 @@ restart: goto exit; } release_sock(sk); - res = wait_event_interruptible(*sk->sk_sleep, + res = wait_event_interruptible(*sk_sleep(sk), (!skb_queue_empty(&sk->sk_receive_queue) || (sock->state == SS_DISCONNECTING))); lock_sock(sk); @@ -1064,7 +1064,7 @@ restart: goto exit; } release_sock(sk); - res = wait_event_interruptible(*sk->sk_sleep, + res = wait_event_interruptible(*sk_sleep(sk), (!skb_queue_empty(&sk->sk_receive_queue) || (sock->state == SS_DISCONNECTING))); lock_sock(sk); @@ -1271,8 +1271,8 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) tipc_disconnect_port(tipc_sk_port(sk)); } - if (waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); return TIPC_OK; } @@ -1343,8 +1343,8 @@ static void wakeupdispatch(struct tipc_port *tport) { struct sock *sk = (struct sock *)tport->usr_handle; - if (waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); } /** @@ -1426,7 +1426,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ release_sock(sk); - res = wait_event_interruptible_timeout(*sk->sk_sleep, + res = wait_event_interruptible_timeout(*sk_sleep(sk), (!skb_queue_empty(&sk->sk_receive_queue) || (sock->state != SS_CONNECTING)), sk->sk_rcvtimeo); @@ -1521,7 +1521,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) goto exit; } release_sock(sk); - res = wait_event_interruptible(*sk->sk_sleep, + res = wait_event_interruptible(*sk_sleep(sk), (!skb_queue_empty(&sk->sk_receive_queue))); lock_sock(sk); if (res) @@ -1632,8 +1632,8 @@ restart: /* Discard any unreceived messages; wake up sleeping tasks */ discard_rx_queue(sk); - if (waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); res = 0; break; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3d9122e78f4..87c0360eaa2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -316,7 +316,7 @@ static void unix_write_space(struct sock *sk) read_lock(&sk->sk_callback_lock); if (unix_writable(sk)) { if (sk_has_sleeper(sk)) - wake_up_interruptible_sync(sk->sk_sleep); + wake_up_interruptible_sync(sk_sleep(sk)); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } read_unlock(&sk->sk_callback_lock); @@ -1736,7 +1736,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo) unix_state_lock(sk); for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (!skb_queue_empty(&sk->sk_receive_queue) || sk->sk_err || @@ -1752,7 +1752,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo) clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); unix_state_unlock(sk); return timeo; } @@ -1991,7 +1991,7 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table struct sock *sk = sock->sk; unsigned int mask; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* exceptional events? */ @@ -2028,7 +2028,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk, *other; unsigned int mask, writable; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* exceptional events? */ diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index cbddd0cb83f..6cffbc4da02 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -718,7 +718,7 @@ static int x25_wait_for_connection_establishment(struct sock *sk) DECLARE_WAITQUEUE(wait, current); int rc; - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); for (;;) { __set_current_state(TASK_INTERRUPTIBLE); rc = -ERESTARTSYS; @@ -738,7 +738,7 @@ static int x25_wait_for_connection_establishment(struct sock *sk) break; } __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); return rc; } @@ -838,7 +838,7 @@ static int x25_wait_for_data(struct sock *sk, long timeout) DECLARE_WAITQUEUE(wait, current); int rc = 0; - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); for (;;) { __set_current_state(TASK_INTERRUPTIBLE); if (sk->sk_shutdown & RCV_SHUTDOWN) @@ -858,7 +858,7 @@ static int x25_wait_for_data(struct sock *sk, long timeout) break; } __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); return rc; } -- cgit v1.2.3-70-g09d2 From c377411f2494a931ff7facdbb3a6839b1266bcf6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Apr 2010 15:13:20 -0700 Subject: net: sk_add_backlog() take rmem_alloc into account Current socket backlog limit is not enough to really stop DDOS attacks, because user thread spend many time to process a full backlog each round, and user might crazy spin on socket lock. We should add backlog size and receive_queue size (aka rmem_alloc) to pace writers, and let user run without being slow down too much. Introduce a sk_rcvqueues_full() helper, to avoid taking socket lock in stress situations. Under huge stress from a multiqueue/RPS enabled NIC, a single flow udp receiver can now process ~200.000 pps (instead of ~100 pps before the patch) on a 8 core machine. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 13 +++++++++++-- net/core/sock.c | 5 ++++- net/ipv4/udp.c | 4 ++++ net/ipv6/udp.c | 8 ++++++++ net/sctp/socket.c | 3 --- 5 files changed, 27 insertions(+), 6 deletions(-) (limited to 'net/core/sock.c') diff --git a/include/net/sock.h b/include/net/sock.h index 07822280d95..cf12b1e61fa 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -256,7 +256,6 @@ struct sock { struct sk_buff *head; struct sk_buff *tail; int len; - int limit; } sk_backlog; wait_queue_head_t *sk_sleep; struct dst_entry *sk_dst_cache; @@ -608,10 +607,20 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) skb->next = NULL; } +/* + * Take into account size of receive queue and backlog queue + */ +static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb) +{ + unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc); + + return qsize + skb->truesize > sk->sk_rcvbuf; +} + /* The per-socket spinlock must be held here. */ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb) { - if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1)) + if (sk_rcvqueues_full(sk, skb)) return -ENOBUFS; __sk_add_backlog(sk, skb); diff --git a/net/core/sock.c b/net/core/sock.c index 58ebd146ce5..51041759517 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -327,6 +327,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) skb->dev = NULL; + if (sk_rcvqueues_full(sk, skb)) { + atomic_inc(&sk->sk_drops); + goto discard_and_relse; + } if (nested) bh_lock_sock_nested(sk); else @@ -1885,7 +1889,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_allocation = GFP_KERNEL; sk->sk_rcvbuf = sysctl_rmem_default; sk->sk_sndbuf = sysctl_wmem_default; - sk->sk_backlog.limit = sk->sk_rcvbuf << 1; sk->sk_state = TCP_CLOSE; sk_set_socket(sk, sock); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index fa3d2874db4..63eb56b2d87 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1372,6 +1372,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto drop; } + + if (sk_rcvqueues_full(sk, skb)) + goto drop; + rc = 0; bh_lock_sock(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2850e35cee3..3ead20ad9d0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -584,6 +584,10 @@ static void flush_stack(struct sock **stack, unsigned int count, sk = stack[i]; if (skb1) { + if (sk_rcvqueues_full(sk, skb)) { + kfree_skb(skb1); + goto drop; + } bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb1); @@ -759,6 +763,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, /* deliver */ + if (sk_rcvqueues_full(sk, skb)) { + sock_put(sk); + goto discard; + } bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f34adcca8a8..13d8229f3a9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3721,9 +3721,6 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) SCTP_DBG_OBJCNT_INC(sock); percpu_counter_inc(&sctp_sockets_allocated); - /* Set socket backlog limit. */ - sk->sk_backlog.limit = sysctl_sctp_rmem[1]; - local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); local_bh_enable(); -- cgit v1.2.3-70-g09d2 From 43815482370c510c569fd18edb57afcb0fa8cab6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Apr 2010 11:01:49 +0000 Subject: net: sock_def_readable() and friends RCU conversion sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 13 +++++++++---- drivers/net/tun.c | 21 ++++++++++++--------- include/linux/net.h | 14 +++++++++----- include/net/af_unix.h | 20 +++++++++++--------- include/net/sock.h | 38 +++++++++++++++++++------------------- net/atm/common.c | 22 ++++++++++++++-------- net/core/sock.c | 50 +++++++++++++++++++++++++++++++------------------- net/core/stream.c | 10 +++++++--- net/dccp/output.c | 10 ++++++---- net/iucv/af_iucv.c | 11 +++++++---- net/phonet/pep.c | 8 ++++---- net/phonet/socket.c | 2 +- net/rxrpc/af_rxrpc.c | 10 ++++++---- net/sctp/socket.c | 2 +- net/socket.c | 47 ++++++++++++++++++++++++++++++++++++----------- net/unix/af_unix.c | 17 ++++++++--------- 16 files changed, 181 insertions(+), 114 deletions(-) (limited to 'net/core/sock.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index d97e1fd234b..1c4110df343 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -37,6 +37,7 @@ struct macvtap_queue { struct sock sk; struct socket sock; + struct socket_wq wq; struct macvlan_dev *vlan; struct file *file; unsigned int flags; @@ -242,12 +243,15 @@ static struct rtnl_link_ops macvtap_link_ops __read_mostly = { static void macvtap_sock_write_space(struct sock *sk) { + wait_queue_head_t *wqueue; + if (!sock_writeable(sk) || !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND); + wqueue = sk_sleep(sk); + if (wqueue && waitqueue_active(wqueue)) + wake_up_interruptible_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); } static int macvtap_open(struct inode *inode, struct file *file) @@ -272,7 +276,8 @@ static int macvtap_open(struct inode *inode, struct file *file) if (!q) goto out; - init_waitqueue_head(&q->sock.wait); + q->sock.wq = &q->wq; + init_waitqueue_head(&q->wq.wait); q->sock.type = SOCK_RAW; q->sock.state = SS_CONNECTED; q->sock.file = file; @@ -308,7 +313,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait) goto out; mask = 0; - poll_wait(file, &q->sock.wait, wait); + poll_wait(file, &q->wq.wait, wait); if (!skb_queue_empty(&q->sk.sk_receive_queue)) mask |= POLLIN | POLLRDNORM; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 20a17938c62..e525a6cf558 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -109,7 +109,7 @@ struct tun_struct { struct tap_filter txflt; struct socket socket; - + struct socket_wq wq; #ifdef TUN_DEBUG int debug; #endif @@ -323,7 +323,7 @@ static void tun_net_uninit(struct net_device *dev) /* Inform the methods they need to stop using the dev. */ if (tfile) { - wake_up_all(&tun->socket.wait); + wake_up_all(&tun->wq.wait); if (atomic_dec_and_test(&tfile->count)) __tun_detach(tun); } @@ -398,7 +398,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Notify and wake up reader process */ if (tun->flags & TUN_FASYNC) kill_fasync(&tun->fasync, SIGIO, POLL_IN); - wake_up_interruptible_poll(&tun->socket.wait, POLLIN | + wake_up_interruptible_poll(&tun->wq.wait, POLLIN | POLLRDNORM | POLLRDBAND); return NETDEV_TX_OK; @@ -498,7 +498,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name); - poll_wait(file, &tun->socket.wait, wait); + poll_wait(file, &tun->wq.wait, wait); if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; @@ -773,7 +773,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name); - add_wait_queue(&tun->socket.wait, &wait); + add_wait_queue(&tun->wq.wait, &wait); while (len) { current->state = TASK_INTERRUPTIBLE; @@ -804,7 +804,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, } current->state = TASK_RUNNING; - remove_wait_queue(&tun->socket.wait, &wait); + remove_wait_queue(&tun->wq.wait, &wait); return ret; } @@ -861,6 +861,7 @@ static struct rtnl_link_ops tun_link_ops __read_mostly = { static void tun_sock_write_space(struct sock *sk) { struct tun_struct *tun; + wait_queue_head_t *wqueue; if (!sock_writeable(sk)) return; @@ -868,8 +869,9 @@ static void tun_sock_write_space(struct sock *sk) if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT | + wqueue = sk_sleep(sk); + if (wqueue && waitqueue_active(wqueue)) + wake_up_interruptible_sync_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); tun = tun_sk(sk)->tun; @@ -1039,7 +1041,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (!sk) goto err_free_dev; - init_waitqueue_head(&tun->socket.wait); + tun->socket.wq = &tun->wq; + init_waitqueue_head(&tun->wq.wait); tun->socket.ops = &tun_socket_ops; sock_init_data(&tun->socket, sk); sk->sk_write_space = tun_sock_write_space; diff --git a/include/linux/net.h b/include/linux/net.h index 4157b5d42bd..2b4deeeb864 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -59,6 +59,7 @@ typedef enum { #include #include /* For O_CLOEXEC and O_NONBLOCK */ #include +#include struct poll_table_struct; struct pipe_inode_info; @@ -116,6 +117,12 @@ enum sock_shutdown_cmd { SHUT_RDWR = 2, }; +struct socket_wq { + wait_queue_head_t wait; + struct fasync_struct *fasync_list; + struct rcu_head rcu; +} ____cacheline_aligned_in_smp; + /** * struct socket - general BSD socket * @state: socket state (%SS_CONNECTED, etc) @@ -135,11 +142,8 @@ struct socket { kmemcheck_bitfield_end(type); unsigned long flags; - /* - * Please keep fasync_list & wait fields in the same cache line - */ - struct fasync_struct *fasync_list; - wait_queue_head_t wait; + + struct socket_wq *wq; struct file *file; struct sock *sk; diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 1614d78c60e..20725e213ae 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -30,7 +30,7 @@ struct unix_skb_parms { #endif }; -#define UNIXCB(skb) (*(struct unix_skb_parms*)&((skb)->cb)) +#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) #define UNIXCREDS(skb) (&UNIXCB((skb)).creds) #define UNIXSID(skb) (&UNIXCB((skb)).secid) @@ -45,21 +45,23 @@ struct unix_skb_parms { struct unix_sock { /* WARNING: sk has to be the first member */ struct sock sk; - struct unix_address *addr; - struct dentry *dentry; - struct vfsmount *mnt; + struct unix_address *addr; + struct dentry *dentry; + struct vfsmount *mnt; struct mutex readlock; - struct sock *peer; - struct sock *other; + struct sock *peer; + struct sock *other; struct list_head link; - atomic_long_t inflight; - spinlock_t lock; + atomic_long_t inflight; + spinlock_t lock; unsigned int gc_candidate : 1; unsigned int gc_maybe_cycle : 1; - wait_queue_head_t peer_wait; + struct socket_wq peer_wq; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) +#define peer_wait peer_wq.wait + #ifdef CONFIG_SYSCTL extern int unix_sysctl_register(struct net *net); extern void unix_sysctl_unregister(struct net *net); diff --git a/include/net/sock.h b/include/net/sock.h index e1777db5b9a..cc7f91ec972 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -159,7 +159,7 @@ struct sock_common { * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings * @sk_lock: synchronizer * @sk_rcvbuf: size of receive buffer in bytes - * @sk_sleep: sock wait queue + * @sk_wq: sock wait queue and async head * @sk_dst_cache: destination cache * @sk_dst_lock: destination cache lock * @sk_policy: flow policy @@ -257,7 +257,7 @@ struct sock { struct sk_buff *tail; int len; } sk_backlog; - wait_queue_head_t *sk_sleep; + struct socket_wq *sk_wq; struct dst_entry *sk_dst_cache; #ifdef CONFIG_XFRM struct xfrm_policy *sk_policy[2]; @@ -1219,7 +1219,7 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock) static inline wait_queue_head_t *sk_sleep(struct sock *sk) { - return sk->sk_sleep; + return &sk->sk_wq->wait; } /* Detach socket from process context. * Announce socket dead, detach it from wait queue and inode. @@ -1233,14 +1233,14 @@ static inline void sock_orphan(struct sock *sk) write_lock_bh(&sk->sk_callback_lock); sock_set_flag(sk, SOCK_DEAD); sk_set_socket(sk, NULL); - sk->sk_sleep = NULL; + sk->sk_wq = NULL; write_unlock_bh(&sk->sk_callback_lock); } static inline void sock_graft(struct sock *sk, struct socket *parent) { write_lock_bh(&sk->sk_callback_lock); - sk->sk_sleep = &parent->wait; + rcu_assign_pointer(sk->sk_wq, parent->wq); parent->sk = sk; sk_set_socket(sk, parent); security_sock_graft(sk, parent); @@ -1392,12 +1392,12 @@ static inline int sk_has_allocations(const struct sock *sk) } /** - * sk_has_sleeper - check if there are any waiting processes - * @sk: socket + * wq_has_sleeper - check if there are any waiting processes + * @sk: struct socket_wq * - * Returns true if socket has waiting processes + * Returns true if socket_wq has waiting processes * - * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory + * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory * barrier call. They were added due to the race found within the tcp code. * * Consider following tcp code paths: @@ -1410,9 +1410,10 @@ static inline int sk_has_allocations(const struct sock *sk) * ... ... * tp->rcv_nxt check sock_def_readable * ... { - * schedule ... - * if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - * wake_up_interruptible(sk_sleep(sk)) + * schedule rcu_read_lock(); + * wq = rcu_dereference(sk->sk_wq); + * if (wq && waitqueue_active(&wq->wait)) + * wake_up_interruptible(&wq->wait) * ... * } * @@ -1421,19 +1422,18 @@ static inline int sk_has_allocations(const struct sock *sk) * could then endup calling schedule and sleep forever if there are no more * data on the socket. * - * The sk_has_sleeper is always called right after a call to read_lock, so we - * can use smp_mb__after_lock barrier. */ -static inline int sk_has_sleeper(struct sock *sk) +static inline bool wq_has_sleeper(struct socket_wq *wq) { + /* * We need to be sure we are in sync with the * add_wait_queue modifications to the wait queue. * * This memory barrier is paired in the sock_poll_wait. */ - smp_mb__after_lock(); - return sk_sleep(sk) && waitqueue_active(sk_sleep(sk)); + smp_mb(); + return wq && waitqueue_active(&wq->wait); } /** @@ -1442,7 +1442,7 @@ static inline int sk_has_sleeper(struct sock *sk) * @wait_address: socket wait queue * @p: poll_table * - * See the comments in the sk_has_sleeper function. + * See the comments in the wq_has_sleeper function. */ static inline void sock_poll_wait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) @@ -1453,7 +1453,7 @@ static inline void sock_poll_wait(struct file *filp, * We need to be sure we are in sync with the * socket flags modification. * - * This memory barrier is paired in the sk_has_sleeper. + * This memory barrier is paired in the wq_has_sleeper. */ smp_mb(); } diff --git a/net/atm/common.c b/net/atm/common.c index e3e10e6f862..b43feb1a399 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -90,10 +90,13 @@ static void vcc_sock_destruct(struct sock *sk) static void vcc_def_wakeup(struct sock *sk) { - read_lock(&sk->sk_callback_lock); - if (sk_has_sleeper(sk)) - wake_up(sk_sleep(sk)); - read_unlock(&sk->sk_callback_lock); + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up(&wq->wait); + rcu_read_unlock(); } static inline int vcc_writable(struct sock *sk) @@ -106,16 +109,19 @@ static inline int vcc_writable(struct sock *sk) static void vcc_write_space(struct sock *sk) { - read_lock(&sk->sk_callback_lock); + struct socket_wq *wq; + + rcu_read_lock(); if (vcc_writable(sk)) { - if (sk_has_sleeper(sk)) - wake_up_interruptible(sk_sleep(sk)); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } - read_unlock(&sk->sk_callback_lock); + rcu_read_unlock(); } static struct proto vcc_proto = { diff --git a/net/core/sock.c b/net/core/sock.c index 51041759517..94c4affdda9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1211,7 +1211,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) */ sk_refcnt_debug_inc(newsk); sk_set_socket(newsk, NULL); - newsk->sk_sleep = NULL; + newsk->sk_wq = NULL; if (newsk->sk_prot->sockets_allocated) percpu_counter_inc(newsk->sk_prot->sockets_allocated); @@ -1800,41 +1800,53 @@ EXPORT_SYMBOL(sock_no_sendpage); static void sock_def_wakeup(struct sock *sk) { - read_lock(&sk->sk_callback_lock); - if (sk_has_sleeper(sk)) - wake_up_interruptible_all(sk_sleep(sk)); - read_unlock(&sk->sk_callback_lock); + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_all(&wq->wait); + rcu_read_unlock(); } static void sock_def_error_report(struct sock *sk) { - read_lock(&sk->sk_callback_lock); - if (sk_has_sleeper(sk)) - wake_up_interruptible_poll(sk_sleep(sk), POLLERR); + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_poll(&wq->wait, POLLERR); sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); - read_unlock(&sk->sk_callback_lock); + rcu_read_unlock(); } static void sock_def_readable(struct sock *sk, int len) { - read_lock(&sk->sk_callback_lock); - if (sk_has_sleeper(sk)) - wake_up_interruptible_sync_poll(sk_sleep(sk), POLLIN | + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); - read_unlock(&sk->sk_callback_lock); + rcu_read_unlock(); } static void sock_def_write_space(struct sock *sk) { - read_lock(&sk->sk_callback_lock); + struct socket_wq *wq; + + rcu_read_lock(); /* Do not wake up a writer until he can make "significant" * progress. --DaveM */ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { - if (sk_has_sleeper(sk)) - wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT | + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); /* Should agree with poll, otherwise some programs break */ @@ -1842,7 +1854,7 @@ static void sock_def_write_space(struct sock *sk) sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } - read_unlock(&sk->sk_callback_lock); + rcu_read_unlock(); } static void sock_def_destruct(struct sock *sk) @@ -1896,10 +1908,10 @@ void sock_init_data(struct socket *sock, struct sock *sk) if (sock) { sk->sk_type = sock->type; - sk->sk_sleep = &sock->wait; + sk->sk_wq = sock->wq; sock->sk = sk; } else - sk->sk_sleep = NULL; + sk->sk_wq = NULL; spin_lock_init(&sk->sk_dst_lock); rwlock_init(&sk->sk_callback_lock); diff --git a/net/core/stream.c b/net/core/stream.c index 7b3c3f30b10..cc196f42b8d 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -28,15 +28,19 @@ void sk_stream_write_space(struct sock *sk) { struct socket *sock = sk->sk_socket; + struct socket_wq *wq; if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { clear_bit(SOCK_NOSPACE, &sock->flags); - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); - if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) + if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); + rcu_read_unlock(); } } diff --git a/net/dccp/output.c b/net/dccp/output.c index 2d3dcb39851..aadbdb58758 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -195,15 +195,17 @@ EXPORT_SYMBOL_GPL(dccp_sync_mss); void dccp_write_space(struct sock *sk) { - read_lock(&sk->sk_callback_lock); + struct socket_wq *wq; - if (sk_has_sleeper(sk)) - wake_up_interruptible(sk_sleep(sk)); + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible(&wq->wait); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); - read_unlock(&sk->sk_callback_lock); + rcu_read_unlock(); } /** diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 9636b7d27b4..8be324fe08b 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -305,11 +305,14 @@ static inline int iucv_below_msglim(struct sock *sk) */ static void iucv_sock_wake_msglim(struct sock *sk) { - read_lock(&sk->sk_callback_lock); - if (sk_has_sleeper(sk)) - wake_up_interruptible_all(sk_sleep(sk)); + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_all(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); - read_unlock(&sk->sk_callback_lock); + rcu_read_unlock(); } /* Timers */ diff --git a/net/phonet/pep.c b/net/phonet/pep.c index e2a95762abd..af4d38bc3b2 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -664,12 +664,12 @@ static int pep_wait_connreq(struct sock *sk, int noblock) if (signal_pending(tsk)) return sock_intr_errno(timeo); - prepare_to_wait_exclusive(&sk->sk_socket->wait, &wait, + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); - finish_wait(&sk->sk_socket->wait, &wait); + finish_wait(sk_sleep(sk), &wait); } return 0; @@ -910,10 +910,10 @@ disabled: goto out; } - prepare_to_wait(&sk->sk_socket->wait, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits)); - finish_wait(&sk->sk_socket->wait, &wait); + finish_wait(sk_sleep(sk), &wait); if (sk->sk_state != TCP_ESTABLISHED) goto disabled; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index c785bfd0744..6e9848bf037 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -265,7 +265,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock, struct pep_sock *pn = pep_sk(sk); unsigned int mask = 0; - poll_wait(file, &sock->wait, wait); + poll_wait(file, sk_sleep(sk), wait); switch (sk->sk_state) { case TCP_LISTEN: diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index c432d76f415..0b9bb2085ce 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -62,13 +62,15 @@ static inline int rxrpc_writable(struct sock *sk) static void rxrpc_write_space(struct sock *sk) { _enter("%p", sk); - read_lock(&sk->sk_callback_lock); + rcu_read_lock(); if (rxrpc_writable(sk)) { - if (sk_has_sleeper(sk)) - wake_up_interruptible(sk_sleep(sk)); + struct socket_wq *wq = rcu_dereference(sk->sk_wq); + + if (wq_has_sleeper(wq)) + wake_up_interruptible(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } - read_unlock(&sk->sk_callback_lock); + rcu_read_unlock(); } /* diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 13d8229f3a9..d54700af927 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6065,7 +6065,7 @@ static void __sctp_write_space(struct sctp_association *asoc) * here by modeling from the current TCP/UDP code. * We have not tested with it yet. */ - if (sock->fasync_list && + if (sock->wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); diff --git a/net/socket.c b/net/socket.c index cb7c1f6c0d6..dae8c6b84a0 100644 --- a/net/socket.c +++ b/net/socket.c @@ -252,9 +252,14 @@ static struct inode *sock_alloc_inode(struct super_block *sb) ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); if (!ei) return NULL; - init_waitqueue_head(&ei->socket.wait); + ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL); + if (!ei->socket.wq) { + kmem_cache_free(sock_inode_cachep, ei); + return NULL; + } + init_waitqueue_head(&ei->socket.wq->wait); + ei->socket.wq->fasync_list = NULL; - ei->socket.fasync_list = NULL; ei->socket.state = SS_UNCONNECTED; ei->socket.flags = 0; ei->socket.ops = NULL; @@ -264,10 +269,21 @@ static struct inode *sock_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } + +static void wq_free_rcu(struct rcu_head *head) +{ + struct socket_wq *wq = container_of(head, struct socket_wq, rcu); + + kfree(wq); +} + static void sock_destroy_inode(struct inode *inode) { - kmem_cache_free(sock_inode_cachep, - container_of(inode, struct socket_alloc, vfs_inode)); + struct socket_alloc *ei; + + ei = container_of(inode, struct socket_alloc, vfs_inode); + call_rcu(&ei->socket.wq->rcu, wq_free_rcu); + kmem_cache_free(sock_inode_cachep, ei); } static void init_once(void *foo) @@ -513,7 +529,7 @@ void sock_release(struct socket *sock) module_put(owner); } - if (sock->fasync_list) + if (sock->wq->fasync_list) printk(KERN_ERR "sock_release: fasync list not empty!\n"); percpu_sub(sockets_in_use, 1); @@ -1080,9 +1096,9 @@ static int sock_fasync(int fd, struct file *filp, int on) lock_sock(sk); - fasync_helper(fd, filp, on, &sock->fasync_list); + fasync_helper(fd, filp, on, &sock->wq->fasync_list); - if (!sock->fasync_list) + if (!sock->wq->fasync_list) sock_reset_flag(sk, SOCK_FASYNC); else sock_set_flag(sk, SOCK_FASYNC); @@ -1091,12 +1107,20 @@ static int sock_fasync(int fd, struct file *filp, int on) return 0; } -/* This function may be called only under socket lock or callback_lock */ +/* This function may be called only under socket lock or callback_lock or rcu_lock */ int sock_wake_async(struct socket *sock, int how, int band) { - if (!sock || !sock->fasync_list) + struct socket_wq *wq; + + if (!sock) return -1; + rcu_read_lock(); + wq = rcu_dereference(sock->wq); + if (!wq || !wq->fasync_list) { + rcu_read_unlock(); + return -1; + } switch (how) { case SOCK_WAKE_WAITD: if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) @@ -1108,11 +1132,12 @@ int sock_wake_async(struct socket *sock, int how, int band) /* fall through */ case SOCK_WAKE_IO: call_kill: - kill_fasync(&sock->fasync_list, SIGIO, band); + kill_fasync(&wq->fasync_list, SIGIO, band); break; case SOCK_WAKE_URG: - kill_fasync(&sock->fasync_list, SIGURG, band); + kill_fasync(&wq->fasync_list, SIGURG, band); } + rcu_read_unlock(); return 0; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 87c0360eaa2..fef2cc5e9d2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -313,13 +313,16 @@ static inline int unix_writable(struct sock *sk) static void unix_write_space(struct sock *sk) { - read_lock(&sk->sk_callback_lock); + struct socket_wq *wq; + + rcu_read_lock(); if (unix_writable(sk)) { - if (sk_has_sleeper(sk)) - wake_up_interruptible_sync(sk_sleep(sk)); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_sync(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } - read_unlock(&sk->sk_callback_lock); + rcu_read_unlock(); } /* When dgram socket disconnects (or changes its peer), we clear its receive @@ -406,9 +409,7 @@ static int unix_release_sock(struct sock *sk, int embrion) skpair->sk_err = ECONNRESET; unix_state_unlock(skpair); skpair->sk_state_change(skpair); - read_lock(&skpair->sk_callback_lock); sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); - read_unlock(&skpair->sk_callback_lock); } sock_put(skpair); /* It may now die */ unix_peer(sk) = NULL; @@ -1142,7 +1143,7 @@ restart: newsk->sk_peercred.pid = task_tgid_vnr(current); current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid); newu = unix_sk(newsk); - newsk->sk_sleep = &newu->peer_wait; + newsk->sk_wq = &newu->peer_wq; otheru = unix_sk(other); /* copy address information from listening to new sock*/ @@ -1931,12 +1932,10 @@ static int unix_shutdown(struct socket *sock, int mode) other->sk_shutdown |= peer_mode; unix_state_unlock(other); other->sk_state_change(other); - read_lock(&other->sk_callback_lock); if (peer_mode == SHUTDOWN_MASK) sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); else if (peer_mode & RCV_SHUTDOWN) sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); - read_unlock(&other->sk_callback_lock); } if (other) sock_put(other); -- cgit v1.2.3-70-g09d2 From a465419b1febb603821f924805529cff89cafeed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 May 2010 00:36:33 -0700 Subject: net: Introduce sk_route_nocaps TCP-MD5 sessions have intermittent failures, when route cache is invalidated. ip_queue_xmit() has to find a new route, calls sk_setup_caps(sk, &rt->u.dst), destroying the sk->sk_route_caps &= ~NETIF_F_GSO_MASK that MD5 desperately try to make all over its way (from tcp_transmit_skb() for example) So we send few bad packets, and everything is fine when tcp_transmit_skb() is called again for this socket. Since ip_queue_xmit() is at a lower level than TCP-MD5, I chose to use a socket field, sk_route_nocaps, containing bits to mask on sk_route_caps. Reported-by: Bhaskar Dutta Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 8 ++++++++ net/core/sock.c | 1 + net/ipv4/tcp_ipv4.c | 6 +++--- net/ipv4/tcp_output.c | 2 +- net/ipv6/tcp_ipv6.c | 4 ++-- 5 files changed, 15 insertions(+), 6 deletions(-) (limited to 'net/core/sock.c') diff --git a/include/net/sock.h b/include/net/sock.h index 328e03f47dd..aed16eb9db4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -177,6 +177,7 @@ struct sock_common { * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) + * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) * @sk_gso_max_size: Maximum GSO segment size to build * @sk_lingertime: %SO_LINGER l_linger setting @@ -276,6 +277,7 @@ struct sock { int sk_forward_alloc; gfp_t sk_allocation; int sk_route_caps; + int sk_route_nocaps; int sk_gso_type; unsigned int sk_gso_max_size; int sk_rcvlowat; @@ -1335,6 +1337,12 @@ static inline int sk_can_gso(const struct sock *sk) extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst); +static inline void sk_nocaps_add(struct sock *sk, int flags) +{ + sk->sk_route_nocaps |= flags; + sk->sk_route_caps &= ~flags; +} + static inline int skb_copy_to_page(struct sock *sk, char __user *from, struct sk_buff *skb, struct page *page, int off, int copy) diff --git a/net/core/sock.c b/net/core/sock.c index 94c4affdda9..63530a03b8c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1231,6 +1231,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) sk->sk_route_caps = dst->dev->features; if (sk->sk_route_caps & NETIF_F_GSO) sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; + sk->sk_route_caps &= ~sk->sk_route_nocaps; if (sk_can_gso(sk)) { if (dst->header_len) { sk->sk_route_caps &= ~NETIF_F_GSO_MASK; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 771f8146a2e..202cf09c4cd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -891,7 +891,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, kfree(newkey); return -ENOMEM; } - sk->sk_route_caps &= ~NETIF_F_GSO_MASK; + sk_nocaps_add(sk, NETIF_F_GSO_MASK); } if (tcp_alloc_md5sig_pool(sk) == NULL) { kfree(newkey); @@ -1021,7 +1021,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, return -EINVAL; tp->md5sig_info = p; - sk->sk_route_caps &= ~NETIF_F_GSO_MASK; + sk_nocaps_add(sk, NETIF_F_GSO_MASK); } newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation); @@ -1462,7 +1462,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (newkey != NULL) tcp_v4_md5_do_add(newsk, newinet->inet_daddr, newkey, key->keylen); - newsk->sk_route_caps &= ~NETIF_F_GSO_MASK; + sk_nocaps_add(newsk, NETIF_F_GSO_MASK); } #endif diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5db3a2c6cb3..18a3302480c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -873,7 +873,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, #ifdef CONFIG_TCP_MD5SIG /* Calculate the MD5 hash, as we have all we need now */ if (md5) { - sk->sk_route_caps &= ~NETIF_F_GSO_MASK; + sk_nocaps_add(sk, NETIF_F_GSO_MASK); tp->af_specific->calc_md5_hash(opts.hash_location, md5, sk, NULL, skb); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6603511e367..2b7c3a100e2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -604,7 +604,7 @@ static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer, kfree(newkey); return -ENOMEM; } - sk->sk_route_caps &= ~NETIF_F_GSO_MASK; + sk_nocaps_add(sk, NETIF_F_GSO_MASK); } if (tcp_alloc_md5sig_pool(sk) == NULL) { kfree(newkey); @@ -741,7 +741,7 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, return -ENOMEM; tp->md5sig_info = p; - sk->sk_route_caps &= ~NETIF_F_GSO_MASK; + sk_nocaps_add(sk, NETIF_F_GSO_MASK); } newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); -- cgit v1.2.3-70-g09d2 From 7fee226ad2397b635e2fd565a59ca3ae08a164cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 May 2010 23:19:48 +0000 Subject: net: add a noref bit on skb dst Use low order bit of skb->_skb_dst to tell dst is not refcounted. Change _skb_dst to _skb_refdst to make sure all uses are catched. skb_dst() returns the dst, regardless of noref bit set or not, but with a lockdep check to make sure a noref dst is not given if current user is not rcu protected. New skb_dst_set_noref() helper to set an notrefcounted dst on a skb. (with lockdep check) skb_dst_drop() drops a reference only if skb dst was refcounted. skb_dst_force() helper is used to force a refcount on dst, when skb is queued and not anymore RCU protected. Use skb_dst_force() in __sk_add_backlog(), __dev_xmit_skb() if !IFF_XMIT_DST_RELEASE or skb enqueued on qdisc queue, in sock_queue_rcv_skb(), in __nf_queue(). Use skb_dst_force() in dev_requeue_skb(). Note: dst_use_noref() still dirties dst, we might transform it later to do one dirtying per jiffies. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 58 ++++++++++++++++++++++++++++++++++++++++++++---- include/net/dst.h | 48 ++++++++++++++++++++++++++++++++++++--- include/net/sock.h | 13 ++++++----- net/core/dev.c | 3 +++ net/core/skbuff.c | 2 +- net/core/sock.c | 6 +++++ net/ipv4/icmp.c | 6 ++--- net/ipv4/ip_options.c | 9 ++++---- net/ipv4/netfilter.c | 6 ++--- net/ipv4/route.c | 2 +- net/netfilter/nf_queue.c | 2 ++ net/sched/sch_generic.c | 4 +++- 12 files changed, 134 insertions(+), 25 deletions(-) (limited to 'net/core/sock.c') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c9525bce80f..7cdfb4d5284 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -264,7 +264,7 @@ typedef unsigned char *sk_buff_data_t; * @transport_header: Transport layer header * @network_header: Network layer header * @mac_header: Link layer header - * @_skb_dst: destination entry + * @_skb_refdst: destination entry (with norefcount bit) * @sp: the security path, used for xfrm * @cb: Control buffer. Free for use by every layer. Put private vars here * @len: Length of actual data @@ -328,7 +328,7 @@ struct sk_buff { */ char cb[48] __aligned(8); - unsigned long _skb_dst; + unsigned long _skb_refdst; #ifdef CONFIG_XFRM struct sec_path *sp; #endif @@ -419,14 +419,64 @@ struct sk_buff { #include +/* + * skb might have a dst pointer attached, refcounted or not. + * _skb_refdst low order bit is set if refcount was _not_ taken + */ +#define SKB_DST_NOREF 1UL +#define SKB_DST_PTRMASK ~(SKB_DST_NOREF) + +/** + * skb_dst - returns skb dst_entry + * @skb: buffer + * + * Returns skb dst_entry, regardless of reference taken or not. + */ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) { - return (struct dst_entry *)skb->_skb_dst; + /* If refdst was not refcounted, check we still are in a + * rcu_read_lock section + */ + WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) && + !rcu_read_lock_held() && + !rcu_read_lock_bh_held()); + return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK); } +/** + * skb_dst_set - sets skb dst + * @skb: buffer + * @dst: dst entry + * + * Sets skb dst, assuming a reference was taken on dst and should + * be released by skb_dst_drop() + */ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) { - skb->_skb_dst = (unsigned long)dst; + skb->_skb_refdst = (unsigned long)dst; +} + +/** + * skb_dst_set_noref - sets skb dst, without a reference + * @skb: buffer + * @dst: dst entry + * + * Sets skb dst, assuming a reference was not taken on dst + * skb_dst_drop() should not dst_release() this dst + */ +static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) +{ + WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; +} + +/** + * skb_dst_is_noref - Test if skb dst isnt refcounted + * @skb: buffer + */ +static inline bool skb_dst_is_noref(const struct sk_buff *skb) +{ + return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb); } static inline struct rtable *skb_rtable(const struct sk_buff *skb) diff --git a/include/net/dst.h b/include/net/dst.h index aac5a5fcfda..27207a13f2a 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -168,6 +168,12 @@ static inline void dst_use(struct dst_entry *dst, unsigned long time) dst->lastuse = time; } +static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) +{ + dst->__use++; + dst->lastuse = time; +} + static inline struct dst_entry * dst_clone(struct dst_entry * dst) { @@ -177,11 +183,47 @@ struct dst_entry * dst_clone(struct dst_entry * dst) } extern void dst_release(struct dst_entry *dst); + +static inline void refdst_drop(unsigned long refdst) +{ + if (!(refdst & SKB_DST_NOREF)) + dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK)); +} + +/** + * skb_dst_drop - drops skb dst + * @skb: buffer + * + * Drops dst reference count if a reference was taken. + */ static inline void skb_dst_drop(struct sk_buff *skb) { - if (skb->_skb_dst) - dst_release(skb_dst(skb)); - skb->_skb_dst = 0UL; + if (skb->_skb_refdst) { + refdst_drop(skb->_skb_refdst); + skb->_skb_refdst = 0UL; + } +} + +static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb) +{ + nskb->_skb_refdst = oskb->_skb_refdst; + if (!(nskb->_skb_refdst & SKB_DST_NOREF)) + dst_clone(skb_dst(nskb)); +} + +/** + * skb_dst_force - makes sure skb dst is refcounted + * @skb: buffer + * + * If dst is not yet refcounted, let's do it + */ +static inline void skb_dst_force(struct sk_buff *skb) +{ + if (skb_dst_is_noref(skb)) { + WARN_ON(!rcu_read_lock_held()); + skb->_skb_refdst &= ~SKB_DST_NOREF; + dst_clone(skb_dst(skb)); + } } /* Children define the path of the packet through the diff --git a/include/net/sock.h b/include/net/sock.h index aed16eb9db4..5697caf8cc7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -600,12 +600,15 @@ static inline int sk_stream_memory_free(struct sock *sk) /* OOB backlog add */ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) { - if (!sk->sk_backlog.tail) { - sk->sk_backlog.head = sk->sk_backlog.tail = skb; - } else { + /* dont let skb dst not refcounted, we are going to leave rcu lock */ + skb_dst_force(skb); + + if (!sk->sk_backlog.tail) + sk->sk_backlog.head = skb; + else sk->sk_backlog.tail->next = skb; - sk->sk_backlog.tail = skb; - } + + sk->sk_backlog.tail = skb; skb->next = NULL; } diff --git a/net/core/dev.c b/net/core/dev.c index cdcb9cbedf4..6c820650b80 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2052,6 +2052,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, * waiting to be sent out; and the qdisc is not running - * xmit the skb directly. */ + if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) + skb_dst_force(skb); __qdisc_update_bstats(q, skb->len); if (sch_direct_xmit(skb, q, dev, txq, root_lock)) __qdisc_run(q); @@ -2060,6 +2062,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, rc = NET_XMIT_SUCCESS; } else { + skb_dst_force(skb); rc = qdisc_enqueue_root(skb, q); qdisc_run(q); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a9b0e1f7780..c543dd25243 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -520,7 +520,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->transport_header = old->transport_header; new->network_header = old->network_header; new->mac_header = old->mac_header; - skb_dst_set(new, dst_clone(skb_dst(old))); + skb_dst_copy(new, old); new->rxhash = old->rxhash; #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); diff --git a/net/core/sock.c b/net/core/sock.c index 63530a03b8c..bf88a167c8f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -307,6 +307,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) */ skb_len = skb->len; + /* we escape from rcu protected region, make sure we dont leak + * a norefcounted dst + */ + skb_dst_force(skb); + spin_lock_irqsave(&list->lock, flags); skb->dropcount = atomic_read(&sk->sk_drops); __skb_queue_tail(list, skb); @@ -1536,6 +1541,7 @@ static void __release_sock(struct sock *sk) do { struct sk_buff *next = skb->next; + WARN_ON_ONCE(skb_dst_is_noref(skb)); skb->next = NULL; sk_backlog_rcv(sk, skb); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index f3d339f728b..d65e9215bcd 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -587,20 +587,20 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) err = __ip_route_output_key(net, &rt2, &fl); else { struct flowi fl2 = {}; - struct dst_entry *odst; + unsigned long orefdst; fl2.fl4_dst = fl.fl4_src; if (ip_route_output_key(net, &rt2, &fl2)) goto relookup_failed; /* Ugh! */ - odst = skb_dst(skb_in); + orefdst = skb_in->_skb_refdst; /* save old refdst */ err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, RT_TOS(tos), rt2->u.dst.dev); dst_release(&rt2->u.dst); rt2 = skb_rtable(skb_in); - skb_dst_set(skb_in, odst); + skb_in->_skb_refdst = orefdst; /* restore old refdst */ } if (err) diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 4c09a31fd14..3244133c24f 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -601,6 +601,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) unsigned char *optptr = skb_network_header(skb) + opt->srr; struct rtable *rt = skb_rtable(skb); struct rtable *rt2; + unsigned long orefdst; int err; if (!opt->srr) @@ -624,16 +625,16 @@ int ip_options_rcv_srr(struct sk_buff *skb) } memcpy(&nexthop, &optptr[srrptr-1], 4); - rt = skb_rtable(skb); + orefdst = skb->_skb_refdst; skb_dst_set(skb, NULL); err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); rt2 = skb_rtable(skb); if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { - ip_rt_put(rt2); - skb_dst_set(skb, &rt->u.dst); + skb_dst_drop(skb); + skb->_skb_refdst = orefdst; return -EINVAL; } - ip_rt_put(rt); + refdst_drop(orefdst); if (rt2->rt_type != RTN_LOCAL) break; /* Superfast 8) loopback forward */ diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 82fb43c5c59..07de855e217 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -17,7 +17,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi fl = {}; - struct dst_entry *odst; + unsigned long orefdst; unsigned int hh_len; unsigned int type; @@ -51,14 +51,14 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) if (ip_route_output_key(net, &rt, &fl) != 0) return -1; - odst = skb_dst(skb); + orefdst = skb->_skb_refdst; if (ip_route_input(skb, iph->daddr, iph->saddr, RT_TOS(iph->tos), rt->u.dst.dev) != 0) { dst_release(&rt->u.dst); return -1; } dst_release(&rt->u.dst); - dst_release(odst); + refdst_drop(orefdst); } if (skb_dst(skb)->error) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index dea3f926425..705eccfb476 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3033,7 +3033,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; if (rt_is_expired(rt)) continue; - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set_noref(skb, &rt->u.dst); if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1, NLM_F_MULTI) <= 0) { diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 0b1103c0b1f..78b3cf9c519 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "nf_internals.h" @@ -170,6 +171,7 @@ static int __nf_queue(struct sk_buff *skb, dev_hold(physoutdev); } #endif + skb_dst_force(skb); afinfo->saveroute(skb, entry); status = qh->outfn(entry, queuenum); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index a969b111bd7..a63029ef3ed 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -26,6 +26,7 @@ #include #include #include +#include /* Main transmission queue. */ @@ -40,6 +41,7 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { + skb_dst_force(skb); q->gso_skb = skb; q->qstats.requeues++; q->q.qlen++; /* it's still part of the queue */ @@ -179,7 +181,7 @@ static inline int qdisc_restart(struct Qdisc *q) skb = dequeue_skb(q); if (unlikely(!skb)) return 0; - + WARN_ON_ONCE(skb_dst_is_noref(skb)); root_lock = qdisc_lock(q); dev = qdisc_dev(q); txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); -- cgit v1.2.3-70-g09d2