summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-04-29 11:01:49 +0000
committerDavid S. Miller <davem@davemloft.net>2010-05-01 15:00:15 -0700
commit43815482370c510c569fd18edb57afcb0fa8cab6 (patch)
tree063efaae3758402b84f056438b704d1de68f7837 /include
parent83d7eb2979cd3390c375470225dd2d8f2009bc70 (diff)
net: sock_def_readable() and friends RCU conversion
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/net.h14
-rw-r--r--include/net/af_unix.h20
-rw-r--r--include/net/sock.h38
3 files changed, 39 insertions, 33 deletions
diff --git a/include/linux/net.h b/include/linux/net.h
index 4157b5d42bd..2b4deeeb864 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -59,6 +59,7 @@ typedef enum {
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
#include <linux/kmemcheck.h>
+#include <linux/rcupdate.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -116,6 +117,12 @@ enum sock_shutdown_cmd {
SHUT_RDWR = 2,
};
+struct socket_wq {
+ wait_queue_head_t wait;
+ struct fasync_struct *fasync_list;
+ struct rcu_head rcu;
+} ____cacheline_aligned_in_smp;
+
/**
* struct socket - general BSD socket
* @state: socket state (%SS_CONNECTED, etc)
@@ -135,11 +142,8 @@ struct socket {
kmemcheck_bitfield_end(type);
unsigned long flags;
- /*
- * Please keep fasync_list & wait fields in the same cache line
- */
- struct fasync_struct *fasync_list;
- wait_queue_head_t wait;
+
+ struct socket_wq *wq;
struct file *file;
struct sock *sk;
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 1614d78c60e..20725e213ae 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -30,7 +30,7 @@ struct unix_skb_parms {
#endif
};
-#define UNIXCB(skb) (*(struct unix_skb_parms*)&((skb)->cb))
+#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
#define UNIXCREDS(skb) (&UNIXCB((skb)).creds)
#define UNIXSID(skb) (&UNIXCB((skb)).secid)
@@ -45,21 +45,23 @@ struct unix_skb_parms {
struct unix_sock {
/* WARNING: sk has to be the first member */
struct sock sk;
- struct unix_address *addr;
- struct dentry *dentry;
- struct vfsmount *mnt;
+ struct unix_address *addr;
+ struct dentry *dentry;
+ struct vfsmount *mnt;
struct mutex readlock;
- struct sock *peer;
- struct sock *other;
+ struct sock *peer;
+ struct sock *other;
struct list_head link;
- atomic_long_t inflight;
- spinlock_t lock;
+ atomic_long_t inflight;
+ spinlock_t lock;
unsigned int gc_candidate : 1;
unsigned int gc_maybe_cycle : 1;
- wait_queue_head_t peer_wait;
+ struct socket_wq peer_wq;
};
#define unix_sk(__sk) ((struct unix_sock *)__sk)
+#define peer_wait peer_wq.wait
+
#ifdef CONFIG_SYSCTL
extern int unix_sysctl_register(struct net *net);
extern void unix_sysctl_unregister(struct net *net);
diff --git a/include/net/sock.h b/include/net/sock.h
index e1777db5b9a..cc7f91ec972 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -159,7 +159,7 @@ struct sock_common {
* @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
* @sk_lock: synchronizer
* @sk_rcvbuf: size of receive buffer in bytes
- * @sk_sleep: sock wait queue
+ * @sk_wq: sock wait queue and async head
* @sk_dst_cache: destination cache
* @sk_dst_lock: destination cache lock
* @sk_policy: flow policy
@@ -257,7 +257,7 @@ struct sock {
struct sk_buff *tail;
int len;
} sk_backlog;
- wait_queue_head_t *sk_sleep;
+ struct socket_wq *sk_wq;
struct dst_entry *sk_dst_cache;
#ifdef CONFIG_XFRM
struct xfrm_policy *sk_policy[2];
@@ -1219,7 +1219,7 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock)
static inline wait_queue_head_t *sk_sleep(struct sock *sk)
{
- return sk->sk_sleep;
+ return &sk->sk_wq->wait;
}
/* Detach socket from process context.
* Announce socket dead, detach it from wait queue and inode.
@@ -1233,14 +1233,14 @@ static inline void sock_orphan(struct sock *sk)
write_lock_bh(&sk->sk_callback_lock);
sock_set_flag(sk, SOCK_DEAD);
sk_set_socket(sk, NULL);
- sk->sk_sleep = NULL;
+ sk->sk_wq = NULL;
write_unlock_bh(&sk->sk_callback_lock);
}
static inline void sock_graft(struct sock *sk, struct socket *parent)
{
write_lock_bh(&sk->sk_callback_lock);
- sk->sk_sleep = &parent->wait;
+ rcu_assign_pointer(sk->sk_wq, parent->wq);
parent->sk = sk;
sk_set_socket(sk, parent);
security_sock_graft(sk, parent);
@@ -1392,12 +1392,12 @@ static inline int sk_has_allocations(const struct sock *sk)
}
/**
- * sk_has_sleeper - check if there are any waiting processes
- * @sk: socket
+ * wq_has_sleeper - check if there are any waiting processes
+ * @sk: struct socket_wq
*
- * Returns true if socket has waiting processes
+ * Returns true if socket_wq has waiting processes
*
- * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory
+ * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory
* barrier call. They were added due to the race found within the tcp code.
*
* Consider following tcp code paths:
@@ -1410,9 +1410,10 @@ static inline int sk_has_allocations(const struct sock *sk)
* ... ...
* tp->rcv_nxt check sock_def_readable
* ... {
- * schedule ...
- * if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
- * wake_up_interruptible(sk_sleep(sk))
+ * schedule rcu_read_lock();
+ * wq = rcu_dereference(sk->sk_wq);
+ * if (wq && waitqueue_active(&wq->wait))
+ * wake_up_interruptible(&wq->wait)
* ...
* }
*
@@ -1421,19 +1422,18 @@ static inline int sk_has_allocations(const struct sock *sk)
* could then endup calling schedule and sleep forever if there are no more
* data on the socket.
*
- * The sk_has_sleeper is always called right after a call to read_lock, so we
- * can use smp_mb__after_lock barrier.
*/
-static inline int sk_has_sleeper(struct sock *sk)
+static inline bool wq_has_sleeper(struct socket_wq *wq)
{
+
/*
* We need to be sure we are in sync with the
* add_wait_queue modifications to the wait queue.
*
* This memory barrier is paired in the sock_poll_wait.
*/
- smp_mb__after_lock();
- return sk_sleep(sk) && waitqueue_active(sk_sleep(sk));
+ smp_mb();
+ return wq && waitqueue_active(&wq->wait);
}
/**
@@ -1442,7 +1442,7 @@ static inline int sk_has_sleeper(struct sock *sk)
* @wait_address: socket wait queue
* @p: poll_table
*
- * See the comments in the sk_has_sleeper function.
+ * See the comments in the wq_has_sleeper function.
*/
static inline void sock_poll_wait(struct file *filp,
wait_queue_head_t *wait_address, poll_table *p)
@@ -1453,7 +1453,7 @@ static inline void sock_poll_wait(struct file *filp,
* We need to be sure we are in sync with the
* socket flags modification.
*
- * This memory barrier is paired in the sk_has_sleeper.
+ * This memory barrier is paired in the wq_has_sleeper.
*/
smp_mb();
}