diff options
Diffstat (limited to 'include/net/sock.h')
-rw-r--r-- | include/net/sock.h | 193 |
1 files changed, 176 insertions, 17 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index eefeeaf7fc4..950409dcec3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -54,6 +54,7 @@ #include <linux/filter.h> #include <linux/rculist_nulls.h> +#include <linux/poll.h> #include <asm/atomic.h> #include <net/dst.h> @@ -103,15 +104,15 @@ struct net; /** * struct sock_common - minimal network layer representation of sockets + * @skc_node: main hash linkage for various protocol lookup tables + * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol + * @skc_refcnt: reference count + * @skc_hash: hash value used with various protocol lookup tables * @skc_family: network address family * @skc_state: Connection state * @skc_reuse: %SO_REUSEADDR setting * @skc_bound_dev_if: bound device index if != 0 - * @skc_node: main hash linkage for various protocol lookup tables - * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol * @skc_bind_node: bind hash linkage for various protocol lookup tables - * @skc_refcnt: reference count - * @skc_hash: hash value used with various protocol lookup tables * @skc_prot: protocol handlers inside a network family * @skc_net: reference to the network namespace of this socket * @@ -119,17 +120,21 @@ struct net; * for struct sock and struct inet_timewait_sock. */ struct sock_common { - unsigned short skc_family; - volatile unsigned char skc_state; - unsigned char skc_reuse; - int skc_bound_dev_if; + /* + * first fields are not copied in sock_copy() + */ union { struct hlist_node skc_node; struct hlist_nulls_node skc_nulls_node; }; - struct hlist_node skc_bind_node; atomic_t skc_refcnt; + unsigned int skc_hash; + unsigned short skc_family; + volatile unsigned char skc_state; + unsigned char skc_reuse; + int skc_bound_dev_if; + struct hlist_node skc_bind_node; struct proto *skc_prot; #ifdef CONFIG_NET_NS struct net *skc_net; @@ -158,7 +163,7 @@ struct sock_common { * @sk_allocation: allocation mode * @sk_sndbuf: size of send buffer in bytes * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, - * %SO_OOBINLINE settings + * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) @@ -207,20 +212,24 @@ struct sock { * don't add nothing before this first member (__sk_common) --acme */ struct sock_common __sk_common; +#define sk_node __sk_common.skc_node +#define sk_nulls_node __sk_common.skc_nulls_node +#define sk_refcnt __sk_common.skc_refcnt + +#define sk_copy_start __sk_common.skc_hash +#define sk_hash __sk_common.skc_hash #define sk_family __sk_common.skc_family #define sk_state __sk_common.skc_state #define sk_reuse __sk_common.skc_reuse #define sk_bound_dev_if __sk_common.skc_bound_dev_if -#define sk_node __sk_common.skc_node -#define sk_nulls_node __sk_common.skc_nulls_node #define sk_bind_node __sk_common.skc_bind_node -#define sk_refcnt __sk_common.skc_refcnt -#define sk_hash __sk_common.skc_hash #define sk_prot __sk_common.skc_prot #define sk_net __sk_common.skc_net + kmemcheck_bitfield_begin(flags); unsigned char sk_shutdown : 2, sk_no_check : 2, sk_userlocks : 4; + kmemcheck_bitfield_end(flags); unsigned char sk_protocol; unsigned short sk_type; int sk_rcvbuf; @@ -488,6 +497,13 @@ enum sock_flags { SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */ SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ + SOCK_TIMESTAMPING_TX_HARDWARE, /* %SOF_TIMESTAMPING_TX_HARDWARE */ + SOCK_TIMESTAMPING_TX_SOFTWARE, /* %SOF_TIMESTAMPING_TX_SOFTWARE */ + SOCK_TIMESTAMPING_RX_HARDWARE, /* %SOF_TIMESTAMPING_RX_HARDWARE */ + SOCK_TIMESTAMPING_RX_SOFTWARE, /* %SOF_TIMESTAMPING_RX_SOFTWARE */ + SOCK_TIMESTAMPING_SOFTWARE, /* %SOF_TIMESTAMPING_SOFTWARE */ + SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */ + SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */ }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) @@ -944,6 +960,11 @@ extern struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, int noblock, int *errcode); +extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk, + unsigned long header_len, + unsigned long data_len, + int noblock, + int *errcode); extern void *sock_kmalloc(struct sock *sk, int size, gfp_t priority); extern void sock_kfree_s(struct sock *sk, void *mem, int size); @@ -1194,6 +1215,107 @@ static inline int skb_copy_to_page(struct sock *sk, char __user *from, return 0; } +/** + * sk_wmem_alloc_get - returns write allocations + * @sk: socket + * + * Returns sk_wmem_alloc minus initial offset of one + */ +static inline int sk_wmem_alloc_get(const struct sock *sk) +{ + return atomic_read(&sk->sk_wmem_alloc) - 1; +} + +/** + * sk_rmem_alloc_get - returns read allocations + * @sk: socket + * + * Returns sk_rmem_alloc + */ +static inline int sk_rmem_alloc_get(const struct sock *sk) +{ + return atomic_read(&sk->sk_rmem_alloc); +} + +/** + * sk_has_allocations - check if allocations are outstanding + * @sk: socket + * + * Returns true if socket has write or read allocations + */ +static inline int sk_has_allocations(const struct sock *sk) +{ + return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk); +} + +/** + * sk_has_sleeper - check if there are any waiting processes + * @sk: socket + * + * Returns true if socket has waiting processes + * + * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory + * barrier call. They were added due to the race found within the tcp code. + * + * Consider following tcp code paths: + * + * CPU1 CPU2 + * + * sys_select receive packet + * ... ... + * __add_wait_queue update tp->rcv_nxt + * ... ... + * tp->rcv_nxt check sock_def_readable + * ... { + * schedule ... + * if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + * wake_up_interruptible(sk->sk_sleep) + * ... + * } + * + * The race for tcp fires when the __add_wait_queue changes done by CPU1 stay + * in its cache, and so does the tp->rcv_nxt update on CPU2 side. The CPU1 + * could then endup calling schedule and sleep forever if there are no more + * data on the socket. + * + * The sk_has_sleeper is always called right after a call to read_lock, so we + * can use smp_mb__after_lock barrier. + */ +static inline int sk_has_sleeper(struct sock *sk) +{ + /* + * We need to be sure we are in sync with the + * add_wait_queue modifications to the wait queue. + * + * This memory barrier is paired in the sock_poll_wait. + */ + smp_mb__after_lock(); + return sk->sk_sleep && waitqueue_active(sk->sk_sleep); +} + +/** + * sock_poll_wait - place memory barrier behind the poll_wait call. + * @filp: file + * @wait_address: socket wait queue + * @p: poll_table + * + * See the comments in the sk_has_sleeper function. + */ +static inline void sock_poll_wait(struct file *filp, + wait_queue_head_t *wait_address, poll_table *p) +{ + if (p && wait_address) { + poll_wait(filp, wait_address, p); + /* + * We need to be sure we are in sync with the + * socket flags modification. + * + * This memory barrier is paired in the sk_has_sleeper. + */ + smp_mb(); + } +} + /* * Queue a received datagram if it will fit. Stream and sequenced * protocols can't normally use this as they need to fit buffers in @@ -1205,14 +1327,20 @@ static inline int skb_copy_to_page(struct sock *sk, char __user *from, static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) { - sock_hold(sk); + skb_orphan(skb); skb->sk = sk; skb->destructor = sock_wfree; + /* + * We used to take a refcount on sk, but following operation + * is enough to guarantee sk_free() wont free this sock until + * all in-flight packets are completed + */ atomic_add(skb->truesize, &sk->sk_wmem_alloc); } static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) { + skb_orphan(skb); skb->sk = sk; skb->destructor = sock_rfree; atomic_add(skb->truesize, &sk->sk_rmem_alloc); @@ -1340,14 +1468,45 @@ static __inline__ void sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { ktime_t kt = skb->tstamp; + struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb); - if (sock_flag(sk, SOCK_RCVTSTAMP)) + /* + * generate control messages if + * - receive time stamping in software requested (SOCK_RCVTSTAMP + * or SOCK_TIMESTAMPING_RX_SOFTWARE) + * - software time stamp available and wanted + * (SOCK_TIMESTAMPING_SOFTWARE) + * - hardware time stamps available and wanted + * (SOCK_TIMESTAMPING_SYS_HARDWARE or + * SOCK_TIMESTAMPING_RAW_HARDWARE) + */ + if (sock_flag(sk, SOCK_RCVTSTAMP) || + sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE) || + (kt.tv64 && sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) || + (hwtstamps->hwtstamp.tv64 && + sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)) || + (hwtstamps->syststamp.tv64 && + sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))) __sock_recv_timestamp(msg, sk, skb); else sk->sk_stamp = kt; } /** + * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped + * @msg: outgoing packet + * @sk: socket sending this packet + * @shtx: filled with instructions for time stamping + * + * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if + * parameters are invalid. + */ +extern int sock_tx_timestamp(struct msghdr *msg, + struct sock *sk, + union skb_shared_tx *shtx); + + +/** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from * @skb: socket buffer to eat @@ -1415,7 +1574,7 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb) return NULL; } -extern void sock_enable_timestamp(struct sock *sk); +extern void sock_enable_timestamp(struct sock *sk, int flag); extern int sock_get_timestamp(struct sock *, struct timeval __user *); extern int sock_get_timestampns(struct sock *, struct timespec __user *); |