diff options
Diffstat (limited to 'include/net/sock.h')
-rw-r--r-- | include/net/sock.h | 177 |
1 files changed, 129 insertions, 48 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 73a4f9702a6..f2046e404a6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -52,12 +52,13 @@ #include <linux/mm.h> #include <linux/security.h> #include <linux/slab.h> +#include <linux/uaccess.h> #include <linux/filter.h> #include <linux/rculist_nulls.h> #include <linux/poll.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <net/dst.h> #include <net/checksum.h> @@ -105,10 +106,8 @@ struct net; /** * struct sock_common - minimal network layer representation of sockets - * @skc_node: main hash linkage for various protocol lookup tables - * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol - * @skc_refcnt: reference count - * @skc_tx_queue_mapping: tx queue number for this connection + * @skc_daddr: Foreign IPv4 addr + * @skc_rcv_saddr: Bound local IPv4 addr * @skc_hash: hash value used with various protocol lookup tables * @skc_u16hashes: two u16 hash values used by UDP lookup tables * @skc_family: network address family @@ -119,20 +118,20 @@ struct net; * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol * @skc_prot: protocol handlers inside a network family * @skc_net: reference to the network namespace of this socket + * @skc_node: main hash linkage for various protocol lookup tables + * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol + * @skc_tx_queue_mapping: tx queue number for this connection + * @skc_refcnt: reference count * * This is the minimal network layer representation of sockets, the header * for struct sock and struct inet_timewait_sock. */ struct sock_common { - /* - * first fields are not copied in sock_copy() + /* skc_daddr and skc_rcv_saddr must be grouped : + * cf INET_MATCH() and INET_TW_MATCH() */ - union { - struct hlist_node skc_node; - struct hlist_nulls_node skc_nulls_node; - }; - atomic_t skc_refcnt; - int skc_tx_queue_mapping; + __be32 skc_daddr; + __be32 skc_rcv_saddr; union { unsigned int skc_hash; @@ -150,6 +149,22 @@ struct sock_common { #ifdef CONFIG_NET_NS struct net *skc_net; #endif + /* + * fields between dontcopy_begin/dontcopy_end + * are not copied in sock_copy() + */ + /* private: */ + int skc_dontcopy_begin[0]; + /* public: */ + union { + struct hlist_node skc_node; + struct hlist_nulls_node skc_nulls_node; + }; + int skc_tx_queue_mapping; + atomic_t skc_refcnt; + /* private: */ + int skc_dontcopy_end[0]; + /* public: */ }; /** @@ -232,7 +247,8 @@ struct sock { #define sk_refcnt __sk_common.skc_refcnt #define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping -#define sk_copy_start __sk_common.skc_hash +#define sk_dontcopy_begin __sk_common.skc_dontcopy_begin +#define sk_dontcopy_end __sk_common.skc_dontcopy_end #define sk_hash __sk_common.skc_hash #define sk_family __sk_common.skc_family #define sk_state __sk_common.skc_state @@ -241,59 +257,67 @@ struct sock { #define sk_bind_node __sk_common.skc_bind_node #define sk_prot __sk_common.skc_prot #define sk_net __sk_common.skc_net - kmemcheck_bitfield_begin(flags); - unsigned int sk_shutdown : 2, - sk_no_check : 2, - sk_userlocks : 4, - sk_protocol : 8, - sk_type : 16; - kmemcheck_bitfield_end(flags); - int sk_rcvbuf; socket_lock_t sk_lock; + struct sk_buff_head sk_receive_queue; /* * The backlog queue is special, it is always used with * the per-socket spinlock held and requires low latency * access. Therefore we special case it's implementation. + * Note : rmem_alloc is in this structure to fill a hole + * on 64bit arches, not because its logically part of + * backlog. */ struct { - struct sk_buff *head; - struct sk_buff *tail; - int len; + atomic_t rmem_alloc; + int len; + struct sk_buff *head; + struct sk_buff *tail; } sk_backlog; - struct socket_wq *sk_wq; - struct dst_entry *sk_dst_cache; +#define sk_rmem_alloc sk_backlog.rmem_alloc + int sk_forward_alloc; +#ifdef CONFIG_RPS + __u32 sk_rxhash; +#endif + atomic_t sk_drops; + int sk_rcvbuf; + + struct sk_filter __rcu *sk_filter; + struct socket_wq __rcu *sk_wq; + +#ifdef CONFIG_NET_DMA + struct sk_buff_head sk_async_wait_queue; +#endif + #ifdef CONFIG_XFRM struct xfrm_policy *sk_policy[2]; #endif + unsigned long sk_flags; + struct dst_entry *sk_dst_cache; spinlock_t sk_dst_lock; - atomic_t sk_rmem_alloc; atomic_t sk_wmem_alloc; atomic_t sk_omem_alloc; int sk_sndbuf; - struct sk_buff_head sk_receive_queue; struct sk_buff_head sk_write_queue; -#ifdef CONFIG_NET_DMA - struct sk_buff_head sk_async_wait_queue; -#endif + kmemcheck_bitfield_begin(flags); + unsigned int sk_shutdown : 2, + sk_no_check : 2, + sk_userlocks : 4, + sk_protocol : 8, + sk_type : 16; + kmemcheck_bitfield_end(flags); int sk_wmem_queued; - int sk_forward_alloc; gfp_t sk_allocation; int sk_route_caps; int sk_route_nocaps; int sk_gso_type; unsigned int sk_gso_max_size; int sk_rcvlowat; -#ifdef CONFIG_RPS - __u32 sk_rxhash; -#endif - unsigned long sk_flags; unsigned long sk_lingertime; struct sk_buff_head sk_error_queue; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; int sk_err, sk_err_soft; - atomic_t sk_drops; unsigned short sk_ack_backlog; unsigned short sk_max_ack_backlog; __u32 sk_priority; @@ -301,7 +325,6 @@ struct sock { const struct cred *sk_peer_cred; long sk_rcvtimeo; long sk_sndtimeo; - struct sk_filter *sk_filter; void *sk_protinfo; struct timer_list sk_timer; ktime_t sk_stamp; @@ -509,9 +532,6 @@ static __inline__ void sk_add_bind_node(struct sock *sk, #define sk_nulls_for_each_from(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \ hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node) -#define sk_for_each_continue(__sk, node) \ - if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ - hlist_for_each_entry_continue(__sk, node, sk_node) #define sk_for_each_safe(__sk, node, tmp, list) \ hlist_for_each_entry_safe(__sk, node, tmp, list, sk_node) #define sk_for_each_bound(__sk, node, list) \ @@ -734,6 +754,8 @@ struct proto { int level, int optname, char __user *optval, int __user *option); + int (*compat_ioctl)(struct sock *sk, + unsigned int cmd, unsigned long arg); #endif int (*sendmsg)(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len); @@ -754,6 +776,7 @@ struct proto { void (*unhash)(struct sock *sk); void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); + void (*clear_sk)(struct sock *sk, int size); /* Keeping track of sockets in use */ #ifdef CONFIG_PROC_FS @@ -762,7 +785,7 @@ struct proto { /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); - atomic_t *memory_allocated; /* Current allocated memory. */ + atomic_long_t *memory_allocated; /* Current allocated memory. */ struct percpu_counter *sockets_allocated; /* Current number of sockets. */ /* * Pressure flag: try to collapse. @@ -771,7 +794,7 @@ struct proto { * is strict, actions are advisory and have some latency. */ int *memory_pressure; - int *sysctl_mem; + long *sysctl_mem; int *sysctl_wmem; int *sysctl_rmem; int max_header; @@ -852,6 +875,8 @@ static inline void __sk_prot_rehash(struct sock *sk) sk->sk_prot->hash(sk); } +void sk_prot_clear_portaddr_nulls(struct sock *sk, int size); + /* About 10 seconds */ #define SOCK_DESTROY_TIME (10*HZ) @@ -1155,6 +1180,8 @@ extern void sk_common_release(struct sock *sk); /* Initialise core socket variables */ extern void sock_init_data(struct socket *sock, struct sock *sk); +extern void sk_filter_release_rcu(struct rcu_head *rcu); + /** * sk_filter_release - release a socket filter * @fp: filter to remove @@ -1165,7 +1192,7 @@ extern void sock_init_data(struct socket *sock, struct sock *sk); static inline void sk_filter_release(struct sk_filter *fp) { if (atomic_dec_and_test(&fp->refcnt)) - kfree(fp); + call_rcu(&fp->rcu, sk_filter_release_rcu); } static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) @@ -1240,7 +1267,8 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock) static inline wait_queue_head_t *sk_sleep(struct sock *sk) { - return &sk->sk_wq->wait; + BUILD_BUG_ON(offsetof(struct socket_wq, wait) != 0); + return &rcu_dereference_raw(sk->sk_wq)->wait; } /* Detach socket from process context. * Announce socket dead, detach it from wait queue and inode. @@ -1261,7 +1289,7 @@ static inline void sock_orphan(struct sock *sk) static inline void sock_graft(struct sock *sk, struct socket *parent) { write_lock_bh(&sk->sk_callback_lock); - rcu_assign_pointer(sk->sk_wq, parent->wq); + sk->sk_wq = parent->wq; parent->sk = sk; sk_set_socket(sk, parent); security_sock_graft(sk, parent); @@ -1362,6 +1390,59 @@ static inline void sk_nocaps_add(struct sock *sk, int flags) sk->sk_route_caps &= ~flags; } +static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, + char __user *from, char *to, + int copy, int offset) +{ + if (skb->ip_summed == CHECKSUM_NONE) { + int err = 0; + __wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err); + if (err) + return err; + skb->csum = csum_block_add(skb->csum, csum, offset); + } else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) { + if (!access_ok(VERIFY_READ, from, copy) || + __copy_from_user_nocache(to, from, copy)) + return -EFAULT; + } else if (copy_from_user(to, from, copy)) + return -EFAULT; + + return 0; +} + +static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb, + char __user *from, int copy) +{ + int err, offset = skb->len; + + err = skb_do_copy_data_nocache(sk, skb, from, skb_put(skb, copy), + copy, offset); + if (err) + __skb_trim(skb, offset); + + return err; +} + +static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from, + struct sk_buff *skb, + struct page *page, + int off, int copy) +{ + int err; + + err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) + off, + copy, skb->len); + if (err) + return err; + + skb->len += copy; + skb->data_len += copy; + skb->truesize += copy; + sk->sk_wmem_queued += copy; + sk_mem_charge(sk, copy); + return 0; +} + static inline int skb_copy_to_page(struct sock *sk, char __user *from, struct sk_buff *skb, struct page *page, int off, int copy) @@ -1722,7 +1803,7 @@ void sock_net_set(struct sock *sk, struct net *net) /* * Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace. - * They should not hold a referrence to a namespace in order to allow + * They should not hold a reference to a namespace in order to allow * to stop it. * Sockets after sk_change_net should be released using sk_release_kernel */ |