summaryrefslogtreecommitdiffstats
path: root/include/net/sock.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h177
1 files changed, 129 insertions, 48 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 73a4f9702a6..f2046e404a6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -52,12 +52,13 @@
#include <linux/mm.h>
#include <linux/security.h>
#include <linux/slab.h>
+#include <linux/uaccess.h>
#include <linux/filter.h>
#include <linux/rculist_nulls.h>
#include <linux/poll.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <net/dst.h>
#include <net/checksum.h>
@@ -105,10 +106,8 @@ struct net;
/**
* struct sock_common - minimal network layer representation of sockets
- * @skc_node: main hash linkage for various protocol lookup tables
- * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
- * @skc_refcnt: reference count
- * @skc_tx_queue_mapping: tx queue number for this connection
+ * @skc_daddr: Foreign IPv4 addr
+ * @skc_rcv_saddr: Bound local IPv4 addr
* @skc_hash: hash value used with various protocol lookup tables
* @skc_u16hashes: two u16 hash values used by UDP lookup tables
* @skc_family: network address family
@@ -119,20 +118,20 @@ struct net;
* @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
* @skc_prot: protocol handlers inside a network family
* @skc_net: reference to the network namespace of this socket
+ * @skc_node: main hash linkage for various protocol lookup tables
+ * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
+ * @skc_tx_queue_mapping: tx queue number for this connection
+ * @skc_refcnt: reference count
*
* This is the minimal network layer representation of sockets, the header
* for struct sock and struct inet_timewait_sock.
*/
struct sock_common {
- /*
- * first fields are not copied in sock_copy()
+ /* skc_daddr and skc_rcv_saddr must be grouped :
+ * cf INET_MATCH() and INET_TW_MATCH()
*/
- union {
- struct hlist_node skc_node;
- struct hlist_nulls_node skc_nulls_node;
- };
- atomic_t skc_refcnt;
- int skc_tx_queue_mapping;
+ __be32 skc_daddr;
+ __be32 skc_rcv_saddr;
union {
unsigned int skc_hash;
@@ -150,6 +149,22 @@ struct sock_common {
#ifdef CONFIG_NET_NS
struct net *skc_net;
#endif
+ /*
+ * fields between dontcopy_begin/dontcopy_end
+ * are not copied in sock_copy()
+ */
+ /* private: */
+ int skc_dontcopy_begin[0];
+ /* public: */
+ union {
+ struct hlist_node skc_node;
+ struct hlist_nulls_node skc_nulls_node;
+ };
+ int skc_tx_queue_mapping;
+ atomic_t skc_refcnt;
+ /* private: */
+ int skc_dontcopy_end[0];
+ /* public: */
};
/**
@@ -232,7 +247,8 @@ struct sock {
#define sk_refcnt __sk_common.skc_refcnt
#define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping
-#define sk_copy_start __sk_common.skc_hash
+#define sk_dontcopy_begin __sk_common.skc_dontcopy_begin
+#define sk_dontcopy_end __sk_common.skc_dontcopy_end
#define sk_hash __sk_common.skc_hash
#define sk_family __sk_common.skc_family
#define sk_state __sk_common.skc_state
@@ -241,59 +257,67 @@ struct sock {
#define sk_bind_node __sk_common.skc_bind_node
#define sk_prot __sk_common.skc_prot
#define sk_net __sk_common.skc_net
- kmemcheck_bitfield_begin(flags);
- unsigned int sk_shutdown : 2,
- sk_no_check : 2,
- sk_userlocks : 4,
- sk_protocol : 8,
- sk_type : 16;
- kmemcheck_bitfield_end(flags);
- int sk_rcvbuf;
socket_lock_t sk_lock;
+ struct sk_buff_head sk_receive_queue;
/*
* The backlog queue is special, it is always used with
* the per-socket spinlock held and requires low latency
* access. Therefore we special case it's implementation.
+ * Note : rmem_alloc is in this structure to fill a hole
+ * on 64bit arches, not because its logically part of
+ * backlog.
*/
struct {
- struct sk_buff *head;
- struct sk_buff *tail;
- int len;
+ atomic_t rmem_alloc;
+ int len;
+ struct sk_buff *head;
+ struct sk_buff *tail;
} sk_backlog;
- struct socket_wq *sk_wq;
- struct dst_entry *sk_dst_cache;
+#define sk_rmem_alloc sk_backlog.rmem_alloc
+ int sk_forward_alloc;
+#ifdef CONFIG_RPS
+ __u32 sk_rxhash;
+#endif
+ atomic_t sk_drops;
+ int sk_rcvbuf;
+
+ struct sk_filter __rcu *sk_filter;
+ struct socket_wq __rcu *sk_wq;
+
+#ifdef CONFIG_NET_DMA
+ struct sk_buff_head sk_async_wait_queue;
+#endif
+
#ifdef CONFIG_XFRM
struct xfrm_policy *sk_policy[2];
#endif
+ unsigned long sk_flags;
+ struct dst_entry *sk_dst_cache;
spinlock_t sk_dst_lock;
- atomic_t sk_rmem_alloc;
atomic_t sk_wmem_alloc;
atomic_t sk_omem_alloc;
int sk_sndbuf;
- struct sk_buff_head sk_receive_queue;
struct sk_buff_head sk_write_queue;
-#ifdef CONFIG_NET_DMA
- struct sk_buff_head sk_async_wait_queue;
-#endif
+ kmemcheck_bitfield_begin(flags);
+ unsigned int sk_shutdown : 2,
+ sk_no_check : 2,
+ sk_userlocks : 4,
+ sk_protocol : 8,
+ sk_type : 16;
+ kmemcheck_bitfield_end(flags);
int sk_wmem_queued;
- int sk_forward_alloc;
gfp_t sk_allocation;
int sk_route_caps;
int sk_route_nocaps;
int sk_gso_type;
unsigned int sk_gso_max_size;
int sk_rcvlowat;
-#ifdef CONFIG_RPS
- __u32 sk_rxhash;
-#endif
- unsigned long sk_flags;
unsigned long sk_lingertime;
struct sk_buff_head sk_error_queue;
struct proto *sk_prot_creator;
rwlock_t sk_callback_lock;
int sk_err,
sk_err_soft;
- atomic_t sk_drops;
unsigned short sk_ack_backlog;
unsigned short sk_max_ack_backlog;
__u32 sk_priority;
@@ -301,7 +325,6 @@ struct sock {
const struct cred *sk_peer_cred;
long sk_rcvtimeo;
long sk_sndtimeo;
- struct sk_filter *sk_filter;
void *sk_protinfo;
struct timer_list sk_timer;
ktime_t sk_stamp;
@@ -509,9 +532,6 @@ static __inline__ void sk_add_bind_node(struct sock *sk,
#define sk_nulls_for_each_from(__sk, node) \
if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \
hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node)
-#define sk_for_each_continue(__sk, node) \
- if (__sk && ({ node = &(__sk)->sk_node; 1; })) \
- hlist_for_each_entry_continue(__sk, node, sk_node)
#define sk_for_each_safe(__sk, node, tmp, list) \
hlist_for_each_entry_safe(__sk, node, tmp, list, sk_node)
#define sk_for_each_bound(__sk, node, list) \
@@ -734,6 +754,8 @@ struct proto {
int level,
int optname, char __user *optval,
int __user *option);
+ int (*compat_ioctl)(struct sock *sk,
+ unsigned int cmd, unsigned long arg);
#endif
int (*sendmsg)(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len);
@@ -754,6 +776,7 @@ struct proto {
void (*unhash)(struct sock *sk);
void (*rehash)(struct sock *sk);
int (*get_port)(struct sock *sk, unsigned short snum);
+ void (*clear_sk)(struct sock *sk, int size);
/* Keeping track of sockets in use */
#ifdef CONFIG_PROC_FS
@@ -762,7 +785,7 @@ struct proto {
/* Memory pressure */
void (*enter_memory_pressure)(struct sock *sk);
- atomic_t *memory_allocated; /* Current allocated memory. */
+ atomic_long_t *memory_allocated; /* Current allocated memory. */
struct percpu_counter *sockets_allocated; /* Current number of sockets. */
/*
* Pressure flag: try to collapse.
@@ -771,7 +794,7 @@ struct proto {
* is strict, actions are advisory and have some latency.
*/
int *memory_pressure;
- int *sysctl_mem;
+ long *sysctl_mem;
int *sysctl_wmem;
int *sysctl_rmem;
int max_header;
@@ -852,6 +875,8 @@ static inline void __sk_prot_rehash(struct sock *sk)
sk->sk_prot->hash(sk);
}
+void sk_prot_clear_portaddr_nulls(struct sock *sk, int size);
+
/* About 10 seconds */
#define SOCK_DESTROY_TIME (10*HZ)
@@ -1155,6 +1180,8 @@ extern void sk_common_release(struct sock *sk);
/* Initialise core socket variables */
extern void sock_init_data(struct socket *sock, struct sock *sk);
+extern void sk_filter_release_rcu(struct rcu_head *rcu);
+
/**
* sk_filter_release - release a socket filter
* @fp: filter to remove
@@ -1165,7 +1192,7 @@ extern void sock_init_data(struct socket *sock, struct sock *sk);
static inline void sk_filter_release(struct sk_filter *fp)
{
if (atomic_dec_and_test(&fp->refcnt))
- kfree(fp);
+ call_rcu(&fp->rcu, sk_filter_release_rcu);
}
static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
@@ -1240,7 +1267,8 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock)
static inline wait_queue_head_t *sk_sleep(struct sock *sk)
{
- return &sk->sk_wq->wait;
+ BUILD_BUG_ON(offsetof(struct socket_wq, wait) != 0);
+ return &rcu_dereference_raw(sk->sk_wq)->wait;
}
/* Detach socket from process context.
* Announce socket dead, detach it from wait queue and inode.
@@ -1261,7 +1289,7 @@ static inline void sock_orphan(struct sock *sk)
static inline void sock_graft(struct sock *sk, struct socket *parent)
{
write_lock_bh(&sk->sk_callback_lock);
- rcu_assign_pointer(sk->sk_wq, parent->wq);
+ sk->sk_wq = parent->wq;
parent->sk = sk;
sk_set_socket(sk, parent);
security_sock_graft(sk, parent);
@@ -1362,6 +1390,59 @@ static inline void sk_nocaps_add(struct sock *sk, int flags)
sk->sk_route_caps &= ~flags;
}
+static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
+ char __user *from, char *to,
+ int copy, int offset)
+{
+ if (skb->ip_summed == CHECKSUM_NONE) {
+ int err = 0;
+ __wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err);
+ if (err)
+ return err;
+ skb->csum = csum_block_add(skb->csum, csum, offset);
+ } else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
+ if (!access_ok(VERIFY_READ, from, copy) ||
+ __copy_from_user_nocache(to, from, copy))
+ return -EFAULT;
+ } else if (copy_from_user(to, from, copy))
+ return -EFAULT;
+
+ return 0;
+}
+
+static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
+ char __user *from, int copy)
+{
+ int err, offset = skb->len;
+
+ err = skb_do_copy_data_nocache(sk, skb, from, skb_put(skb, copy),
+ copy, offset);
+ if (err)
+ __skb_trim(skb, offset);
+
+ return err;
+}
+
+static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from,
+ struct sk_buff *skb,
+ struct page *page,
+ int off, int copy)
+{
+ int err;
+
+ err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) + off,
+ copy, skb->len);
+ if (err)
+ return err;
+
+ skb->len += copy;
+ skb->data_len += copy;
+ skb->truesize += copy;
+ sk->sk_wmem_queued += copy;
+ sk_mem_charge(sk, copy);
+ return 0;
+}
+
static inline int skb_copy_to_page(struct sock *sk, char __user *from,
struct sk_buff *skb, struct page *page,
int off, int copy)
@@ -1722,7 +1803,7 @@ void sock_net_set(struct sock *sk, struct net *net)
/*
* Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace.
- * They should not hold a referrence to a namespace in order to allow
+ * They should not hold a reference to a namespace in order to allow
* to stop it.
* Sockets after sk_change_net should be released using sk_release_kernel
*/