diff options
Diffstat (limited to 'include/net')
40 files changed, 693 insertions, 316 deletions
diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b5d785ab4a0..bfc1779fc75 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -13,7 +13,7 @@ extern void unix_gc(void); #define UNIX_HASH_SIZE 256 extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; -extern rwlock_t unix_table_lock; +extern spinlock_t unix_table_lock; extern atomic_t unix_tot_inflight; @@ -58,10 +58,10 @@ struct unix_skb_parms { #define UNIXCB(skb) (*(struct unix_skb_parms*)&((skb)->cb)) #define UNIXCREDS(skb) (&UNIXCB((skb)).creds) -#define unix_state_rlock(s) read_lock(&unix_sk(s)->lock) -#define unix_state_runlock(s) read_unlock(&unix_sk(s)->lock) -#define unix_state_wlock(s) write_lock(&unix_sk(s)->lock) -#define unix_state_wunlock(s) write_unlock(&unix_sk(s)->lock) +#define unix_state_rlock(s) spin_lock(&unix_sk(s)->lock) +#define unix_state_runlock(s) spin_unlock(&unix_sk(s)->lock) +#define unix_state_wlock(s) spin_lock(&unix_sk(s)->lock) +#define unix_state_wunlock(s) spin_unlock(&unix_sk(s)->lock) #ifdef __KERNEL__ /* The AF_UNIX socket */ @@ -76,7 +76,7 @@ struct unix_sock { struct sock *other; struct sock *gc_tree; atomic_t inflight; - rwlock_t lock; + spinlock_t lock; wait_queue_head_t peer_wait; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) diff --git a/include/net/atmclip.h b/include/net/atmclip.h index 47048b1d179..90fcc98e676 100644 --- a/include/net/atmclip.h +++ b/include/net/atmclip.h @@ -7,7 +7,6 @@ #define _ATMCLIP_H #include <linux/netdevice.h> -#include <linux/skbuff.h> #include <linux/atm.h> #include <linux/atmdev.h> #include <linux/atmarp.h> @@ -18,6 +17,7 @@ #define CLIP_VCC(vcc) ((struct clip_vcc *) ((vcc)->user_back)) #define NEIGH2ENTRY(neigh) ((struct atmarp_entry *) (neigh)->primary_key) +struct sk_buff; struct clip_vcc { struct atm_vcc *vcc; /* VCC descriptor */ diff --git a/include/net/dst.h b/include/net/dst.h index 6c196a5baf2..bee8b84d329 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -9,6 +9,7 @@ #define _NET_DST_H #include <linux/config.h> +#include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/rcupdate.h> #include <linux/jiffies.h> diff --git a/include/net/flow.h b/include/net/flow.h index 9a5c94b1a0e..ec7eb86eb20 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -84,11 +84,12 @@ struct flowi { #define FLOW_DIR_OUT 1 #define FLOW_DIR_FWD 2 -typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir, +struct sock; +typedef void (*flow_resolve_t)(struct flowi *key, u32 sk_sid, u16 family, u8 dir, void **objp, atomic_t **obj_refp); -extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, - flow_resolve_t resolver); +extern void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, + flow_resolve_t resolver); extern void flow_cache_flush(void); extern atomic_t flow_cache_genid; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 52d8b1a73d5..c5b96b2b815 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -60,7 +60,7 @@ struct genl_info */ struct genl_ops { - unsigned int cmd; + u8 cmd; unsigned int flags; struct nla_policy *policy; int (*doit)(struct sk_buff *skb, diff --git a/include/net/icmp.h b/include/net/icmp.h index 6cdebeee5f9..e7c3f20fbaf 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -20,12 +20,9 @@ #include <linux/config.h> #include <linux/icmp.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/protocol.h> +#include <net/inet_sock.h> #include <net/snmp.h> -#include <linux/ip.h> struct icmp_err { int errno; @@ -38,6 +35,10 @@ DECLARE_SNMP_STAT(struct icmp_mib, icmp_statistics); #define ICMP_INC_STATS_BH(field) SNMP_INC_STATS_BH(icmp_statistics, field) #define ICMP_INC_STATS_USER(field) SNMP_INC_STATS_USER(icmp_statistics, field) +struct dst_entry; +struct net_proto_family; +struct sk_buff; + extern void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info); extern int icmp_rcv(struct sk_buff *skb); extern int icmp_ioctl(struct sock *sk, int cmd, unsigned long arg); diff --git a/include/net/ieee80211_crypt.h b/include/net/ieee80211_crypt.h index 225fc751d46..03b766afdc3 100644 --- a/include/net/ieee80211_crypt.h +++ b/include/net/ieee80211_crypt.h @@ -23,12 +23,17 @@ #ifndef IEEE80211_CRYPT_H #define IEEE80211_CRYPT_H -#include <linux/skbuff.h> +#include <linux/types.h> +#include <linux/list.h> +#include <asm/atomic.h> enum { IEEE80211_CRYPTO_TKIP_COUNTERMEASURES = (1 << 0), }; +struct sk_buff; +struct module; + struct ieee80211_crypto_ops { const char *name; struct list_head list; @@ -87,6 +92,8 @@ struct ieee80211_crypt_data { atomic_t refcnt; }; +struct ieee80211_device; + int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops); int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops); struct ieee80211_crypto_ops *ieee80211_get_crypto_ops(const char *name); diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index e97a9accb71..eb8afe3499a 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -24,6 +24,7 @@ #define IF_RA_MANAGED 0x40 #define IF_RA_RCVD 0x20 #define IF_RS_SENT 0x10 +#define IF_READY 0x80000000 /* prefix flags */ #define IF_PREFIX_ONLINK 0x01 @@ -82,6 +83,7 @@ struct ipv6_mc_socklist struct in6_addr addr; int ifindex; struct ipv6_mc_socklist *next; + rwlock_t sflock; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ip6_sf_socklist *sflist; }; diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h new file mode 100644 index 00000000000..b33b438bffc --- /dev/null +++ b/include/net/inet6_connection_sock.h @@ -0,0 +1,42 @@ +/* + * NET Generic infrastructure for INET6 connection oriented protocols. + * + * Authors: Many people, see the TCPv6 sources + * + * From code originally in TCPv6 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _INET6_CONNECTION_SOCK_H +#define _INET6_CONNECTION_SOCK_H + +#include <linux/types.h> + +struct in6_addr; +struct inet_bind_bucket; +struct request_sock; +struct sk_buff; +struct sock; +struct sockaddr; + +extern int inet6_csk_bind_conflict(const struct sock *sk, + const struct inet_bind_bucket *tb); + +extern struct request_sock *inet6_csk_search_req(const struct sock *sk, + struct request_sock ***prevp, + const __u16 rport, + const struct in6_addr *raddr, + const struct in6_addr *laddr, + const int iif); + +extern void inet6_csk_reqsk_queue_hash_add(struct sock *sk, + struct request_sock *req, + const unsigned long timeout); + +extern void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); + +extern int inet6_csk_xmit(struct sk_buff *skb, int ipfragok); +#endif /* _INET6_CONNECTION_SOCK_H */ diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 5a2beed5a77..25f708ff020 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -48,6 +48,32 @@ static inline int inet6_sk_ehashfn(const struct sock *sk) return inet6_ehashfn(laddr, lport, faddr, fport); } +static inline void __inet6_hash(struct inet_hashinfo *hashinfo, + struct sock *sk) +{ + struct hlist_head *list; + rwlock_t *lock; + + BUG_TRAP(sk_unhashed(sk)); + + if (sk->sk_state == TCP_LISTEN) { + list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + lock = &hashinfo->lhash_lock; + inet_listen_wlock(hashinfo); + } else { + unsigned int hash; + sk->sk_hash = hash = inet6_sk_ehashfn(sk); + hash &= (hashinfo->ehash_size - 1); + list = &hashinfo->ehash[hash].chain; + lock = &hashinfo->ehash[hash].lock; + write_lock(lock); + } + + __sk_add_node(sk, list); + sock_prot_inc_use(sk->sk_prot); + write_unlock(lock); +} + /* * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM @@ -84,10 +110,10 @@ static inline struct sock * if(*((__u32 *)&(tw->tw_dport)) == ports && sk->sk_family == PF_INET6) { - const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); + const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); - if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && + if (ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) goto hit; } diff --git a/include/net/inet_common.h b/include/net/inet_common.h index f943306ce5f..227adcbdfec 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -1,8 +1,8 @@ #ifndef _INET_COMMON_H #define _INET_COMMON_H -extern struct proto_ops inet_stream_ops; -extern struct proto_ops inet_dgram_ops; +extern const struct proto_ops inet_stream_ops; +extern const struct proto_ops inet_dgram_ops; /* * INET4 prototypes used by INET6 diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b0c99060b78..50234fa56a6 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -15,9 +15,11 @@ #ifndef _INET_CONNECTION_SOCK_H #define _INET_CONNECTION_SOCK_H -#include <linux/ip.h> +#include <linux/compiler.h> #include <linux/string.h> #include <linux/timer.h> + +#include <net/inet_sock.h> #include <net/request_sock.h> #define INET_CSK_DEBUG 1 @@ -29,6 +31,29 @@ struct inet_bind_bucket; struct inet_hashinfo; struct tcp_congestion_ops; +/* + * Pointers to address related TCP functions + * (i.e. things that depend on the address family) + */ +struct inet_connection_sock_af_ops { + int (*queue_xmit)(struct sk_buff *skb, int ipfragok); + void (*send_check)(struct sock *sk, int len, + struct sk_buff *skb); + int (*rebuild_header)(struct sock *sk); + int (*conn_request)(struct sock *sk, struct sk_buff *skb); + struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst); + int (*remember_stamp)(struct sock *sk); + __u16 net_header_len; + int (*setsockopt)(struct sock *sk, int level, int optname, + char __user *optval, int optlen); + int (*getsockopt)(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); + void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); + int sockaddr_len; +}; + /** inet_connection_sock - INET connection oriented sock * * @icsk_accept_queue: FIFO of established children @@ -36,13 +61,16 @@ struct tcp_congestion_ops; * @icsk_timeout: Timeout * @icsk_retransmit_timer: Resend (no ack) * @icsk_rto: Retransmit timeout + * @icsk_pmtu_cookie Last pmtu seen by socket * @icsk_ca_ops Pluggable congestion control hook + * @icsk_af_ops Operations which are AF_INET{4,6} specific * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event * @icsk_backoff: Backoff * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries * @icsk_probes_out: unanswered 0 window probes + * @icsk_ext_hdr_len: Network protocol overhead (IP/IPv6 options) * @icsk_ack: Delayed ACK control data */ struct inet_connection_sock { @@ -54,14 +82,17 @@ struct inet_connection_sock { struct timer_list icsk_retransmit_timer; struct timer_list icsk_delack_timer; __u32 icsk_rto; + __u32 icsk_pmtu_cookie; struct tcp_congestion_ops *icsk_ca_ops; + struct inet_connection_sock_af_ops *icsk_af_ops; + unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state; __u8 icsk_retransmits; __u8 icsk_pending; __u8 icsk_backoff; __u8 icsk_syn_retries; __u8 icsk_probes_out; - /* 2 BYTES HOLE, TRY TO PACK! */ + __u16 icsk_ext_hdr_len; struct { __u8 pending; /* ACK is pending */ __u8 quick; /* Scheduled number of quick acks */ @@ -192,8 +223,12 @@ extern struct request_sock *inet_csk_search_req(const struct sock *sk, const __u16 rport, const __u32 raddr, const __u32 laddr); +extern int inet_csk_bind_conflict(const struct sock *sk, + const struct inet_bind_bucket *tb); extern int inet_csk_get_port(struct inet_hashinfo *hashinfo, - struct sock *sk, unsigned short snum); + struct sock *sk, unsigned short snum, + int (*bind_conflict)(const struct sock *sk, + const struct inet_bind_bucket *tb)); extern struct dst_entry* inet_csk_route_req(struct sock *sk, const struct request_sock *req); @@ -207,7 +242,7 @@ static inline void inet_csk_reqsk_queue_add(struct sock *sk, extern void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, - const unsigned timeout); + unsigned long timeout); static inline void inet_csk_reqsk_queue_removed(struct sock *sk, struct request_sock *req) @@ -273,4 +308,6 @@ static inline unsigned int inet_csk_listen_poll(const struct sock *sk) extern int inet_csk_listen_start(struct sock *sk, const int nr_table_entries); extern void inet_csk_listen_stop(struct sock *sk); +extern void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); + #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index b0c47e2eccf..d599c6bfbb8 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -3,6 +3,8 @@ #include <linux/ip.h> #include <linux/skbuff.h> + +#include <net/inet_sock.h> #include <net/dsfield.h> enum { diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 07840baa934..135d80fd658 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -26,6 +26,7 @@ #include <linux/wait.h> #include <net/inet_connection_sock.h> +#include <net/inet_sock.h> #include <net/route.h> #include <net/sock.h> #include <net/tcp_states.h> @@ -128,26 +129,6 @@ struct inet_hashinfo { kmem_cache_t *bind_bucket_cachep; }; -static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport, - const __u32 faddr, const __u16 fport) -{ - unsigned int h = (laddr ^ lport) ^ (faddr ^ fport); - h ^= h >> 16; - h ^= h >> 8; - return h; -} - -static inline int inet_sk_ehashfn(const struct sock *sk) -{ - const struct inet_sock *inet = inet_sk(sk); - const __u32 laddr = inet->rcv_saddr; - const __u16 lport = inet->num; - const __u32 faddr = inet->daddr; - const __u16 fport = inet->dport; - - return inet_ehashfn(laddr, lport, faddr, fport); -} - static inline struct inet_ehash_bucket *inet_ehash_bucket( struct inet_hashinfo *hashinfo, unsigned int hash) @@ -434,4 +415,7 @@ static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, return sk; } + +extern int inet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk); #endif /* _INET_HASHTABLES_H */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h new file mode 100644 index 00000000000..883eb529ef8 --- /dev/null +++ b/include/net/inet_sock.h @@ -0,0 +1,193 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for inet_sock + * + * Authors: Many, reorganised here by + * Arnaldo Carvalho de Melo <acme@mandriva.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _INET_SOCK_H +#define _INET_SOCK_H + +#include <linux/config.h> + +#include <linux/string.h> +#include <linux/types.h> + +#include <net/flow.h> +#include <net/sock.h> +#include <net/request_sock.h> + +/** struct ip_options - IP Options + * + * @faddr - Saved first hop address + * @is_setbyuser - Set by setsockopt? + * @is_data - Options in __data, rather than skb + * @is_strictroute - Strict source route + * @srr_is_hit - Packet destination addr was our one + * @is_changed - IP checksum more not valid + * @rr_needaddr - Need to record addr of outgoing dev + * @ts_needtime - Need to record timestamp + * @ts_needaddr - Need to record addr of outgoing dev + */ +struct ip_options { + __u32 faddr; + unsigned char optlen; + unsigned char srr; + unsigned char rr; + unsigned char ts; + unsigned char is_setbyuser:1, + is_data:1, + is_strictroute:1, + srr_is_hit:1, + is_changed:1, + rr_needaddr:1, + ts_needtime:1, + ts_needaddr:1; + unsigned char router_alert; + unsigned char __pad1; + unsigned char __pad2; + unsigned char __data[0]; +}; + +#define optlength(opt) (sizeof(struct ip_options) + opt->optlen) + +struct inet_request_sock { + struct request_sock req; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + u16 inet6_rsk_offset; + /* 2 bytes hole, try to pack */ +#endif + u32 loc_addr; + u32 rmt_addr; + u16 rmt_port; + u16 snd_wscale : 4, + rcv_wscale : 4, + tstamp_ok : 1, + sack_ok : 1, + wscale_ok : 1, + ecn_ok : 1, + acked : 1; + struct ip_options *opt; +}; + +static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) +{ + return (struct inet_request_sock *)sk; +} + +struct ip_mc_socklist; +struct ipv6_pinfo; +struct rtable; + +/** struct inet_sock - representation of INET sockets + * + * @sk - ancestor class + * @pinet6 - pointer to IPv6 control block + * @daddr - Foreign IPv4 addr + * @rcv_saddr - Bound local IPv4 addr + * @dport - Destination port + * @num - Local port + * @saddr - Sending source + * @uc_ttl - Unicast TTL + * @sport - Source port + * @id - ID counter for DF pkts + * @tos - TOS + * @mc_ttl - Multicasting TTL + * @is_icsk - is this an inet_connection_sock? + * @mc_index - Multicast device index + * @mc_list - Group array + * @cork - info to build ip hdr on each ip frag while socket is corked + */ +struct inet_sock { + /* sk and pinet6 has to be the first two members of inet_sock */ + struct sock sk; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct ipv6_pinfo *pinet6; +#endif + /* Socket demultiplex comparisons on incoming packets. */ + __u32 daddr; + __u32 rcv_saddr; + __u16 dport; + __u16 num; + __u32 saddr; + __s16 uc_ttl; + __u16 cmsg_flags; + struct ip_options *opt; + __u16 sport; + __u16 id; + __u8 tos; + __u8 mc_ttl; + __u8 pmtudisc; + __u8 recverr:1, + is_icsk:1, + freebind:1, + hdrincl:1, + mc_loop:1; + int mc_index; + __u32 mc_addr; + struct ip_mc_socklist *mc_list; + struct { + unsigned int flags; + unsigned int fragsize; + struct ip_options *opt; + struct rtable *rt; + int length; /* Total length of all frames */ + u32 addr; + struct flowi fl; + } cork; +}; + +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ +#define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */ + +static inline struct inet_sock *inet_sk(const struct sock *sk) +{ + return (struct inet_sock *)sk; +} + +static inline void __inet_sk_copy_descendant(struct sock *sk_to, + const struct sock *sk_from, + const int ancestor_size) +{ + memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1, + sk_from->sk_prot->obj_size - ancestor_size); +} +#if !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) +static inline void inet_sk_copy_descendant(struct sock *sk_to, + const struct sock *sk_from) +{ + __inet_sk_copy_descendant(sk_to, sk_from, sizeof(struct inet_sock)); +} +#endif + +extern int inet_sk_rebuild_header(struct sock *sk); + +static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport, + const __u32 faddr, const __u16 fport) +{ + unsigned int h = (laddr ^ lport) ^ (faddr ^ fport); + h ^= h >> 16; + h ^= h >> 8; + return h; +} + +static inline int inet_sk_ehashfn(const struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + const __u32 laddr = inet->rcv_saddr; + const __u16 lport = inet->num; + const __u32 faddr = inet->daddr; + const __u16 fport = inet->dport; + + return inet_ehashfn(laddr, lport, faddr, fport); +} + +#endif /* _INET_SOCK_H */ diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 28f7b210350..1da294c4752 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -17,15 +17,16 @@ #include <linux/config.h> -#include <linux/ip.h> #include <linux/list.h> #include <linux/module.h> #include <linux/timer.h> #include <linux/types.h> #include <linux/workqueue.h> +#include <net/inet_sock.h> #include <net/sock.h> #include <net/tcp_states.h> +#include <net/timewait_sock.h> #include <asm/atomic.h> @@ -127,7 +128,8 @@ struct inet_timewait_sock { __u16 tw_num; /* And these are ours. */ __u8 tw_ipv6only:1; - /* 31 bits hole, try to pack */ + /* 15 bits hole, try to pack */ + __u16 tw_ipv6_offset; int tw_timeout; unsigned long tw_ttd; struct inet_bind_bucket *tw_tb; @@ -199,7 +201,7 @@ static inline void inet_twsk_put(struct inet_timewait_sock *tw) printk(KERN_DEBUG "%s timewait_sock %p released\n", tw->tw_prot->name, tw); #endif - kmem_cache_free(tw->tw_prot->twsk_slab, tw); + kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); module_put(owner); } } diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 7fda471002b..0965515f40c 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -25,6 +25,7 @@ struct inet_peer __u32 v4daddr; /* peer's address */ __u16 avl_height; __u16 ip_id_count; /* IP ID for the next packet */ + atomic_t rid; /* Frag reception counter */ __u32 tcp_ts; unsigned long tcp_ts_stamp; }; diff --git a/include/net/ip.h b/include/net/ip.h index e4563bbee6e..7bb5804847f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -24,14 +24,10 @@ #include <linux/config.h> #include <linux/types.h> -#include <linux/socket.h> #include <linux/ip.h> #include <linux/in.h> -#include <linux/netdevice.h> -#include <linux/inetdevice.h> -#include <linux/in_route.h> -#include <net/route.h> -#include <net/arp.h> + +#include <net/inet_sock.h> #include <net/snmp.h> struct sock; @@ -45,6 +41,7 @@ struct inet_skb_parm #define IPSKB_TRANSLATED 2 #define IPSKB_FORWARDED 4 #define IPSKB_XFRM_TUNNEL_SIZE 8 +#define IPSKB_FRAG_COMPLETE 16 }; struct ipcm_cookie @@ -74,6 +71,13 @@ extern rwlock_t ip_ra_lock; #define IP_FRAG_TIME (30 * HZ) /* fragment lifetime */ +struct msghdr; +struct net_device; +struct packet_type; +struct rtable; +struct sk_buff; +struct sockaddr; + extern void ip_mc_dropsocket(struct sock *); extern void ip_mc_dropdevice(struct net_device *dev); extern int igmp_mc_proc_init(void); @@ -168,6 +172,7 @@ extern int sysctl_ipfrag_high_thresh; extern int sysctl_ipfrag_low_thresh; extern int sysctl_ipfrag_time; extern int sysctl_ipfrag_secret_interval; +extern int sysctl_ipfrag_max_dist; /* From inetpeer.c */ extern int inet_peer_threshold; @@ -182,6 +187,8 @@ extern int sysctl_ip_dynaddr; extern void ipfrag_init(void); #ifdef CONFIG_INET +#include <net/dst.h> + /* The function in 2.2 was invalid, producing wrong result for * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */ static inline @@ -310,7 +317,6 @@ enum ip_defrag_users IP_DEFRAG_CALL_RA_CHAIN, IP_DEFRAG_CONNTRACK_IN, IP_DEFRAG_CONNTRACK_OUT, - IP_DEFRAG_NAT_OUT, IP_DEFRAG_VS_IN, IP_DEFRAG_VS_OUT, IP_DEFRAG_VS_FWD diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 14de4ebd121..e000fa2cd5f 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -238,6 +238,8 @@ extern int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, struct net_device *dev, u32 *spec_dst, u32 *itag); extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res); +struct rtentry; + /* Exported by fib_semantics.c */ extern int ip_fib_check_default(u32 gw, struct net_device *dev); extern int fib_sync_down(u32 local, struct net_device *dev, int force); diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 3b5559a023a..7d2674fde19 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -251,16 +251,15 @@ struct ip_vs_daemon_user { #include <linux/config.h> #include <linux/list.h> /* for struct list_head */ #include <linux/spinlock.h> /* for struct rwlock_t */ -#include <linux/skbuff.h> /* for struct sk_buff */ -#include <linux/ip.h> /* for struct iphdr */ #include <asm/atomic.h> /* for struct atomic_t */ -#include <linux/netdevice.h> /* for struct neighbour */ -#include <net/dst.h> /* for struct dst_entry */ -#include <net/udp.h> #include <linux/compiler.h> +#include <linux/timer.h> +#include <net/checksum.h> #ifdef CONFIG_IP_VS_DEBUG +#include <linux/net.h> + extern int ip_vs_get_debug_level(void); #define IP_VS_DBG(level, msg...) \ do { \ @@ -429,8 +428,11 @@ struct ip_vs_stats spinlock_t lock; /* spin lock */ }; +struct dst_entry; +struct iphdr; struct ip_vs_conn; struct ip_vs_app; +struct sk_buff; struct ip_vs_protocol { struct ip_vs_protocol *next; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 0a2ad51cff8..860bbac4c4e 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -240,6 +240,8 @@ extern struct ipv6_txoptions * ipv6_renew_options(struct sock *sk, struct ipv6_t struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt); +extern int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb); + extern int ip6_frag_nqueues; extern atomic_t ip6_frag_mem; @@ -525,6 +527,9 @@ extern int inet6_getname(struct socket *sock, struct sockaddr *uaddr, extern int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); +extern int inet6_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk); + /* * reassembly.c */ @@ -533,8 +538,11 @@ extern int sysctl_ip6frag_low_thresh; extern int sysctl_ip6frag_time; extern int sysctl_ip6frag_secret_interval; -extern struct proto_ops inet6_stream_ops; -extern struct proto_ops inet6_dgram_ops; +extern const struct proto_ops inet6_stream_ops; +extern const struct proto_ops inet6_dgram_ops; + +struct group_source_req; +struct group_filter; extern int ip6_mc_source(int add, int omode, struct sock *sk, struct group_source_req *pgsr); diff --git a/include/net/ndisc.h b/include/net/ndisc.h index f85d6e4b744..bbac87eeb42 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -35,11 +35,20 @@ enum { #ifdef __KERNEL__ -#include <linux/skbuff.h> -#include <linux/netdevice.h> +#include <linux/config.h> +#include <linux/compiler.h> #include <linux/icmpv6.h> +#include <linux/in6.h> +#include <linux/types.h> + #include <net/neighbour.h> -#include <asm/atomic.h> + +struct ctl_table; +struct file; +struct inet6_dev; +struct net_device; +struct net_proto_family; +struct sk_buff; extern struct neigh_table nd_tbl; @@ -108,7 +117,7 @@ extern int igmp6_event_report(struct sk_buff *skb); extern void igmp6_cleanup(void); #ifdef CONFIG_SYSCTL -extern int ndisc_ifinfo_sysctl_change(ctl_table *ctl, +extern int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 34c07731933..6fa9ae19074 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -49,8 +49,8 @@ #ifdef __KERNEL__ #include <asm/atomic.h> -#include <linux/skbuff.h> #include <linux/netdevice.h> +#include <linux/skbuff.h> #include <linux/rcupdate.h> #include <linux/seq_file.h> diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index cc482561079..64b82b74a65 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -94,6 +94,9 @@ struct nf_conn /* Current number of expected connections */ unsigned int expecting; + /* Unique ID that identifies this conntrack*/ + unsigned int id; + /* Helper. if any */ struct nf_conntrack_helper *helper; @@ -140,6 +143,9 @@ struct nf_conntrack_expect /* Usage count. */ atomic_t use; + /* Unique ID */ + unsigned int id; + /* Flags */ unsigned int flags; @@ -190,6 +196,31 @@ static inline void nf_ct_put(struct nf_conn *ct) nf_conntrack_put(&ct->ct_general); } +extern struct nf_conntrack_tuple_hash * +__nf_conntrack_find(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack); + +extern void nf_conntrack_hash_insert(struct nf_conn *ct); + +extern struct nf_conntrack_expect * +__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple); + +extern struct nf_conntrack_expect * +nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple); + +extern void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); + +extern void nf_ct_remove_expectations(struct nf_conn *ct); + +extern void nf_conntrack_flush(void); + +extern struct nf_conntrack_helper * +nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple); +extern void nf_ct_helper_put(struct nf_conntrack_helper *helper); + +extern struct nf_conntrack_helper * +__nf_conntrack_helper_find_byname(const char *name); + /* call to create an explicit dependency on nf_conntrack. */ extern void need_nf_conntrack(void); diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 5a66b2a3a62..86ec8174ad0 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -33,6 +33,8 @@ struct nf_conntrack_helper unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info conntrackinfo); + + int (*to_nfattr)(struct sk_buff *skb, const struct nf_conn *ct); }; extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 01663e5b33d..67856eb93b4 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -14,6 +14,8 @@ #include <linux/seq_file.h> #include <net/netfilter/nf_conntrack.h> +struct nfattr; + struct nf_conntrack_l3proto { /* Next pointer. */ @@ -70,6 +72,12 @@ struct nf_conntrack_l3proto u_int32_t (*get_features)(const struct nf_conntrack_tuple *tuple); + int (*tuple_to_nfattr)(struct sk_buff *skb, + const struct nf_conntrack_tuple *t); + + int (*nfattr_to_tuple)(struct nfattr *tb[], + struct nf_conntrack_tuple *t); + /* Module (if any) which this is connected to. */ struct module *me; }; @@ -81,11 +89,16 @@ extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto); extern void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto); static inline struct nf_conntrack_l3proto * -nf_ct_find_l3proto(u_int16_t l3proto) +__nf_ct_l3proto_find(u_int16_t l3proto) { return nf_ct_l3protos[l3proto]; } +extern struct nf_conntrack_l3proto * +nf_ct_l3proto_find_get(u_int16_t l3proto); + +extern void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p); + /* Existing built-in protocols */ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; diff --git a/include/net/netfilter/nf_conntrack_protocol.h b/include/net/netfilter/nf_conntrack_protocol.h index b3afda35397..1f33737fcea 100644 --- a/include/net/netfilter/nf_conntrack_protocol.h +++ b/include/net/netfilter/nf_conntrack_protocol.h @@ -12,6 +12,7 @@ #include <net/netfilter/nf_conntrack.h> struct seq_file; +struct nfattr; struct nf_conntrack_protocol { @@ -66,6 +67,18 @@ struct nf_conntrack_protocol enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum); + /* convert protoinfo to nfnetink attributes */ + int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa, + const struct nf_conn *ct); + + /* convert nfnetlink attributes to protoinfo */ + int (*from_nfattr)(struct nfattr *tb[], struct nf_conn *ct); + + int (*tuple_to_nfattr)(struct sk_buff *skb, + const struct nf_conntrack_tuple *t); + int (*nfattr_to_tuple)(struct nfattr *tb[], + struct nf_conntrack_tuple *t); + /* Module (if any) which this is connected to. */ struct module *me; }; @@ -80,12 +93,23 @@ extern struct nf_conntrack_protocol nf_conntrack_generic_protocol; extern struct nf_conntrack_protocol **nf_ct_protos[PF_MAX]; extern struct nf_conntrack_protocol * -nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol); +__nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol); + +extern struct nf_conntrack_protocol * +nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol); + +extern void nf_ct_proto_put(struct nf_conntrack_protocol *p); /* Protocol registration. */ extern int nf_conntrack_protocol_register(struct nf_conntrack_protocol *proto); extern void nf_conntrack_protocol_unregister(struct nf_conntrack_protocol *proto); +/* Generic netlink helpers */ +extern int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple); +extern int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[], + struct nf_conntrack_tuple *t); + /* Log invalid packets */ extern unsigned int nf_ct_log_invalid; diff --git a/include/net/pkt_act.h b/include/net/pkt_act.h index bd08964b72c..b225d8472b7 100644 --- a/include/net/pkt_act.h +++ b/include/net/pkt_act.h @@ -15,7 +15,6 @@ #include <linux/in.h> #include <linux/errno.h> #include <linux/interrupt.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/module.h> diff --git a/include/net/protocol.h b/include/net/protocol.h index 357691f6a45..63f7db99c2a 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -65,7 +65,7 @@ struct inet_protosw { int protocol; /* This is the L4 protocol number. */ struct proto *prot; - struct proto_ops *ops; + const struct proto_ops *ops; int capability; /* Which (if any) capability do * we need to use this socket @@ -76,6 +76,7 @@ struct inet_protosw { }; #define INET_PROTOSW_REUSE 0x01 /* Are ports automatically reusable? */ #define INET_PROTOSW_PERMANENT 0x02 /* Permanent protocols are unremovable. */ +#define INET_PROTOSW_ICSK 0x04 /* Is this an inet_connection_sock? */ extern struct net_protocol *inet_protocol_base; extern struct net_protocol *inet_protos[MAX_INET_PROTOS]; diff --git a/include/net/raw.h b/include/net/raw.h index f47917469b1..e67b28a0248 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -19,6 +19,8 @@ #include <linux/config.h> +#include <net/protocol.h> + extern struct proto raw_prot; extern void raw_err(struct sock *, struct sk_buff *, u32 info); diff --git a/include/net/request_sock.h b/include/net/request_sock.h index b52cc52ffe3..11641c9384f 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -244,7 +244,7 @@ static inline int reqsk_queue_is_full(const struct request_sock_queue *queue) static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, u32 hash, struct request_sock *req, - unsigned timeout) + unsigned long timeout) { struct listen_sock *lopt = queue->listen_opt; diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8e7794ee27f..f5c22d77fea 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -277,6 +277,24 @@ struct sctp_sock { __u32 default_context; __u32 default_timetolive; + /* Heartbeat interval: The endpoint sends out a Heartbeat chunk to + * the destination address every heartbeat interval. This value + * will be inherited by all new associations. + */ + __u32 hbinterval; + + /* This is the max_retrans value for new associations. */ + __u16 pathmaxrxt; + + /* The initial Path MTU to use for new associations. */ + __u32 pathmtu; + + /* The default SACK delay timeout for new associations. */ + __u32 sackdelay; + + /* Flags controling Heartbeat, SACK delay, and Path MTU Discovery. */ + __u32 param_flags; + struct sctp_initmsg initmsg; struct sctp_rtoinfo rtoinfo; struct sctp_paddrparams paddrparam; @@ -845,9 +863,6 @@ struct sctp_transport { /* Data that has been sent, but not acknowledged. */ __u32 flight_size; - /* PMTU : The current known path MTU. */ - __u32 pmtu; - /* Destination */ struct dst_entry *dst; /* Source address. */ @@ -862,7 +877,22 @@ struct sctp_transport { /* Heartbeat interval: The endpoint sends out a Heartbeat chunk to * the destination address every heartbeat interval. */ - int hb_interval; + __u32 hbinterval; + + /* This is the max_retrans value for the transport and will + * be initialized from the assocs value. This can be changed + * using SCTP_SET_PEER_ADDR_PARAMS socket option. + */ + __u16 pathmaxrxt; + + /* PMTU : The current known path MTU. */ + __u32 pathmtu; + + /* SACK delay timeout */ + __u32 sackdelay; + + /* Flags controling Heartbeat, SACK delay, and Path MTU Discovery. */ + __u32 param_flags; /* When was the last time (in jiffies) that we heard from this * transport? We use this to pick new active and retran paths. @@ -882,22 +912,11 @@ struct sctp_transport { */ int state; - /* hb_allowed : The current heartbeat state of this destination, - * : i.e. ALLOW-HB, NO-HEARTBEAT, etc. - */ - int hb_allowed; - /* These are the error stats for this destination. */ /* Error count : The current error count for this destination. */ unsigned short error_count; - /* This is the max_retrans value for the transport and will - * be initialized to proto.max_retrans.path. This can be changed - * using SCTP_SET_PEER_ADDR_PARAMS socket option. - */ - int max_retrans; - /* Per : A timer used by each destination. * Destination : * Timer : @@ -1502,6 +1521,28 @@ struct sctp_association { /* The largest timeout or RTO value to use in attempting an INIT */ __u16 max_init_timeo; + /* Heartbeat interval: The endpoint sends out a Heartbeat chunk to + * the destination address every heartbeat interval. This value + * will be inherited by all new transports. + */ + __u32 hbinterval; + + /* This is the max_retrans value for new transports in the + * association. + */ + __u16 pathmaxrxt; + + /* Association : The smallest PMTU discovered for all of the + * PMTU : peer's transport addresses. + */ + __u32 pathmtu; + + /* SACK delay timeout */ + __u32 sackdelay; + + /* Flags controling Heartbeat, SACK delay, and Path MTU Discovery. */ + __u32 param_flags; + int timeouts[SCTP_NUM_TIMEOUT_TYPES]; struct timer_list timers[SCTP_NUM_TIMEOUT_TYPES]; @@ -1571,11 +1612,6 @@ struct sctp_association { */ wait_queue_head_t wait; - /* Association : The smallest PMTU discovered for all of the - * PMTU : peer's transport addresses. - */ - __u32 pmtu; - /* The message size at which SCTP fragmentation will occur. */ __u32 frag_point; diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index f1c3bc54526..8a6bef6f91e 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -93,6 +93,8 @@ enum sctp_optname { #define SCTP_STATUS SCTP_STATUS SCTP_GET_PEER_ADDR_INFO, #define SCTP_GET_PEER_ADDR_INFO SCTP_GET_PEER_ADDR_INFO + SCTP_DELAYED_ACK_TIME, +#define SCTP_DELAYED_ACK_TIME SCTP_DELAYED_ACK_TIME /* Internal Socket Options. Some of the sctp library functions are * implemented using these socket options. @@ -503,13 +505,41 @@ struct sctp_setadaption { * unreachable. The following structure is used to access and modify an * address's parameters: */ +enum sctp_spp_flags { + SPP_HB_ENABLE = 1, /*Enable heartbeats*/ + SPP_HB_DISABLE = 2, /*Disable heartbeats*/ + SPP_HB = SPP_HB_ENABLE | SPP_HB_DISABLE, + SPP_HB_DEMAND = 4, /*Send heartbeat immediately*/ + SPP_PMTUD_ENABLE = 8, /*Enable PMTU discovery*/ + SPP_PMTUD_DISABLE = 16, /*Disable PMTU discovery*/ + SPP_PMTUD = SPP_PMTUD_ENABLE | SPP_PMTUD_DISABLE, + SPP_SACKDELAY_ENABLE = 32, /*Enable SACK*/ + SPP_SACKDELAY_DISABLE = 64, /*Disable SACK*/ + SPP_SACKDELAY = SPP_SACKDELAY_ENABLE | SPP_SACKDELAY_DISABLE, +}; + struct sctp_paddrparams { sctp_assoc_t spp_assoc_id; struct sockaddr_storage spp_address; __u32 spp_hbinterval; __u16 spp_pathmaxrxt; + __u32 spp_pathmtu; + __u32 spp_sackdelay; + __u32 spp_flags; } __attribute__((packed, aligned(4))); +/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME) + * + * This options will get or set the delayed ack timer. The time is set + * in milliseconds. If the assoc_id is 0, then this sets or gets the + * endpoints default delayed ack timer value. If the assoc_id field is + * non-zero, then the set or get effects the specified association. + */ +struct sctp_assoc_value { + sctp_assoc_t assoc_id; + uint32_t assoc_value; +}; + /* * 7.2.2 Peer Address Information * diff --git a/include/net/sock.h b/include/net/sock.h index 982b4ecd187..6961700ff3a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -493,6 +493,7 @@ extern void sk_stream_kill_queues(struct sock *sk); extern int sk_wait_data(struct sock *sk, long *timeo); struct request_sock_ops; +struct timewait_sock_ops; /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface @@ -557,11 +558,10 @@ struct proto { kmem_cache_t *slab; unsigned int obj_size; - kmem_cache_t *twsk_slab; - unsigned int twsk_obj_size; atomic_t *orphan_count; struct request_sock_ops *rsk_prot; + struct timewait_sock_ops *twsk_prot; struct module *owner; @@ -926,6 +926,29 @@ static inline void sock_put(struct sock *sk) sk_free(sk); } +static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb) +{ + int rc = NET_RX_SUCCESS; + + if (sk_filter(sk, skb, 0)) + goto discard_and_relse; + + skb->dev = NULL; + + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) + rc = sk->sk_backlog_rcv(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); +out: + sock_put(sk); + return rc; +discard_and_relse: + kfree_skb(skb); + goto out; +} + /* Detach socket from process context. * Announce socket dead, detach it from wait queue and inode. * Note that parent inode held reference count on this struct sock, @@ -1166,7 +1189,10 @@ static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) static inline int sock_error(struct sock *sk) { - int err = xchg(&sk->sk_err, 0); + int err; + if (likely(!sk->sk_err)) + return 0; + err = xchg(&sk->sk_err, 0); return -err; } diff --git a/include/net/tcp.h b/include/net/tcp.h index d78025f9fbe..77f21c65bbc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -225,53 +225,6 @@ extern atomic_t tcp_sockets_allocated; extern int tcp_memory_pressure; /* - * Pointers to address related TCP functions - * (i.e. things that depend on the address family) - */ - -struct tcp_func { - int (*queue_xmit) (struct sk_buff *skb, - int ipfragok); - - void (*send_check) (struct sock *sk, - struct tcphdr *th, - int len, - struct sk_buff *skb); - - int (*rebuild_header) (struct sock *sk); - - int (*conn_request) (struct sock *sk, - struct sk_buff *skb); - - struct sock * (*syn_recv_sock) (struct sock *sk, - struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst); - - int (*remember_stamp) (struct sock *sk); - - __u16 net_header_len; - - int (*setsockopt) (struct sock *sk, - int level, - int optname, - char __user *optval, - int optlen); - - int (*getsockopt) (struct sock *sk, - int level, - int optname, - char __user *optval, - int __user *optlen); - - - void (*addr2sockaddr) (struct sock *sk, - struct sockaddr *); - - int sockaddr_len; -}; - -/* * The next routines deal with comparing 32 bit unsigned ints * and worry about wraparound (automatic with unsigned arithmetic). */ @@ -334,6 +287,9 @@ extern int tcp_rcv_established(struct sock *sk, extern void tcp_rcv_space_adjust(struct sock *sk); +extern int tcp_twsk_unique(struct sock *sk, + struct sock *sktw, void *twp); + static inline void tcp_dec_quickack_mode(struct sock *sk, const unsigned int pkts) { @@ -405,8 +361,7 @@ extern void tcp_parse_options(struct sk_buff *skb, * TCP v4 functions exported for the inet6 API */ -extern void tcp_v4_send_check(struct sock *sk, - struct tcphdr *th, int len, +extern void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); extern int tcp_v4_conn_request(struct sock *sk, @@ -490,34 +445,16 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); -/* Initialize RCV_MSS value. - * RCV_MSS is an our guess about MSS used by the peer. - * We haven't any direct information about the MSS. - * It's better to underestimate the RCV_MSS rather than overestimate. - * Overestimations make us ACKing less frequently than needed. - * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss(). - */ +extern void tcp_initialize_rcv_mss(struct sock *sk); -static inline void tcp_initialize_rcv_mss(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); - - hint = min(hint, tp->rcv_wnd/2); - hint = min(hint, TCP_MIN_RCVMSS); - hint = max(hint, TCP_MIN_MSS); - - inet_csk(sk)->icsk_ack.rcv_mss = hint; -} - -static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) +static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) { tp->pred_flags = htonl((tp->tcp_header_len << 26) | ntohl(TCP_FLAG_ACK) | snd_wnd); } -static __inline__ void tcp_fast_path_on(struct tcp_sock *tp) +static inline void tcp_fast_path_on(struct tcp_sock *tp) { __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); } @@ -535,7 +472,7 @@ static inline void tcp_fast_path_check(struct sock *sk, struct tcp_sock *tp) * Rcv_nxt can be after the window if our peer push more data * than the offered window. */ -static __inline__ u32 tcp_receive_window(const struct tcp_sock *tp) +static inline u32 tcp_receive_window(const struct tcp_sock *tp) { s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt; @@ -707,6 +644,7 @@ extern void tcp_cleanup_congestion_control(struct sock *sk); extern int tcp_set_default_congestion_control(const char *name); extern void tcp_get_default_congestion_control(char *name); extern int tcp_set_congestion_control(struct sock *sk, const char *name); +extern void tcp_slow_start(struct tcp_sock *tp); extern struct tcp_congestion_ops tcp_init_congestion_ops; extern u32 tcp_reno_ssthresh(struct sock *sk); @@ -746,7 +684,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) * "Packets left network, but not honestly ACKed yet" PLUS * "Packets fast retransmitted" */ -static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) +static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) { return (tp->packets_out - tp->left_out + tp->retrans_out); } @@ -766,33 +704,6 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) (tp->snd_cwnd >> 2))); } -/* - * Linear increase during slow start - */ -static inline void tcp_slow_start(struct tcp_sock *tp) -{ - if (sysctl_tcp_abc) { - /* RFC3465: Slow Start - * TCP sender SHOULD increase cwnd by the number of - * previously unacknowledged bytes ACKed by each incoming - * acknowledgment, provided the increase is not more than L - */ - if (tp->bytes_acked < tp->mss_cache) - return; - - /* We MAY increase by 2 if discovered delayed ack */ - if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - } - } - tp->bytes_acked = 0; - - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; -} - - static inline void tcp_sync_left_out(struct tcp_sock *tp) { if (tp->rx_opt.sack_ok && @@ -801,34 +712,7 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp) tp->left_out = tp->sacked_out + tp->lost_out; } -/* Set slow start threshold and cwnd not falling to slow start */ -static inline void __tcp_enter_cwr(struct sock *sk) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - - tp->undo_marker = 0; - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); - tp->snd_cwnd = min(tp->snd_cwnd, - tcp_packets_in_flight(tp) + 1U); - tp->snd_cwnd_cnt = 0; - tp->high_seq = tp->snd_nxt; - tp->snd_cwnd_stamp = tcp_time_stamp; - TCP_ECN_queue_cwr(tp); -} - -static inline void tcp_enter_cwr(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - tp->prior_ssthresh = 0; - tp->bytes_acked = 0; - if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { - __tcp_enter_cwr(sk); - tcp_set_ca_state(sk, TCP_CA_CWR); - } -} - +extern void tcp_enter_cwr(struct sock *sk); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); /* Slow start with delack produces 3 packets of burst, so that @@ -860,14 +744,14 @@ static inline int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) return left <= tcp_max_burst(tp); } -static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, - const struct sk_buff *skb) +static inline void tcp_minshall_update(struct tcp_sock *tp, int mss, + const struct sk_buff *skb) { if (skb->len < mss) tp->snd_sml = TCP_SKB_CB(skb)->end_seq; } -static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) +static inline void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) { const struct inet_connection_sock *icsk = inet_csk(sk); if (!tp->packets_out && !icsk->icsk_pending) @@ -875,18 +759,18 @@ static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *t icsk->icsk_rto, TCP_RTO_MAX); } -static __inline__ void tcp_push_pending_frames(struct sock *sk, - struct tcp_sock *tp) +static inline void tcp_push_pending_frames(struct sock *sk, + struct tcp_sock *tp) { __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle); } -static __inline__ void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq) +static inline void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq) { tp->snd_wl1 = seq; } -static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) +static inline void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) { tp->snd_wl1 = seq; } @@ -894,19 +778,19 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) /* * Calculate(/check) TCP checksum */ -static __inline__ u16 tcp_v4_check(struct tcphdr *th, int len, - unsigned long saddr, unsigned long daddr, - unsigned long base) +static inline u16 tcp_v4_check(struct tcphdr *th, int len, + unsigned long saddr, unsigned long daddr, + unsigned long base) { return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base); } -static __inline__ int __tcp_checksum_complete(struct sk_buff *skb) +static inline int __tcp_checksum_complete(struct sk_buff *skb) { return __skb_checksum_complete(skb); } -static __inline__ int tcp_checksum_complete(struct sk_buff *skb) +static inline int tcp_checksum_complete(struct sk_buff *skb) { return skb->ip_summed != CHECKSUM_UNNECESSARY && __tcp_checksum_complete(skb); @@ -914,7 +798,7 @@ static __inline__ int tcp_checksum_complete(struct sk_buff *skb) /* Prequeue for VJ style copy to user, combined with checksumming. */ -static __inline__ void tcp_prequeue_init(struct tcp_sock *tp) +static inline void tcp_prequeue_init(struct tcp_sock *tp) { tp->ucopy.task = NULL; tp->ucopy.len = 0; @@ -930,7 +814,7 @@ static __inline__ void tcp_prequeue_init(struct tcp_sock *tp) * * NOTE: is this not too big to inline? */ -static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb) +static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -971,7 +855,7 @@ static const char *statename[]={ }; #endif -static __inline__ void tcp_set_state(struct sock *sk, int state) +static inline void tcp_set_state(struct sock *sk, int state) { int oldstate = sk->sk_state; @@ -1005,7 +889,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) #endif } -static __inline__ void tcp_done(struct sock *sk) +static inline void tcp_done(struct sock *sk) { tcp_set_state(sk, TCP_CLOSE); tcp_clear_xmit_timers(sk); @@ -1018,81 +902,13 @@ static __inline__ void tcp_done(struct sock *sk) inet_csk_destroy_sock(sk); } -static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt) +static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) { rx_opt->dsack = 0; rx_opt->eff_sacks = 0; rx_opt->num_sacks = 0; } -static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp, __u32 tstamp) -{ - if (tp->rx_opt.tstamp_ok) { - *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); - *ptr++ = htonl(tstamp); - *ptr++ = htonl(tp->rx_opt.ts_recent); - } - if (tp->rx_opt.eff_sacks) { - struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks; - int this_sack; - - *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_SACK << 8) | - (TCPOLEN_SACK_BASE + - (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK))); - for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { - *ptr++ = htonl(sp[this_sack].start_seq); - *ptr++ = htonl(sp[this_sack].end_seq); - } - if (tp->rx_opt.dsack) { - tp->rx_opt.dsack = 0; - tp->rx_opt.eff_sacks--; - } - } -} - -/* Construct a tcp options header for a SYN or SYN_ACK packet. - * If this is every changed make sure to change the definition of - * MAX_SYN_SIZE to match the new maximum number of options that you - * can generate. - */ -static inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack, - int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent) -{ - /* We always get an MSS option. - * The option bytes which will be seen in normal data - * packets should timestamps be used, must be in the MSS - * advertised. But we subtract them from tp->mss_cache so - * that calculations in tcp_sendmsg are simpler etc. - * So account for this fact here if necessary. If we - * don't do this correctly, as a receiver we won't - * recognize data packets as being full sized when we - * should, and thus we won't abide by the delayed ACK - * rules correctly. - * SACKs don't matter, we never delay an ACK when we - * have any of those going out. - */ - *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); - if (ts) { - if(sack) - *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) | - (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - else - *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - *ptr++ = htonl(tstamp); /* TSVAL */ - *ptr++ = htonl(ts_recent); /* TSECR */ - } else if(sack) - *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | - (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); - if (offer_wscale) - *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale)); -} - /* Determine a window scaling and initial window to offer. */ extern void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, @@ -1117,9 +933,9 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk->sk_rcvbuf); } -static __inline__ void tcp_openreq_init(struct request_sock *req, - struct tcp_options_received *rx_opt, - struct sk_buff *skb) +static inline void tcp_openreq_init(struct request_sock *req, + struct tcp_options_received *rx_opt, + struct sk_buff *skb) { struct inet_request_sock *ireq = inet_rsk(req); diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h index b9d4176b2d1..b0b645988bd 100644 --- a/include/net/tcp_states.h +++ b/include/net/tcp_states.h @@ -31,4 +31,20 @@ enum { #define TCP_STATE_MASK 0xF +#define TCP_ACTION_FIN (1 << 7) + +enum { + TCPF_ESTABLISHED = (1 << 1), + TCPF_SYN_SENT = (1 << 2), + TCPF_SYN_RECV = (1 << 3), + TCPF_FIN_WAIT1 = (1 << 4), + TCPF_FIN_WAIT2 = (1 << 5), + TCPF_TIME_WAIT = (1 << 6), + TCPF_CLOSE = (1 << 7), + TCPF_CLOSE_WAIT = (1 << 8), + TCPF_LAST_ACK = (1 << 9), + TCPF_LISTEN = (1 << 10), + TCPF_CLOSING = (1 << 11) +}; + #endif /* _LINUX_TCP_STATES_H */ diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h new file mode 100644 index 00000000000..2544281e1d5 --- /dev/null +++ b/include/net/timewait_sock.h @@ -0,0 +1,31 @@ +/* + * NET Generic infrastructure for Network protocols. + * + * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _TIMEWAIT_SOCK_H +#define _TIMEWAIT_SOCK_H + +#include <linux/slab.h> +#include <net/sock.h> + +struct timewait_sock_ops { + kmem_cache_t *twsk_slab; + unsigned int twsk_obj_size; + int (*twsk_unique)(struct sock *sk, + struct sock *sktw, void *twp); +}; + +static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp) +{ + if (sk->sk_prot->twsk_prot->twsk_unique != NULL) + return sk->sk_prot->twsk_prot->twsk_unique(sk, sktw, twp); + return 0; +} + +#endif /* _TIMEWAIT_SOCK_H */ diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 4e86f2de663..61f724c1036 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -44,7 +44,7 @@ extern int datagram_send_ctl(struct msghdr *msg, /* * address family specific functions */ -extern struct tcp_func ipv4_specific; +extern struct inet_connection_sock_af_ops ipv4_specific; extern int inet6_destroy_sock(struct sock *sk); diff --git a/include/net/udp.h b/include/net/udp.h index 107b9d791a1..766fba1369c 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -22,9 +22,8 @@ #ifndef _UDP_H #define _UDP_H -#include <linux/udp.h> -#include <linux/ip.h> #include <linux/list.h> +#include <net/inet_sock.h> #include <net/sock.h> #include <net/snmp.h> #include <linux/seq_file.h> @@ -62,6 +61,7 @@ static inline int udp_lport_inuse(u16 num) extern struct proto udp_prot; +struct sk_buff; extern void udp_err(struct sk_buff *, u32); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1cdb8791213..07d7b50cdd7 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -2,11 +2,12 @@ #define _NET_XFRM_H #include <linux/compiler.h> +#include <linux/in.h> #include <linux/xfrm.h> #include <linux/spinlock.h> #include <linux/list.h> #include <linux/skbuff.h> -#include <linux/netdevice.h> +#include <linux/socket.h> #include <linux/crypto.h> #include <linux/pfkeyv2.h> #include <linux/in6.h> @@ -144,6 +145,9 @@ struct xfrm_state * transformer. */ struct xfrm_type *type; + /* Security context */ + struct xfrm_sec_ctx *security; + /* Private data of this transformer, format is opaque, * interpreted by xfrm_type methods. */ void *data; @@ -298,6 +302,7 @@ struct xfrm_policy __u8 flags; __u8 dead; __u8 xfrm_nr; + struct xfrm_sec_ctx *security; struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; }; @@ -510,6 +515,25 @@ xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, return 0; } +#ifdef CONFIG_SECURITY_NETWORK_XFRM +/* If neither has a context --> match + * Otherwise, both must have a context and the sids, doi, alg must match + */ +static inline int xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2) +{ + return ((!s1 && !s2) || + (s1 && s2 && + (s1->ctx_sid == s2->ctx_sid) && + (s1->ctx_doi == s2->ctx_doi) && + (s1->ctx_alg == s2->ctx_alg))); +} +#else +static inline int xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2) +{ + return 1; +} +#endif + /* A struct encoding bundle of transformations to apply to some set of flow. * * dst->child points to the next element of bundle. @@ -878,8 +902,8 @@ static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsig struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp); extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); -struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel, - int delete); +struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, + struct xfrm_sec_ctx *ctx, int delete); struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete); void xfrm_policy_flush(void); u32 xfrm_get_acqseq(void); |