diff options
Diffstat (limited to 'include/net')
26 files changed, 2674 insertions, 103 deletions
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index e42d728b162..911ceb5cd26 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -57,8 +57,6 @@ #define BT_DBG(fmt, arg...) printk(KERN_INFO "%s: " fmt "\n" , __FUNCTION__ , ## arg) #define BT_ERR(fmt, arg...) printk(KERN_ERR "%s: " fmt "\n" , __FUNCTION__ , ## arg) -extern struct proc_dir_entry *proc_bt; - /* Connection and socket states */ enum { BT_CONNECTED = 1, /* Equal to TCP_ESTABLISHED to make net code happy */ @@ -177,4 +175,6 @@ extern int hci_sock_cleanup(void); extern int bt_sysfs_init(void); extern void bt_sysfs_cleanup(void); +extern struct class bt_class; + #endif /* __BLUETOOTH_H */ diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index fa2d12b0579..b06a2d2f63d 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -184,10 +184,10 @@ enum { struct hci_rp_read_loc_version { __u8 status; __u8 hci_ver; - __u16 hci_rev; + __le16 hci_rev; __u8 lmp_ver; - __u16 manufacturer; - __u16 lmp_subver; + __le16 manufacturer; + __le16 lmp_subver; } __attribute__ ((packed)); #define OCF_READ_LOCAL_FEATURES 0x0003 @@ -199,10 +199,10 @@ struct hci_rp_read_loc_features { #define OCF_READ_BUFFER_SIZE 0x0005 struct hci_rp_read_buffer_size { __u8 status; - __u16 acl_mtu; + __le16 acl_mtu; __u8 sco_mtu; - __u16 acl_max_pkt; - __u16 sco_max_pkt; + __le16 acl_max_pkt; + __le16 sco_max_pkt; } __attribute__ ((packed)); #define OCF_READ_BD_ADDR 0x0009 @@ -267,21 +267,21 @@ struct hci_cp_write_dev_class { #define OCF_READ_VOICE_SETTING 0x0025 struct hci_rp_read_voice_setting { - __u8 status; - __u16 voice_setting; + __u8 status; + __le16 voice_setting; } __attribute__ ((packed)); #define OCF_WRITE_VOICE_SETTING 0x0026 struct hci_cp_write_voice_setting { - __u16 voice_setting; + __le16 voice_setting; } __attribute__ ((packed)); #define OCF_HOST_BUFFER_SIZE 0x0033 struct hci_cp_host_buffer_size { - __u16 acl_mtu; + __le16 acl_mtu; __u8 sco_mtu; - __u16 acl_max_pkt; - __u16 sco_max_pkt; + __le16 acl_max_pkt; + __le16 sco_max_pkt; } __attribute__ ((packed)); /* Link Control */ @@ -289,10 +289,10 @@ struct hci_cp_host_buffer_size { #define OCF_CREATE_CONN 0x0005 struct hci_cp_create_conn { bdaddr_t bdaddr; - __u16 pkt_type; + __le16 pkt_type; __u8 pscan_rep_mode; __u8 pscan_mode; - __u16 clock_offset; + __le16 clock_offset; __u8 role_switch; } __attribute__ ((packed)); @@ -310,14 +310,14 @@ struct hci_cp_reject_conn_req { #define OCF_DISCONNECT 0x0006 struct hci_cp_disconnect { - __u16 handle; + __le16 handle; __u8 reason; } __attribute__ ((packed)); #define OCF_ADD_SCO 0x0007 struct hci_cp_add_sco { - __u16 handle; - __u16 pkt_type; + __le16 handle; + __le16 pkt_type; } __attribute__ ((packed)); #define OCF_INQUIRY 0x0001 @@ -354,56 +354,56 @@ struct hci_cp_pin_code_neg_reply { #define OCF_CHANGE_CONN_PTYPE 0x000F struct hci_cp_change_conn_ptype { - __u16 handle; - __u16 pkt_type; + __le16 handle; + __le16 pkt_type; } __attribute__ ((packed)); #define OCF_AUTH_REQUESTED 0x0011 struct hci_cp_auth_requested { - __u16 handle; + __le16 handle; } __attribute__ ((packed)); #define OCF_SET_CONN_ENCRYPT 0x0013 struct hci_cp_set_conn_encrypt { - __u16 handle; + __le16 handle; __u8 encrypt; } __attribute__ ((packed)); #define OCF_CHANGE_CONN_LINK_KEY 0x0015 struct hci_cp_change_conn_link_key { - __u16 handle; + __le16 handle; } __attribute__ ((packed)); #define OCF_READ_REMOTE_FEATURES 0x001B struct hci_cp_read_rmt_features { - __u16 handle; + __le16 handle; } __attribute__ ((packed)); #define OCF_READ_REMOTE_VERSION 0x001D struct hci_cp_read_rmt_version { - __u16 handle; + __le16 handle; } __attribute__ ((packed)); /* Link Policy */ #define OGF_LINK_POLICY 0x02 #define OCF_ROLE_DISCOVERY 0x0009 struct hci_cp_role_discovery { - __u16 handle; + __le16 handle; } __attribute__ ((packed)); struct hci_rp_role_discovery { __u8 status; - __u16 handle; + __le16 handle; __u8 role; } __attribute__ ((packed)); #define OCF_READ_LINK_POLICY 0x000C struct hci_cp_read_link_policy { - __u16 handle; + __le16 handle; } __attribute__ ((packed)); struct hci_rp_read_link_policy { __u8 status; - __u16 handle; - __u16 policy; + __le16 handle; + __le16 policy; } __attribute__ ((packed)); #define OCF_SWITCH_ROLE 0x000B @@ -414,12 +414,12 @@ struct hci_cp_switch_role { #define OCF_WRITE_LINK_POLICY 0x000D struct hci_cp_write_link_policy { - __u16 handle; - __u16 policy; + __le16 handle; + __le16 policy; } __attribute__ ((packed)); struct hci_rp_write_link_policy { __u8 status; - __u16 handle; + __le16 handle; } __attribute__ ((packed)); /* Status params */ @@ -441,7 +441,7 @@ struct inquiry_info { __u8 pscan_period_mode; __u8 pscan_mode; __u8 dev_class[3]; - __u16 clock_offset; + __le16 clock_offset; } __attribute__ ((packed)); #define HCI_EV_INQUIRY_RESULT_WITH_RSSI 0x22 @@ -450,7 +450,7 @@ struct inquiry_info_with_rssi { __u8 pscan_rep_mode; __u8 pscan_period_mode; __u8 dev_class[3]; - __u16 clock_offset; + __le16 clock_offset; __s8 rssi; } __attribute__ ((packed)); struct inquiry_info_with_rssi_and_pscan_mode { @@ -459,7 +459,7 @@ struct inquiry_info_with_rssi_and_pscan_mode { __u8 pscan_period_mode; __u8 pscan_mode; __u8 dev_class[3]; - __u16 clock_offset; + __le16 clock_offset; __s8 rssi; } __attribute__ ((packed)); @@ -469,7 +469,7 @@ struct extended_inquiry_info { __u8 pscan_rep_mode; __u8 pscan_period_mode; __u8 dev_class[3]; - __u16 clock_offset; + __le16 clock_offset; __s8 rssi; __u8 data[240]; } __attribute__ ((packed)); @@ -477,7 +477,7 @@ struct extended_inquiry_info { #define HCI_EV_CONN_COMPLETE 0x03 struct hci_ev_conn_complete { __u8 status; - __u16 handle; + __le16 handle; bdaddr_t bdaddr; __u8 link_type; __u8 encr_mode; @@ -493,27 +493,27 @@ struct hci_ev_conn_request { #define HCI_EV_DISCONN_COMPLETE 0x05 struct hci_ev_disconn_complete { __u8 status; - __u16 handle; + __le16 handle; __u8 reason; } __attribute__ ((packed)); #define HCI_EV_AUTH_COMPLETE 0x06 struct hci_ev_auth_complete { __u8 status; - __u16 handle; + __le16 handle; } __attribute__ ((packed)); #define HCI_EV_ENCRYPT_CHANGE 0x08 struct hci_ev_encrypt_change { __u8 status; - __u16 handle; + __le16 handle; __u8 encrypt; } __attribute__ ((packed)); #define HCI_EV_CHANGE_CONN_LINK_KEY_COMPLETE 0x09 struct hci_ev_change_conn_link_key_complete { __u8 status; - __u16 handle; + __le16 handle; } __attribute__ ((packed)); #define HCI_EV_QOS_SETUP_COMPLETE 0x0D @@ -526,21 +526,21 @@ struct hci_qos { } __attribute__ ((packed)); struct hci_ev_qos_setup_complete { __u8 status; - __u16 handle; + __le16 handle; struct hci_qos qos; } __attribute__ ((packed)); #define HCI_EV_CMD_COMPLETE 0x0E struct hci_ev_cmd_complete { __u8 ncmd; - __u16 opcode; + __le16 opcode; } __attribute__ ((packed)); #define HCI_EV_CMD_STATUS 0x0F struct hci_ev_cmd_status { __u8 status; __u8 ncmd; - __u16 opcode; + __le16 opcode; } __attribute__ ((packed)); #define HCI_EV_NUM_COMP_PKTS 0x13 @@ -559,9 +559,9 @@ struct hci_ev_role_change { #define HCI_EV_MODE_CHANGE 0x14 struct hci_ev_mode_change { __u8 status; - __u16 handle; + __le16 handle; __u8 mode; - __u16 interval; + __le16 interval; } __attribute__ ((packed)); #define HCI_EV_PIN_CODE_REQ 0x16 @@ -584,24 +584,24 @@ struct hci_ev_link_key_notify { #define HCI_EV_RMT_FEATURES 0x0B struct hci_ev_rmt_features { __u8 status; - __u16 handle; + __le16 handle; __u8 features[8]; } __attribute__ ((packed)); #define HCI_EV_RMT_VERSION 0x0C struct hci_ev_rmt_version { __u8 status; - __u16 handle; + __le16 handle; __u8 lmp_ver; - __u16 manufacturer; - __u16 lmp_subver; + __le16 manufacturer; + __le16 lmp_subver; } __attribute__ ((packed)); #define HCI_EV_CLOCK_OFFSET 0x01C struct hci_ev_clock_offset { __u8 status; - __u16 handle; - __u16 clock_offset; + __le16 handle; + __le16 clock_offset; } __attribute__ ((packed)); #define HCI_EV_PSCAN_REP_MODE 0x20 @@ -638,7 +638,7 @@ struct hci_ev_si_security { #define HCI_SCO_HDR_SIZE 3 struct hci_command_hdr { - __u16 opcode; /* OCF & OGF */ + __le16 opcode; /* OCF & OGF */ __u8 plen; } __attribute__ ((packed)); @@ -648,22 +648,22 @@ struct hci_event_hdr { } __attribute__ ((packed)); struct hci_acl_hdr { - __u16 handle; /* Handle & Flags(PB, BC) */ - __u16 dlen; + __le16 handle; /* Handle & Flags(PB, BC) */ + __le16 dlen; } __attribute__ ((packed)); struct hci_sco_hdr { - __u16 handle; + __le16 handle; __u8 dlen; } __attribute__ ((packed)); /* Command opcode pack/unpack */ -#define hci_opcode_pack(ogf, ocf) (__u16)((ocf & 0x03ff)|(ogf << 10)) +#define hci_opcode_pack(ogf, ocf) (__u16) ((ocf & 0x03ff)|(ogf << 10)) #define hci_opcode_ogf(op) (op >> 10) #define hci_opcode_ocf(op) (op & 0x03ff) /* ACL handle and flags pack/unpack */ -#define hci_handle_pack(h, f) (__u16)((h & 0x0fff)|(f << 12)) +#define hci_handle_pack(h, f) (__u16) ((h & 0x0fff)|(f << 12)) #define hci_handle(h) (h & 0x0fff) #define hci_flags(h) (h >> 12) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 7f933f30207..bb9f81dc872 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -25,7 +25,6 @@ #ifndef __HCI_CORE_H #define __HCI_CORE_H -#include <linux/proc_fs.h> #include <net/bluetooth/hci.h> /* HCI upper protocols */ @@ -34,8 +33,6 @@ #define HCI_INIT_TIMEOUT (HZ * 10) -extern struct proc_dir_entry *proc_bt_hci; - /* HCI Core structures */ struct inquiry_data { @@ -44,7 +41,7 @@ struct inquiry_data { __u8 pscan_period_mode; __u8 pscan_mode; __u8 dev_class[3]; - __u16 clock_offset; + __le16 clock_offset; __s8 rssi; }; @@ -126,10 +123,6 @@ struct hci_dev { atomic_t promisc; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *proc; -#endif - struct class_device class_dev; struct module *owner; diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index e656be7c001..bbfac86734e 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -351,6 +351,4 @@ int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); int rfcomm_init_ttys(void); void rfcomm_cleanup_ttys(void); -extern struct proc_dir_entry *proc_bt_rfcomm; - #endif /* __RFCOMM_H */ diff --git a/include/net/genetlink.h b/include/net/genetlink.h new file mode 100644 index 00000000000..52d8b1a73d5 --- /dev/null +++ b/include/net/genetlink.h @@ -0,0 +1,154 @@ +#ifndef __NET_GENERIC_NETLINK_H +#define __NET_GENERIC_NETLINK_H + +#include <linux/genetlink.h> +#include <net/netlink.h> + +/** + * struct genl_family - generic netlink family + * @id: protocol family idenfitier + * @hdrsize: length of user specific header in bytes + * @name: name of family + * @version: protocol version + * @maxattr: maximum number of attributes supported + * @attrbuf: buffer to store parsed attributes + * @ops_list: list of all assigned operations + * @family_list: family list + */ +struct genl_family +{ + unsigned int id; + unsigned int hdrsize; + char name[GENL_NAMSIZ]; + unsigned int version; + unsigned int maxattr; + struct module * owner; + struct nlattr ** attrbuf; /* private */ + struct list_head ops_list; /* private */ + struct list_head family_list; /* private */ +}; + +#define GENL_ADMIN_PERM 0x01 + +/** + * struct genl_info - receiving information + * @snd_seq: sending sequence number + * @snd_pid: netlink pid of sender + * @nlhdr: netlink message header + * @genlhdr: generic netlink message header + * @userhdr: user specific header + * @attrs: netlink attributes + */ +struct genl_info +{ + u32 snd_seq; + u32 snd_pid; + struct nlmsghdr * nlhdr; + struct genlmsghdr * genlhdr; + void * userhdr; + struct nlattr ** attrs; +}; + +/** + * struct genl_ops - generic netlink operations + * @cmd: command identifier + * @flags: flags + * @policy: attribute validation policy + * @doit: standard command callback + * @dumpit: callback for dumpers + * @ops_list: operations list + */ +struct genl_ops +{ + unsigned int cmd; + unsigned int flags; + struct nla_policy *policy; + int (*doit)(struct sk_buff *skb, + struct genl_info *info); + int (*dumpit)(struct sk_buff *skb, + struct netlink_callback *cb); + struct list_head ops_list; +}; + +extern int genl_register_family(struct genl_family *family); +extern int genl_unregister_family(struct genl_family *family); +extern int genl_register_ops(struct genl_family *, struct genl_ops *ops); +extern int genl_unregister_ops(struct genl_family *, struct genl_ops *ops); + +extern struct sock *genl_sock; + +/** + * genlmsg_put - Add generic netlink header to netlink message + * @skb: socket buffer holding the message + * @pid: netlink pid the message is addressed to + * @seq: sequence number (usually the one of the sender) + * @type: netlink message type + * @hdrlen: length of the user specific header + * @flags netlink message flags + * @cmd: generic netlink command + * @version: version + * + * Returns pointer to user specific header + */ +static inline void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, + int type, int hdrlen, int flags, + u8 cmd, u8 version) +{ + struct nlmsghdr *nlh; + struct genlmsghdr *hdr; + + nlh = nlmsg_put(skb, pid, seq, type, GENL_HDRLEN + hdrlen, flags); + if (nlh == NULL) + return NULL; + + hdr = nlmsg_data(nlh); + hdr->cmd = cmd; + hdr->version = version; + hdr->reserved = 0; + + return (char *) hdr + GENL_HDRLEN; +} + +/** + * genlmsg_end - Finalize a generic netlink message + * @skb: socket buffer the message is stored in + * @hdr: user specific header + */ +static inline int genlmsg_end(struct sk_buff *skb, void *hdr) +{ + return nlmsg_end(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN); +} + +/** + * genlmsg_cancel - Cancel construction of a generic netlink message + * @skb: socket buffer the message is stored in + * @hdr: generic netlink message header + */ +static inline int genlmsg_cancel(struct sk_buff *skb, void *hdr) +{ + return nlmsg_cancel(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN); +} + +/** + * genlmsg_multicast - multicast a netlink message + * @skb: netlink message as socket buffer + * @pid: own netlink pid to avoid sending to yourself + * @group: multicast group id + */ +static inline int genlmsg_multicast(struct sk_buff *skb, u32 pid, + unsigned int group) +{ + return nlmsg_multicast(genl_sock, skb, pid, group); +} + +/** + * genlmsg_unicast - unicast a netlink message + * @skb: netlink message as socket buffer + * @pid: netlink pid of the destination socket + */ +static inline int genlmsg_unicast(struct sk_buff *skb, u32 pid) +{ + return nlmsg_unicast(genl_sock, skb, pid); +} + +#endif /* __NET_GENERIC_NETLINK_H */ diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h index 5e38dca1d08..b93fd8c1d88 100644 --- a/include/net/ieee80211.h +++ b/include/net/ieee80211.h @@ -29,7 +29,7 @@ #include <linux/kernel.h> /* ARRAY_SIZE */ #include <linux/wireless.h> -#define IEEE80211_VERSION "git-1.1.6" +#define IEEE80211_VERSION "git-1.1.7" #define IEEE80211_DATA_LEN 2304 /* Maximum size for the MA-UNITDATA primitive, 802.11 standard section diff --git a/include/net/ieee80211_crypt.h b/include/net/ieee80211_crypt.h index 0a1c2d82ca4..225fc751d46 100644 --- a/include/net/ieee80211_crypt.h +++ b/include/net/ieee80211_crypt.h @@ -31,6 +31,7 @@ enum { struct ieee80211_crypto_ops { const char *name; + struct list_head list; /* init new crypto context (e.g., allocate private data space, * select IV, etc.); returns NULL on failure or pointer to allocated diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index f87845e2e96..b0c47e2eccf 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -2,6 +2,7 @@ #define _INET_ECN_H_ #include <linux/ip.h> +#include <linux/skbuff.h> #include <net/dsfield.h> enum { @@ -48,7 +49,7 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) (label) |= __constant_htons(INET_ECN_ECT_0 << 4); \ } while (0) -static inline void IP_ECN_set_ce(struct iphdr *iph) +static inline int IP_ECN_set_ce(struct iphdr *iph) { u32 check = iph->check; u32 ecn = (iph->tos + 1) & INET_ECN_MASK; @@ -61,7 +62,7 @@ static inline void IP_ECN_set_ce(struct iphdr *iph) * INET_ECN_CE => 00 */ if (!(ecn & 2)) - return; + return !ecn; /* * The following gives us: @@ -72,6 +73,7 @@ static inline void IP_ECN_set_ce(struct iphdr *iph) iph->check = check + (check>=0xFFFF); iph->tos |= INET_ECN_CE; + return 1; } static inline void IP_ECN_clear(struct iphdr *iph) @@ -87,11 +89,12 @@ static inline void ipv4_copy_dscp(struct iphdr *outer, struct iphdr *inner) struct ipv6hdr; -static inline void IP6_ECN_set_ce(struct ipv6hdr *iph) +static inline int IP6_ECN_set_ce(struct ipv6hdr *iph) { if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph))) - return; + return 0; *(u32*)iph |= htonl(INET_ECN_CE << 20); + return 1; } static inline void IP6_ECN_clear(struct ipv6hdr *iph) @@ -105,4 +108,21 @@ static inline void ipv6_copy_dscp(struct ipv6hdr *outer, struct ipv6hdr *inner) ipv6_change_dsfield(inner, INET_ECN_MASK, dscp); } +static inline int INET_ECN_set_ce(struct sk_buff *skb) +{ + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + if (skb->nh.raw + sizeof(struct iphdr) <= skb->tail) + return IP_ECN_set_ce(skb->nh.iph); + break; + + case __constant_htons(ETH_P_IPV6): + if (skb->nh.raw + sizeof(struct ipv6hdr) <= skb->tail) + return IP6_ECN_set_ce(skb->nh.ipv6h); + break; + } + + return 0; +} + #endif diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f50f9596834..07840baa934 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -125,9 +125,7 @@ struct inet_hashinfo { rwlock_t lhash_lock ____cacheline_aligned; atomic_t lhash_users; wait_queue_head_t lhash_wait; - spinlock_t portalloc_lock; kmem_cache_t *bind_bucket_cachep; - int port_rover; }; static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 65ec86678a0..6addb4d464d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -252,12 +252,25 @@ typedef int (*inet_getfrag_t) (const void *data, char *, unsigned int, unsigned int); - -extern int ipv6_addr_type(const struct in6_addr *addr); +extern int __ipv6_addr_type(const struct in6_addr *addr); +static inline int ipv6_addr_type(const struct in6_addr *addr) +{ + return __ipv6_addr_type(addr) & 0xffff; +} static inline int ipv6_addr_scope(const struct in6_addr *addr) { - return ipv6_addr_type(addr) & IPV6_ADDR_SCOPE_MASK; + return __ipv6_addr_type(addr) & IPV6_ADDR_SCOPE_MASK; +} + +static inline int __ipv6_addr_src_scope(int type) +{ + return (type == IPV6_ADDR_ANY ? __IPV6_ADDR_SCOPE_INVALID : (type >> 16)); +} + +static inline int ipv6_addr_src_scope(const struct in6_addr *addr) +{ + return __ipv6_addr_src_scope(__ipv6_addr_type(addr)); } static inline int ipv6_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2) @@ -341,6 +354,54 @@ static inline int ipv6_addr_any(const struct in6_addr *a) } /* + * find the first different bit between two addresses + * length of address must be a multiple of 32bits + */ +static inline int __ipv6_addr_diff(const void *token1, const void *token2, int addrlen) +{ + const __u32 *a1 = token1, *a2 = token2; + int i; + + addrlen >>= 2; + + for (i = 0; i < addrlen; i++) { + __u32 xb = a1[i] ^ a2[i]; + if (xb) { + int j = 31; + + xb = ntohl(xb); + while ((xb & (1 << j)) == 0) + j--; + + return (i * 32 + 31 - j); + } + } + + /* + * we should *never* get to this point since that + * would mean the addrs are equal + * + * However, we do get to it 8) And exacly, when + * addresses are equal 8) + * + * ip route add 1111::/128 via ... + * ip route add 1111::/64 via ... + * and we are here. + * + * Ideally, this function should stop comparison + * at prefix length. It does not, but it is still OK, + * if returned value is greater than prefix length. + * --ANK (980803) + */ + return (addrlen << 5); +} + +static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_addr *a2) +{ + return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); +} + +/* * Prototypes exported by ipv6 */ diff --git a/include/net/netfilter/ipv4/nf_conntrack_icmp.h b/include/net/netfilter/ipv4/nf_conntrack_icmp.h new file mode 100644 index 00000000000..3dd22cff23e --- /dev/null +++ b/include/net/netfilter/ipv4/nf_conntrack_icmp.h @@ -0,0 +1,11 @@ +#ifndef _NF_CONNTRACK_ICMP_H +#define _NF_CONNTRACK_ICMP_H +/* ICMP tracking. */ +#include <asm/atomic.h> + +struct ip_ct_icmp +{ + /* Optimization: when number in == number out, forget immediately. */ + atomic_t count; +}; +#endif /* _NF_CONNTRACK_ICMP_H */ diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h new file mode 100644 index 00000000000..25b081a730e --- /dev/null +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -0,0 +1,43 @@ +/* + * IPv4 support for nf_conntrack. + * + * 23 Mar 2004: Yasuyuki Kozakai @ USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - move L3 protocol dependent part from include/linux/netfilter_ipv4/ + * ip_conntarck.h + */ + +#ifndef _NF_CONNTRACK_IPV4_H +#define _NF_CONNTRACK_IPV4_H + +#ifdef CONFIG_IP_NF_NAT_NEEDED +#include <linux/netfilter_ipv4/ip_nat.h> + +/* per conntrack: nat application helper private data */ +union ip_conntrack_nat_help { + /* insert nat helper private data here */ +}; + +struct nf_conntrack_ipv4_nat { + struct ip_nat_info info; + union ip_conntrack_nat_help help; +#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ + defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) + int masq_index; +#endif +}; +#endif /* CONFIG_IP_NF_NAT_NEEDED */ + +struct nf_conntrack_ipv4 { +#ifdef CONFIG_IP_NF_NAT_NEEDED + struct nf_conntrack_ipv4_nat *nat; +#endif +}; + +/* Returns new sk_buff, or NULL */ +struct sk_buff * +nf_ct_ipv4_ct_gather_frags(struct sk_buff *skb); + +/* call to create an explicit dependency on nf_conntrack_l3proto_ipv4. */ +extern void need_ip_conntrack(void); + +#endif /*_NF_CONNTRACK_IPV4_H*/ diff --git a/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h b/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h new file mode 100644 index 00000000000..86591afda29 --- /dev/null +++ b/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h @@ -0,0 +1,27 @@ +/* + * ICMPv6 tracking. + * + * 21 Apl 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - separated from nf_conntrack_icmp.h + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_icmp.h + */ + +#ifndef _NF_CONNTRACK_ICMPV6_H +#define _NF_CONNTRACK_ICMPV6_H +#include <asm/atomic.h> + +#ifndef ICMPV6_NI_QUERY +#define ICMPV6_NI_QUERY 139 +#endif +#ifndef ICMPV6_NI_REPLY +#define ICMPV6_NI_REPLY 140 +#endif + +struct nf_ct_icmpv6 +{ + /* Optimization: when number in == number out, forget immediately. */ + atomic_t count; +}; + +#endif /* _NF_CONNTRACK_ICMPV6_H */ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h new file mode 100644 index 00000000000..cc482561079 --- /dev/null +++ b/include/net/netfilter/nf_conntrack.h @@ -0,0 +1,354 @@ +/* + * Connection state tracking for netfilter. This is separated from, + * but required by, the (future) NAT layer; it can also be used by an iptables + * extension. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - generalize L3 protocol dependent part. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack.h + */ + +#ifndef _NF_CONNTRACK_H +#define _NF_CONNTRACK_H + +#include <linux/netfilter/nf_conntrack_common.h> + +#ifdef __KERNEL__ +#include <linux/config.h> +#include <linux/bitops.h> +#include <linux/compiler.h> +#include <asm/atomic.h> + +#include <linux/netfilter/nf_conntrack_tcp.h> +#include <linux/netfilter/nf_conntrack_sctp.h> +#include <net/netfilter/ipv4/nf_conntrack_icmp.h> +#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> + +#include <net/netfilter/nf_conntrack_tuple.h> + +/* per conntrack: protocol private data */ +union nf_conntrack_proto { + /* insert conntrack proto private data here */ + struct ip_ct_sctp sctp; + struct ip_ct_tcp tcp; + struct ip_ct_icmp icmp; + struct nf_ct_icmpv6 icmpv6; +}; + +union nf_conntrack_expect_proto { + /* insert expect proto private data here */ +}; + +/* Add protocol helper include file here */ +#include <linux/netfilter/nf_conntrack_ftp.h> + +/* per conntrack: application helper private data */ +union nf_conntrack_help { + /* insert conntrack helper private data (master) here */ + struct ip_ct_ftp_master ct_ftp_info; +}; + +#include <linux/types.h> +#include <linux/skbuff.h> + +#ifdef CONFIG_NETFILTER_DEBUG +#define NF_CT_ASSERT(x) \ +do { \ + if (!(x)) \ + /* Wooah! I'm tripping my conntrack in a frenzy of \ + netplay... */ \ + printk("NF_CT_ASSERT: %s:%i(%s)\n", \ + __FILE__, __LINE__, __FUNCTION__); \ +} while(0) +#else +#define NF_CT_ASSERT(x) +#endif + +struct nf_conntrack_helper; + +#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> +struct nf_conn +{ + /* Usage count in here is 1 for hash table/destruct timer, 1 per skb, + plus 1 for any connection(s) we are `master' for */ + struct nf_conntrack ct_general; + + /* XXX should I move this to the tail ? - Y.K */ + /* These are my tuples; original and reply */ + struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; + + /* Have we seen traffic both ways yet? (bitset) */ + unsigned long status; + + /* Timer function; drops refcnt when it goes off. */ + struct timer_list timeout; + +#ifdef CONFIG_NF_CT_ACCT + /* Accounting Information (same cache line as other written members) */ + struct ip_conntrack_counter counters[IP_CT_DIR_MAX]; +#endif + /* If we were expected by an expectation, this will be it */ + struct nf_conn *master; + + /* Current number of expected connections */ + unsigned int expecting; + + /* Helper. if any */ + struct nf_conntrack_helper *helper; + + /* features - nat, helper, ... used by allocating system */ + u_int32_t features; + + /* Storage reserved for other modules: */ + + union nf_conntrack_proto proto; + +#if defined(CONFIG_NF_CONNTRACK_MARK) + u_int32_t mark; +#endif + + /* These members are dynamically allocated. */ + + union nf_conntrack_help *help; + + /* Layer 3 dependent members. (ex: NAT) */ + union { + struct nf_conntrack_ipv4 *ipv4; + } l3proto; + void *data[0]; +}; + +struct nf_conntrack_expect +{ + /* Internal linked list (global expectation list) */ + struct list_head list; + + /* We expect this tuple, with the following mask */ + struct nf_conntrack_tuple tuple, mask; + + /* Function to call after setup and insertion */ + void (*expectfn)(struct nf_conn *new, + struct nf_conntrack_expect *this); + + /* The conntrack of the master connection */ + struct nf_conn *master; + + /* Timer function; deletes the expectation. */ + struct timer_list timeout; + + /* Usage count. */ + atomic_t use; + + /* Flags */ + unsigned int flags; + +#ifdef CONFIG_NF_NAT_NEEDED + /* This is the original per-proto part, used to map the + * expected connection the way the recipient expects. */ + union nf_conntrack_manip_proto saved_proto; + /* Direction relative to the master connection. */ + enum ip_conntrack_dir dir; +#endif +}; + +#define NF_CT_EXPECT_PERMANENT 0x1 + +static inline struct nf_conn * +nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash) +{ + return container_of(hash, struct nf_conn, + tuplehash[hash->tuple.dst.dir]); +} + +/* get master conntrack via master expectation */ +#define master_ct(conntr) (conntr->master) + +/* Alter reply tuple (maybe alter helper). */ +extern void +nf_conntrack_alter_reply(struct nf_conn *conntrack, + const struct nf_conntrack_tuple *newreply); + +/* Is this tuple taken? (ignoring any belonging to the given + conntrack). */ +extern int +nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack); + +/* Return conntrack_info and tuple hash for given skb. */ +static inline struct nf_conn * +nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) +{ + *ctinfo = skb->nfctinfo; + return (struct nf_conn *)skb->nfct; +} + +/* decrement reference count on a conntrack */ +static inline void nf_ct_put(struct nf_conn *ct) +{ + NF_CT_ASSERT(ct); + nf_conntrack_put(&ct->ct_general); +} + +/* call to create an explicit dependency on nf_conntrack. */ +extern void need_nf_conntrack(void); + +extern int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, + const struct nf_conntrack_tuple *orig); + +extern void __nf_ct_refresh_acct(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + unsigned long extra_jiffies, + int do_acct); + +/* Refresh conntrack for this many jiffies and do accounting */ +static inline void nf_ct_refresh_acct(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + unsigned long extra_jiffies) +{ + __nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, 1); +} + +/* Refresh conntrack for this many jiffies */ +static inline void nf_ct_refresh(struct nf_conn *ct, + const struct sk_buff *skb, + unsigned long extra_jiffies) +{ + __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0); +} + +/* These are for NAT. Icky. */ +/* Update TCP window tracking data when NAT mangles the packet */ +extern void nf_conntrack_tcp_update(struct sk_buff *skb, + unsigned int dataoff, + struct nf_conn *conntrack, + int dir); + +/* Call me when a conntrack is destroyed. */ +extern void (*nf_conntrack_destroyed)(struct nf_conn *conntrack); + +/* Fake conntrack entry for untracked connections */ +extern struct nf_conn nf_conntrack_untracked; + +extern int nf_ct_no_defrag; + +/* Iterate over all conntracks: if iter returns true, it's deleted. */ +extern void +nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data); +extern void nf_conntrack_free(struct nf_conn *ct); +extern struct nf_conn * +nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_tuple *repl); + +/* It's confirmed if it is, or has been in the hash table. */ +static inline int nf_ct_is_confirmed(struct nf_conn *ct) +{ + return test_bit(IPS_CONFIRMED_BIT, &ct->status); +} + +static inline int nf_ct_is_dying(struct nf_conn *ct) +{ + return test_bit(IPS_DYING_BIT, &ct->status); +} + +extern unsigned int nf_conntrack_htable_size; + +#define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++) + +#ifdef CONFIG_NF_CONNTRACK_EVENTS +#include <linux/notifier.h> +#include <linux/interrupt.h> + +struct nf_conntrack_ecache { + struct nf_conn *ct; + unsigned int events; +}; +DECLARE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache); + +#define CONNTRACK_ECACHE(x) (__get_cpu_var(nf_conntrack_ecache).x) + +extern struct notifier_block *nf_conntrack_chain; +extern struct notifier_block *nf_conntrack_expect_chain; + +static inline int nf_conntrack_register_notifier(struct notifier_block *nb) +{ + return notifier_chain_register(&nf_conntrack_chain, nb); +} + +static inline int nf_conntrack_unregister_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&nf_conntrack_chain, nb); +} + +static inline int +nf_conntrack_expect_register_notifier(struct notifier_block *nb) +{ + return notifier_chain_register(&nf_conntrack_expect_chain, nb); +} + +static inline int +nf_conntrack_expect_unregister_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&nf_conntrack_expect_chain, nb); +} + +extern void nf_ct_deliver_cached_events(const struct nf_conn *ct); +extern void __nf_ct_event_cache_init(struct nf_conn *ct); + +static inline void +nf_conntrack_event_cache(enum ip_conntrack_events event, + const struct sk_buff *skb) +{ + struct nf_conn *ct = (struct nf_conn *)skb->nfct; + struct nf_conntrack_ecache *ecache; + + local_bh_disable(); + ecache = &__get_cpu_var(nf_conntrack_ecache); + if (ct != ecache->ct) + __nf_ct_event_cache_init(ct); + ecache->events |= event; + local_bh_enable(); +} + +static inline void nf_conntrack_event(enum ip_conntrack_events event, + struct nf_conn *ct) +{ + if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) + notifier_call_chain(&nf_conntrack_chain, event, ct); +} + +static inline void +nf_conntrack_expect_event(enum ip_conntrack_expect_events event, + struct nf_conntrack_expect *exp) +{ + notifier_call_chain(&nf_conntrack_expect_chain, event, exp); +} +#else /* CONFIG_NF_CONNTRACK_EVENTS */ +static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, + const struct sk_buff *skb) {} +static inline void nf_conntrack_event(enum ip_conntrack_events event, + struct nf_conn *ct) {} +static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {} +static inline void +nf_conntrack_expect_event(enum ip_conntrack_expect_events event, + struct nf_conntrack_expect *exp) {} +#endif /* CONFIG_NF_CONNTRACK_EVENTS */ + +/* no helper, no nat */ +#define NF_CT_F_BASIC 0 +/* for helper */ +#define NF_CT_F_HELP 1 +/* for nat. */ +#define NF_CT_F_NAT 2 +#define NF_CT_F_NUM 4 + +extern int +nf_conntrack_register_cache(u_int32_t features, const char *name, size_t size, + int (*init_conntrack)(struct nf_conn *, u_int32_t)); +extern void +nf_conntrack_unregister_cache(u_int32_t features); + +#endif /* __KERNEL__ */ +#endif /* _NF_CONNTRACK_H */ diff --git a/include/net/netfilter/nf_conntrack_compat.h b/include/net/netfilter/nf_conntrack_compat.h new file mode 100644 index 00000000000..3cac19fb364 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_compat.h @@ -0,0 +1,108 @@ +#ifndef _NF_CONNTRACK_COMPAT_H +#define _NF_CONNTRACK_COMPAT_H + +#ifdef __KERNEL__ + +#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) + +#include <linux/netfilter_ipv4/ip_conntrack.h> + +#ifdef CONFIG_IP_NF_CONNTRACK_MARK +static inline u_int32_t *nf_ct_get_mark(const struct sk_buff *skb, + u_int32_t *ctinfo) +{ + struct ip_conntrack *ct = ip_conntrack_get(skb, ctinfo); + + if (ct) + return &ct->mark; + else + return NULL; +} +#endif /* CONFIG_IP_NF_CONNTRACK_MARK */ + +#ifdef CONFIG_IP_NF_CT_ACCT +static inline struct ip_conntrack_counter * +nf_ct_get_counters(const struct sk_buff *skb) +{ + enum ip_conntrack_info ctinfo; + struct ip_conntrack *ct = ip_conntrack_get(skb, &ctinfo); + + if (ct) + return ct->counters; + else + return NULL; +} +#endif /* CONFIG_IP_NF_CT_ACCT */ + +static inline int nf_ct_is_untracked(const struct sk_buff *skb) +{ + return (skb->nfct == &ip_conntrack_untracked.ct_general); +} + +static inline void nf_ct_untrack(struct sk_buff *skb) +{ + skb->nfct = &ip_conntrack_untracked.ct_general; +} + +static inline int nf_ct_get_ctinfo(const struct sk_buff *skb, + enum ip_conntrack_info *ctinfo) +{ + struct ip_conntrack *ct = ip_conntrack_get(skb, ctinfo); + return (ct != NULL); +} + +#else /* CONFIG_IP_NF_CONNTRACK */ + +#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> +#include <net/netfilter/nf_conntrack.h> + +#ifdef CONFIG_NF_CONNTRACK_MARK + +static inline u_int32_t *nf_ct_get_mark(const struct sk_buff *skb, + u_int32_t *ctinfo) +{ + struct nf_conn *ct = nf_ct_get(skb, ctinfo); + + if (ct) + return &ct->mark; + else + return NULL; +} +#endif /* CONFIG_NF_CONNTRACK_MARK */ + +#ifdef CONFIG_NF_CT_ACCT +static inline struct ip_conntrack_counter * +nf_ct_get_counters(const struct sk_buff *skb) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + + if (ct) + return ct->counters; + else + return NULL; +} +#endif /* CONFIG_NF_CT_ACCT */ + +static inline int nf_ct_is_untracked(const struct sk_buff *skb) +{ + return (skb->nfct == &nf_conntrack_untracked.ct_general); +} + +static inline void nf_ct_untrack(struct sk_buff *skb) +{ + skb->nfct = &nf_conntrack_untracked.ct_general; +} + +static inline int nf_ct_get_ctinfo(const struct sk_buff *skb, + enum ip_conntrack_info *ctinfo) +{ + struct nf_conn *ct = nf_ct_get(skb, ctinfo); + return (ct != NULL); +} + +#endif /* CONFIG_IP_NF_CONNTRACK */ + +#endif /* __KERNEL__ */ + +#endif /* _NF_CONNTRACK_COMPAT_H */ diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h new file mode 100644 index 00000000000..da254525a4c --- /dev/null +++ b/include/net/netfilter/nf_conntrack_core.h @@ -0,0 +1,76 @@ +/* + * This header is used to share core functionality between the + * standalone connection tracking module, and the compatibility layer's use + * of connection tracking. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - generalize L3 protocol dependent part. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_core.h + */ + +#ifndef _NF_CONNTRACK_CORE_H +#define _NF_CONNTRACK_CORE_H + +#include <linux/netfilter.h> + +/* This header is used to share core functionality between the + standalone connection tracking module, and the compatibility layer's use + of connection tracking. */ +extern unsigned int nf_conntrack_in(int pf, + unsigned int hooknum, + struct sk_buff **pskb); + +extern int nf_conntrack_init(void); +extern void nf_conntrack_cleanup(void); + +struct nf_conntrack_l3proto; +extern struct nf_conntrack_l3proto *nf_ct_find_l3proto(u_int16_t pf); +/* Like above, but you already have conntrack read lock. */ +extern struct nf_conntrack_l3proto *__nf_ct_find_l3proto(u_int16_t l3proto); + +struct nf_conntrack_protocol; + +extern int +nf_ct_get_tuple(const struct sk_buff *skb, + unsigned int nhoff, + unsigned int dataoff, + u_int16_t l3num, + u_int8_t protonum, + struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_l3proto *l3proto, + const struct nf_conntrack_protocol *protocol); + +extern int +nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, + const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_l3proto *l3proto, + const struct nf_conntrack_protocol *protocol); + +/* Find a connection corresponding to a tuple. */ +extern struct nf_conntrack_tuple_hash * +nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack); + +extern int __nf_conntrack_confirm(struct sk_buff **pskb); + +/* Confirm a connection: returns NF_DROP if packet must be dropped. */ +static inline int nf_conntrack_confirm(struct sk_buff **pskb) +{ + struct nf_conn *ct = (struct nf_conn *)(*pskb)->nfct; + int ret = NF_ACCEPT; + + if (ct) { + if (!nf_ct_is_confirmed(ct)) + ret = __nf_conntrack_confirm(pskb); + nf_ct_deliver_cached_events(ct); + } + return ret; +} + +extern void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb); + +extern struct list_head *nf_conntrack_hash; +extern struct list_head nf_conntrack_expect_list; +extern rwlock_t nf_conntrack_lock ; +#endif /* _NF_CONNTRACK_CORE_H */ diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h new file mode 100644 index 00000000000..5a66b2a3a62 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -0,0 +1,51 @@ +/* + * connection tracking helpers. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - generalize L3 protocol dependent part. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_helper.h + */ + +#ifndef _NF_CONNTRACK_HELPER_H +#define _NF_CONNTRACK_HELPER_H +#include <net/netfilter/nf_conntrack.h> + +struct module; + +struct nf_conntrack_helper +{ + struct list_head list; /* Internal use. */ + + const char *name; /* name of the module */ + struct module *me; /* pointer to self */ + unsigned int max_expected; /* Maximum number of concurrent + * expected connections */ + unsigned int timeout; /* timeout for expecteds */ + + /* Mask of things we will help (compared against server response) */ + struct nf_conntrack_tuple tuple; + struct nf_conntrack_tuple mask; + + /* Function to call when data passes; return verdict, or -1 to + invalidate. */ + int (*help)(struct sk_buff **pskb, + unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info conntrackinfo); +}; + +extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); +extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); + +/* Allocate space for an expectation: this is mandatory before calling + nf_conntrack_expect_related. You will have to call put afterwards. */ +extern struct nf_conntrack_expect * +nf_conntrack_expect_alloc(struct nf_conn *master); +extern void nf_conntrack_expect_put(struct nf_conntrack_expect *exp); + +/* Add an expected connection: can have more than one per connection */ +extern int nf_conntrack_expect_related(struct nf_conntrack_expect *exp); +extern void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp); + +#endif /*_NF_CONNTRACK_HELPER_H*/ diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h new file mode 100644 index 00000000000..01663e5b33d --- /dev/null +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -0,0 +1,93 @@ +/* + * Copyright (C)2003,2004 USAGI/WIDE Project + * + * Header for use in defining a given L3 protocol for connection tracking. + * + * Author: + * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * + * Derived from include/netfilter_ipv4/ip_conntrack_protocol.h + */ + +#ifndef _NF_CONNTRACK_L3PROTO_H +#define _NF_CONNTRACK_L3PROTO_H +#include <linux/seq_file.h> +#include <net/netfilter/nf_conntrack.h> + +struct nf_conntrack_l3proto +{ + /* Next pointer. */ + struct list_head list; + + /* L3 Protocol Family number. ex) PF_INET */ + u_int16_t l3proto; + + /* Protocol name */ + const char *name; + + /* + * Try to fill in the third arg: nhoff is offset of l3 proto + * hdr. Return true if possible. + */ + int (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int nhoff, + struct nf_conntrack_tuple *tuple); + + /* + * Invert the per-proto part of the tuple: ie. turn xmit into reply. + * Some packets can't be inverted: return 0 in that case. + */ + int (*invert_tuple)(struct nf_conntrack_tuple *inverse, + const struct nf_conntrack_tuple *orig); + + /* Print out the per-protocol part of the tuple. */ + int (*print_tuple)(struct seq_file *s, + const struct nf_conntrack_tuple *); + + /* Print out the private part of the conntrack. */ + int (*print_conntrack)(struct seq_file *s, const struct nf_conn *); + + /* Returns verdict for packet, or -1 for invalid. */ + int (*packet)(struct nf_conn *conntrack, + const struct sk_buff *skb, + enum ip_conntrack_info ctinfo); + + /* + * Called when a new connection for this protocol found; + * returns TRUE if it's OK. If so, packet() called next. + */ + int (*new)(struct nf_conn *conntrack, const struct sk_buff *skb); + + /* Called when a conntrack entry is destroyed */ + void (*destroy)(struct nf_conn *conntrack); + + /* + * Called before tracking. + * *dataoff: offset of protocol header (TCP, UDP,...) in *pskb + * *protonum: protocol number + */ + int (*prepare)(struct sk_buff **pskb, unsigned int hooknum, + unsigned int *dataoff, u_int8_t *protonum); + + u_int32_t (*get_features)(const struct nf_conntrack_tuple *tuple); + + /* Module (if any) which this is connected to. */ + struct module *me; +}; + +extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX]; + +/* Protocol registration. */ +extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto); +extern void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto); + +static inline struct nf_conntrack_l3proto * +nf_ct_find_l3proto(u_int16_t l3proto) +{ + return nf_ct_l3protos[l3proto]; +} + +/* Existing built-in protocols */ +extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; +extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; +extern struct nf_conntrack_l3proto nf_conntrack_generic_l3proto; +#endif /*_NF_CONNTRACK_L3PROTO_H*/ diff --git a/include/net/netfilter/nf_conntrack_protocol.h b/include/net/netfilter/nf_conntrack_protocol.h new file mode 100644 index 00000000000..b3afda35397 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_protocol.h @@ -0,0 +1,105 @@ +/* + * Header for use in defining a given protocol for connection tracking. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - generalized L3 protocol dependent part. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_protcol.h + */ + +#ifndef _NF_CONNTRACK_PROTOCOL_H +#define _NF_CONNTRACK_PROTOCOL_H +#include <net/netfilter/nf_conntrack.h> + +struct seq_file; + +struct nf_conntrack_protocol +{ + /* Next pointer. */ + struct list_head list; + + /* L3 Protocol number. */ + u_int16_t l3proto; + + /* Protocol number. */ + u_int8_t proto; + + /* Protocol name */ + const char *name; + + /* Try to fill in the third arg: dataoff is offset past network protocol + hdr. Return true if possible. */ + int (*pkt_to_tuple)(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple); + + /* Invert the per-proto part of the tuple: ie. turn xmit into reply. + * Some packets can't be inverted: return 0 in that case. + */ + int (*invert_tuple)(struct nf_conntrack_tuple *inverse, + const struct nf_conntrack_tuple *orig); + + /* Print out the per-protocol part of the tuple. Return like seq_* */ + int (*print_tuple)(struct seq_file *s, + const struct nf_conntrack_tuple *); + + /* Print out the private part of the conntrack. */ + int (*print_conntrack)(struct seq_file *s, const struct nf_conn *); + + /* Returns verdict for packet, or -1 for invalid. */ + int (*packet)(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + int pf, + unsigned int hooknum); + + /* Called when a new connection for this protocol found; + * returns TRUE if it's OK. If so, packet() called next. */ + int (*new)(struct nf_conn *conntrack, const struct sk_buff *skb, + unsigned int dataoff); + + /* Called when a conntrack entry is destroyed */ + void (*destroy)(struct nf_conn *conntrack); + + int (*error)(struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, + int pf, unsigned int hooknum); + + /* Module (if any) which this is connected to. */ + struct module *me; +}; + +/* Existing built-in protocols */ +extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp6; +extern struct nf_conntrack_protocol nf_conntrack_protocol_udp4; +extern struct nf_conntrack_protocol nf_conntrack_protocol_udp6; +extern struct nf_conntrack_protocol nf_conntrack_generic_protocol; + +#define MAX_NF_CT_PROTO 256 +extern struct nf_conntrack_protocol **nf_ct_protos[PF_MAX]; + +extern struct nf_conntrack_protocol * +nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol); + +/* Protocol registration. */ +extern int nf_conntrack_protocol_register(struct nf_conntrack_protocol *proto); +extern void nf_conntrack_protocol_unregister(struct nf_conntrack_protocol *proto); + +/* Log invalid packets */ +extern unsigned int nf_ct_log_invalid; + +#ifdef CONFIG_SYSCTL +#ifdef DEBUG_INVALID_PACKETS +#define LOG_INVALID(proto) \ + (nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW) +#else +#define LOG_INVALID(proto) \ + ((nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW) \ + && net_ratelimit()) +#endif +#else +#define LOG_INVALID(proto) 0 +#endif /* CONFIG_SYSCTL */ + +#endif /*_NF_CONNTRACK_PROTOCOL_H*/ diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h new file mode 100644 index 00000000000..14ce790e5c6 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -0,0 +1,190 @@ +/* + * Definitions and Declarations for tuple. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * - generalize L3 protocol dependent part. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_tuple.h + */ + +#ifndef _NF_CONNTRACK_TUPLE_H +#define _NF_CONNTRACK_TUPLE_H + +#include <linux/netfilter/nf_conntrack_tuple_common.h> + +/* A `tuple' is a structure containing the information to uniquely + identify a connection. ie. if two packets have the same tuple, they + are in the same connection; if not, they are not. + + We divide the structure along "manipulatable" and + "non-manipulatable" lines, for the benefit of the NAT code. +*/ + +#define NF_CT_TUPLE_L3SIZE 4 + +/* The l3 protocol-specific manipulable parts of the tuple: always in + network order! */ +union nf_conntrack_man_l3proto { + u_int32_t all[NF_CT_TUPLE_L3SIZE]; + u_int32_t ip; + u_int32_t ip6[4]; +}; + +/* The protocol-specific manipulable parts of the tuple: always in + network order! */ +union nf_conntrack_man_proto +{ + /* Add other protocols here. */ + u_int16_t all; + + struct { + u_int16_t port; + } tcp; + struct { + u_int16_t port; + } udp; + struct { + u_int16_t id; + } icmp; + struct { + u_int16_t port; + } sctp; +}; + +/* The manipulable part of the tuple. */ +struct nf_conntrack_man +{ + union nf_conntrack_man_l3proto u3; + union nf_conntrack_man_proto u; + /* Layer 3 protocol */ + u_int16_t l3num; +}; + +/* This contains the information to distinguish a connection. */ +struct nf_conntrack_tuple +{ + struct nf_conntrack_man src; + + /* These are the parts of the tuple which are fixed. */ + struct { + union { + u_int32_t all[NF_CT_TUPLE_L3SIZE]; + u_int32_t ip; + u_int32_t ip6[4]; + } u3; + union { + /* Add other protocols here. */ + u_int16_t all; + + struct { + u_int16_t port; + } tcp; + struct { + u_int16_t port; + } udp; + struct { + u_int8_t type, code; + } icmp; + struct { + u_int16_t port; + } sctp; + } u; + + /* The protocol. */ + u_int8_t protonum; + + /* The direction (for tuplehash) */ + u_int8_t dir; + } dst; +}; + +/* This is optimized opposed to a memset of the whole structure. Everything we + * really care about is the source/destination unions */ +#define NF_CT_TUPLE_U_BLANK(tuple) \ + do { \ + (tuple)->src.u.all = 0; \ + (tuple)->dst.u.all = 0; \ + memset(&(tuple)->src.u3, 0, sizeof((tuple)->src.u3)); \ + memset(&(tuple)->dst.u3, 0, sizeof((tuple)->dst.u3)); \ + } while (0) + +#ifdef __KERNEL__ + +#define NF_CT_DUMP_TUPLE(tp) \ +DEBUGP("tuple %p: %u %u %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x %hu -> %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x %hu\n", \ + (tp), (tp)->src.l3num, (tp)->dst.protonum, \ + NIP6(*(struct in6_addr *)(tp)->src.u3.all), ntohs((tp)->src.u.all), \ + NIP6(*(struct in6_addr *)(tp)->dst.u3.all), ntohs((tp)->dst.u.all)) + +/* If we're the first tuple, it's the original dir. */ +#define NF_CT_DIRECTION(h) \ + ((enum ip_conntrack_dir)(h)->tuple.dst.dir) + +/* Connections have two entries in the hash table: one for each way */ +struct nf_conntrack_tuple_hash +{ + struct list_head list; + + struct nf_conntrack_tuple tuple; +}; + +#endif /* __KERNEL__ */ + +static inline int nf_ct_tuple_src_equal(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2) +{ + return (t1->src.u3.all[0] == t2->src.u3.all[0] && + t1->src.u3.all[1] == t2->src.u3.all[1] && + t1->src.u3.all[2] == t2->src.u3.all[2] && + t1->src.u3.all[3] == t2->src.u3.all[3] && + t1->src.u.all == t2->src.u.all && + t1->src.l3num == t2->src.l3num && + t1->dst.protonum == t2->dst.protonum); +} + +static inline int nf_ct_tuple_dst_equal(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2) +{ + return (t1->dst.u3.all[0] == t2->dst.u3.all[0] && + t1->dst.u3.all[1] == t2->dst.u3.all[1] && + t1->dst.u3.all[2] == t2->dst.u3.all[2] && + t1->dst.u3.all[3] == t2->dst.u3.all[3] && + t1->dst.u.all == t2->dst.u.all && + t1->src.l3num == t2->src.l3num && + t1->dst.protonum == t2->dst.protonum); +} + +static inline int nf_ct_tuple_equal(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2) +{ + return nf_ct_tuple_src_equal(t1, t2) && nf_ct_tuple_dst_equal(t1, t2); +} + +static inline int nf_ct_tuple_mask_cmp(const struct nf_conntrack_tuple *t, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *mask) +{ + int count = 0; + + for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ + if ((t->src.u3.all[count] ^ tuple->src.u3.all[count]) & + mask->src.u3.all[count]) + return 0; + } + + for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ + if ((t->dst.u3.all[count] ^ tuple->dst.u3.all[count]) & + mask->dst.u3.all[count]) + return 0; + } + + if ((t->src.u.all ^ tuple->src.u.all) & mask->src.u.all || + (t->dst.u.all ^ tuple->dst.u.all) & mask->dst.u.all || + (t->src.l3num ^ tuple->src.l3num) & mask->src.l3num || + (t->dst.protonum ^ tuple->dst.protonum) & mask->dst.protonum) + return 0; + + return 1; +} + +#endif /* _NF_CONNTRACK_TUPLE_H */ diff --git a/include/net/netlink.h b/include/net/netlink.h new file mode 100644 index 00000000000..640c26a90cf --- /dev/null +++ b/include/net/netlink.h @@ -0,0 +1,883 @@ +#ifndef __NET_NETLINK_H +#define __NET_NETLINK_H + +#include <linux/types.h> +#include <linux/netlink.h> + +/* ======================================================================== + * Netlink Messages and Attributes Interface (As Seen On TV) + * ------------------------------------------------------------------------ + * Messages Interface + * ------------------------------------------------------------------------ + * + * Message Format: + * <--- nlmsg_total_size(payload) ---> + * <-- nlmsg_msg_size(payload) -> + * +----------+- - -+-------------+- - -+-------- - - + * | nlmsghdr | Pad | Payload | Pad | nlmsghdr + * +----------+- - -+-------------+- - -+-------- - - + * nlmsg_data(nlh)---^ ^ + * nlmsg_next(nlh)-----------------------+ + * + * Payload Format: + * <---------------------- nlmsg_len(nlh) ---------------------> + * <------ hdrlen ------> <- nlmsg_attrlen(nlh, hdrlen) -> + * +----------------------+- - -+--------------------------------+ + * | Family Header | Pad | Attributes | + * +----------------------+- - -+--------------------------------+ + * nlmsg_attrdata(nlh, hdrlen)---^ + * + * Data Structures: + * struct nlmsghdr netlink message header + * + * Message Construction: + * nlmsg_new() create a new netlink message + * nlmsg_put() add a netlink message to an skb + * nlmsg_put_answer() callback based nlmsg_put() + * nlmsg_end() finanlize netlink message + * nlmsg_cancel() cancel message construction + * nlmsg_free() free a netlink message + * + * Message Sending: + * nlmsg_multicast() multicast message to several groups + * nlmsg_unicast() unicast a message to a single socket + * + * Message Length Calculations: + * nlmsg_msg_size(payload) length of message w/o padding + * nlmsg_total_size(payload) length of message w/ padding + * nlmsg_padlen(payload) length of padding at tail + * + * Message Payload Access: + * nlmsg_data(nlh) head of message payload + * nlmsg_len(nlh) length of message payload + * nlmsg_attrdata(nlh, hdrlen) head of attributes data + * nlmsg_attrlen(nlh, hdrlen) length of attributes data + * + * Message Parsing: + * nlmsg_ok(nlh, remaining) does nlh fit into remaining bytes? + * nlmsg_next(nlh, remaining) get next netlink message + * nlmsg_parse() parse attributes of a message + * nlmsg_find_attr() find an attribute in a message + * nlmsg_for_each_msg() loop over all messages + * nlmsg_validate() validate netlink message incl. attrs + * nlmsg_for_each_attr() loop over all attributes + * + * ------------------------------------------------------------------------ + * Attributes Interface + * ------------------------------------------------------------------------ + * + * Attribute Format: + * <------- nla_total_size(payload) -------> + * <---- nla_attr_size(payload) -----> + * +----------+- - -+- - - - - - - - - +- - -+-------- - - + * | Header | Pad | Payload | Pad | Header + * +----------+- - -+- - - - - - - - - +- - -+-------- - - + * <- nla_len(nla) -> ^ + * nla_data(nla)----^ | + * nla_next(nla)-----------------------------' + * + * Data Structures: + * struct nlattr netlink attribtue header + * + * Attribute Construction: + * nla_reserve(skb, type, len) reserve skb tailroom for an attribute + * nla_put(skb, type, len, data) add attribute to skb + * + * Attribute Construction for Basic Types: + * nla_put_u8(skb, type, value) add u8 attribute to skb + * nla_put_u16(skb, type, value) add u16 attribute to skb + * nla_put_u32(skb, type, value) add u32 attribute to skb + * nla_put_u64(skb, type, value) add u64 attribute to skb + * nla_put_string(skb, type, str) add string attribute to skb + * nla_put_flag(skb, type) add flag attribute to skb + * nla_put_msecs(skb, type, jiffies) add msecs attribute to skb + * + * Exceptions Based Attribute Construction: + * NLA_PUT(skb, type, len, data) add attribute to skb + * NLA_PUT_U8(skb, type, value) add u8 attribute to skb + * NLA_PUT_U16(skb, type, value) add u16 attribute to skb + * NLA_PUT_U32(skb, type, value) add u32 attribute to skb + * NLA_PUT_U64(skb, type, value) add u64 attribute to skb + * NLA_PUT_STRING(skb, type, str) add string attribute to skb + * NLA_PUT_FLAG(skb, type) add flag attribute to skb + * NLA_PUT_MSECS(skb, type, jiffies) add msecs attribute to skb + * + * The meaning of these functions is equal to their lower case + * variants but they jump to the label nla_put_failure in case + * of a failure. + * + * Nested Attributes Construction: + * nla_nest_start(skb, type) start a nested attribute + * nla_nest_end(skb, nla) finalize a nested attribute + * nla_nest_cancel(skb, nla) cancel nested attribute construction + * + * Attribute Length Calculations: + * nla_attr_size(payload) length of attribute w/o padding + * nla_total_size(payload) length of attribute w/ padding + * nla_padlen(payload) length of padding + * + * Attribute Payload Access: + * nla_data(nla) head of attribute payload + * nla_len(nla) length of attribute payload + * + * Attribute Payload Access for Basic Types: + * nla_get_u8(nla) get payload for a u8 attribute + * nla_get_u16(nla) get payload for a u16 attribute + * nla_get_u32(nla) get payload for a u32 attribute + * nla_get_u64(nla) get payload for a u64 attribute + * nla_get_flag(nla) return 1 if flag is true + * nla_get_msecs(nla) get payload for a msecs attribute + * + * Attribute Misc: + * nla_memcpy(dest, nla, count) copy attribute into memory + * nla_memcmp(nla, data, size) compare attribute with memory area + * nla_strlcpy(dst, nla, size) copy attribute to a sized string + * nla_strcmp(nla, str) compare attribute with string + * + * Attribute Parsing: + * nla_ok(nla, remaining) does nla fit into remaining bytes? + * nla_next(nla, remaining) get next netlink attribute + * nla_validate() validate a stream of attributes + * nla_find() find attribute in stream of attributes + * nla_parse() parse and validate stream of attrs + * nla_parse_nested() parse nested attribuets + * nla_for_each_attr() loop over all attributes + *========================================================================= + */ + + /** + * Standard attribute types to specify validation policy + */ +enum { + NLA_UNSPEC, + NLA_U8, + NLA_U16, + NLA_U32, + NLA_U64, + NLA_STRING, + NLA_FLAG, + NLA_MSECS, + NLA_NESTED, + __NLA_TYPE_MAX, +}; + +#define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1) + +/** + * struct nla_policy - attribute validation policy + * @type: Type of attribute or NLA_UNSPEC + * @minlen: Minimal length of payload required to be available + * + * Policies are defined as arrays of this struct, the array must be + * accessible by attribute type up to the highest identifier to be expected. + * + * Example: + * static struct nla_policy my_policy[ATTR_MAX+1] __read_mostly = { + * [ATTR_FOO] = { .type = NLA_U16 }, + * [ATTR_BAR] = { .type = NLA_STRING }, + * [ATTR_BAZ] = { .minlen = sizeof(struct mystruct) }, + * }; + */ +struct nla_policy { + u16 type; + u16 minlen; +}; + +extern void netlink_run_queue(struct sock *sk, unsigned int *qlen, + int (*cb)(struct sk_buff *, + struct nlmsghdr *, int *)); +extern void netlink_queue_skip(struct nlmsghdr *nlh, + struct sk_buff *skb); + +extern int nla_validate(struct nlattr *head, int len, int maxtype, + struct nla_policy *policy); +extern int nla_parse(struct nlattr *tb[], int maxtype, + struct nlattr *head, int len, + struct nla_policy *policy); +extern struct nlattr * nla_find(struct nlattr *head, int len, int attrtype); +extern size_t nla_strlcpy(char *dst, const struct nlattr *nla, + size_t dstsize); +extern int nla_memcpy(void *dest, struct nlattr *src, int count); +extern int nla_memcmp(const struct nlattr *nla, const void *data, + size_t size); +extern int nla_strcmp(const struct nlattr *nla, const char *str); +extern struct nlattr * __nla_reserve(struct sk_buff *skb, int attrtype, + int attrlen); +extern struct nlattr * nla_reserve(struct sk_buff *skb, int attrtype, + int attrlen); +extern void __nla_put(struct sk_buff *skb, int attrtype, + int attrlen, const void *data); +extern int nla_put(struct sk_buff *skb, int attrtype, + int attrlen, const void *data); + +/************************************************************************** + * Netlink Messages + **************************************************************************/ + +/** + * nlmsg_msg_size - length of netlink message not including padding + * @payload: length of message payload + */ +static inline int nlmsg_msg_size(int payload) +{ + return NLMSG_HDRLEN + payload; +} + +/** + * nlmsg_total_size - length of netlink message including padding + * @payload: length of message payload + */ +static inline int nlmsg_total_size(int payload) +{ + return NLMSG_ALIGN(nlmsg_msg_size(payload)); +} + +/** + * nlmsg_padlen - length of padding at the message's tail + * @payload: length of message payload + */ +static inline int nlmsg_padlen(int payload) +{ + return nlmsg_total_size(payload) - nlmsg_msg_size(payload); +} + +/** + * nlmsg_data - head of message payload + * @nlh: netlink messsage header + */ +static inline void *nlmsg_data(const struct nlmsghdr *nlh) +{ + return (unsigned char *) nlh + NLMSG_HDRLEN; +} + +/** + * nlmsg_len - length of message payload + * @nlh: netlink message header + */ +static inline int nlmsg_len(const struct nlmsghdr *nlh) +{ + return nlh->nlmsg_len - NLMSG_HDRLEN; +} + +/** + * nlmsg_attrdata - head of attributes data + * @nlh: netlink message header + * @hdrlen: length of family specific header + */ +static inline struct nlattr *nlmsg_attrdata(const struct nlmsghdr *nlh, + int hdrlen) +{ + unsigned char *data = nlmsg_data(nlh); + return (struct nlattr *) (data + NLMSG_ALIGN(hdrlen)); +} + +/** + * nlmsg_attrlen - length of attributes data + * @nlh: netlink message header + * @hdrlen: length of family specific header + */ +static inline int nlmsg_attrlen(const struct nlmsghdr *nlh, int hdrlen) +{ + return nlmsg_len(nlh) - NLMSG_ALIGN(hdrlen); +} + +/** + * nlmsg_ok - check if the netlink message fits into the remaining bytes + * @nlh: netlink message header + * @remaining: number of bytes remaining in message stream + */ +static inline int nlmsg_ok(const struct nlmsghdr *nlh, int remaining) +{ + return (remaining >= sizeof(struct nlmsghdr) && + nlh->nlmsg_len >= sizeof(struct nlmsghdr) && + nlh->nlmsg_len <= remaining); +} + +/** + * nlmsg_next - next netlink message in message stream + * @nlh: netlink message header + * @remaining: number of bytes remaining in message stream + * + * Returns the next netlink message in the message stream and + * decrements remaining by the size of the current message. + */ +static inline struct nlmsghdr *nlmsg_next(struct nlmsghdr *nlh, int *remaining) +{ + int totlen = NLMSG_ALIGN(nlh->nlmsg_len); + + *remaining -= totlen; + + return (struct nlmsghdr *) ((unsigned char *) nlh + totlen); +} + +/** + * nlmsg_parse - parse attributes of a netlink message + * @nlh: netlink message header + * @hdrlen: length of family specific header + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * + * See nla_parse() + */ +static inline int nlmsg_parse(struct nlmsghdr *nlh, int hdrlen, + struct nlattr *tb[], int maxtype, + struct nla_policy *policy) +{ + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + return -EINVAL; + + return nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), policy); +} + +/** + * nlmsg_find_attr - find a specific attribute in a netlink message + * @nlh: netlink message header + * @hdrlen: length of familiy specific header + * @attrtype: type of attribute to look for + * + * Returns the first attribute which matches the specified type. + */ +static inline struct nlattr *nlmsg_find_attr(struct nlmsghdr *nlh, + int hdrlen, int attrtype) +{ + return nla_find(nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), attrtype); +} + +/** + * nlmsg_validate - validate a netlink message including attributes + * @nlh: netlinket message header + * @hdrlen: length of familiy specific header + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + */ +static inline int nlmsg_validate(struct nlmsghdr *nlh, int hdrlen, int maxtype, + struct nla_policy *policy) +{ + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + return -EINVAL; + + return nla_validate(nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), maxtype, policy); +} + +/** + * nlmsg_for_each_attr - iterate over a stream of attributes + * @pos: loop counter, set to current attribute + * @nlh: netlink message header + * @hdrlen: length of familiy specific header + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nlmsg_for_each_attr(pos, nlh, hdrlen, rem) \ + nla_for_each_attr(pos, nlmsg_attrdata(nlh, hdrlen), \ + nlmsg_attrlen(nlh, hdrlen), rem) + +#if 0 +/* FIXME: Enable once all users have been converted */ + +/** + * __nlmsg_put - Add a new netlink message to an skb + * @skb: socket buffer to store message in + * @pid: netlink process id + * @seq: sequence number of message + * @type: message type + * @payload: length of message payload + * @flags: message flags + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for both the netlink header and payload. + */ +static inline struct nlmsghdr *__nlmsg_put(struct sk_buff *skb, u32 pid, + u32 seq, int type, int payload, + int flags) +{ + struct nlmsghdr *nlh; + + nlh = (struct nlmsghdr *) skb_put(skb, nlmsg_total_size(payload)); + nlh->nlmsg_type = type; + nlh->nlmsg_len = nlmsg_msg_size(payload); + nlh->nlmsg_flags = flags; + nlh->nlmsg_pid = pid; + nlh->nlmsg_seq = seq; + + memset((unsigned char *) nlmsg_data(nlh) + payload, 0, + nlmsg_padlen(payload)); + + return nlh; +} +#endif + +/** + * nlmsg_put - Add a new netlink message to an skb + * @skb: socket buffer to store message in + * @pid: netlink process id + * @seq: sequence number of message + * @type: message type + * @payload: length of message payload + * @flags: message flags + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the message header and payload. + */ +static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, + int type, int payload, int flags) +{ + if (unlikely(skb_tailroom(skb) < nlmsg_total_size(payload))) + return NULL; + + return __nlmsg_put(skb, pid, seq, type, payload, flags); +} + +/** + * nlmsg_put_answer - Add a new callback based netlink message to an skb + * @skb: socket buffer to store message in + * @cb: netlink callback + * @type: message type + * @payload: length of message payload + * @flags: message flags + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the message header and payload. + */ +static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb, + struct netlink_callback *cb, + int type, int payload, + int flags) +{ + return nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + type, payload, flags); +} + +/** + * nlmsg_new - Allocate a new netlink message + * @size: maximum size of message + * + * Use NLMSG_GOODSIZE if size isn't know and you need a good default size. + */ +static inline struct sk_buff *nlmsg_new(int size) +{ + return alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); +} + +/** + * nlmsg_end - Finalize a netlink message + * @skb: socket buffer the message is stored in + * @nlh: netlink message header + * + * Corrects the netlink message header to include the appeneded + * attributes. Only necessary if attributes have been added to + * the message. + * + * Returns the total data length of the skb. + */ +static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + nlh->nlmsg_len = skb->tail - (unsigned char *) nlh; + + return skb->len; +} + +/** + * nlmsg_cancel - Cancel construction of a netlink message + * @skb: socket buffer the message is stored in + * @nlh: netlink message header + * + * Removes the complete netlink message including all + * attributes from the socket buffer again. Returns -1. + */ +static inline int nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + skb_trim(skb, (unsigned char *) nlh - skb->data); + + return -1; +} + +/** + * nlmsg_free - free a netlink message + * @skb: socket buffer of netlink message + */ +static inline void nlmsg_free(struct sk_buff *skb) +{ + kfree_skb(skb); +} + +/** + * nlmsg_multicast - multicast a netlink message + * @sk: netlink socket to spread messages to + * @skb: netlink message as socket buffer + * @pid: own netlink pid to avoid sending to yourself + * @group: multicast group id + */ +static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, + u32 pid, unsigned int group) +{ + int err; + + NETLINK_CB(skb).dst_group = group; + + err = netlink_broadcast(sk, skb, pid, group, GFP_KERNEL); + if (err > 0) + err = 0; + + return err; +} + +/** + * nlmsg_unicast - unicast a netlink message + * @sk: netlink socket to spread message to + * @skb: netlink message as socket buffer + * @pid: netlink pid of the destination socket + */ +static inline int nlmsg_unicast(struct sock *sk, struct sk_buff *skb, u32 pid) +{ + int err; + + err = netlink_unicast(sk, skb, pid, MSG_DONTWAIT); + if (err > 0) + err = 0; + + return err; +} + +/** + * nlmsg_for_each_msg - iterate over a stream of messages + * @pos: loop counter, set to current message + * @head: head of message stream + * @len: length of message stream + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nlmsg_for_each_msg(pos, head, len, rem) \ + for (pos = head, rem = len; \ + nlmsg_ok(pos, rem); \ + pos = nlmsg_next(pos, &(rem))) + +/************************************************************************** + * Netlink Attributes + **************************************************************************/ + +/** + * nla_attr_size - length of attribute not including padding + * @payload: length of payload + */ +static inline int nla_attr_size(int payload) +{ + return NLA_HDRLEN + payload; +} + +/** + * nla_total_size - total length of attribute including padding + * @payload: length of payload + */ +static inline int nla_total_size(int payload) +{ + return NLA_ALIGN(nla_attr_size(payload)); +} + +/** + * nla_padlen - length of padding at the tail of attribute + * @payload: length of payload + */ +static inline int nla_padlen(int payload) +{ + return nla_total_size(payload) - nla_attr_size(payload); +} + +/** + * nla_data - head of payload + * @nla: netlink attribute + */ +static inline void *nla_data(const struct nlattr *nla) +{ + return (char *) nla + NLA_HDRLEN; +} + +/** + * nla_len - length of payload + * @nla: netlink attribute + */ +static inline int nla_len(const struct nlattr *nla) +{ + return nla->nla_len - NLA_HDRLEN; +} + +/** + * nla_ok - check if the netlink attribute fits into the remaining bytes + * @nla: netlink attribute + * @remaining: number of bytes remaining in attribute stream + */ +static inline int nla_ok(const struct nlattr *nla, int remaining) +{ + return remaining >= sizeof(*nla) && + nla->nla_len >= sizeof(*nla) && + nla->nla_len <= remaining; +} + +/** + * nla_next - next netlink attribte in attribute stream + * @nla: netlink attribute + * @remaining: number of bytes remaining in attribute stream + * + * Returns the next netlink attribute in the attribute stream and + * decrements remaining by the size of the current attribute. + */ +static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining) +{ + int totlen = NLA_ALIGN(nla->nla_len); + + *remaining -= totlen; + return (struct nlattr *) ((char *) nla + totlen); +} + +/** + * nla_parse_nested - parse nested attributes + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @nla: attribute containing the nested attributes + * @policy: validation policy + * + * See nla_parse() + */ +static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, + struct nlattr *nla, + struct nla_policy *policy) +{ + return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy); +} +/** + * nla_put_u8 - Add a u16 netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value) +{ + return nla_put(skb, attrtype, sizeof(u8), &value); +} + +/** + * nla_put_u16 - Add a u16 netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value) +{ + return nla_put(skb, attrtype, sizeof(u16), &value); +} + +/** + * nla_put_u32 - Add a u32 netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value) +{ + return nla_put(skb, attrtype, sizeof(u32), &value); +} + +/** + * nla_put_64 - Add a u64 netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_u64(struct sk_buff *skb, int attrtype, u64 value) +{ + return nla_put(skb, attrtype, sizeof(u64), &value); +} + +/** + * nla_put_string - Add a string netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @str: NUL terminated string + */ +static inline int nla_put_string(struct sk_buff *skb, int attrtype, + const char *str) +{ + return nla_put(skb, attrtype, strlen(str) + 1, str); +} + +/** + * nla_put_flag - Add a flag netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + */ +static inline int nla_put_flag(struct sk_buff *skb, int attrtype) +{ + return nla_put(skb, attrtype, 0, NULL); +} + +/** + * nla_put_msecs - Add a msecs netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @jiffies: number of msecs in jiffies + */ +static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, + unsigned long jiffies) +{ + u64 tmp = jiffies_to_msecs(jiffies); + return nla_put(skb, attrtype, sizeof(u64), &tmp); +} + +#define NLA_PUT(skb, attrtype, attrlen, data) \ + do { \ + if (nla_put(skb, attrtype, attrlen, data) < 0) \ + goto nla_put_failure; \ + } while(0) + +#define NLA_PUT_TYPE(skb, type, attrtype, value) \ + do { \ + type __tmp = value; \ + NLA_PUT(skb, attrtype, sizeof(type), &__tmp); \ + } while(0) + +#define NLA_PUT_U8(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, u8, attrtype, value) + +#define NLA_PUT_U16(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, u16, attrtype, value) + +#define NLA_PUT_U32(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, u32, attrtype, value) + +#define NLA_PUT_U64(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, u64, attrtype, value) + +#define NLA_PUT_STRING(skb, attrtype, value) \ + NLA_PUT(skb, attrtype, strlen(value) + 1, value) + +#define NLA_PUT_FLAG(skb, attrtype, value) \ + NLA_PUT(skb, attrtype, 0, NULL) + +#define NLA_PUT_MSECS(skb, attrtype, jiffies) \ + NLA_PUT_U64(skb, attrtype, jiffies_to_msecs(jiffies)) + +/** + * nla_get_u32 - return payload of u32 attribute + * @nla: u32 netlink attribute + */ +static inline u32 nla_get_u32(struct nlattr *nla) +{ + return *(u32 *) nla_data(nla); +} + +/** + * nla_get_u16 - return payload of u16 attribute + * @nla: u16 netlink attribute + */ +static inline u16 nla_get_u16(struct nlattr *nla) +{ + return *(u16 *) nla_data(nla); +} + +/** + * nla_get_u8 - return payload of u8 attribute + * @nla: u8 netlink attribute + */ +static inline u8 nla_get_u8(struct nlattr *nla) +{ + return *(u8 *) nla_data(nla); +} + +/** + * nla_get_u64 - return payload of u64 attribute + * @nla: u64 netlink attribute + */ +static inline u64 nla_get_u64(struct nlattr *nla) +{ + u64 tmp; + + nla_memcpy(&tmp, nla, sizeof(tmp)); + + return tmp; +} + +/** + * nla_get_flag - return payload of flag attribute + * @nla: flag netlink attribute + */ +static inline int nla_get_flag(struct nlattr *nla) +{ + return !!nla; +} + +/** + * nla_get_msecs - return payload of msecs attribute + * @nla: msecs netlink attribute + * + * Returns the number of milliseconds in jiffies. + */ +static inline unsigned long nla_get_msecs(struct nlattr *nla) +{ + u64 msecs = nla_get_u64(nla); + + return msecs_to_jiffies((unsigned long) msecs); +} + +/** + * nla_nest_start - Start a new level of nested attributes + * @skb: socket buffer to add attributes to + * @attrtype: attribute type of container + * + * Returns the container attribute + */ +static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) +{ + struct nlattr *start = (struct nlattr *) skb->tail; + + if (nla_put(skb, attrtype, 0, NULL) < 0) + return NULL; + + return start; +} + +/** + * nla_nest_end - Finalize nesting of attributes + * @skb: socket buffer the attribtues are stored in + * @start: container attribute + * + * Corrects the container attribute header to include the all + * appeneded attributes. + * + * Returns the total data length of the skb. + */ +static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start) +{ + start->nla_len = skb->tail - (unsigned char *) start; + return skb->len; +} + +/** + * nla_nest_cancel - Cancel nesting of attributes + * @skb: socket buffer the message is stored in + * @start: container attribute + * + * Removes the container attribute and including all nested + * attributes. Returns -1. + */ +static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) +{ + if (start) + skb_trim(skb, (unsigned char *) start - skb->data); + + return -1; +} + +/** + * nla_for_each_attr - iterate over a stream of attributes + * @pos: loop counter, set to current attribute + * @head: head of attribute stream + * @len: length of attribute stream + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nla_for_each_attr(pos, head, len, rem) \ + for (pos = head, rem = len; \ + nla_ok(pos, rem); \ + pos = nla_next(pos, &(rem))) + +#endif diff --git a/include/net/red.h b/include/net/red.h new file mode 100644 index 00000000000..2ed4358e329 --- /dev/null +++ b/include/net/red.h @@ -0,0 +1,325 @@ +#ifndef __NET_SCHED_RED_H +#define __NET_SCHED_RED_H + +#include <linux/config.h> +#include <linux/types.h> +#include <net/pkt_sched.h> +#include <net/inet_ecn.h> +#include <net/dsfield.h> + +/* Random Early Detection (RED) algorithm. + ======================================= + + Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways + for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking. + + This file codes a "divisionless" version of RED algorithm + as written down in Fig.17 of the paper. + + Short description. + ------------------ + + When a new packet arrives we calculate the average queue length: + + avg = (1-W)*avg + W*current_queue_len, + + W is the filter time constant (chosen as 2^(-Wlog)), it controls + the inertia of the algorithm. To allow larger bursts, W should be + decreased. + + if (avg > th_max) -> packet marked (dropped). + if (avg < th_min) -> packet passes. + if (th_min < avg < th_max) we calculate probability: + + Pb = max_P * (avg - th_min)/(th_max-th_min) + + and mark (drop) packet with this probability. + Pb changes from 0 (at avg==th_min) to max_P (avg==th_max). + max_P should be small (not 1), usually 0.01..0.02 is good value. + + max_P is chosen as a number, so that max_P/(th_max-th_min) + is a negative power of two in order arithmetics to contain + only shifts. + + + Parameters, settable by user: + ----------------------------- + + qth_min - bytes (should be < qth_max/2) + qth_max - bytes (should be at least 2*qth_min and less limit) + Wlog - bits (<32) log(1/W). + Plog - bits (<32) + + Plog is related to max_P by formula: + + max_P = (qth_max-qth_min)/2^Plog; + + F.e. if qth_max=128K and qth_min=32K, then Plog=22 + corresponds to max_P=0.02 + + Scell_log + Stab + + Lookup table for log((1-W)^(t/t_ave). + + + NOTES: + + Upper bound on W. + ----------------- + + If you want to allow bursts of L packets of size S, + you should choose W: + + L + 1 - th_min/S < (1-(1-W)^L)/W + + th_min/S = 32 th_min/S = 4 + + log(W) L + -1 33 + -2 35 + -3 39 + -4 46 + -5 57 + -6 75 + -7 101 + -8 135 + -9 190 + etc. + */ + +#define RED_STAB_SIZE 256 +#define RED_STAB_MASK (RED_STAB_SIZE - 1) + +struct red_stats +{ + u32 prob_drop; /* Early probability drops */ + u32 prob_mark; /* Early probability marks */ + u32 forced_drop; /* Forced drops, qavg > max_thresh */ + u32 forced_mark; /* Forced marks, qavg > max_thresh */ + u32 pdrop; /* Drops due to queue limits */ + u32 other; /* Drops due to drop() calls */ + u32 backlog; +}; + +struct red_parms +{ + /* Parameters */ + u32 qth_min; /* Min avg length threshold: A scaled */ + u32 qth_max; /* Max avg length threshold: A scaled */ + u32 Scell_max; + u32 Rmask; /* Cached random mask, see red_rmask */ + u8 Scell_log; + u8 Wlog; /* log(W) */ + u8 Plog; /* random number bits */ + u8 Stab[RED_STAB_SIZE]; + + /* Variables */ + int qcount; /* Number of packets since last random + number generation */ + u32 qR; /* Cached random number */ + + unsigned long qavg; /* Average queue length: A scaled */ + psched_time_t qidlestart; /* Start of current idle period */ +}; + +static inline u32 red_rmask(u8 Plog) +{ + return Plog < 32 ? ((1 << Plog) - 1) : ~0UL; +} + +static inline void red_set_parms(struct red_parms *p, + u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog, + u8 Scell_log, u8 *stab) +{ + /* Reset average queue length, the value is strictly bound + * to the parameters below, reseting hurts a bit but leaving + * it might result in an unreasonable qavg for a while. --TGR + */ + p->qavg = 0; + + p->qcount = -1; + p->qth_min = qth_min << Wlog; + p->qth_max = qth_max << Wlog; + p->Wlog = Wlog; + p->Plog = Plog; + p->Rmask = red_rmask(Plog); + p->Scell_log = Scell_log; + p->Scell_max = (255 << Scell_log); + + memcpy(p->Stab, stab, sizeof(p->Stab)); +} + +static inline int red_is_idling(struct red_parms *p) +{ + return !PSCHED_IS_PASTPERFECT(p->qidlestart); +} + +static inline void red_start_of_idle_period(struct red_parms *p) +{ + PSCHED_GET_TIME(p->qidlestart); +} + +static inline void red_end_of_idle_period(struct red_parms *p) +{ + PSCHED_SET_PASTPERFECT(p->qidlestart); +} + +static inline void red_restart(struct red_parms *p) +{ + red_end_of_idle_period(p); + p->qavg = 0; + p->qcount = -1; +} + +static inline unsigned long red_calc_qavg_from_idle_time(struct red_parms *p) +{ + psched_time_t now; + long us_idle; + int shift; + + PSCHED_GET_TIME(now); + us_idle = PSCHED_TDIFF_SAFE(now, p->qidlestart, p->Scell_max); + + /* + * The problem: ideally, average length queue recalcultion should + * be done over constant clock intervals. This is too expensive, so + * that the calculation is driven by outgoing packets. + * When the queue is idle we have to model this clock by hand. + * + * SF+VJ proposed to "generate": + * + * m = idletime / (average_pkt_size / bandwidth) + * + * dummy packets as a burst after idle time, i.e. + * + * p->qavg *= (1-W)^m + * + * This is an apparently overcomplicated solution (f.e. we have to + * precompute a table to make this calculation in reasonable time) + * I believe that a simpler model may be used here, + * but it is field for experiments. + */ + + shift = p->Stab[(us_idle >> p->Scell_log) & RED_STAB_MASK]; + + if (shift) + return p->qavg >> shift; + else { + /* Approximate initial part of exponent with linear function: + * + * (1-W)^m ~= 1-mW + ... + * + * Seems, it is the best solution to + * problem of too coarse exponent tabulation. + */ + us_idle = (p->qavg * us_idle) >> p->Scell_log; + + if (us_idle < (p->qavg >> 1)) + return p->qavg - us_idle; + else + return p->qavg >> 1; + } +} + +static inline unsigned long red_calc_qavg_no_idle_time(struct red_parms *p, + unsigned int backlog) +{ + /* + * NOTE: p->qavg is fixed point number with point at Wlog. + * The formula below is equvalent to floating point + * version: + * + * qavg = qavg*(1-W) + backlog*W; + * + * --ANK (980924) + */ + return p->qavg + (backlog - (p->qavg >> p->Wlog)); +} + +static inline unsigned long red_calc_qavg(struct red_parms *p, + unsigned int backlog) +{ + if (!red_is_idling(p)) + return red_calc_qavg_no_idle_time(p, backlog); + else + return red_calc_qavg_from_idle_time(p); +} + +static inline u32 red_random(struct red_parms *p) +{ + return net_random() & p->Rmask; +} + +static inline int red_mark_probability(struct red_parms *p, unsigned long qavg) +{ + /* The formula used below causes questions. + + OK. qR is random number in the interval 0..Rmask + i.e. 0..(2^Plog). If we used floating point + arithmetics, it would be: (2^Plog)*rnd_num, + where rnd_num is less 1. + + Taking into account, that qavg have fixed + point at Wlog, and Plog is related to max_P by + max_P = (qth_max-qth_min)/2^Plog; two lines + below have the following floating point equivalent: + + max_P*(qavg - qth_min)/(qth_max-qth_min) < rnd/qcount + + Any questions? --ANK (980924) + */ + return !(((qavg - p->qth_min) >> p->Wlog) * p->qcount < p->qR); +} + +enum { + RED_BELOW_MIN_THRESH, + RED_BETWEEN_TRESH, + RED_ABOVE_MAX_TRESH, +}; + +static inline int red_cmp_thresh(struct red_parms *p, unsigned long qavg) +{ + if (qavg < p->qth_min) + return RED_BELOW_MIN_THRESH; + else if (qavg >= p->qth_max) + return RED_ABOVE_MAX_TRESH; + else + return RED_BETWEEN_TRESH; +} + +enum { + RED_DONT_MARK, + RED_PROB_MARK, + RED_HARD_MARK, +}; + +static inline int red_action(struct red_parms *p, unsigned long qavg) +{ + switch (red_cmp_thresh(p, qavg)) { + case RED_BELOW_MIN_THRESH: + p->qcount = -1; + return RED_DONT_MARK; + + case RED_BETWEEN_TRESH: + if (++p->qcount) { + if (red_mark_probability(p, qavg)) { + p->qcount = 0; + p->qR = red_random(p); + return RED_PROB_MARK; + } + } else + p->qR = red_random(p); + + return RED_DONT_MARK; + + case RED_ABOVE_MAX_TRESH: + p->qcount = -1; + return RED_HARD_MARK; + } + + BUG(); + return RED_DONT_MARK; +} + +#endif diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index dc107ffad48..34a1a09e5ae 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -120,6 +120,7 @@ typedef union { int error; sctp_state_t state; sctp_event_timeout_t to; + unsigned long zero; void *ptr; struct sctp_chunk *chunk; struct sctp_association *asoc; @@ -148,17 +149,17 @@ static inline sctp_arg_t SCTP_NULL(void) } static inline sctp_arg_t SCTP_NOFORCE(void) { - sctp_arg_t retval; retval.i32 = 0; return retval; + sctp_arg_t retval = {.zero = 0UL}; retval.i32 = 0; return retval; } static inline sctp_arg_t SCTP_FORCE(void) { - sctp_arg_t retval; retval.i32 = 1; return retval; + sctp_arg_t retval = {.zero = 0UL}; retval.i32 = 1; return retval; } #define SCTP_ARG_CONSTRUCTOR(name, type, elt) \ static inline sctp_arg_t \ SCTP_## name (type arg) \ -{ sctp_arg_t retval; retval.elt = arg; return retval; } +{ sctp_arg_t retval = {.zero = 0UL}; retval.elt = arg; return retval; } SCTP_ARG_CONSTRUCTOR(I32, __s32, i32) SCTP_ARG_CONSTRUCTOR(U32, __u32, u32) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 9c385b6417c..8e7794ee27f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -161,6 +161,13 @@ extern struct sctp_globals { */ int sndbuf_policy; + /* + * Policy for preforming sctp/socket accounting + * 0 - do socket level accounting, all assocs share sk_rcvbuf + * 1 - do sctp accounting, each asoc may use sk_rcvbuf bytes + */ + int rcvbuf_policy; + /* Delayed SACK timeout 200ms default*/ int sack_timeout; @@ -218,6 +225,7 @@ extern struct sctp_globals { #define sctp_cookie_preserve_enable (sctp_globals.cookie_preserve_enable) #define sctp_max_retrans_association (sctp_globals.max_retrans_association) #define sctp_sndbuf_policy (sctp_globals.sndbuf_policy) +#define sctp_rcvbuf_policy (sctp_globals.rcvbuf_policy) #define sctp_max_retrans_path (sctp_globals.max_retrans_path) #define sctp_max_retrans_init (sctp_globals.max_retrans_init) #define sctp_sack_timeout (sctp_globals.sack_timeout) @@ -1222,11 +1230,11 @@ struct sctp_endpoint { int last_key; int key_changed_at; - /* Default timeouts. */ - int timeouts[SCTP_NUM_TIMEOUT_TYPES]; - /* sendbuf acct. policy. */ __u32 sndbuf_policy; + + /* rcvbuf acct. policy. */ + __u32 rcvbuf_policy; }; /* Recover the outter endpoint structure. */ @@ -1553,6 +1561,11 @@ struct sctp_association { */ int sndbuf_used; + /* This is the amount of memory that this association has allocated + * in the receive path at any given time. + */ + atomic_t rmem_alloc; + /* This is the wait queue head for send requests waiting on * the association sndbuf space. */ diff --git a/include/net/sock.h b/include/net/sock.h index e0498bd3600..982b4ecd187 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -461,16 +461,16 @@ static inline void sk_stream_free_skb(struct sock *sk, struct sk_buff *skb) } /* The per-socket spinlock must be held here. */ -#define sk_add_backlog(__sk, __skb) \ -do { if (!(__sk)->sk_backlog.tail) { \ - (__sk)->sk_backlog.head = \ - (__sk)->sk_backlog.tail = (__skb); \ - } else { \ - ((__sk)->sk_backlog.tail)->next = (__skb); \ - (__sk)->sk_backlog.tail = (__skb); \ - } \ - (__skb)->next = NULL; \ -} while(0) +static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) +{ + if (!sk->sk_backlog.tail) { + sk->sk_backlog.head = sk->sk_backlog.tail = skb; + } else { + sk->sk_backlog.tail->next = skb; + sk->sk_backlog.tail = skb; + } + skb->next = NULL; +} #define sk_wait_event(__sk, __timeo, __condition) \ ({ int rc; \ @@ -1247,6 +1247,12 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk) (skb != (struct sk_buff *)&(sk)->sk_write_queue); \ skb = skb->next) +/*from STCP for fast SACK Process*/ +#define sk_stream_for_retrans_queue_from(skb, sk) \ + for (; (skb != (sk)->sk_send_head) && \ + (skb != (struct sk_buff *)&(sk)->sk_write_queue); \ + skb = skb->next) + /* * Default write policy as shown to user space via poll/select/SIGIO */ diff --git a/include/net/tcp.h b/include/net/tcp.h index c24339c4e31..0f984801197 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -27,6 +27,7 @@ #include <linux/slab.h> #include <linux/cache.h> #include <linux/percpu.h> +#include <linux/skbuff.h> #include <net/inet_connection_sock.h> #include <net/inet_timewait_sock.h> @@ -88,10 +89,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); */ #define TCP_SYN_RETRIES 5 /* number of times to retry active opening a - * connection: ~180sec is RFC minumum */ + * connection: ~180sec is RFC minimum */ #define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a - * connection: ~180sec is RFC minumum */ + * connection: ~180sec is RFC minimum */ #define TCP_ORPHAN_RETRIES 7 /* number of times to retry on an orphaned @@ -179,7 +180,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ #define TCP_NAGLE_CORK 2 /* Socket is corked */ -#define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */ +#define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */ extern struct inet_timewait_death_row tcp_death_row; @@ -217,6 +218,7 @@ extern int sysctl_tcp_low_latency; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; +extern int sysctl_tcp_abc; extern atomic_t tcp_memory_allocated; extern atomic_t tcp_sockets_allocated; @@ -550,13 +552,13 @@ extern u32 __tcp_select_window(struct sock *sk); /* TCP timestamps are only 32-bits, this causes a slight * complication on 64-bit systems since we store a snapshot - * of jiffies in the buffer control blocks below. We decidely + * of jiffies in the buffer control blocks below. We decidedly * only use of the low 32-bits of jiffies and hide the ugly * casts with the following macro. */ #define tcp_time_stamp ((__u32)(jiffies)) -/* This is what the send packet queueing engine uses to pass +/* This is what the send packet queuing engine uses to pass * TCP per-packet control information to the transmission * code. We also store the host-order sequence numbers in * here too. This is 36 bytes on 32-bit architectures, @@ -596,7 +598,7 @@ struct tcp_skb_cb { #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ #define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS) -#define TCPCB_URG 0x20 /* Urgent pointer advenced here */ +#define TCPCB_URG 0x20 /* Urgent pointer advanced here */ #define TCPCB_AT_TAIL (TCPCB_URG) @@ -764,6 +766,33 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) (tp->snd_cwnd >> 2))); } +/* + * Linear increase during slow start + */ +static inline void tcp_slow_start(struct tcp_sock *tp) +{ + if (sysctl_tcp_abc) { + /* RFC3465: Slow Start + * TCP sender SHOULD increase cwnd by the number of + * previously unacknowledged bytes ACKed by each incoming + * acknowledgment, provided the increase is not more than L + */ + if (tp->bytes_acked < tp->mss_cache) + return; + + /* We MAY increase by 2 if discovered delayed ack */ + if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) { + if (tp->snd_cwnd < tp->snd_cwnd_clamp) + tp->snd_cwnd++; + } + } + tp->bytes_acked = 0; + + if (tp->snd_cwnd < tp->snd_cwnd_clamp) + tp->snd_cwnd++; +} + + static inline void tcp_sync_left_out(struct tcp_sock *tp) { if (tp->rx_opt.sack_ok && @@ -793,6 +822,7 @@ static inline void tcp_enter_cwr(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); tp->prior_ssthresh = 0; + tp->bytes_acked = 0; if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { __tcp_enter_cwr(sk); tcp_set_ca_state(sk, TCP_CA_CWR); @@ -809,6 +839,27 @@ static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp) return 3; } +/* RFC2861 Check whether we are limited by application or congestion window + * This is the inverse of cwnd check in tcp_tso_should_defer + */ +static inline int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) +{ + const struct tcp_sock *tp = tcp_sk(sk); + u32 left; + + if (in_flight >= tp->snd_cwnd) + return 1; + + if (!(sk->sk_route_caps & NETIF_F_TSO)) + return 0; + + left = tp->snd_cwnd - in_flight; + if (sysctl_tcp_tso_win_divisor) + return left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd; + else + return left <= tcp_max_burst(tp); +} + static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, const struct sk_buff *skb) { @@ -852,7 +903,7 @@ static __inline__ u16 tcp_v4_check(struct tcphdr *th, int len, static __inline__ int __tcp_checksum_complete(struct sk_buff *skb) { - return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); + return __skb_checksum_complete(skb); } static __inline__ int tcp_checksum_complete(struct sk_buff *skb) @@ -1156,6 +1207,15 @@ static inline void tcp_mib_init(void) TCP_ADD_STATS_USER(TCP_MIB_MAXCONN, -1); } +/*from STCP */ +static inline void clear_all_retrans_hints(struct tcp_sock *tp){ + tp->lost_skb_hint = NULL; + tp->scoreboard_skb_hint = NULL; + tp->retransmit_skb_hint = NULL; + tp->forward_skb_hint = NULL; + tp->fastpath_skb_hint = NULL; +} + /* /proc */ enum tcp_seq_states { TCP_SEQ_STATE_LISTENING, |