diff options
Diffstat (limited to 'net')
453 files changed, 24549 insertions, 10512 deletions
diff --git a/net/802/hippi.c b/net/802/hippi.c index 6d7fed3dd99..579e2ddf5eb 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -36,7 +36,6 @@ #include <net/arp.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/checksum.h> #include <asm/system.h> /* diff --git a/net/Kconfig b/net/Kconfig index a81aca43932..7dfc9492069 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -63,6 +63,7 @@ config INET if INET source "net/ipv4/Kconfig" source "net/ipv6/Kconfig" +source "net/netlabel/Kconfig" endif # if INET @@ -74,7 +75,7 @@ config NETWORK_SECMARK If you are unsure how to answer this question, answer N. menuconfig NETFILTER - bool "Network packet filtering (replaces ipchains)" + bool "Network packet filtering framework (Netfilter)" ---help--- Netfilter is a framework for filtering and mangling network packets that pass through your Linux box. @@ -174,33 +175,6 @@ source "net/ipx/Kconfig" source "drivers/net/appletalk/Kconfig" source "net/x25/Kconfig" source "net/lapb/Kconfig" - -config NET_DIVERT - bool "Frame Diverter (EXPERIMENTAL)" - depends on EXPERIMENTAL && BROKEN - ---help--- - The Frame Diverter allows you to divert packets from the - network, that are not aimed at the interface receiving it (in - promisc. mode). Typically, a Linux box setup as an Ethernet bridge - with the Frames Diverter on, can do some *really* transparent www - caching using a Squid proxy for example. - - This is very useful when you don't want to change your router's - config (or if you simply don't have access to it). - - The other possible usages of diverting Ethernet Frames are - numberous: - - reroute smtp traffic to another interface - - traffic-shape certain network streams - - transparently proxy smtp connections - - etc... - - For more informations, please refer to: - <http://diverter.sourceforge.net/> - <http://perso.wanadoo.fr/magpie/EtherDivert.html> - - If unsure, say N. - source "net/econet/Kconfig" source "net/wanrouter/Kconfig" source "net/sched/Kconfig" @@ -249,8 +223,6 @@ source "net/ieee80211/Kconfig" config WIRELESS_EXT bool -source "net/netlabel/Kconfig" - config FIB_RULES bool diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 708e2e0371a..3a705220770 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -61,6 +61,7 @@ #include <net/tcp_states.h> #include <net/route.h> #include <linux/atalk.h> +#include "../core/kmap_skb.h" struct datalink_proto *ddp_dl, *aarp_dl; static const struct proto_ops atalk_dgram_ops; @@ -1584,7 +1585,6 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr if (usat->sat_addr.s_net || usat->sat_addr.s_node == ATADDR_ANYNODE) { rt = atrtr_find(&usat->sat_addr); - dev = rt->dev; } else { struct atalk_addr at_hint; @@ -1592,7 +1592,6 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr at_hint.s_net = at->src_net; rt = atrtr_find(&at_hint); - dev = rt->dev; } if (!rt) return -ENETUNREACH; diff --git a/net/atm/Makefile b/net/atm/Makefile index 89656d6c0b9..cc50bd1ff1d 100644 --- a/net/atm/Makefile +++ b/net/atm/Makefile @@ -7,10 +7,7 @@ mpoa-objs := mpc.o mpoa_caches.o mpoa_proc.o obj-$(CONFIG_ATM) += atm.o obj-$(CONFIG_ATM_CLIP) += clip.o -atm-$(subst m,y,$(CONFIG_ATM_CLIP)) += ipcommon.o obj-$(CONFIG_ATM_BR2684) += br2684.o -atm-$(subst m,y,$(CONFIG_ATM_BR2684)) += ipcommon.o -atm-$(subst m,y,$(CONFIG_NET_SCH_ATM)) += ipcommon.o atm-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_ATM_LANE) += lec.o diff --git a/net/atm/br2684.c b/net/atm/br2684.c index d00cca97eb3..83a1c1b1d6c 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -23,7 +23,6 @@ Author: Marcell GAL, 2000, XDSL Ltd, Hungary #include <linux/atmbr2684.h> #include "common.h" -#include "ipcommon.h" /* * Define this to use a version of the code which interacts with the higher @@ -372,7 +371,7 @@ static int br2684_setfilt(struct atm_vcc *atmvcc, void __user *arg) /* Returns 1 if packet should be dropped */ static inline int -packet_fails_filter(u16 type, struct br2684_vcc *brvcc, struct sk_buff *skb) +packet_fails_filter(__be16 type, struct br2684_vcc *brvcc, struct sk_buff *skb) { if (brvcc->filter.netmask == 0) return 0; /* no filter in place */ @@ -500,11 +499,12 @@ Note: we do not have explicit unassign, but look at _push() */ int err; struct br2684_vcc *brvcc; - struct sk_buff_head copy; struct sk_buff *skb; + struct sk_buff_head *rq; struct br2684_dev *brdev; struct net_device *net_dev; struct atm_backend_br2684 be; + unsigned long flags; if (copy_from_user(&be, arg, sizeof be)) return -EFAULT; @@ -554,12 +554,30 @@ Note: we do not have explicit unassign, but look at _push() brvcc->old_push = atmvcc->push; barrier(); atmvcc->push = br2684_push; - skb_queue_head_init(©); - skb_migrate(&sk_atm(atmvcc)->sk_receive_queue, ©); - while ((skb = skb_dequeue(©)) != NULL) { + + rq = &sk_atm(atmvcc)->sk_receive_queue; + + spin_lock_irqsave(&rq->lock, flags); + if (skb_queue_empty(rq)) { + skb = NULL; + } else { + /* NULL terminate the list. */ + rq->prev->next = NULL; + skb = rq->next; + } + rq->prev = rq->next = (struct sk_buff *)rq; + rq->qlen = 0; + spin_unlock_irqrestore(&rq->lock, flags); + + while (skb) { + struct sk_buff *next = skb->next; + + skb->next = skb->prev = NULL; BRPRIV(skb->dev)->stats.rx_bytes -= skb->len; BRPRIV(skb->dev)->stats.rx_packets--; br2684_push(atmvcc, skb); + + skb = next; } __module_get(THIS_MODULE); return 0; diff --git a/net/atm/clip.c b/net/atm/clip.c index 7af2c411da8..5f8a1d22272 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -38,7 +38,6 @@ #include "common.h" #include "resources.h" -#include "ipcommon.h" #include <net/atmclip.h> @@ -54,7 +53,7 @@ static struct atm_vcc *atmarpd; static struct neigh_table clip_tbl; static struct timer_list idle_timer; -static int to_atmarpd(enum atmarp_ctrl_type type, int itf, unsigned long ip) +static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip) { struct sock *sk; struct atmarp_ctrl *ctrl; @@ -220,7 +219,7 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb) || memcmp(skb->data, llc_oui, sizeof (llc_oui))) skb->protocol = htons(ETH_P_IP); else { - skb->protocol = ((u16 *) skb->data)[3]; + skb->protocol = ((__be16 *) skb->data)[3]; skb_pull(skb, RFC1483LLC_LEN); if (skb->protocol == htons(ETH_P_ARP)) { PRIV(skb->dev)->stats.rx_packets++; @@ -430,7 +429,7 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev) here = skb_push(skb, RFC1483LLC_LEN); memcpy(here, llc_oui, sizeof(llc_oui)); - ((u16 *) here)[3] = skb->protocol; + ((__be16 *) here)[3] = skb->protocol; } atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = vcc->atm_options; @@ -469,8 +468,9 @@ static struct net_device_stats *clip_get_stats(struct net_device *dev) static int clip_mkip(struct atm_vcc *vcc, int timeout) { struct clip_vcc *clip_vcc; - struct sk_buff_head copy; struct sk_buff *skb; + struct sk_buff_head *rq; + unsigned long flags; if (!vcc->push) return -EBADFD; @@ -490,10 +490,26 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout) clip_vcc->old_pop = vcc->pop; vcc->push = clip_push; vcc->pop = clip_pop; - skb_queue_head_init(©); - skb_migrate(&sk_atm(vcc)->sk_receive_queue, ©); + + rq = &sk_atm(vcc)->sk_receive_queue; + + spin_lock_irqsave(&rq->lock, flags); + if (skb_queue_empty(rq)) { + skb = NULL; + } else { + /* NULL terminate the list. */ + rq->prev->next = NULL; + skb = rq->next; + } + rq->prev = rq->next = (struct sk_buff *)rq; + rq->qlen = 0; + spin_unlock_irqrestore(&rq->lock, flags); + /* re-process everything received between connection setup and MKIP */ - while ((skb = skb_dequeue(©)) != NULL) + while (skb) { + struct sk_buff *next = skb->next; + + skb->next = skb->prev = NULL; if (!clip_devs) { atm_return(vcc, skb->truesize); kfree_skb(skb); @@ -506,10 +522,13 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout) PRIV(skb->dev)->stats.rx_bytes -= len; kfree_skb(skb); } + + skb = next; + } return 0; } -static int clip_setentry(struct atm_vcc *vcc, u32 ip) +static int clip_setentry(struct atm_vcc *vcc, __be32 ip) { struct neighbour *neigh; struct atmarp_entry *entry; @@ -752,7 +771,7 @@ static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) err = clip_mkip(vcc, arg); break; case ATMARP_SETENTRY: - err = clip_setentry(vcc, arg); + err = clip_setentry(vcc, (__force __be32)arg); break; case ATMARP_ENCAP: err = clip_encap(vcc, arg); diff --git a/net/atm/ipcommon.c b/net/atm/ipcommon.c deleted file mode 100644 index 1d3de42fada..00000000000 --- a/net/atm/ipcommon.c +++ /dev/null @@ -1,63 +0,0 @@ -/* net/atm/ipcommon.c - Common items for all ways of doing IP over ATM */ - -/* Written 1996-2000 by Werner Almesberger, EPFL LRC/ICA */ - - -#include <linux/module.h> -#include <linux/string.h> -#include <linux/skbuff.h> -#include <linux/netdevice.h> -#include <linux/in.h> -#include <linux/atmdev.h> -#include <linux/atmclip.h> - -#include "common.h" -#include "ipcommon.h" - - -#if 0 -#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define DPRINTK(format,args...) -#endif - - -/* - * skb_migrate appends the list at "from" to "to", emptying "from" in the - * process. skb_migrate is atomic with respect to all other skb operations on - * "from" and "to". Note that it locks both lists at the same time, so to deal - * with the lock ordering, the locks are taken in address order. - * - * This function should live in skbuff.c or skbuff.h. - */ - - -void skb_migrate(struct sk_buff_head *from, struct sk_buff_head *to) -{ - unsigned long flags; - struct sk_buff *skb_from = (struct sk_buff *) from; - struct sk_buff *skb_to = (struct sk_buff *) to; - struct sk_buff *prev; - - if ((unsigned long) from < (unsigned long) to) { - spin_lock_irqsave(&from->lock, flags); - spin_lock_nested(&to->lock, SINGLE_DEPTH_NESTING); - } else { - spin_lock_irqsave(&to->lock, flags); - spin_lock_nested(&from->lock, SINGLE_DEPTH_NESTING); - } - prev = from->prev; - from->next->prev = to->prev; - prev->next = skb_to; - to->prev->next = from->next; - to->prev = from->prev; - to->qlen += from->qlen; - spin_unlock(&to->lock); - from->prev = skb_from; - from->next = skb_from; - from->qlen = 0; - spin_unlock_irqrestore(&from->lock, flags); -} - - -EXPORT_SYMBOL(skb_migrate); diff --git a/net/atm/ipcommon.h b/net/atm/ipcommon.h deleted file mode 100644 index d72165f6093..00000000000 --- a/net/atm/ipcommon.h +++ /dev/null @@ -1,22 +0,0 @@ -/* net/atm/ipcommon.h - Common items for all ways of doing IP over ATM */ - -/* Written 1996-2000 by Werner Almesberger, EPFL LRC/ICA */ - - -#ifndef NET_ATM_IPCOMMON_H -#define NET_ATM_IPCOMMON_H - - -#include <linux/string.h> -#include <linux/skbuff.h> -#include <linux/netdevice.h> -#include <linux/atmdev.h> - -/* - * Appends all skbs from "from" to "to". The operation is atomic with respect - * to all other skb operations on "from" or "to". - */ - -void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to); - -#endif diff --git a/net/atm/lec.c b/net/atm/lec.c index 66c57c1091a..3fc0abeeaf3 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -204,9 +204,9 @@ static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc) memset(rdesc, 0, ETH_ALEN); /* offset 4 comes from LAN destination field in LE control frames */ if (trh->rcf & htons((uint16_t) TR_RCF_DIR_BIT)) - memcpy(&rdesc[4], &trh->rseg[num_rdsc - 2], sizeof(uint16_t)); + memcpy(&rdesc[4], &trh->rseg[num_rdsc - 2], sizeof(__be16)); else { - memcpy(&rdesc[4], &trh->rseg[1], sizeof(uint16_t)); + memcpy(&rdesc[4], &trh->rseg[1], sizeof(__be16)); rdesc[5] = ((ntohs(trh->rseg[0]) & 0x000f) | (rdesc[5] & 0xf0)); } @@ -775,7 +775,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) unsigned char *src, *dst; atm_return(vcc, skb->truesize); - if (*(uint16_t *) skb->data == htons(priv->lecid) || + if (*(__be16 *) skb->data == htons(priv->lecid) || !priv->lecd || !(dev->flags & IFF_UP)) { /* * Probably looping back, or if lecd is missing, @@ -1321,11 +1321,10 @@ static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force, if (table == NULL) return -1; - *tlvs = kmalloc(table->sizeoftlvs, GFP_ATOMIC); + *tlvs = kmemdup(table->tlvs, table->sizeoftlvs, GFP_ATOMIC); if (*tlvs == NULL) return -1; - memcpy(*tlvs, table->tlvs, table->sizeoftlvs); *sizeoftlvs = table->sizeoftlvs; return 0; @@ -1364,11 +1363,10 @@ static int lane2_associate_req(struct net_device *dev, u8 *lan_dst, kfree(priv->tlvs); /* NULL if there was no previous association */ - priv->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL); + priv->tlvs = kmemdup(tlvs, sizeoftlvs, GFP_KERNEL); if (priv->tlvs == NULL) return (0); priv->sizeoftlvs = sizeoftlvs; - memcpy(priv->tlvs, tlvs, sizeoftlvs); skb = alloc_skb(sizeoftlvs, GFP_ATOMIC); if (skb == NULL) @@ -1409,12 +1407,10 @@ static void lane2_associate_ind(struct net_device *dev, u8 *mac_addr, kfree(entry->tlvs); - entry->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL); + entry->tlvs = kmemdup(tlvs, sizeoftlvs, GFP_KERNEL); if (entry->tlvs == NULL) return; - entry->sizeoftlvs = sizeoftlvs; - memcpy(entry->tlvs, tlvs, sizeoftlvs); #endif #if 0 printk("lec.c: lane2_associate_ind()\n"); @@ -1458,7 +1454,7 @@ static void lane2_associate_ind(struct net_device *dev, u8 *mac_addr, #define LEC_ARP_REFRESH_INTERVAL (3*HZ) -static void lec_arp_check_expire(void *data); +static void lec_arp_check_expire(struct work_struct *work); static void lec_arp_expire_arp(unsigned long data); /* @@ -1481,7 +1477,7 @@ static void lec_arp_init(struct lec_priv *priv) INIT_HLIST_HEAD(&priv->lec_no_forward); INIT_HLIST_HEAD(&priv->mcast_fwds); spin_lock_init(&priv->lec_arp_lock); - INIT_WORK(&priv->lec_arp_work, lec_arp_check_expire, priv); + INIT_DELAYED_WORK(&priv->lec_arp_work, lec_arp_check_expire); schedule_delayed_work(&priv->lec_arp_work, LEC_ARP_REFRESH_INTERVAL); } @@ -1879,10 +1875,11 @@ static void lec_arp_expire_vcc(unsigned long data) * to ESI_FORWARD_DIRECT. This causes the flush period to end * regardless of the progress of the flush protocol. */ -static void lec_arp_check_expire(void *data) +static void lec_arp_check_expire(struct work_struct *work) { unsigned long flags; - struct lec_priv *priv = data; + struct lec_priv *priv = + container_of(work, struct lec_priv, lec_arp_work.work); struct hlist_node *node, *next; struct lec_arp_table *entry; unsigned long now; diff --git a/net/atm/lec.h b/net/atm/lec.h index 877f5093969..99136babd53 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h @@ -14,14 +14,14 @@ #define LEC_HEADER_LEN 16 struct lecdatahdr_8023 { - unsigned short le_header; + __be16 le_header; unsigned char h_dest[ETH_ALEN]; unsigned char h_source[ETH_ALEN]; - unsigned short h_type; + __be16 h_type; }; struct lecdatahdr_8025 { - unsigned short le_header; + __be16 le_header; unsigned char ac_pad; unsigned char fc; unsigned char h_dest[ETH_ALEN]; @@ -92,7 +92,7 @@ struct lec_priv { spinlock_t lec_arp_lock; struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */ struct atm_vcc *lecd; - struct work_struct lec_arp_work; /* C10 */ + struct delayed_work lec_arp_work; /* C10 */ unsigned int maximum_unknown_frame_count; /* * Within the period of time defined by this variable, the client will send diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 0d2b994af51..c18f73715ef 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -152,7 +152,7 @@ static struct mpoa_client *find_mpc_by_lec(struct net_device *dev) /* * Overwrites the old entry or makes a new one. */ -struct atm_mpoa_qos *atm_mpoa_add_qos(uint32_t dst_ip, struct atm_qos *qos) +struct atm_mpoa_qos *atm_mpoa_add_qos(__be32 dst_ip, struct atm_qos *qos) { struct atm_mpoa_qos *entry; @@ -177,7 +177,7 @@ struct atm_mpoa_qos *atm_mpoa_add_qos(uint32_t dst_ip, struct atm_qos *qos) return entry; } -struct atm_mpoa_qos *atm_mpoa_search_qos(uint32_t dst_ip) +struct atm_mpoa_qos *atm_mpoa_search_qos(__be32 dst_ip) { struct atm_mpoa_qos *qos; @@ -460,11 +460,11 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc) in_cache_entry *entry; struct iphdr *iph; char *buff; - uint32_t ipaddr = 0; + __be32 ipaddr = 0; static struct { struct llc_snap_hdr hdr; - uint32_t tag; + __be32 tag; } tagged_llc_snap_hdr = { {0xaa, 0xaa, 0x03, {0x00, 0x00, 0x00}, {0x88, 0x4c}}, 0 @@ -559,7 +559,7 @@ static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg) struct mpoa_client *mpc; struct atmmpc_ioc ioc_data; in_cache_entry *in_entry; - uint32_t ipaddr; + __be32 ipaddr; bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmmpc_ioc)); if (bytes_left != 0) { @@ -638,7 +638,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb) struct sk_buff *new_skb; eg_cache_entry *eg; struct mpoa_client *mpc; - uint32_t tag; + __be32 tag; char *tmp; ddprintk("mpoa: (%s) mpc_push:\n", dev->name); @@ -683,7 +683,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb) } tmp = skb->data + sizeof(struct llc_snap_hdr); - tag = *(uint32_t *)tmp; + tag = *(__be32 *)tmp; eg = mpc->eg_ops->get_by_tag(tag, mpc); if (eg == NULL) { @@ -1029,7 +1029,7 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned lo static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc) { - uint32_t dst_ip = msg->content.in_info.in_dst_ip; + __be32 dst_ip = msg->content.in_info.in_dst_ip; in_cache_entry *entry; entry = mpc->in_ops->get(dst_ip, mpc); @@ -1066,7 +1066,7 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc) */ static void check_qos_and_open_shortcut(struct k_message *msg, struct mpoa_client *client, in_cache_entry *entry) { - uint32_t dst_ip = msg->content.in_info.in_dst_ip; + __be32 dst_ip = msg->content.in_info.in_dst_ip; struct atm_mpoa_qos *qos = atm_mpoa_search_qos(dst_ip); eg_cache_entry *eg_entry = client->eg_ops->get_by_src_ip(dst_ip, client); @@ -1102,7 +1102,7 @@ static void check_qos_and_open_shortcut(struct k_message *msg, struct mpoa_clien static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc) { - uint32_t dst_ip = msg->content.in_info.in_dst_ip; + __be32 dst_ip = msg->content.in_info.in_dst_ip; in_cache_entry *entry = mpc->in_ops->get(dst_ip, mpc); dprintk("mpoa: (%s) MPOA_res_reply_rcvd: ip %u.%u.%u.%u\n", mpc->dev->name, NIPQUAD(dst_ip)); @@ -1148,8 +1148,8 @@ static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc) static void ingress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc) { - uint32_t dst_ip = msg->content.in_info.in_dst_ip; - uint32_t mask = msg->ip_mask; + __be32 dst_ip = msg->content.in_info.in_dst_ip; + __be32 mask = msg->ip_mask; in_cache_entry *entry = mpc->in_ops->get_with_mask(dst_ip, mpc, mask); if(entry == NULL){ @@ -1173,7 +1173,7 @@ static void ingress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc) static void egress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc) { - uint32_t cache_id = msg->content.eg_info.cache_id; + __be32 cache_id = msg->content.eg_info.cache_id; eg_cache_entry *entry = mpc->eg_ops->get_by_cache_id(cache_id, mpc); if (entry == NULL) { @@ -1322,13 +1322,12 @@ static void set_mps_mac_addr_rcvd(struct k_message *msg, struct mpoa_client *cli if(client->number_of_mps_macs) kfree(client->mps_macs); client->number_of_mps_macs = 0; - client->mps_macs = kmalloc(ETH_ALEN,GFP_KERNEL); + client->mps_macs = kmemdup(msg->MPS_ctrl, ETH_ALEN, GFP_KERNEL); if (client->mps_macs == NULL) { printk("mpoa: set_mps_mac_addr_rcvd: out of memory\n"); return; } client->number_of_mps_macs = 1; - memcpy(client->mps_macs, msg->MPS_ctrl, ETH_ALEN); return; } diff --git a/net/atm/mpc.h b/net/atm/mpc.h index 3c7981a229e..51f460d005c 100644 --- a/net/atm/mpc.h +++ b/net/atm/mpc.h @@ -36,14 +36,14 @@ struct mpoa_client { struct atm_mpoa_qos { struct atm_mpoa_qos *next; - uint32_t ipaddr; + __be32 ipaddr; struct atm_qos qos; }; /* MPOA QoS operations */ -struct atm_mpoa_qos *atm_mpoa_add_qos(uint32_t dst_ip, struct atm_qos *qos); -struct atm_mpoa_qos *atm_mpoa_search_qos(uint32_t dst_ip); +struct atm_mpoa_qos *atm_mpoa_add_qos(__be32 dst_ip, struct atm_qos *qos); +struct atm_mpoa_qos *atm_mpoa_search_qos(__be32 dst_ip); int atm_mpoa_delete_qos(struct atm_mpoa_qos *qos); /* Display QoS entries. This is for the procfs */ diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c index fbf13cdcf46..697a081533b 100644 --- a/net/atm/mpoa_caches.c +++ b/net/atm/mpoa_caches.c @@ -22,7 +22,7 @@ #define ddprintk(format,args...) #endif -static in_cache_entry *in_cache_get(uint32_t dst_ip, +static in_cache_entry *in_cache_get(__be32 dst_ip, struct mpoa_client *client) { in_cache_entry *entry; @@ -42,9 +42,9 @@ static in_cache_entry *in_cache_get(uint32_t dst_ip, return NULL; } -static in_cache_entry *in_cache_get_with_mask(uint32_t dst_ip, +static in_cache_entry *in_cache_get_with_mask(__be32 dst_ip, struct mpoa_client *client, - uint32_t mask) + __be32 mask) { in_cache_entry *entry; @@ -84,10 +84,10 @@ static in_cache_entry *in_cache_get_by_vcc(struct atm_vcc *vcc, return NULL; } -static in_cache_entry *in_cache_add_entry(uint32_t dst_ip, +static in_cache_entry *in_cache_add_entry(__be32 dst_ip, struct mpoa_client *client) { - in_cache_entry* entry = kmalloc(sizeof(in_cache_entry), GFP_KERNEL); + in_cache_entry *entry = kzalloc(sizeof(in_cache_entry), GFP_KERNEL); if (entry == NULL) { printk("mpoa: mpoa_caches.c: new_in_cache_entry: out of memory\n"); @@ -95,7 +95,6 @@ static in_cache_entry *in_cache_add_entry(uint32_t dst_ip, } dprintk("mpoa: mpoa_caches.c: adding an ingress entry, ip = %u.%u.%u.%u\n", NIPQUAD(dst_ip)); - memset(entry,0,sizeof(in_cache_entry)); atomic_set(&entry->use, 1); dprintk("mpoa: mpoa_caches.c: new_in_cache_entry: about to lock\n"); @@ -319,7 +318,7 @@ static void in_destroy_cache(struct mpoa_client *mpc) return; } -static eg_cache_entry *eg_cache_get_by_cache_id(uint32_t cache_id, struct mpoa_client *mpc) +static eg_cache_entry *eg_cache_get_by_cache_id(__be32 cache_id, struct mpoa_client *mpc) { eg_cache_entry *entry; @@ -339,7 +338,7 @@ static eg_cache_entry *eg_cache_get_by_cache_id(uint32_t cache_id, struct mpoa_c } /* This can be called from any context since it saves CPU flags */ -static eg_cache_entry *eg_cache_get_by_tag(uint32_t tag, struct mpoa_client *mpc) +static eg_cache_entry *eg_cache_get_by_tag(__be32 tag, struct mpoa_client *mpc) { unsigned long flags; eg_cache_entry *entry; @@ -380,7 +379,7 @@ static eg_cache_entry *eg_cache_get_by_vcc(struct atm_vcc *vcc, struct mpoa_clie return NULL; } -static eg_cache_entry *eg_cache_get_by_src_ip(uint32_t ipaddr, struct mpoa_client *mpc) +static eg_cache_entry *eg_cache_get_by_src_ip(__be32 ipaddr, struct mpoa_client *mpc) { eg_cache_entry *entry; @@ -447,7 +446,7 @@ static void eg_cache_remove_entry(eg_cache_entry *entry, static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, struct mpoa_client *client) { - eg_cache_entry *entry = kmalloc(sizeof(eg_cache_entry), GFP_KERNEL); + eg_cache_entry *entry = kzalloc(sizeof(eg_cache_entry), GFP_KERNEL); if (entry == NULL) { printk("mpoa: mpoa_caches.c: new_eg_cache_entry: out of memory\n"); @@ -455,7 +454,6 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, struct mpoa_cli } dprintk("mpoa: mpoa_caches.c: adding an egress entry, ip = %u.%u.%u.%u, this should be our IP\n", NIPQUAD(msg->content.eg_info.eg_dst_ip)); - memset(entry, 0, sizeof(eg_cache_entry)); atomic_set(&entry->use, 1); dprintk("mpoa: mpoa_caches.c: new_eg_cache_entry: about to lock\n"); diff --git a/net/atm/mpoa_caches.h b/net/atm/mpoa_caches.h index 6c9886a03d0..84de977def2 100644 --- a/net/atm/mpoa_caches.h +++ b/net/atm/mpoa_caches.h @@ -29,12 +29,12 @@ typedef struct in_cache_entry { } in_cache_entry; struct in_cache_ops{ - in_cache_entry *(*add_entry)(uint32_t dst_ip, + in_cache_entry *(*add_entry)(__be32 dst_ip, struct mpoa_client *client); - in_cache_entry *(*get)(uint32_t dst_ip, struct mpoa_client *client); - in_cache_entry *(*get_with_mask)(uint32_t dst_ip, + in_cache_entry *(*get)(__be32 dst_ip, struct mpoa_client *client); + in_cache_entry *(*get_with_mask)(__be32 dst_ip, struct mpoa_client *client, - uint32_t mask); + __be32 mask); in_cache_entry *(*get_by_vcc)(struct atm_vcc *vcc, struct mpoa_client *client); void (*put)(in_cache_entry *entry); @@ -56,17 +56,17 @@ typedef struct eg_cache_entry{ struct atm_vcc *shortcut; uint32_t packets_rcvd; uint16_t entry_state; - uint32_t latest_ip_addr; /* The src IP address of the last packet */ + __be32 latest_ip_addr; /* The src IP address of the last packet */ struct eg_ctrl_info ctrl_info; atomic_t use; } eg_cache_entry; struct eg_cache_ops{ eg_cache_entry *(*add_entry)(struct k_message *msg, struct mpoa_client *client); - eg_cache_entry *(*get_by_cache_id)(uint32_t cache_id, struct mpoa_client *client); - eg_cache_entry *(*get_by_tag)(uint32_t cache_id, struct mpoa_client *client); + eg_cache_entry *(*get_by_cache_id)(__be32 cache_id, struct mpoa_client *client); + eg_cache_entry *(*get_by_tag)(__be32 cache_id, struct mpoa_client *client); eg_cache_entry *(*get_by_vcc)(struct atm_vcc *vcc, struct mpoa_client *client); - eg_cache_entry *(*get_by_src_ip)(uint32_t ipaddr, struct mpoa_client *client); + eg_cache_entry *(*get_by_src_ip)(__be32 ipaddr, struct mpoa_client *client); void (*put)(eg_cache_entry *entry); void (*remove_entry)(eg_cache_entry *entry, struct mpoa_client *client); void (*update)(eg_cache_entry *entry, uint16_t holding_time); diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c index d37b8911b3a..3844c85d602 100644 --- a/net/atm/mpoa_proc.c +++ b/net/atm/mpoa_proc.c @@ -231,14 +231,14 @@ static int parse_qos(const char *buff) */ unsigned char ip[4]; int tx_pcr, tx_sdu, rx_pcr, rx_sdu; - uint32_t ipaddr; + __be32 ipaddr; struct atm_qos qos; memset(&qos, 0, sizeof(struct atm_qos)); if (sscanf(buff, "del %hhu.%hhu.%hhu.%hhu", ip, ip+1, ip+2, ip+3) == 4) { - ipaddr = *(uint32_t *)ip; + ipaddr = *(__be32 *)ip; return atm_mpoa_delete_qos(atm_mpoa_search_qos(ipaddr)); } @@ -250,7 +250,7 @@ static int parse_qos(const char *buff) ip, ip+1, ip+2, ip+3, &tx_pcr, &tx_sdu, &rx_pcr, &rx_sdu) != 8) return 0; - ipaddr = *(uint32_t *)ip; + ipaddr = *(__be32 *)ip; qos.txtp.traffic_class = ATM_CBR; qos.txtp.max_pcr = tx_pcr; qos.txtp.max_sdu = tx_sdu; diff --git a/net/atm/proc.c b/net/atm/proc.c index 91fe5f53ff1..739866bfe9e 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -393,7 +393,7 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf, if (count == 0) return 0; page = get_zeroed_page(GFP_KERNEL); if (!page) return -ENOMEM; - dev = PDE(file->f_dentry->d_inode)->data; + dev = PDE(file->f_path.dentry->d_inode)->data; if (!dev->ops->proc_read) length = -EINVAL; else { diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 000695c4858..6cabf6d8a75 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -906,13 +906,13 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) ax25->source_addr = oax25->source_addr; if (oax25->digipeat != NULL) { - if ((ax25->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { + ax25->digipeat = kmemdup(oax25->digipeat, sizeof(ax25_digi), + GFP_ATOMIC); + if (ax25->digipeat == NULL) { sk_free(sk); ax25_cb_put(ax25); return NULL; } - - memcpy(ax25->digipeat, oax25->digipeat, sizeof(ax25_digi)); } sk->sk_protinfo = ax25; diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c index 5f0896ad004..97a49c79c60 100644 --- a/net/ax25/ax25_addr.c +++ b/net/ax25/ax25_addr.c @@ -29,17 +29,26 @@ #include <linux/interrupt.h> /* - * The null address is defined as a callsign of all spaces with an - * SSID of zero. + * The default broadcast address of an interface is QST-0; the default address + * is LINUX-1. The null address is defined as a callsign of all spaces with + * an SSID of zero. */ -ax25_address null_ax25_address = {{0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00}}; +const ax25_address ax25_bcast = + {{'Q' << 1, 'S' << 1, 'T' << 1, ' ' << 1, ' ' << 1, ' ' << 1, 0 << 1}}; +const ax25_address ax25_defaddr = + {{'L' << 1, 'I' << 1, 'N' << 1, 'U' << 1, 'X' << 1, ' ' << 1, 1 << 1}}; +const ax25_address null_ax25_address = + {{' ' << 1, ' ' << 1, ' ' << 1, ' ' << 1, ' ' << 1, ' ' << 1, 0 << 1}}; + +EXPORT_SYMBOL_GPL(ax25_bcast); +EXPORT_SYMBOL_GPL(ax25_defaddr); EXPORT_SYMBOL(null_ax25_address); /* * ax25 -> ascii conversion */ -char *ax2asc(char *buf, ax25_address *a) +char *ax2asc(char *buf, const ax25_address *a) { char c, *s; int n; @@ -72,9 +81,9 @@ EXPORT_SYMBOL(ax2asc); /* * ascii -> ax25 conversion */ -void asc2ax(ax25_address *addr, char *callsign) +void asc2ax(ax25_address *addr, const char *callsign) { - char *s; + const char *s; int n; for (s = callsign, n = 0; n < 6; n++) { @@ -107,7 +116,7 @@ EXPORT_SYMBOL(asc2ax); /* * Compare two ax.25 addresses */ -int ax25cmp(ax25_address *a, ax25_address *b) +int ax25cmp(const ax25_address *a, const ax25_address *b) { int ct = 0; @@ -128,7 +137,7 @@ EXPORT_SYMBOL(ax25cmp); /* * Compare two AX.25 digipeater paths. */ -int ax25digicmp(ax25_digi *digi1, ax25_digi *digi2) +int ax25digicmp(const ax25_digi *digi1, const ax25_digi *digi2) { int i; @@ -149,7 +158,9 @@ int ax25digicmp(ax25_digi *digi1, ax25_digi *digi2) * Given an AX.25 address pull of to, from, digi list, command/response and the start of data * */ -unsigned char *ax25_addr_parse(unsigned char *buf, int len, ax25_address *src, ax25_address *dest, ax25_digi *digi, int *flags, int *dama) +const unsigned char *ax25_addr_parse(const unsigned char *buf, int len, + ax25_address *src, ax25_address *dest, ax25_digi *digi, int *flags, + int *dama) { int d = 0; @@ -204,7 +215,8 @@ unsigned char *ax25_addr_parse(unsigned char *buf, int len, ax25_address *src, a /* * Assemble an AX.25 header from the bits */ -int ax25_addr_build(unsigned char *buf, ax25_address *src, ax25_address *dest, ax25_digi *d, int flag, int modulus) +int ax25_addr_build(unsigned char *buf, const ax25_address *src, + const ax25_address *dest, const ax25_digi *d, int flag, int modulus) { int len = 0; int ct = 0; @@ -261,7 +273,7 @@ int ax25_addr_build(unsigned char *buf, ax25_address *src, ax25_address *dest, a return len; } -int ax25_addr_size(ax25_digi *dp) +int ax25_addr_size(const ax25_digi *dp) { if (dp == NULL) return 2 * AX25_ADDR_LEN; @@ -272,7 +284,7 @@ int ax25_addr_size(ax25_digi *dp) /* * Reverse Digipeat List. May not pass both parameters as same struct */ -void ax25_digi_invert(ax25_digi *in, ax25_digi *out) +void ax25_digi_invert(const ax25_digi *in, ax25_digi *out) { int ct; diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c index d7736e58533..f84047d1e8c 100644 --- a/net/ax25/ax25_out.c +++ b/net/ax25/ax25_out.c @@ -70,11 +70,11 @@ ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, ax25_address *src, ax2 ax25->dest_addr = *dest; if (digi != NULL) { - if ((ax25->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { + ax25->digipeat = kmemdup(digi, sizeof(*digi), GFP_ATOMIC); + if (ax25->digipeat == NULL) { ax25_cb_put(ax25); return NULL; } - memcpy(ax25->digipeat, digi, sizeof(ax25_digi)); } switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 51b7bdaf27e..8580356ace5 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -432,11 +432,12 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) } if (ax25_rt->digipeat != NULL) { - if ((ax25->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { + ax25->digipeat = kmemdup(ax25_rt->digipeat, sizeof(ax25_digi), + GFP_ATOMIC); + if (ax25->digipeat == NULL) { err = -ENOMEM; goto put; } - memcpy(ax25->digipeat, ax25_rt->digipeat, sizeof(ax25_digi)); ax25_adjust_path(addr, ax25->digipeat); } diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index 867d4253797..d23a27f25d2 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -209,7 +209,9 @@ void ax25_register_sysctl(void) } for (n = 0, ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) { - ctl_table *child = kmalloc(sizeof(ax25_param_table), GFP_ATOMIC); + struct ctl_table *child = kmemdup(ax25_param_table, + sizeof(ax25_param_table), + GFP_ATOMIC); if (!child) { while (n--) kfree(ax25_table[n].child); @@ -217,7 +219,6 @@ void ax25_register_sysctl(void) spin_unlock_bh(&ax25_dev_lock); return; } - memcpy(child, ax25_param_table, sizeof(ax25_param_table)); ax25_table[n].child = ax25_dev->systable = child; ax25_table[n].ctl_name = n + 1; ax25_table[n].procname = ax25_dev->dev->name; diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index bbb1ed7097a..0b6cd0e2528 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -95,14 +95,14 @@ struct bnep_setup_conn_req { struct bnep_set_filter_req { __u8 type; __u8 ctrl; - __u16 len; + __be16 len; __u8 list[0]; } __attribute__((packed)); struct bnep_control_rsp { __u8 type; __u8 ctrl; - __u16 resp; + __be16 resp; } __attribute__((packed)); struct bnep_ext_hdr { diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 4d3424c2421..7ba6470dc50 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -117,18 +117,18 @@ static int bnep_send_rsp(struct bnep_session *s, u8 ctrl, u16 resp) static inline void bnep_set_default_proto_filter(struct bnep_session *s) { /* (IPv4, ARP) */ - s->proto_filter[0].start = htons(0x0800); - s->proto_filter[0].end = htons(0x0806); + s->proto_filter[0].start = ETH_P_IP; + s->proto_filter[0].end = ETH_P_ARP; /* (RARP, AppleTalk) */ - s->proto_filter[1].start = htons(0x8035); - s->proto_filter[1].end = htons(0x80F3); + s->proto_filter[1].start = ETH_P_RARP; + s->proto_filter[1].end = ETH_P_AARP; /* (IPX, IPv6) */ - s->proto_filter[2].start = htons(0x8137); - s->proto_filter[2].end = htons(0x86DD); + s->proto_filter[2].start = ETH_P_IPX; + s->proto_filter[2].end = ETH_P_IPV6; } #endif -static int bnep_ctrl_set_netfilter(struct bnep_session *s, u16 *data, int len) +static int bnep_ctrl_set_netfilter(struct bnep_session *s, __be16 *data, int len) { int n; @@ -150,8 +150,8 @@ static int bnep_ctrl_set_netfilter(struct bnep_session *s, u16 *data, int len) int i; for (i = 0; i < n; i++) { - f[i].start = get_unaligned(data++); - f[i].end = get_unaligned(data++); + f[i].start = ntohs(get_unaligned(data++)); + f[i].end = ntohs(get_unaligned(data++)); BT_DBG("proto filter start %d end %d", f[i].start, f[i].end); @@ -180,7 +180,7 @@ static int bnep_ctrl_set_mcfilter(struct bnep_session *s, u8 *data, int len) if (len < 2) return -EILSEQ; - n = ntohs(get_unaligned((u16 *) data)); + n = ntohs(get_unaligned((__be16 *) data)); data += 2; len -= 2; if (len < n) @@ -332,7 +332,7 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) if (!skb_pull(skb, __bnep_rx_hlen[type & BNEP_TYPE_MASK])) goto badframe; - s->eh.h_proto = get_unaligned((u16 *) (skb->data - 2)); + s->eh.h_proto = get_unaligned((__be16 *) (skb->data - 2)); if (type & BNEP_EXT_HEADER) { if (bnep_rx_extension(s, skb) < 0) @@ -343,7 +343,7 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) if (ntohs(s->eh.h_proto) == 0x8100) { if (!skb_pull(skb, 4)) goto badframe; - s->eh.h_proto = get_unaligned((u16 *) (skb->data - 2)); + s->eh.h_proto = get_unaligned((__be16 *) (skb->data - 2)); } /* We have to alloc new skb and copy data here :(. Because original skb @@ -365,7 +365,7 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) case BNEP_COMPRESSED_SRC_ONLY: memcpy(__skb_put(nskb, ETH_ALEN), s->eh.h_dest, ETH_ALEN); memcpy(__skb_put(nskb, ETH_ALEN), skb->mac.raw, ETH_ALEN); - put_unaligned(s->eh.h_proto, (u16 *) __skb_put(nskb, 2)); + put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2)); break; case BNEP_COMPRESSED_DST_ONLY: @@ -375,7 +375,7 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) case BNEP_GENERAL: memcpy(__skb_put(nskb, ETH_ALEN * 2), skb->mac.raw, ETH_ALEN * 2); - put_unaligned(s->eh.h_proto, (u16 *) __skb_put(nskb, 2)); + put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2)); break; } diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 7f7b27db6a8..67a002a9751 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -158,14 +158,15 @@ static inline int bnep_net_mc_filter(struct sk_buff *skb, struct bnep_session *s static inline u16 bnep_net_eth_proto(struct sk_buff *skb) { struct ethhdr *eh = (void *) skb->data; + u16 proto = ntohs(eh->h_proto); - if (ntohs(eh->h_proto) >= 1536) - return eh->h_proto; + if (proto >= 1536) + return proto; - if (get_unaligned((u16 *) skb->data) == 0xFFFF) - return htons(ETH_P_802_3); + if (get_unaligned((__be16 *) skb->data) == htons(0xFFFF)) + return ETH_P_802_3; - return htons(ETH_P_802_2); + return ETH_P_802_2; } static inline int bnep_net_proto_filter(struct sk_buff *skb, struct bnep_session *s) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 65f09484571..bb94e6da223 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -57,6 +57,7 @@ static void hci_cc_link_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb) { __u8 status; + struct hci_conn *pend; BT_DBG("%s ocf 0x%x", hdev->name, ocf); @@ -71,6 +72,15 @@ static void hci_cc_link_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb clear_bit(HCI_INQUIRY, &hdev->flags); hci_req_complete(hdev, status); } + + hci_dev_lock(hdev); + + pend = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2); + if (pend) + hci_acl_connect(pend); + + hci_dev_unlock(hdev); + break; default: @@ -565,11 +575,20 @@ static void hci_cs_info_param(struct hci_dev *hdev, __u16 ocf, __u8 status) static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { __u8 status = *((__u8 *) skb->data); + struct hci_conn *pend; BT_DBG("%s status %d", hdev->name, status); clear_bit(HCI_INQUIRY, &hdev->flags); hci_req_complete(hdev, status); + + hci_dev_lock(hdev); + + pend = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2); + if (pend) + hci_acl_connect(pend); + + hci_dev_unlock(hdev); } /* Inquiry Result */ diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index f26a9eb4994..dbf98c49dba 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -120,10 +120,13 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) if (!hci_test_bit(evt, &flt->event_mask)) continue; - if (flt->opcode && ((evt == HCI_EV_CMD_COMPLETE && - flt->opcode != *(__u16 *)(skb->data + 3)) || - (evt == HCI_EV_CMD_STATUS && - flt->opcode != *(__u16 *)(skb->data + 4)))) + if (flt->opcode && + ((evt == HCI_EV_CMD_COMPLETE && + flt->opcode != + get_unaligned((__le16 *)(skb->data + 3))) || + (evt == HCI_EV_CMD_STATUS && + flt->opcode != + get_unaligned((__le16 *)(skb->data + 4))))) continue; } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 954eb74eb37..d4c935692cc 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -237,9 +237,9 @@ static void bt_release(struct device *dev) kfree(data); } -static void add_conn(void *data) +static void add_conn(struct work_struct *work) { - struct hci_conn *conn = data; + struct hci_conn *conn = container_of(work, struct hci_conn, work); int i; if (device_register(&conn->dev) < 0) { @@ -259,7 +259,9 @@ void hci_conn_add_sysfs(struct hci_conn *conn) BT_DBG("conn %p", conn); - conn->dev.parent = &hdev->dev; + conn->dev.bus = &bt_bus; + conn->dev.parent = &hdev->dev; + conn->dev.release = bt_release; snprintf(conn->dev.bus_id, BUS_ID_SIZE, @@ -270,14 +272,14 @@ void hci_conn_add_sysfs(struct hci_conn *conn) dev_set_drvdata(&conn->dev, conn); - INIT_WORK(&conn->work, add_conn, (void *) conn); + INIT_WORK(&conn->work, add_conn); schedule_work(&conn->work); } -static void del_conn(void *data) +static void del_conn(struct work_struct *work) { - struct hci_conn *conn = data; + struct hci_conn *conn = container_of(work, struct hci_conn, work); device_del(&conn->dev); } @@ -285,7 +287,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn) { BT_DBG("conn %p", conn); - INIT_WORK(&conn->work, del_conn, (void *) conn); + INIT_WORK(&conn->work, del_conn); schedule_work(&conn->work); } diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 2b3dcb8f90f..29a8fa4d372 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -770,7 +770,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl long timeo; int err = 0; - lock_sock(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); if (sk->sk_state != BT_LISTEN) { err = -EBADFD; @@ -792,7 +792,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl release_sock(sk); timeo = schedule_timeout(timeo); - lock_sock(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); if (sk->sk_state != BT_LISTEN) { err = -EBADFD; @@ -1353,12 +1353,12 @@ static inline int l2cap_conf_output(struct sock *sk, void **ptr) /* Configure output options and let the other side know * which ones we don't like. */ - if (pi->conf_mtu < pi->omtu) { - l2cap_add_conf_opt(ptr, L2CAP_CONF_MTU, 2, pi->omtu); + if (pi->conf_mtu < pi->omtu) result = L2CAP_CONF_UNACCEPT; - } else { + else pi->omtu = pi->conf_mtu; - } + + l2cap_add_conf_opt(ptr, L2CAP_CONF_MTU, 2, pi->omtu); BT_DBG("sk %p result %d", sk, result); return result; @@ -1533,6 +1533,9 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr if (!(sk = l2cap_get_chan_by_scid(&conn->chan_list, dcid))) return -ENOENT; + if (sk->sk_state == BT_DISCONN) + goto unlock; + l2cap_parse_conf_req(sk, req->data, cmd->len - sizeof(*req)); if (flags & 0x0001) { diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index ddc4e9d5963..278c8676906 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -854,7 +854,7 @@ int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, rpn->flow_ctrl = flow_ctrl_settings; rpn->xon_char = xon_char; rpn->xoff_char = xoff_char; - rpn->param_mask = param_mask; + rpn->param_mask = cpu_to_le16(param_mask); *ptr = __fcs(buf); ptr++; @@ -1018,7 +1018,7 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr) if (len > 127) { hdr = (void *) skb_push(skb, 4); - put_unaligned(htobs(__len16(len)), (u16 *) &hdr->len); + put_unaligned(htobs(__len16(len)), (__le16 *) &hdr->len); } else { hdr = (void *) skb_push(skb, 3); hdr->len = __len8(len); @@ -1343,7 +1343,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ /* Check for sane values, ignore/accept bit_rate, 8 bits, 1 stop bit, * no parity, no flow control lines, normal XON/XOFF chars */ - if (rpn->param_mask & RFCOMM_RPN_PM_BITRATE) { + if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_BITRATE)) { bit_rate = rpn->bit_rate; if (bit_rate != RFCOMM_RPN_BR_115200) { BT_DBG("RPN bit rate mismatch 0x%x", bit_rate); @@ -1352,7 +1352,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ } } - if (rpn->param_mask & RFCOMM_RPN_PM_DATA) { + if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_DATA)) { data_bits = __get_rpn_data_bits(rpn->line_settings); if (data_bits != RFCOMM_RPN_DATA_8) { BT_DBG("RPN data bits mismatch 0x%x", data_bits); @@ -1361,7 +1361,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ } } - if (rpn->param_mask & RFCOMM_RPN_PM_STOP) { + if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_STOP)) { stop_bits = __get_rpn_stop_bits(rpn->line_settings); if (stop_bits != RFCOMM_RPN_STOP_1) { BT_DBG("RPN stop bits mismatch 0x%x", stop_bits); @@ -1370,7 +1370,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ } } - if (rpn->param_mask & RFCOMM_RPN_PM_PARITY) { + if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_PARITY)) { parity = __get_rpn_parity(rpn->line_settings); if (parity != RFCOMM_RPN_PARITY_NONE) { BT_DBG("RPN parity mismatch 0x%x", parity); @@ -1379,7 +1379,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ } } - if (rpn->param_mask & RFCOMM_RPN_PM_FLOW) { + if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_FLOW)) { flow_ctrl = rpn->flow_ctrl; if (flow_ctrl != RFCOMM_RPN_FLOW_NONE) { BT_DBG("RPN flow ctrl mismatch 0x%x", flow_ctrl); @@ -1388,7 +1388,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ } } - if (rpn->param_mask & RFCOMM_RPN_PM_XON) { + if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_XON)) { xon_char = rpn->xon_char; if (xon_char != RFCOMM_RPN_XON_CHAR) { BT_DBG("RPN XON char mismatch 0x%x", xon_char); @@ -1397,7 +1397,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ } } - if (rpn->param_mask & RFCOMM_RPN_PM_XOFF) { + if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_XOFF)) { xoff_char = rpn->xoff_char; if (xoff_char != RFCOMM_RPN_XOFF_CHAR) { BT_DBG("RPN XOFF char mismatch 0x%x", xoff_char); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index b8e3a5f1c8a..e0e0d09023b 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -752,9 +752,9 @@ static int rfcomm_tty_ioctl(struct tty_struct *tty, struct file *filp, unsigned return -ENOIOCTLCMD; } -static void rfcomm_tty_set_termios(struct tty_struct *tty, struct termios *old) +static void rfcomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old) { - struct termios *new = (struct termios *) tty->termios; + struct ktermios *new = tty->termios; int old_baud_rate = tty_termios_baud_rate(old); int new_baud_rate = tty_termios_baud_rate(new); @@ -765,7 +765,7 @@ static void rfcomm_tty_set_termios(struct tty_struct *tty, struct termios *old) BT_DBG("tty %p termios %p", tty, old); - if (!dev) + if (!dev || !dev->dlc || !dev->dlc->session) return; /* Handle turning off CRTSCTS */ diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index d9f04864d15..8ca448db7a0 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -23,7 +23,7 @@ #include <asm/atomic.h> #include "br_private.h" -static kmem_cache_t *br_fdb_cache __read_mostly; +static struct kmem_cache *br_fdb_cache __read_mostly; static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index f753c40c11d..55bb2634c08 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -77,12 +77,16 @@ static int port_cost(struct net_device *dev) * Called from work queue to allow for calling functions that * might sleep (such as speed check), and to debounce. */ -static void port_carrier_check(void *arg) +static void port_carrier_check(struct work_struct *work) { - struct net_device *dev = arg; struct net_bridge_port *p; + struct net_device *dev; struct net_bridge *br; + dev = container_of(work, struct net_bridge_port, + carrier_check.work)->dev; + work_release(work); + rtnl_lock(); p = dev->br_port; if (!p) @@ -276,7 +280,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->port_no = index; br_init_port(p); p->state = BR_STATE_DISABLED; - INIT_WORK(&p->carrier_check, port_carrier_check, dev); + INIT_DELAYED_WORK_NAR(&p->carrier_check, port_carrier_check); br_stp_port_timer_init(p); kobject_init(&p->kobj); diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 4e4119a1213..4c61a7e0a86 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -58,12 +58,13 @@ static int get_fdb_entries(struct net_bridge *br, void __user *userbuf, { int num; void *buf; - size_t size = maxnum * sizeof(struct __fdb_entry); + size_t size; - if (size > PAGE_SIZE) { - size = PAGE_SIZE; + /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */ + if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry)) maxnum = PAGE_SIZE/sizeof(struct __fdb_entry); - } + + size = maxnum * sizeof(struct __fdb_entry); buf = kmalloc(size, GFP_USER); if (!buf) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index ac181be13d8..ea3337ad0ed 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -34,13 +34,13 @@ #include <linux/netfilter_ipv6.h> #include <linux/netfilter_arp.h> #include <linux/in_route.h> +#include <linux/inetdevice.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/route.h> #include <asm/uaccess.h> -#include <asm/checksum.h> #include "br_private.h" #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> @@ -61,9 +61,6 @@ static int brnf_filter_vlan_tagged __read_mostly = 1; #define brnf_filter_vlan_tagged 1 #endif -int brnf_deferred_hooks; -EXPORT_SYMBOL_GPL(brnf_deferred_hooks); - static __be16 inline vlan_proto(const struct sk_buff *skb) { return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; @@ -222,10 +219,14 @@ static void __br_dnat_complain(void) * * Otherwise, the packet is considered to be routed and we just * change the destination MAC address so that the packet will - * later be passed up to the IP stack to be routed. + * later be passed up to the IP stack to be routed. For a redirected + * packet, ip_route_input() will give back the localhost as output device, + * which differs from the bridge device. * * Let us now consider the case that ip_route_input() fails: * + * This can be because the destination address is martian, in which case + * the packet will be dropped. * After a "echo '0' > /proc/sys/net/ipv4/ip_forward" ip_route_input() * will fail, while __ip_route_output_key() will return success. The source * address for __ip_route_output_key() is set to zero, so __ip_route_output_key @@ -238,7 +239,8 @@ static void __br_dnat_complain(void) * * --Lennert, 20020411 * --Bart, 20020416 (updated) - * --Bart, 20021007 (updated) */ + * --Bart, 20021007 (updated) + * --Bart, 20062711 (updated) */ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) { if (skb->pkt_type == PACKET_OTHERHOST) { @@ -265,15 +267,15 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) struct net_device *dev = skb->dev; struct iphdr *iph = skb->nh.iph; struct nf_bridge_info *nf_bridge = skb->nf_bridge; + int err; if (nf_bridge->mask & BRNF_PKT_TYPE) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->mask ^= BRNF_PKT_TYPE; } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; - if (dnat_took_place(skb)) { - if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev)) { + if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { struct rtable *rt; struct flowi fl = { .nl_u = { @@ -284,19 +286,33 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) }, .proto = 0, }; + struct in_device *in_dev = in_dev_get(dev); + + /* If err equals -EHOSTUNREACH the error is due to a + * martian destination or due to the fact that + * forwarding is disabled. For most martian packets, + * ip_route_output_key() will fail. It won't fail for 2 types of + * martian destinations: loopback destinations and destination + * 0.0.0.0. In both cases the packet will be dropped because the + * destination is the loopback device and not the bridge. */ + if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) + goto free_skb; if (!ip_route_output_key(&rt, &fl)) { /* - Bridged-and-DNAT'ed traffic doesn't - * require ip_forwarding. - * - Deal with redirected traffic. */ - if (((struct dst_entry *)rt)->dev == dev || - rt->rt_type == RTN_LOCAL) { + * require ip_forwarding. */ + if (((struct dst_entry *)rt)->dev == dev) { skb->dst = (struct dst_entry *)rt; goto bridged_dnat; } + /* we are sure that forwarding is disabled, so printing + * this message is no problem. Note that the packet could + * still have a martian destination address, in which case + * the packet could be dropped even if forwarding were enabled */ __br_dnat_complain(); dst_release((struct dst_entry *)rt); } +free_skb: kfree_skb(skb); return 0; } else { @@ -381,7 +397,7 @@ static int check_hbh_len(struct sk_buff *skb) case IPV6_TLV_JUMBO: if (skb->nh.raw[off + 1] != 4 || (off & 3) != 2) goto bad; - pkt_len = ntohl(*(u32 *) (skb->nh.raw + off + 2)); + pkt_len = ntohl(*(__be32 *) (skb->nh.raw + off + 2)); if (pkt_len <= IPV6_MAXPLEN || skb->nh.ipv6h->payload_len) goto bad; @@ -666,110 +682,50 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb, return NF_STOLEN; } -/* PF_BRIDGE/LOCAL_OUT ***********************************************/ -static int br_nf_local_out_finish(struct sk_buff *skb) -{ - if (skb->protocol == htons(ETH_P_8021Q)) { - skb_push(skb, VLAN_HLEN); - skb->nh.raw -= VLAN_HLEN; - } - - NF_HOOK_THRESH(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, - br_forward_finish, NF_BR_PRI_FIRST + 1); - - return 0; -} - -/* This function sees both locally originated IP packets and forwarded +/* PF_BRIDGE/LOCAL_OUT *********************************************** + * + * This function sees both locally originated IP packets and forwarded * IP packets (in both cases the destination device is a bridge * device). It also sees bridged-and-DNAT'ed packets. - * To be able to filter on the physical bridge devices (with the physdev - * module), we steal packets destined to a bridge device away from the - * PF_INET/FORWARD and PF_INET/OUTPUT hook functions, and give them back later, - * when we have determined the real output device. This is done in here. * * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward() * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor * will be executed. - * Otherwise, if nf_bridge->physindev is NULL, the bridge-nf code never touched - * this packet before, and so the packet was locally originated. We fake - * the PF_INET/LOCAL_OUT hook. - * Finally, if nf_bridge->physindev isn't NULL, then the packet was IP routed, - * so we fake the PF_INET/FORWARD hook. ip_sabotage_out() makes sure - * even routed packets that didn't arrive on a bridge interface have their - * nf_bridge->physindev set. */ + */ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct net_device *realindev, *realoutdev; + struct net_device *realindev; struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; - int pf; if (!skb->nf_bridge) return NF_ACCEPT; - if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) - pf = PF_INET; - else - pf = PF_INET6; - nf_bridge = skb->nf_bridge; - nf_bridge->physoutdev = skb->dev; - realindev = nf_bridge->physindev; + if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT)) + return NF_ACCEPT; /* Bridged, take PF_BRIDGE/FORWARD. * (see big note in front of br_nf_pre_routing_finish) */ - if (nf_bridge->mask & BRNF_BRIDGED_DNAT) { - if (nf_bridge->mask & BRNF_PKT_TYPE) { - skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; - } - if (skb->protocol == htons(ETH_P_8021Q)) { - skb_push(skb, VLAN_HLEN); - skb->nh.raw -= VLAN_HLEN; - } + nf_bridge->physoutdev = skb->dev; + realindev = nf_bridge->physindev; - NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, - skb->dev, br_forward_finish); - goto out; + if (nf_bridge->mask & BRNF_PKT_TYPE) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->mask ^= BRNF_PKT_TYPE; } - realoutdev = bridge_parent(skb->dev); - if (!realoutdev) - return NF_DROP; - -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - /* iptables should match -o br0.x */ - if (nf_bridge->netoutdev) - realoutdev = nf_bridge->netoutdev; -#endif if (skb->protocol == htons(ETH_P_8021Q)) { - skb_pull(skb, VLAN_HLEN); - (*pskb)->nh.raw += VLAN_HLEN; - } - /* IP forwarded traffic has a physindev, locally - * generated traffic hasn't. */ - if (realindev != NULL) { - if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT)) { - struct net_device *parent = bridge_parent(realindev); - if (parent) - realindev = parent; - } - - NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev, - realoutdev, br_nf_local_out_finish, - NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1); - } else { - NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev, - realoutdev, br_nf_local_out_finish, - NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1); + skb_push(skb, VLAN_HLEN); + skb->nh.raw -= VLAN_HLEN; } -out: + NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev, + br_forward_finish); return NF_STOLEN; } @@ -875,69 +831,6 @@ static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff **pskb, return NF_ACCEPT; } -/* Postpone execution of PF_INET(6)/FORWARD, PF_INET(6)/LOCAL_OUT - * and PF_INET(6)/POST_ROUTING until we have done the forwarding - * decision in the bridge code and have determined nf_bridge->physoutdev. */ -static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct sk_buff *skb = *pskb; - - if ((out->hard_start_xmit == br_dev_xmit && - okfn != br_nf_forward_finish && - okfn != br_nf_local_out_finish && okfn != br_nf_dev_queue_xmit) -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - || ((out->priv_flags & IFF_802_1Q_VLAN) && - VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit) -#endif - ) { - struct nf_bridge_info *nf_bridge; - - if (!skb->nf_bridge) { -#ifdef CONFIG_SYSCTL - /* This code is executed while in the IP(v6) stack, - the version should be 4 or 6. We can't use - skb->protocol because that isn't set on - PF_INET(6)/LOCAL_OUT. */ - struct iphdr *ip = skb->nh.iph; - - if (ip->version == 4 && !brnf_call_iptables) - return NF_ACCEPT; - else if (ip->version == 6 && !brnf_call_ip6tables) - return NF_ACCEPT; - else if (!brnf_deferred_hooks) - return NF_ACCEPT; -#endif - if (hook == NF_IP_POST_ROUTING) - return NF_ACCEPT; - if (!nf_bridge_alloc(skb)) - return NF_DROP; - } - - nf_bridge = skb->nf_bridge; - - /* This frame will arrive on PF_BRIDGE/LOCAL_OUT and we - * will need the indev then. For a brouter, the real indev - * can be a bridge port, so we make sure br_nf_local_out() - * doesn't use the bridge parent of the indev by using - * the BRNF_DONT_TAKE_PARENT mask. */ - if (hook == NF_IP_FORWARD && nf_bridge->physindev == NULL) { - nf_bridge->mask |= BRNF_DONT_TAKE_PARENT; - nf_bridge->physindev = (struct net_device *)in; - } -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - /* the iptables outdev is br0.x, not br0 */ - if (out->priv_flags & IFF_802_1Q_VLAN) - nf_bridge->netoutdev = (struct net_device *)out; -#endif - return NF_STOP; - } - - return NF_ACCEPT; -} - /* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input. * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because @@ -983,36 +876,6 @@ static struct nf_hook_ops br_nf_ops[] = { .pf = PF_INET6, .hooknum = NF_IP6_PRE_ROUTING, .priority = NF_IP6_PRI_FIRST, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_FORWARD, - .priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_IP6_FORWARD, - .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_OUT, - .priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_OUT, - .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_POST_ROUTING, - .priority = NF_IP_PRI_FIRST, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_IP6_POST_ROUTING, - .priority = NF_IP6_PRI_FIRST, }, }; #ifdef CONFIG_SYSCTL diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 8f661195d09..a9139682c49 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -15,6 +15,18 @@ #include <net/netlink.h> #include "br_private.h" +static inline size_t br_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(4) /* IFLA_MASTER */ + + nla_total_size(4) /* IFLA_MTU */ + + nla_total_size(4) /* IFLA_LINK */ + + nla_total_size(1) /* IFLA_OPERSTATE */ + + nla_total_size(1); /* IFLA_PROTINFO */ +} + /* * Create one netlink message for one interface * Contains port and master info as well as carrier and bridge state. @@ -24,51 +36,43 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por { const struct net_bridge *br = port->br; const struct net_device *dev = port->dev; - struct ifinfomsg *r; + struct ifinfomsg *hdr; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - u32 mtu = dev->mtu; u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; - u8 portstate = port->state; pr_debug("br_fill_info event %d port %s master %s\n", event, dev->name, br->dev->name); - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); - r = NLMSG_DATA(nlh); - r->ifi_family = AF_BRIDGE; - r->__ifi_pad = 0; - r->ifi_type = dev->type; - r->ifi_index = dev->ifindex; - r->ifi_flags = dev_get_flags(dev); - r->ifi_change = 0; + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); + if (nlh == NULL) + return -ENOBUFS; - RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name); + hdr = nlmsg_data(nlh); + hdr->ifi_family = AF_BRIDGE; + hdr->__ifi_pad = 0; + hdr->ifi_type = dev->type; + hdr->ifi_index = dev->ifindex; + hdr->ifi_flags = dev_get_flags(dev); + hdr->ifi_change = 0; - RTA_PUT(skb, IFLA_MASTER, sizeof(int), &br->dev->ifindex); + NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); + NLA_PUT_U32(skb, IFLA_MASTER, br->dev->ifindex); + NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); + NLA_PUT_U8(skb, IFLA_OPERSTATE, operstate); if (dev->addr_len) - RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); + NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); - RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu); if (dev->ifindex != dev->iflink) - RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink); - - - RTA_PUT(skb, IFLA_OPERSTATE, sizeof(operstate), &operstate); + NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); if (event == RTM_NEWLINK) - RTA_PUT(skb, IFLA_PROTINFO, sizeof(portstate), &portstate); - - nlh->nlmsg_len = skb->tail - b; - - return skb->len; + NLA_PUT_U8(skb, IFLA_PROTINFO, port->state); -nlmsg_failure: -rtattr_failure: + return nlmsg_end(skb, nlh); - skb_trim(skb, b - skb->data); - return -EINVAL; +nla_put_failure: + return nlmsg_cancel(skb, nlh); } /* @@ -77,19 +81,16 @@ rtattr_failure: void br_ifinfo_notify(int event, struct net_bridge_port *port) { struct sk_buff *skb; - int payload = sizeof(struct ifinfomsg) + 128; int err = -ENOBUFS; pr_debug("bridge notify event=%d\n", event); - skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC); + skb = nlmsg_new(br_nlmsg_size(), GFP_ATOMIC); if (skb == NULL) goto errout; err = br_fill_ifinfo(skb, port, 0, 0, event, 0); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + /* failure implies BUG in br_nlmsg_size() */ + BUG_ON(err < 0); err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); errout: @@ -104,25 +105,18 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { struct net_device *dev; int idx; - int s_idx = cb->args[0]; - int err = 0; read_lock(&dev_base_lock); for (dev = dev_base, idx = 0; dev; dev = dev->next) { - struct net_bridge_port *p = dev->br_port; - /* not a bridge port */ - if (!p) - continue; - - if (idx < s_idx) - goto cont; + if (dev->br_port == NULL || idx < cb->args[0]) + goto skip; - err = br_fill_ifinfo(skb, p, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI); - if (err <= 0) + if (br_fill_ifinfo(skb, dev->br_port, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWLINK, + NLM_F_MULTI) < 0) break; -cont: +skip: ++idx; } read_unlock(&dev_base_lock); @@ -138,26 +132,27 @@ cont: */ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct rtattr **rta = arg; - struct ifinfomsg *ifm = NLMSG_DATA(nlh); + struct ifinfomsg *ifm; + struct nlattr *protinfo; struct net_device *dev; struct net_bridge_port *p; u8 new_state; + if (nlmsg_len(nlh) < sizeof(*ifm)) + return -EINVAL; + + ifm = nlmsg_data(nlh); if (ifm->ifi_family != AF_BRIDGE) return -EPFNOSUPPORT; - /* Must pass valid state as PROTINFO */ - if (rta[IFLA_PROTINFO-1]) { - u8 *pstate = RTA_DATA(rta[IFLA_PROTINFO-1]); - new_state = *pstate; - } else + protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); + if (!protinfo || nla_len(protinfo) < sizeof(u8)) return -EINVAL; + new_state = nla_get_u8(protinfo); if (new_state > BR_STATE_BLOCKING) return -EINVAL; - /* Find bridge port */ dev = __dev_get_by_index(ifm->ifi_index); if (!dev) return -ENODEV; @@ -170,10 +165,8 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (p->br->stp_enabled) return -EBUSY; - if (!netif_running(dev)) - return -ENETDOWN; - - if (!netif_carrier_ok(dev) && new_state != BR_STATE_DISABLED) + if (!netif_running(dev) || + (!netif_carrier_ok(dev) && new_state != BR_STATE_DISABLED)) return -ENETDOWN; p->state = new_state; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 74258d86f25..3a534e94c7f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -82,7 +82,7 @@ struct net_bridge_port struct timer_list hold_timer; struct timer_list message_age_timer; struct kobject kobj; - struct work_struct carrier_check; + struct delayed_work carrier_check; struct rcu_head rcu; }; diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c index d42f63f5e9f..9abbc09ccdc 100644 --- a/net/bridge/netfilter/ebt_802_3.c +++ b/net/bridge/netfilter/ebt_802_3.c @@ -17,7 +17,7 @@ static int ebt_filter_802_3(const struct sk_buff *skb, const struct net_device * { struct ebt_802_3_info *info = (struct ebt_802_3_info *)data; struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb); - uint16_t type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type; + __be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type; if (info->bitmask & EBT_802_3_SAP) { if (FWINV(info->sap != hdr->llc.ui.ssap, EBT_802_3_SAP)) diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index a614485828a..ce97c4285f9 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -15,7 +15,7 @@ #include <linux/module.h> static int ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, - const char *mac, uint32_t ip) + const char *mac, __be32 ip) { /* You may be puzzled as to how this code works. * Some tricks were used, refer to @@ -70,7 +70,7 @@ static int ebt_mac_wormhash_check_integrity(const struct ebt_mac_wormhash return 0; } -static int get_ip_dst(const struct sk_buff *skb, uint32_t *addr) +static int get_ip_dst(const struct sk_buff *skb, __be32 *addr) { if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) { struct iphdr _iph, *ih; @@ -81,16 +81,16 @@ static int get_ip_dst(const struct sk_buff *skb, uint32_t *addr) *addr = ih->daddr; } else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { struct arphdr _arph, *ah; - uint32_t buf, *bp; + __be32 buf, *bp; ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL || - ah->ar_pln != sizeof(uint32_t) || + ah->ar_pln != sizeof(__be32) || ah->ar_hln != ETH_ALEN) return -1; bp = skb_header_pointer(skb, sizeof(struct arphdr) + - 2 * ETH_ALEN + sizeof(uint32_t), - sizeof(uint32_t), &buf); + 2 * ETH_ALEN + sizeof(__be32), + sizeof(__be32), &buf); if (bp == NULL) return -1; *addr = *bp; @@ -98,7 +98,7 @@ static int get_ip_dst(const struct sk_buff *skb, uint32_t *addr) return 0; } -static int get_ip_src(const struct sk_buff *skb, uint32_t *addr) +static int get_ip_src(const struct sk_buff *skb, __be32 *addr) { if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) { struct iphdr _iph, *ih; @@ -109,15 +109,15 @@ static int get_ip_src(const struct sk_buff *skb, uint32_t *addr) *addr = ih->saddr; } else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { struct arphdr _arph, *ah; - uint32_t buf, *bp; + __be32 buf, *bp; ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL || - ah->ar_pln != sizeof(uint32_t) || + ah->ar_pln != sizeof(__be32) || ah->ar_hln != ETH_ALEN) return -1; bp = skb_header_pointer(skb, sizeof(struct arphdr) + - ETH_ALEN, sizeof(uint32_t), &buf); + ETH_ALEN, sizeof(__be32), &buf); if (bp == NULL) return -1; *addr = *bp; @@ -133,7 +133,7 @@ static int ebt_filter_among(const struct sk_buff *skb, struct ebt_among_info *info = (struct ebt_among_info *) data; const char *dmac, *smac; const struct ebt_mac_wormhash *wh_dst, *wh_src; - uint32_t dip = 0, sip = 0; + __be32 dip = 0, sip = 0; wh_dst = ebt_among_wh_dst(info); wh_src = ebt_among_wh_src(info); diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c index a6c81d9f73b..9c599800a90 100644 --- a/net/bridge/netfilter/ebt_arp.c +++ b/net/bridge/netfilter/ebt_arp.c @@ -35,10 +35,10 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in return EBT_NOMATCH; if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP)) { - uint32_t _addr, *ap; + __be32 _addr, *ap; /* IPv4 addresses are always 4 bytes */ - if (ah->ar_pln != sizeof(uint32_t)) + if (ah->ar_pln != sizeof(__be32)) return EBT_NOMATCH; if (info->bitmask & EBT_ARP_SRC_IP) { ap = skb_header_pointer(skb, sizeof(struct arphdr) + @@ -53,7 +53,7 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in if (info->bitmask & EBT_ARP_DST_IP) { ap = skb_header_pointer(skb, sizeof(struct arphdr) + - 2*ah->ar_hln+sizeof(uint32_t), + 2*ah->ar_hln+sizeof(__be32), sizeof(_addr), &_addr); if (ap == NULL) return EBT_NOMATCH; diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index 65b665ce57b..e4c642448e1 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -20,8 +20,8 @@ #include <linux/module.h> struct tcpudphdr { - uint16_t src; - uint16_t dst; + __be16 src; + __be16 dst; }; static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in, diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 466ed3440b7..a184f879f25 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -38,8 +38,8 @@ static int ebt_log_check(const char *tablename, unsigned int hookmask, struct tcpudphdr { - uint16_t src; - uint16_t dst; + __be16 src; + __be16 dst; }; struct arppayload @@ -130,7 +130,7 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum, * then log the ARP payload */ if (ah->ar_hrd == htons(1) && ah->ar_hln == ETH_ALEN && - ah->ar_pln == sizeof(uint32_t)) { + ah->ar_pln == sizeof(__be32)) { struct arppayload _arpp, *ap; ap = skb_header_pointer(skb, sizeof(_arph), diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c index b54306a934e..62d23c7b25e 100644 --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c @@ -25,15 +25,15 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr, int action = info->target & -16; if (action == MARK_SET_VALUE) - (*pskb)->nfmark = info->mark; + (*pskb)->mark = info->mark; else if (action == MARK_OR_VALUE) - (*pskb)->nfmark |= info->mark; + (*pskb)->mark |= info->mark; else if (action == MARK_AND_VALUE) - (*pskb)->nfmark &= info->mark; + (*pskb)->mark &= info->mark; else - (*pskb)->nfmark ^= info->mark; + (*pskb)->mark ^= info->mark; - return info->target | -16; + return info->target | ~EBT_VERDICT_BITS; } static int ebt_target_mark_check(const char *tablename, unsigned int hookmask, @@ -44,13 +44,13 @@ static int ebt_target_mark_check(const char *tablename, unsigned int hookmask, if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info))) return -EINVAL; - tmp = info->target | -16; + tmp = info->target | ~EBT_VERDICT_BITS; if (BASE_CHAIN && tmp == EBT_RETURN) return -EINVAL; CLEAR_BASE_CHAIN_BIT; if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) return -EINVAL; - tmp = info->target & -16; + tmp = info->target & ~EBT_VERDICT_BITS; if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE && tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE) return -EINVAL; diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c index a6413e4b498..025869ee0b6 100644 --- a/net/bridge/netfilter/ebt_mark_m.c +++ b/net/bridge/netfilter/ebt_mark_m.c @@ -19,8 +19,8 @@ static int ebt_filter_mark(const struct sk_buff *skb, struct ebt_mark_m_info *info = (struct ebt_mark_m_info *) data; if (info->bitmask & EBT_MARK_OR) - return !(!!(skb->nfmark & info->mask) ^ info->invert); - return !(((skb->nfmark & info->mask) == info->mark) ^ info->invert); + return !(!!(skb->mark & info->mask) ^ info->invert); + return !(((skb->mark & info->mask) == info->mark) ^ info->invert); } static int ebt_mark_check(const char *tablename, unsigned int hookmask, diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index cbb33e24ca8..a50722182bf 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -12,6 +12,8 @@ #include <linux/netfilter_bridge/ebt_nat.h> #include <linux/module.h> #include <net/sock.h> +#include <linux/if_arp.h> +#include <net/arp.h> static int ebt_target_snat(struct sk_buff **pskb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, @@ -31,24 +33,43 @@ static int ebt_target_snat(struct sk_buff **pskb, unsigned int hooknr, *pskb = nskb; } memcpy(eth_hdr(*pskb)->h_source, info->mac, ETH_ALEN); - return info->target; + if (!(info->target & NAT_ARP_BIT) && + eth_hdr(*pskb)->h_proto == htons(ETH_P_ARP)) { + struct arphdr _ah, *ap; + + ap = skb_header_pointer(*pskb, 0, sizeof(_ah), &_ah); + if (ap == NULL) + return EBT_DROP; + if (ap->ar_hln != ETH_ALEN) + goto out; + if (skb_store_bits(*pskb, sizeof(_ah), info->mac,ETH_ALEN)) + return EBT_DROP; + } +out: + return info->target | ~EBT_VERDICT_BITS; } static int ebt_target_snat_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { struct ebt_nat_info *info = (struct ebt_nat_info *) data; + int tmp; if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info))) return -EINVAL; - if (BASE_CHAIN && info->target == EBT_RETURN) + tmp = info->target | ~EBT_VERDICT_BITS; + if (BASE_CHAIN && tmp == EBT_RETURN) return -EINVAL; CLEAR_BASE_CHAIN_BIT; if (strcmp(tablename, "nat")) return -EINVAL; if (hookmask & ~(1 << NF_BR_POST_ROUTING)) return -EINVAL; - if (INVALID_TARGET) + + if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) + return -EINVAL; + tmp = info->target | EBT_VERDICT_BITS; + if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT) return -EINVAL; return 0; } diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 9f950db3b76..c1af68b5a29 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -168,7 +168,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, if (ub->qlen == 1) skb_set_timestamp(ub->skb, &pm->stamp); pm->data_len = copy_len; - pm->mark = skb->nfmark; + pm->mark = skb->mark; pm->hook = hooknr; if (uloginfo->prefix != NULL) strcpy(pm->prefix, uloginfo->prefix); diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c index a2b452862b7..7ee37762296 100644 --- a/net/bridge/netfilter/ebt_vlan.c +++ b/net/bridge/netfilter/ebt_vlan.c @@ -55,7 +55,7 @@ ebt_filter_vlan(const struct sk_buff *skb, unsigned short id; /* VLAN ID, given from frame TCI */ unsigned char prio; /* user_priority, given from frame TCI */ /* VLAN encapsulated Type/Length field, given from orig frame */ - unsigned short encap; + __be16 encap; fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame); if (fp == NULL) diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 9a6e548e148..d37ce047893 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -23,7 +23,7 @@ static struct ebt_entries initial_chain = { .policy = EBT_ACCEPT, }; -static struct ebt_replace initial_table = +static struct ebt_replace_kernel initial_table = { .name = "broute", .valid_hooks = 1 << NF_BR_BROUTING, diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 3d5bd44f239..127135ead2d 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -30,7 +30,7 @@ static struct ebt_entries initial_chains[] = }, }; -static struct ebt_replace initial_table = +static struct ebt_replace_kernel initial_table = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 04dd42efda1..9c50488b62e 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -30,7 +30,7 @@ static struct ebt_entries initial_chains[] = } }; -static struct ebt_replace initial_table = +static struct ebt_replace_kernel initial_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 3df55b2bd91..bee558a4180 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -86,7 +86,7 @@ static inline int ebt_do_match (struct ebt_entry_match *m, static inline int ebt_dev_check(char *entry, const struct net_device *device) { int i = 0; - char *devname = device->name; + const char *devname = device->name; if (*entry == '\0') return 0; @@ -338,10 +338,11 @@ ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e, const char *name, unsigned int hookmask, unsigned int *cnt) { struct ebt_match *match; + size_t left = ((char *)e + e->watchers_offset) - (char *)m; int ret; - if (((char *)m) + m->match_size + sizeof(struct ebt_entry_match) > - ((char *)e) + e->watchers_offset) + if (left < sizeof(struct ebt_entry_match) || + left - sizeof(struct ebt_entry_match) < m->match_size) return -EINVAL; match = find_match_lock(m->u.name, &ret, &ebt_mutex); if (!match) @@ -367,10 +368,11 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e, const char *name, unsigned int hookmask, unsigned int *cnt) { struct ebt_watcher *watcher; + size_t left = ((char *)e + e->target_offset) - (char *)w; int ret; - if (((char *)w) + w->watcher_size + sizeof(struct ebt_entry_watcher) > - ((char *)e) + e->target_offset) + if (left < sizeof(struct ebt_entry_watcher) || + left - sizeof(struct ebt_entry_watcher) < w->watcher_size) return -EINVAL; watcher = find_watcher_lock(w->u.name, &ret, &ebt_mutex); if (!watcher) @@ -391,35 +393,91 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e, return 0; } +static int ebt_verify_pointers(struct ebt_replace *repl, + struct ebt_table_info *newinfo) +{ + unsigned int limit = repl->entries_size; + unsigned int valid_hooks = repl->valid_hooks; + unsigned int offset = 0; + int i; + + for (i = 0; i < NF_BR_NUMHOOKS; i++) + newinfo->hook_entry[i] = NULL; + + newinfo->entries_size = repl->entries_size; + newinfo->nentries = repl->nentries; + + while (offset < limit) { + size_t left = limit - offset; + struct ebt_entry *e = (void *)newinfo->entries + offset; + + if (left < sizeof(unsigned int)) + break; + + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + if ((valid_hooks & (1 << i)) == 0) + continue; + if ((char __user *)repl->hook_entry[i] == + repl->entries + offset) + break; + } + + if (i != NF_BR_NUMHOOKS || !(e->bitmask & EBT_ENTRY_OR_ENTRIES)) { + if (e->bitmask != 0) { + /* we make userspace set this right, + so there is no misunderstanding */ + BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set " + "in distinguisher\n"); + return -EINVAL; + } + if (i != NF_BR_NUMHOOKS) + newinfo->hook_entry[i] = (struct ebt_entries *)e; + if (left < sizeof(struct ebt_entries)) + break; + offset += sizeof(struct ebt_entries); + } else { + if (left < sizeof(struct ebt_entry)) + break; + if (left < e->next_offset) + break; + offset += e->next_offset; + } + } + if (offset != limit) { + BUGPRINT("entries_size too small\n"); + return -EINVAL; + } + + /* check if all valid hooks have a chain */ + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + if (!newinfo->hook_entry[i] && + (valid_hooks & (1 << i))) { + BUGPRINT("Valid hook without chain\n"); + return -EINVAL; + } + } + return 0; +} + /* * this one is very careful, as it is the first function * to parse the userspace data */ static inline int ebt_check_entry_size_and_hooks(struct ebt_entry *e, - struct ebt_table_info *newinfo, char *base, char *limit, - struct ebt_entries **hook_entries, unsigned int *n, unsigned int *cnt, - unsigned int *totalcnt, unsigned int *udc_cnt, unsigned int valid_hooks) + struct ebt_table_info *newinfo, + unsigned int *n, unsigned int *cnt, + unsigned int *totalcnt, unsigned int *udc_cnt) { int i; for (i = 0; i < NF_BR_NUMHOOKS; i++) { - if ((valid_hooks & (1 << i)) == 0) - continue; - if ( (char *)hook_entries[i] - base == - (char *)e - newinfo->entries) + if ((void *)e == (void *)newinfo->hook_entry[i]) break; } /* beginning of a new chain if i == NF_BR_NUMHOOKS it must be a user defined chain */ - if (i != NF_BR_NUMHOOKS || !(e->bitmask & EBT_ENTRY_OR_ENTRIES)) { - if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) != 0) { - /* we make userspace set this right, - so there is no misunderstanding */ - BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set " - "in distinguisher\n"); - return -EINVAL; - } + if (i != NF_BR_NUMHOOKS || !e->bitmask) { /* this checks if the previous chain has as many entries as it said it has */ if (*n != *cnt) { @@ -427,12 +485,6 @@ ebt_check_entry_size_and_hooks(struct ebt_entry *e, "in the chain\n"); return -EINVAL; } - /* before we look at the struct, be sure it is not too big */ - if ((char *)hook_entries[i] + sizeof(struct ebt_entries) - > limit) { - BUGPRINT("entries_size too small\n"); - return -EINVAL; - } if (((struct ebt_entries *)e)->policy != EBT_DROP && ((struct ebt_entries *)e)->policy != EBT_ACCEPT) { /* only RETURN from udc */ @@ -444,8 +496,6 @@ ebt_check_entry_size_and_hooks(struct ebt_entry *e, } if (i == NF_BR_NUMHOOKS) /* it's a user defined chain */ (*udc_cnt)++; - else - newinfo->hook_entry[i] = (struct ebt_entries *)e; if (((struct ebt_entries *)e)->counter_offset != *totalcnt) { BUGPRINT("counter_offset != totalcnt"); return -EINVAL; @@ -466,7 +516,6 @@ ebt_check_entry_size_and_hooks(struct ebt_entry *e, BUGPRINT("target size too small\n"); return -EINVAL; } - (*cnt)++; (*totalcnt)++; return 0; @@ -485,17 +534,14 @@ struct ebt_cl_stack */ static inline int ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo, - struct ebt_entries **hook_entries, unsigned int *n, unsigned int valid_hooks, - struct ebt_cl_stack *udc) + unsigned int *n, struct ebt_cl_stack *udc) { int i; /* we're only interested in chain starts */ - if (e->bitmask & EBT_ENTRY_OR_ENTRIES) + if (e->bitmask) return 0; for (i = 0; i < NF_BR_NUMHOOKS; i++) { - if ((valid_hooks & (1 << i)) == 0) - continue; if (newinfo->hook_entry[i] == (struct ebt_entries *)e) break; } @@ -541,7 +587,7 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) { struct ebt_entry_target *t; - if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) == 0) + if (e->bitmask == 0) return 0; /* we're done */ if (cnt && (*cnt)-- == 0) @@ -558,16 +604,17 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) static inline int ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, - const char *name, unsigned int *cnt, unsigned int valid_hooks, + const char *name, unsigned int *cnt, struct ebt_cl_stack *cl_s, unsigned int udc_cnt) { struct ebt_entry_target *t; struct ebt_target *target; unsigned int i, j, hook = 0, hookmask = 0; + size_t gap = e->next_offset - e->target_offset; int ret; /* don't mess with the struct ebt_entries */ - if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) == 0) + if (e->bitmask == 0) return 0; if (e->bitmask & ~EBT_F_MASK) { @@ -584,7 +631,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, } /* what hook do we belong to? */ for (i = 0; i < NF_BR_NUMHOOKS; i++) { - if ((valid_hooks & (1 << i)) == 0) + if (!newinfo->hook_entry[i]) continue; if ((char *)newinfo->hook_entry[i] < (char *)e) hook = i; @@ -625,8 +672,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, t->u.target = target; if (t->u.target == &ebt_standard_target) { - if (e->target_offset + sizeof(struct ebt_standard_target) > - e->next_offset) { + if (gap < sizeof(struct ebt_standard_target)) { BUGPRINT("Standard target size too big\n"); ret = -EFAULT; goto cleanup_watchers; @@ -637,8 +683,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, ret = -EFAULT; goto cleanup_watchers; } - } else if ((e->target_offset + t->target_size + - sizeof(struct ebt_entry_target) > e->next_offset) || + } else if (t->target_size > gap - sizeof(struct ebt_entry_target) || (t->u.target->check && t->u.target->check(name, hookmask, e, t->data, t->target_size) != 0)){ module_put(t->u.target->me); @@ -708,7 +753,9 @@ static int check_chainloops(struct ebt_entries *chain, struct ebt_cl_stack *cl_s BUGPRINT("loop\n"); return -1; } - /* this can't be 0, so the above test is correct */ + if (cl_s[i].hookmask & (1 << hooknr)) + goto letscontinue; + /* this can't be 0, so the loop test is correct */ cl_s[i].cs.n = pos + 1; pos = 0; cl_s[i].cs.e = ((void *)e + e->next_offset); @@ -728,42 +775,35 @@ letscontinue: } /* do the parsing of the table/chains/entries/matches/watchers/targets, heh */ -static int translate_table(struct ebt_replace *repl, - struct ebt_table_info *newinfo) +static int translate_table(char *name, struct ebt_table_info *newinfo) { unsigned int i, j, k, udc_cnt; int ret; struct ebt_cl_stack *cl_s = NULL; /* used in the checking for chain loops */ i = 0; - while (i < NF_BR_NUMHOOKS && !(repl->valid_hooks & (1 << i))) + while (i < NF_BR_NUMHOOKS && !newinfo->hook_entry[i]) i++; if (i == NF_BR_NUMHOOKS) { BUGPRINT("No valid hooks specified\n"); return -EINVAL; } - if (repl->hook_entry[i] != (struct ebt_entries *)repl->entries) { + if (newinfo->hook_entry[i] != (struct ebt_entries *)newinfo->entries) { BUGPRINT("Chains don't start at beginning\n"); return -EINVAL; } /* make sure chains are ordered after each other in same order as their corresponding hooks */ for (j = i + 1; j < NF_BR_NUMHOOKS; j++) { - if (!(repl->valid_hooks & (1 << j))) + if (!newinfo->hook_entry[j]) continue; - if ( repl->hook_entry[j] <= repl->hook_entry[i] ) { + if (newinfo->hook_entry[j] <= newinfo->hook_entry[i]) { BUGPRINT("Hook order must be followed\n"); return -EINVAL; } i = j; } - for (i = 0; i < NF_BR_NUMHOOKS; i++) - newinfo->hook_entry[i] = NULL; - - newinfo->entries_size = repl->entries_size; - newinfo->nentries = repl->nentries; - /* do some early checkings and initialize some things */ i = 0; /* holds the expected nr. of entries for the chain */ j = 0; /* holds the up to now counted entries for the chain */ @@ -771,9 +811,8 @@ static int translate_table(struct ebt_replace *repl, newinfo->nentries afterwards */ udc_cnt = 0; /* will hold the nr. of user defined chains (udc) */ ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, - ebt_check_entry_size_and_hooks, newinfo, repl->entries, - repl->entries + repl->entries_size, repl->hook_entry, &i, &j, &k, - &udc_cnt, repl->valid_hooks); + ebt_check_entry_size_and_hooks, newinfo, + &i, &j, &k, &udc_cnt); if (ret != 0) return ret; @@ -788,15 +827,6 @@ static int translate_table(struct ebt_replace *repl, return -EINVAL; } - /* check if all valid hooks have a chain */ - for (i = 0; i < NF_BR_NUMHOOKS; i++) { - if (newinfo->hook_entry[i] == NULL && - (repl->valid_hooks & (1 << i))) { - BUGPRINT("Valid hook without chain\n"); - return -EINVAL; - } - } - /* get the location of the udc, put them in an array while we're at it, allocate the chainstack */ if (udc_cnt) { @@ -824,8 +854,7 @@ static int translate_table(struct ebt_replace *repl, return -ENOMEM; i = 0; /* the i'th udc */ EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, - ebt_get_udc_positions, newinfo, repl->hook_entry, &i, - repl->valid_hooks, cl_s); + ebt_get_udc_positions, newinfo, &i, cl_s); /* sanity check */ if (i != udc_cnt) { BUGPRINT("i != udc_cnt\n"); @@ -836,7 +865,7 @@ static int translate_table(struct ebt_replace *repl, /* Check for loops */ for (i = 0; i < NF_BR_NUMHOOKS; i++) - if (repl->valid_hooks & (1 << i)) + if (newinfo->hook_entry[i]) if (check_chainloops(newinfo->hook_entry[i], cl_s, udc_cnt, i, newinfo->entries)) { vfree(cl_s); @@ -856,8 +885,7 @@ static int translate_table(struct ebt_replace *repl, /* used to know what we need to clean up if something goes wrong */ i = 0; ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, - ebt_check_entry, newinfo, repl->name, &i, repl->valid_hooks, - cl_s, udc_cnt); + ebt_check_entry, newinfo, name, &i, cl_s, udc_cnt); if (ret != 0) { EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, ebt_cleanup_entry, &i); @@ -954,7 +982,11 @@ static int do_replace(void __user *user, unsigned int len) /* this can get initialized by translate_table() */ newinfo->chainstack = NULL; - ret = translate_table(&tmp, newinfo); + ret = ebt_verify_pointers(&tmp, newinfo); + if (ret != 0) + goto free_counterstmp; + + ret = translate_table(tmp.name, newinfo); if (ret != 0) goto free_counterstmp; @@ -1125,35 +1157,47 @@ int ebt_register_table(struct ebt_table *table) { struct ebt_table_info *newinfo; struct ebt_table *t; + struct ebt_replace_kernel *repl; int ret, i, countersize; + void *p; - if (!table || !table->table ||!table->table->entries || - table->table->entries_size == 0 || - table->table->counters || table->private) { + if (!table || !(repl = table->table) || !repl->entries || + repl->entries_size == 0 || + repl->counters || table->private) { BUGPRINT("Bad table data for ebt_register_table!!!\n"); return -EINVAL; } - countersize = COUNTER_OFFSET(table->table->nentries) * + countersize = COUNTER_OFFSET(repl->nentries) * (highest_possible_processor_id()+1); newinfo = vmalloc(sizeof(*newinfo) + countersize); ret = -ENOMEM; if (!newinfo) return -ENOMEM; - newinfo->entries = vmalloc(table->table->entries_size); - if (!(newinfo->entries)) + p = vmalloc(repl->entries_size); + if (!p) goto free_newinfo; - memcpy(newinfo->entries, table->table->entries, - table->table->entries_size); + memcpy(p, repl->entries, repl->entries_size); + newinfo->entries = p; + + newinfo->entries_size = repl->entries_size; + newinfo->nentries = repl->nentries; if (countersize) memset(newinfo->counters, 0, countersize); /* fill in newinfo and parse the entries */ newinfo->chainstack = NULL; - ret = translate_table(table->table, newinfo); + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + if ((repl->valid_hooks & (1 << i)) == 0) + newinfo->hook_entry[i] = NULL; + else + newinfo->hook_entry[i] = p + + ((char *)repl->hook_entry[i] - repl->entries); + } + ret = translate_table(repl->name, newinfo); if (ret != 0) { BUGPRINT("Translate_table failed\n"); goto free_chainstack; @@ -1277,33 +1321,33 @@ free_tmp: } static inline int ebt_make_matchname(struct ebt_entry_match *m, - char *base, char *ubase) + char *base, char __user *ubase) { - char *hlp = ubase - base + (char *)m; + char __user *hlp = ubase + ((char *)m - base); if (copy_to_user(hlp, m->u.match->name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; } static inline int ebt_make_watchername(struct ebt_entry_watcher *w, - char *base, char *ubase) + char *base, char __user *ubase) { - char *hlp = ubase - base + (char *)w; + char __user *hlp = ubase + ((char *)w - base); if (copy_to_user(hlp , w->u.watcher->name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; } -static inline int ebt_make_names(struct ebt_entry *e, char *base, char *ubase) +static inline int ebt_make_names(struct ebt_entry *e, char *base, char __user *ubase) { int ret; - char *hlp; + char __user *hlp; struct ebt_entry_target *t; - if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) == 0) + if (e->bitmask == 0) return 0; - hlp = ubase - base + (char *)e + e->target_offset; + hlp = ubase + (((char *)e + e->target_offset) - base); t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); ret = EBT_MATCH_ITERATE(e, ebt_make_matchname, base, ubase); diff --git a/net/core/Makefile b/net/core/Makefile index 119568077da..73272d506e9 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -12,7 +12,6 @@ obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ obj-$(CONFIG_XFRM) += flow.o obj-$(CONFIG_SYSFS) += net-sysfs.o -obj-$(CONFIG_NET_DIVERT) += dv.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_WIRELESS_EXT) += wireless.o obj-$(CONFIG_NETPOLL) += netpoll.o diff --git a/net/core/datagram.c b/net/core/datagram.c index f558c61aecc..797fdd4352c 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -321,7 +321,7 @@ fault: static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 __user *to, int len, - unsigned int *csump) + __wsum *csump) { int start = skb_headlen(skb); int pos = 0; @@ -350,7 +350,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { - unsigned int csum2; + __wsum csum2; int err = 0; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -386,7 +386,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, end = start + list->len; if ((copy = end - offset) > 0) { - unsigned int csum2 = 0; + __wsum csum2 = 0; if (copy > len) copy = len; if (skb_copy_and_csum_datagram(list, @@ -411,11 +411,11 @@ fault: return -EFAULT; } -unsigned int __skb_checksum_complete(struct sk_buff *skb) +__sum16 __skb_checksum_complete(struct sk_buff *skb) { - unsigned int sum; + __sum16 sum; - sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); + sum = csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); if (likely(!sum)) { if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); @@ -441,7 +441,7 @@ EXPORT_SYMBOL(__skb_checksum_complete); int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen, struct iovec *iov) { - unsigned int csum; + __wsum csum; int chunk = skb->len - hlen; /* Skip filled elements. @@ -460,7 +460,7 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base, chunk, &csum)) goto fault; - if ((unsigned short)csum_fold(csum)) + if (csum_fold(csum)) goto csum_error; if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); diff --git a/net/core/dev.c b/net/core/dev.c index 81c426adcd1..e660cb57e42 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -98,7 +98,6 @@ #include <linux/seq_file.h> #include <linux/stat.h> #include <linux/if_bridge.h> -#include <linux/divert.h> #include <net/dst.h> #include <net/pkt_sched.h> #include <net/checksum.h> @@ -1170,7 +1169,7 @@ EXPORT_SYMBOL(netif_device_attach); */ int skb_checksum_help(struct sk_buff *skb) { - unsigned int csum; + __wsum csum; int ret = 0, offset = skb->h.raw - skb->data; if (skb->ip_summed == CHECKSUM_COMPLETE) @@ -1192,9 +1191,9 @@ int skb_checksum_help(struct sk_buff *skb) offset = skb->tail - skb->h.raw; BUG_ON(offset <= 0); - BUG_ON(skb->csum + 2 > offset); + BUG_ON(skb->csum_offset + 2 > offset); - *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum); + *(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum); out_set_summed: skb->ip_summed = CHECKSUM_NONE; @@ -1216,7 +1215,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_type *ptype; - int type = skb->protocol; + __be16 type = skb->protocol; int err; BUG_ON(skb_shinfo(skb)->frag_list); @@ -1767,7 +1766,7 @@ int netif_receive_skb(struct sk_buff *skb) struct packet_type *ptype, *pt_prev; struct net_device *orig_dev; int ret = NET_RX_DROP; - unsigned short type; + __be16 type; /* if we've gotten here through NAPI, check netpoll */ if (skb->dev->poll && netpoll_rx(skb)) @@ -1827,8 +1826,6 @@ int netif_receive_skb(struct sk_buff *skb) ncls: #endif - handle_diverter(skb); - if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) goto out; @@ -2898,10 +2895,6 @@ int register_netdevice(struct net_device *dev) spin_lock_init(&dev->ingress_lock); #endif - ret = alloc_divert_blk(dev); - if (ret) - goto out; - dev->iflink = -1; /* Init, if this function is available */ @@ -2910,13 +2903,13 @@ int register_netdevice(struct net_device *dev) if (ret) { if (ret > 0) ret = -EIO; - goto out_err; + goto out; } } if (!dev_valid_name(dev->name)) { ret = -EINVAL; - goto out_err; + goto out; } dev->ifindex = dev_new_index(); @@ -2930,7 +2923,7 @@ int register_netdevice(struct net_device *dev) = hlist_entry(p, struct net_device, name_hlist); if (!strncmp(d->name, dev->name, IFNAMSIZ)) { ret = -EEXIST; - goto out_err; + goto out; } } @@ -2974,7 +2967,7 @@ int register_netdevice(struct net_device *dev) ret = netdev_register_sysfs(dev); if (ret) - goto out_err; + goto out; dev->reg_state = NETREG_REGISTERED; /* @@ -3001,9 +2994,6 @@ int register_netdevice(struct net_device *dev) out: return ret; -out_err: - free_divert_blk(dev); - goto out; } /** @@ -3035,15 +3025,6 @@ int register_netdev(struct net_device *dev) goto out; } - /* - * Back compatibility hook. Kill this one in 2.5 - */ - if (dev->name[0] == 0 || dev->name[0] == ' ') { - err = dev_alloc_name(dev, "eth%d"); - if (err < 0) - goto out; - } - err = register_netdevice(dev); out: rtnl_unlock(); @@ -3329,8 +3310,6 @@ int unregister_netdevice(struct net_device *dev) /* Notifier chain MUST detach us from master device. */ BUG_TRAP(!dev->master); - free_divert_blk(dev); - /* Finish processing unregister after unlock */ net_set_todo(dev); @@ -3361,7 +3340,6 @@ void unregister_netdev(struct net_device *dev) EXPORT_SYMBOL(unregister_netdev); -#ifdef CONFIG_HOTPLUG_CPU static int dev_cpu_callback(struct notifier_block *nfb, unsigned long action, void *ocpu) @@ -3405,7 +3383,6 @@ static int dev_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_NET_DMA /** diff --git a/net/core/dst.c b/net/core/dst.c index 1a5e49da0e7..836ec660692 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -125,7 +125,7 @@ void * dst_alloc(struct dst_ops * ops) if (ops->gc()) return NULL; } - dst = kmem_cache_alloc(ops->kmem_cachep, SLAB_ATOMIC); + dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) return NULL; memset(dst, 0, ops->entry_size); diff --git a/net/core/dv.c b/net/core/dv.c deleted file mode 100644 index 29ee77f1593..00000000000 --- a/net/core/dv.c +++ /dev/null @@ -1,546 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Generic frame diversion - * - * Authors: - * Benoit LOCHER: initial integration within the kernel with support for ethernet - * Dave Miller: improvement on the code (correctness, performance and source files) - * - */ -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/ip.h> -#include <linux/udp.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/skbuff.h> -#include <linux/capability.h> -#include <linux/errno.h> -#include <linux/init.h> -#include <net/dst.h> -#include <net/arp.h> -#include <net/sock.h> -#include <net/ipv6.h> -#include <net/ip.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/checksum.h> -#include <linux/divert.h> -#include <linux/sockios.h> - -const char sysctl_divert_version[32]="0.46"; /* Current version */ - -static int __init dv_init(void) -{ - return 0; -} -module_init(dv_init); - -/* - * Allocate a divert_blk for a device. This must be an ethernet nic. - */ -int alloc_divert_blk(struct net_device *dev) -{ - int alloc_size = (sizeof(struct divert_blk) + 3) & ~3; - - dev->divert = NULL; - if (dev->type == ARPHRD_ETHER) { - dev->divert = kzalloc(alloc_size, GFP_KERNEL); - if (dev->divert == NULL) { - printk(KERN_INFO "divert: unable to allocate divert_blk for %s\n", - dev->name); - return -ENOMEM; - } - dev_hold(dev); - } - - return 0; -} - -/* - * Free a divert_blk allocated by the above function, if it was - * allocated on that device. - */ -void free_divert_blk(struct net_device *dev) -{ - if (dev->divert) { - kfree(dev->divert); - dev->divert=NULL; - dev_put(dev); - } -} - -/* - * Adds a tcp/udp (source or dest) port to an array - */ -static int add_port(u16 ports[], u16 port) -{ - int i; - - if (port == 0) - return -EINVAL; - - /* Storing directly in network format for performance, - * thanks Dave :) - */ - port = htons(port); - - for (i = 0; i < MAX_DIVERT_PORTS; i++) { - if (ports[i] == port) - return -EALREADY; - } - - for (i = 0; i < MAX_DIVERT_PORTS; i++) { - if (ports[i] == 0) { - ports[i] = port; - return 0; - } - } - - return -ENOBUFS; -} - -/* - * Removes a port from an array tcp/udp (source or dest) - */ -static int remove_port(u16 ports[], u16 port) -{ - int i; - - if (port == 0) - return -EINVAL; - - /* Storing directly in network format for performance, - * thanks Dave ! - */ - port = htons(port); - - for (i = 0; i < MAX_DIVERT_PORTS; i++) { - if (ports[i] == port) { - ports[i] = 0; - return 0; - } - } - - return -EINVAL; -} - -/* Some basic sanity checks on the arguments passed to divert_ioctl() */ -static int check_args(struct divert_cf *div_cf, struct net_device **dev) -{ - char devname[32]; - int ret; - - if (dev == NULL) - return -EFAULT; - - /* GETVERSION: all other args are unused */ - if (div_cf->cmd == DIVCMD_GETVERSION) - return 0; - - /* Network device index should reasonably be between 0 and 1000 :) */ - if (div_cf->dev_index < 0 || div_cf->dev_index > 1000) - return -EINVAL; - - /* Let's try to find the ifname */ - sprintf(devname, "eth%d", div_cf->dev_index); - *dev = dev_get_by_name(devname); - - /* dev should NOT be null */ - if (*dev == NULL) - return -EINVAL; - - ret = 0; - - /* user issuing the ioctl must be a super one :) */ - if (!capable(CAP_SYS_ADMIN)) { - ret = -EPERM; - goto out; - } - - /* Device must have a divert_blk member NOT null */ - if ((*dev)->divert == NULL) - ret = -EINVAL; -out: - dev_put(*dev); - return ret; -} - -/* - * control function of the diverter - */ -#if 0 -#define DVDBG(a) \ - printk(KERN_DEBUG "divert_ioctl() line %d %s\n", __LINE__, (a)) -#else -#define DVDBG(a) -#endif - -int divert_ioctl(unsigned int cmd, struct divert_cf __user *arg) -{ - struct divert_cf div_cf; - struct divert_blk *div_blk; - struct net_device *dev; - int ret; - - switch (cmd) { - case SIOCGIFDIVERT: - DVDBG("SIOCGIFDIVERT, copy_from_user"); - if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf))) - return -EFAULT; - DVDBG("before check_args"); - ret = check_args(&div_cf, &dev); - if (ret) - return ret; - DVDBG("after checkargs"); - div_blk = dev->divert; - - DVDBG("befre switch()"); - switch (div_cf.cmd) { - case DIVCMD_GETSTATUS: - /* Now, just give the user the raw divert block - * for him to play with :) - */ - if (copy_to_user(div_cf.arg1.ptr, dev->divert, - sizeof(struct divert_blk))) - return -EFAULT; - break; - - case DIVCMD_GETVERSION: - DVDBG("GETVERSION: checking ptr"); - if (div_cf.arg1.ptr == NULL) - return -EINVAL; - DVDBG("GETVERSION: copying data to userland"); - if (copy_to_user(div_cf.arg1.ptr, - sysctl_divert_version, 32)) - return -EFAULT; - DVDBG("GETVERSION: data copied"); - break; - - default: - return -EINVAL; - } - - break; - - case SIOCSIFDIVERT: - if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf))) - return -EFAULT; - - ret = check_args(&div_cf, &dev); - if (ret) - return ret; - - div_blk = dev->divert; - - switch(div_cf.cmd) { - case DIVCMD_RESET: - div_blk->divert = 0; - div_blk->protos = DIVERT_PROTO_NONE; - memset(div_blk->tcp_dst, 0, - MAX_DIVERT_PORTS * sizeof(u16)); - memset(div_blk->tcp_src, 0, - MAX_DIVERT_PORTS * sizeof(u16)); - memset(div_blk->udp_dst, 0, - MAX_DIVERT_PORTS * sizeof(u16)); - memset(div_blk->udp_src, 0, - MAX_DIVERT_PORTS * sizeof(u16)); - return 0; - - case DIVCMD_DIVERT: - switch(div_cf.arg1.int32) { - case DIVARG1_ENABLE: - if (div_blk->divert) - return -EALREADY; - div_blk->divert = 1; - break; - - case DIVARG1_DISABLE: - if (!div_blk->divert) - return -EALREADY; - div_blk->divert = 0; - break; - - default: - return -EINVAL; - } - - break; - - case DIVCMD_IP: - switch(div_cf.arg1.int32) { - case DIVARG1_ENABLE: - if (div_blk->protos & DIVERT_PROTO_IP) - return -EALREADY; - div_blk->protos |= DIVERT_PROTO_IP; - break; - - case DIVARG1_DISABLE: - if (!(div_blk->protos & DIVERT_PROTO_IP)) - return -EALREADY; - div_blk->protos &= ~DIVERT_PROTO_IP; - break; - - default: - return -EINVAL; - } - - break; - - case DIVCMD_TCP: - switch(div_cf.arg1.int32) { - case DIVARG1_ENABLE: - if (div_blk->protos & DIVERT_PROTO_TCP) - return -EALREADY; - div_blk->protos |= DIVERT_PROTO_TCP; - break; - - case DIVARG1_DISABLE: - if (!(div_blk->protos & DIVERT_PROTO_TCP)) - return -EALREADY; - div_blk->protos &= ~DIVERT_PROTO_TCP; - break; - - default: - return -EINVAL; - } - - break; - - case DIVCMD_TCPDST: - switch(div_cf.arg1.int32) { - case DIVARG1_ADD: - return add_port(div_blk->tcp_dst, - div_cf.arg2.uint16); - - case DIVARG1_REMOVE: - return remove_port(div_blk->tcp_dst, - div_cf.arg2.uint16); - - default: - return -EINVAL; - } - - break; - - case DIVCMD_TCPSRC: - switch(div_cf.arg1.int32) { - case DIVARG1_ADD: - return add_port(div_blk->tcp_src, - div_cf.arg2.uint16); - - case DIVARG1_REMOVE: - return remove_port(div_blk->tcp_src, - div_cf.arg2.uint16); - - default: - return -EINVAL; - } - - break; - - case DIVCMD_UDP: - switch(div_cf.arg1.int32) { - case DIVARG1_ENABLE: - if (div_blk->protos & DIVERT_PROTO_UDP) - return -EALREADY; - div_blk->protos |= DIVERT_PROTO_UDP; - break; - - case DIVARG1_DISABLE: - if (!(div_blk->protos & DIVERT_PROTO_UDP)) - return -EALREADY; - div_blk->protos &= ~DIVERT_PROTO_UDP; - break; - - default: - return -EINVAL; - } - - break; - - case DIVCMD_UDPDST: - switch(div_cf.arg1.int32) { - case DIVARG1_ADD: - return add_port(div_blk->udp_dst, - div_cf.arg2.uint16); - - case DIVARG1_REMOVE: - return remove_port(div_blk->udp_dst, - div_cf.arg2.uint16); - - default: - return -EINVAL; - } - - break; - - case DIVCMD_UDPSRC: - switch(div_cf.arg1.int32) { - case DIVARG1_ADD: - return add_port(div_blk->udp_src, - div_cf.arg2.uint16); - - case DIVARG1_REMOVE: - return remove_port(div_blk->udp_src, - div_cf.arg2.uint16); - - default: - return -EINVAL; - } - - break; - - case DIVCMD_ICMP: - switch(div_cf.arg1.int32) { - case DIVARG1_ENABLE: - if (div_blk->protos & DIVERT_PROTO_ICMP) - return -EALREADY; - div_blk->protos |= DIVERT_PROTO_ICMP; - break; - - case DIVARG1_DISABLE: - if (!(div_blk->protos & DIVERT_PROTO_ICMP)) - return -EALREADY; - div_blk->protos &= ~DIVERT_PROTO_ICMP; - break; - - default: - return -EINVAL; - } - - break; - - default: - return -EINVAL; - } - - break; - - default: - return -EINVAL; - } - - return 0; -} - - -/* - * Check if packet should have its dest mac address set to the box itself - * for diversion - */ - -#define ETH_DIVERT_FRAME(skb) \ - memcpy(eth_hdr(skb), skb->dev->dev_addr, ETH_ALEN); \ - skb->pkt_type=PACKET_HOST - -void divert_frame(struct sk_buff *skb) -{ - struct ethhdr *eth = eth_hdr(skb); - struct iphdr *iph; - struct tcphdr *tcph; - struct udphdr *udph; - struct divert_blk *divert = skb->dev->divert; - int i, src, dst; - unsigned char *skb_data_end = skb->data + skb->len; - - /* Packet is already aimed at us, return */ - if (!compare_ether_addr(eth->h_dest, skb->dev->dev_addr)) - return; - - /* proto is not IP, do nothing */ - if (eth->h_proto != htons(ETH_P_IP)) - return; - - /* Divert all IP frames ? */ - if (divert->protos & DIVERT_PROTO_IP) { - ETH_DIVERT_FRAME(skb); - return; - } - - /* Check for possible (maliciously) malformed IP frame (thanks Dave) */ - iph = (struct iphdr *) skb->data; - if (((iph->ihl<<2)+(unsigned char*)(iph)) >= skb_data_end) { - printk(KERN_INFO "divert: malformed IP packet !\n"); - return; - } - - switch (iph->protocol) { - /* Divert all ICMP frames ? */ - case IPPROTO_ICMP: - if (divert->protos & DIVERT_PROTO_ICMP) { - ETH_DIVERT_FRAME(skb); - return; - } - break; - - /* Divert all TCP frames ? */ - case IPPROTO_TCP: - if (divert->protos & DIVERT_PROTO_TCP) { - ETH_DIVERT_FRAME(skb); - return; - } - - /* Check for possible (maliciously) malformed IP - * frame (thanx Dave) - */ - tcph = (struct tcphdr *) - (((unsigned char *)iph) + (iph->ihl<<2)); - if (((unsigned char *)(tcph+1)) >= skb_data_end) { - printk(KERN_INFO "divert: malformed TCP packet !\n"); - return; - } - - /* Divert some tcp dst/src ports only ?*/ - for (i = 0; i < MAX_DIVERT_PORTS; i++) { - dst = divert->tcp_dst[i]; - src = divert->tcp_src[i]; - if ((dst && dst == tcph->dest) || - (src && src == tcph->source)) { - ETH_DIVERT_FRAME(skb); - return; - } - } - break; - - /* Divert all UDP frames ? */ - case IPPROTO_UDP: - if (divert->protos & DIVERT_PROTO_UDP) { - ETH_DIVERT_FRAME(skb); - return; - } - - /* Check for possible (maliciously) malformed IP - * packet (thanks Dave) - */ - udph = (struct udphdr *) - (((unsigned char *)iph) + (iph->ihl<<2)); - if (((unsigned char *)(udph+1)) >= skb_data_end) { - printk(KERN_INFO - "divert: malformed UDP packet !\n"); - return; - } - - /* Divert some udp dst/src ports only ? */ - for (i = 0; i < MAX_DIVERT_PORTS; i++) { - dst = divert->udp_dst[i]; - src = divert->udp_src[i]; - if ((dst && dst == udph->dest) || - (src && src == udph->source)) { - ETH_DIVERT_FRAME(skb); - return; - } - } - break; - } -} diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 6b0e63cacd9..1df6cd4568d 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -107,6 +107,22 @@ out: EXPORT_SYMBOL_GPL(fib_rules_unregister); +static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, + struct flowi *fl, int flags) +{ + int ret = 0; + + if (rule->ifindex && (rule->ifindex != fl->iif)) + goto out; + + if ((rule->mark ^ fl->mark) & rule->mark_mask) + goto out; + + ret = ops->match(rule, fl, flags); +out: + return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; +} + int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) { @@ -116,10 +132,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, rcu_read_lock(); list_for_each_entry_rcu(rule, ops->rules_list, list) { - if (rule->ifindex && (rule->ifindex != fl->iif)) - continue; - - if (!ops->match(rule, fl, flags)) + if (!fib_rule_match(rule, ops, fl, flags)) continue; err = ops->action(rule, fl, flags, arg); @@ -179,6 +192,18 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) rule->ifindex = dev->ifindex; } + if (tb[FRA_FWMARK]) { + rule->mark = nla_get_u32(tb[FRA_FWMARK]); + if (rule->mark) + /* compatibility: if the mark value is non-zero all bits + * are compared unless a mask is explicitly specified. + */ + rule->mark_mask = 0xFFFFFFFF; + } + + if (tb[FRA_FWMASK]) + rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]); + rule->action = frh->action; rule->flags = frh->flags; rule->table = frh_get_table(frh, tb); @@ -250,6 +275,14 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) nla_strcmp(tb[FRA_IFNAME], rule->ifname)) continue; + if (tb[FRA_FWMARK] && + (rule->mark != nla_get_u32(tb[FRA_FWMARK]))) + continue; + + if (tb[FRA_FWMASK] && + (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK]))) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -273,6 +306,22 @@ errout: return err; } +static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + struct fib_rule *rule) +{ + size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr)) + + nla_total_size(IFNAMSIZ) /* FRA_IFNAME */ + + nla_total_size(4) /* FRA_PRIORITY */ + + nla_total_size(4) /* FRA_TABLE */ + + nla_total_size(4) /* FRA_FWMARK */ + + nla_total_size(4); /* FRA_FWMASK */ + + if (ops->nlmsg_payload) + payload += ops->nlmsg_payload(rule); + + return payload; +} + static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, u32 pid, u32 seq, int type, int flags, struct fib_rules_ops *ops) @@ -298,6 +347,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (rule->pref) NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref); + if (rule->mark) + NLA_PUT_U32(skb, FRA_FWMARK, rule->mark); + + if (rule->mark_mask || rule->mark) + NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask); + if (ops->fill(rule, skb, nlh, frh) < 0) goto nla_put_failure; @@ -345,15 +400,13 @@ static void notify_rule_change(int event, struct fib_rule *rule, struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL); if (skb == NULL) goto errout; err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + /* failure implies BUG in fib_rule_nlmsg_size() */ + BUG_ON(err < 0); err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL); errout: diff --git a/net/core/filter.c b/net/core/filter.c index 6732782a5a4..0df843b667f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -178,7 +178,7 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int load_w: ptr = load_pointer(skb, k, 4, &tmp); if (ptr != NULL) { - A = ntohl(get_unaligned((u32 *)ptr)); + A = ntohl(get_unaligned((__be32 *)ptr)); continue; } break; @@ -187,7 +187,7 @@ load_w: load_h: ptr = load_pointer(skb, k, 2, &tmp); if (ptr != NULL) { - A = ntohs(get_unaligned((u16 *)ptr)); + A = ntohs(get_unaligned((__be16 *)ptr)); continue; } break; @@ -261,7 +261,7 @@ load_b: */ switch (k-SKF_AD_OFF) { case SKF_AD_PROTOCOL: - A = htons(skb->protocol); + A = ntohs(skb->protocol); continue; case SKF_AD_PKTTYPE: A = skb->pkt_type; diff --git a/net/core/flow.c b/net/core/flow.c index b16d31ae5e5..d137f971f97 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -44,7 +44,7 @@ static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL }; #define flow_table(cpu) (per_cpu(flow_tables, cpu)) -static kmem_cache_t *flow_cachep __read_mostly; +static struct kmem_cache *flow_cachep __read_mostly; static int flow_lwm, flow_hwm; @@ -211,7 +211,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, if (flow_count(cpu) > flow_hwm) flow_cache_shrink(cpu); - fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC); + fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); if (fle) { fle->next = *head; *head = fle; @@ -340,7 +340,6 @@ static void __devinit flow_cache_cpu_prepare(int cpu) tasklet_init(tasklet, flow_cache_flush_tasklet, 0); } -#ifdef CONFIG_HOTPLUG_CPU static int flow_cache_cpu(struct notifier_block *nfb, unsigned long action, void *hcpu) @@ -349,7 +348,6 @@ static int flow_cache_cpu(struct notifier_block *nfb, __flow_cache_shrink((unsigned long)hcpu, 0); return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ static int __init flow_cache_init(void) { diff --git a/net/core/iovec.c b/net/core/iovec.c index 65e4b56fbc7..04b249c40b5 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -158,9 +158,9 @@ int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset, * call to this function will be unaligned also. */ int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov, - int offset, unsigned int len, int *csump) + int offset, unsigned int len, __wsum *csump) { - int csum = *csump; + __wsum csum = *csump; int partial_cnt = 0, err = 0; /* Skip over the finished iovecs */ diff --git a/net/core/kmap_skb.h b/net/core/kmap_skb.h new file mode 100644 index 00000000000..283c2b993fb --- /dev/null +++ b/net/core/kmap_skb.h @@ -0,0 +1,19 @@ +#include <linux/highmem.h> + +static inline void *kmap_skb_frag(const skb_frag_t *frag) +{ +#ifdef CONFIG_HIGHMEM + BUG_ON(in_irq()); + + local_bh_disable(); +#endif + return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); +} + +static inline void kunmap_skb_frag(void *vaddr) +{ + kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); +#ifdef CONFIG_HIGHMEM + local_bh_enable(); +#endif +} diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 4b36114744c..549a2ce951b 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -34,8 +34,8 @@ enum lw_bits { static unsigned long linkwatch_flags; static unsigned long linkwatch_nextevent; -static void linkwatch_event(void *dummy); -static DECLARE_WORK(linkwatch_work, linkwatch_event, NULL); +static void linkwatch_event(struct work_struct *dummy); +static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); static LIST_HEAD(lweventlist); static DEFINE_SPINLOCK(lweventlist_lock); @@ -127,7 +127,7 @@ void linkwatch_run_queue(void) } -static void linkwatch_event(void *dummy) +static void linkwatch_event(struct work_struct *dummy) { /* Limit the number of linkwatch events to one * per second so that a runaway driver does not @@ -171,10 +171,9 @@ void linkwatch_fire_event(struct net_device *dev) unsigned long delay = linkwatch_nextevent - jiffies; /* If we wrap around we'll delay it by at most HZ. */ - if (!delay || delay > HZ) - schedule_work(&linkwatch_work); - else - schedule_delayed_work(&linkwatch_work, delay); + if (delay > HZ) + delay = 0; + schedule_delayed_work(&linkwatch_work, delay); } } } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index b4b478353b2..e7300b6b407 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -251,7 +251,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) goto out_entries; } - n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC); + n = kmem_cache_alloc(tbl->kmem_cachep, GFP_ATOMIC); if (!n) goto out_entries; @@ -577,9 +577,10 @@ void neigh_destroy(struct neighbour *neigh) while ((hh = neigh->hh) != NULL) { neigh->hh = hh->hh_next; hh->hh_next = NULL; - write_lock_bh(&hh->hh_lock); + + write_seqlock_bh(&hh->hh_lock); hh->hh_output = neigh_blackhole; - write_unlock_bh(&hh->hh_lock); + write_sequnlock_bh(&hh->hh_lock); if (atomic_dec_and_test(&hh->hh_refcnt)) kfree(hh); } @@ -897,9 +898,9 @@ static void neigh_update_hhs(struct neighbour *neigh) if (update) { for (hh = neigh->hh; hh; hh = hh->hh_next) { - write_lock_bh(&hh->hh_lock); + write_seqlock_bh(&hh->hh_lock); update(hh, neigh->dev, neigh->ha); - write_unlock_bh(&hh->hh_lock); + write_sequnlock_bh(&hh->hh_lock); } } } @@ -1089,7 +1090,7 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, break; if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) { - rwlock_init(&hh->hh_lock); + seqlock_init(&hh->hh_lock); hh->hh_type = protocol; atomic_set(&hh->hh_refcnt, 0); hh->hh_next = NULL; @@ -1266,10 +1267,9 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) { - struct neigh_parms *p = kmalloc(sizeof(*p), GFP_KERNEL); + struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); if (p) { - memcpy(p, &tbl->parms, sizeof(*p)); p->tbl = tbl; atomic_set(&p->refcnt, 1); INIT_RCU_HEAD(&p->rcu_head); @@ -2410,20 +2410,27 @@ static struct file_operations neigh_stat_seq_fops = { #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_ARPD +static inline size_t neigh_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ndmsg)) + + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ + + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(struct nda_cacheinfo)) + + nla_total_size(4); /* NDA_PROBES */ +} + static void __neigh_notify(struct neighbour *n, int type, int flags) { struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC); if (skb == NULL) goto errout; err = neigh_fill_info(skb, n, 0, 0, type, flags); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + /* failure implies BUG in neigh_nlmsg_size() */ + BUG_ON(err < 0); err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); errout: @@ -2618,14 +2625,14 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, int p_id, int pdev_id, char *p_name, proc_handler *handler, ctl_handler *strategy) { - struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL); + struct neigh_sysctl_table *t = kmemdup(&neigh_sysctl_template, + sizeof(*t), GFP_KERNEL); const char *dev_name_source = NULL; char *dev_name = NULL; int err = 0; if (!t) return -ENOBUFS; - memcpy(t, &neigh_sysctl_template, sizeof(*t)); t->neigh_vars[0].data = &p->mcast_probes; t->neigh_vars[1].data = &p->ucast_probes; t->neigh_vars[2].data = &p->app_probes; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 9308af060b4..823215d8e90 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -34,18 +34,12 @@ #define MAX_UDP_CHUNK 1460 #define MAX_SKBS 32 #define MAX_QUEUE_DEPTH (MAX_SKBS / 2) -#define MAX_RETRIES 20000 -static DEFINE_SPINLOCK(skb_list_lock); -static int nr_skbs; -static struct sk_buff *skbs; - -static DEFINE_SPINLOCK(queue_lock); -static int queue_depth; -static struct sk_buff *queue_head, *queue_tail; +static struct sk_buff_head skb_pool; static atomic_t trapped; +#define USEC_PER_POLL 50 #define NETPOLL_RX_ENABLED 1 #define NETPOLL_RX_DROP 2 @@ -56,54 +50,41 @@ static atomic_t trapped; static void zap_completion_queue(void); static void arp_reply(struct sk_buff *skb); -static void queue_process(void *p) +static void queue_process(struct work_struct *work) { - unsigned long flags; + struct netpoll_info *npinfo = + container_of(work, struct netpoll_info, tx_work.work); struct sk_buff *skb; - - while (queue_head) { - spin_lock_irqsave(&queue_lock, flags); - - skb = queue_head; - queue_head = skb->next; - if (skb == queue_tail) - queue_head = NULL; - - queue_depth--; - - spin_unlock_irqrestore(&queue_lock, flags); - - dev_queue_xmit(skb); - } -} - -static DECLARE_WORK(send_queue, queue_process, NULL); - -void netpoll_queue(struct sk_buff *skb) -{ unsigned long flags; - if (queue_depth == MAX_QUEUE_DEPTH) { - __kfree_skb(skb); - return; - } + while ((skb = skb_dequeue(&npinfo->txq))) { + struct net_device *dev = skb->dev; - spin_lock_irqsave(&queue_lock, flags); - if (!queue_head) - queue_head = skb; - else - queue_tail->next = skb; - queue_tail = skb; - queue_depth++; - spin_unlock_irqrestore(&queue_lock, flags); + if (!netif_device_present(dev) || !netif_running(dev)) { + __kfree_skb(skb); + continue; + } - schedule_work(&send_queue); + local_irq_save(flags); + netif_tx_lock(dev); + if (netif_queue_stopped(dev) || + dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + skb_queue_head(&npinfo->txq, skb); + netif_tx_unlock(dev); + local_irq_restore(flags); + + schedule_delayed_work(&npinfo->tx_work, HZ/10); + return; + } + netif_tx_unlock(dev); + local_irq_restore(flags); + } } -static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, - unsigned short ulen, u32 saddr, u32 daddr) +static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, + unsigned short ulen, __be32 saddr, __be32 daddr) { - unsigned int psum; + __wsum psum; if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY) return 0; @@ -111,7 +92,7 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); if (skb->ip_summed == CHECKSUM_COMPLETE && - !(u16)csum_fold(csum_add(psum, skb->csum))) + !csum_fold(csum_add(psum, skb->csum))) return 0; skb->csum = psum; @@ -167,12 +148,11 @@ static void service_arp_queue(struct netpoll_info *npi) arp_reply(skb); skb = skb_dequeue(&npi->arp_tx); } - return; } void netpoll_poll(struct netpoll *np) { - if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) + if (!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) return; /* Process pending work on NIC */ @@ -190,17 +170,15 @@ static void refill_skbs(void) struct sk_buff *skb; unsigned long flags; - spin_lock_irqsave(&skb_list_lock, flags); - while (nr_skbs < MAX_SKBS) { + spin_lock_irqsave(&skb_pool.lock, flags); + while (skb_pool.qlen < MAX_SKBS) { skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); if (!skb) break; - skb->next = skbs; - skbs = skb; - nr_skbs++; + __skb_queue_tail(&skb_pool, skb); } - spin_unlock_irqrestore(&skb_list_lock, flags); + spin_unlock_irqrestore(&skb_pool.lock, flags); } static void zap_completion_queue(void) @@ -219,7 +197,7 @@ static void zap_completion_queue(void) while (clist != NULL) { struct sk_buff *skb = clist; clist = clist->next; - if(skb->destructor) + if (skb->destructor) dev_kfree_skb_any(skb); /* put this one back */ else __kfree_skb(skb); @@ -229,38 +207,25 @@ static void zap_completion_queue(void) put_cpu_var(softnet_data); } -static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve) +static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) { - int once = 1, count = 0; - unsigned long flags; - struct sk_buff *skb = NULL; + int count = 0; + struct sk_buff *skb; zap_completion_queue(); + refill_skbs(); repeat: - if (nr_skbs < MAX_SKBS) - refill_skbs(); skb = alloc_skb(len, GFP_ATOMIC); + if (!skb) + skb = skb_dequeue(&skb_pool); if (!skb) { - spin_lock_irqsave(&skb_list_lock, flags); - skb = skbs; - if (skb) { - skbs = skb->next; - skb->next = NULL; - nr_skbs--; - } - spin_unlock_irqrestore(&skb_list_lock, flags); - } - - if(!skb) { - count++; - if (once && (count == 1000000)) { - printk("out of netpoll skbs!\n"); - once = 0; + if (++count < 10) { + netpoll_poll(np); + goto repeat; } - netpoll_poll(np); - goto repeat; + return NULL; } atomic_set(&skb->users, 1); @@ -270,50 +235,46 @@ repeat: static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { - int status; - struct netpoll_info *npinfo; + int status = NETDEV_TX_BUSY; + unsigned long tries; + struct net_device *dev = np->dev; + struct netpoll_info *npinfo = np->dev->npinfo; - if (!np || !np->dev || !netif_running(np->dev)) { - __kfree_skb(skb); - return; - } + if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { + __kfree_skb(skb); + return; + } - npinfo = np->dev->npinfo; + /* don't get messages out of order, and no recursion */ + if (skb_queue_len(&npinfo->txq) == 0 && + npinfo->poll_owner != smp_processor_id()) { + unsigned long flags; - /* avoid recursion */ - if (npinfo->poll_owner == smp_processor_id() || - np->dev->xmit_lock_owner == smp_processor_id()) { - if (np->drop) - np->drop(skb); - else - __kfree_skb(skb); - return; - } - - do { - npinfo->tries--; - netif_tx_lock(np->dev); + local_irq_save(flags); + if (netif_tx_trylock(dev)) { + /* try until next clock tick */ + for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; + tries > 0; --tries) { + if (!netif_queue_stopped(dev)) + status = dev->hard_start_xmit(skb, dev); - /* - * network drivers do not expect to be called if the queue is - * stopped. - */ - status = NETDEV_TX_BUSY; - if (!netif_queue_stopped(np->dev)) - status = np->dev->hard_start_xmit(skb, np->dev); + if (status == NETDEV_TX_OK) + break; - netif_tx_unlock(np->dev); + /* tickle device maybe there is some cleanup */ + netpoll_poll(np); - /* success */ - if(!status) { - npinfo->tries = MAX_RETRIES; /* reset */ - return; + udelay(USEC_PER_POLL); + } + netif_tx_unlock(dev); } + local_irq_restore(flags); + } - /* transmit busy */ - netpoll_poll(np); - udelay(50); - } while (npinfo->tries > 0); + if (status != NETDEV_TX_OK) { + skb_queue_tail(&npinfo->txq, skb); + schedule_delayed_work(&npinfo->tx_work,0); + } } void netpoll_send_udp(struct netpoll *np, const char *msg, int len) @@ -340,6 +301,12 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) udph->dest = htons(np->remote_port); udph->len = htons(udp_len); udph->check = 0; + udph->check = csum_tcpudp_magic(htonl(np->local_ip), + htonl(np->remote_ip), + udp_len, IPPROTO_UDP, + csum_partial((unsigned char *)udph, udp_len, 0)); + if (udph->check == 0) + udph->check = CSUM_MANGLED_0; skb->nh.iph = iph = (struct iphdr *)skb_push(skb, sizeof(*iph)); @@ -373,7 +340,8 @@ static void arp_reply(struct sk_buff *skb) struct arphdr *arp; unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; - u32 sip, tip; + __be32 sip, tip; + unsigned char *sha; struct sk_buff *send_skb; struct netpoll *np = NULL; @@ -400,9 +368,14 @@ static void arp_reply(struct sk_buff *skb) arp->ar_op != htons(ARPOP_REQUEST)) return; - arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len; + arp_ptr = (unsigned char *)(arp+1); + /* save the location of the src hw addr */ + sha = arp_ptr; + arp_ptr += skb->dev->addr_len; memcpy(&sip, arp_ptr, 4); - arp_ptr += 4 + skb->dev->addr_len; + arp_ptr += 4; + /* if we actually cared about dst hw addr, it would get copied here */ + arp_ptr += skb->dev->addr_len; memcpy(&tip, arp_ptr, 4); /* Should we ignore arp? */ @@ -425,8 +398,8 @@ static void arp_reply(struct sk_buff *skb) if (np->dev->hard_header && np->dev->hard_header(send_skb, skb->dev, ptype, - np->remote_mac, np->local_mac, - send_skb->len) < 0) { + sha, np->local_mac, + send_skb->len) < 0) { kfree_skb(send_skb); return; } @@ -449,7 +422,7 @@ static void arp_reply(struct sk_buff *skb) arp_ptr += np->dev->addr_len; memcpy(arp_ptr, &tip, 4); arp_ptr += 4; - memcpy(arp_ptr, np->remote_mac, np->dev->addr_len); + memcpy(arp_ptr, sha, np->dev->addr_len); arp_ptr += np->dev->addr_len; memcpy(arp_ptr, &sip, 4); @@ -464,7 +437,6 @@ int __netpoll_rx(struct sk_buff *skb) struct netpoll_info *npi = skb->dev->npinfo; struct netpoll *np = npi->rx_np; - if (!np) goto out; if (skb->dev->type != ARPHRD_ETHER) @@ -537,47 +509,47 @@ int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; - if(*cur != '@') { + if (*cur != '@') { if ((delim = strchr(cur, '@')) == NULL) goto parse_failed; - *delim=0; - np->local_port=simple_strtol(cur, NULL, 10); - cur=delim; + *delim = 0; + np->local_port = simple_strtol(cur, NULL, 10); + cur = delim; } cur++; printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port); - if(*cur != '/') { + if (*cur != '/') { if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; - *delim=0; - np->local_ip=ntohl(in_aton(cur)); - cur=delim; + *delim = 0; + np->local_ip = ntohl(in_aton(cur)); + cur = delim; printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", np->name, HIPQUAD(np->local_ip)); } cur++; - if ( *cur != ',') { + if (*cur != ',') { /* parse out dev name */ if ((delim = strchr(cur, ',')) == NULL) goto parse_failed; - *delim=0; + *delim = 0; strlcpy(np->dev_name, cur, sizeof(np->dev_name)); - cur=delim; + cur = delim; } cur++; printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name); - if ( *cur != '@' ) { + if (*cur != '@') { /* dst port */ if ((delim = strchr(cur, '@')) == NULL) goto parse_failed; - *delim=0; - np->remote_port=simple_strtol(cur, NULL, 10); - cur=delim; + *delim = 0; + np->remote_port = simple_strtol(cur, NULL, 10); + cur = delim; } cur++; printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port); @@ -585,42 +557,41 @@ int netpoll_parse_options(struct netpoll *np, char *opt) /* dst ip */ if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; - *delim=0; - np->remote_ip=ntohl(in_aton(cur)); - cur=delim+1; + *delim = 0; + np->remote_ip = ntohl(in_aton(cur)); + cur = delim + 1; printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", - np->name, HIPQUAD(np->remote_ip)); + np->name, HIPQUAD(np->remote_ip)); - if( *cur != 0 ) - { + if (*cur != 0) { /* MAC address */ if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[0]=simple_strtol(cur, NULL, 16); - cur=delim+1; + *delim = 0; + np->remote_mac[0] = simple_strtol(cur, NULL, 16); + cur = delim + 1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[1]=simple_strtol(cur, NULL, 16); - cur=delim+1; + *delim = 0; + np->remote_mac[1] = simple_strtol(cur, NULL, 16); + cur = delim + 1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[2]=simple_strtol(cur, NULL, 16); - cur=delim+1; + *delim = 0; + np->remote_mac[2] = simple_strtol(cur, NULL, 16); + cur = delim + 1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[3]=simple_strtol(cur, NULL, 16); - cur=delim+1; + *delim = 0; + np->remote_mac[3] = simple_strtol(cur, NULL, 16); + cur = delim + 1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[4]=simple_strtol(cur, NULL, 16); - cur=delim+1; - np->remote_mac[5]=simple_strtol(cur, NULL, 16); + *delim = 0; + np->remote_mac[4] = simple_strtol(cur, NULL, 16); + cur = delim + 1; + np->remote_mac[5] = simple_strtol(cur, NULL, 16); } printk(KERN_INFO "%s: remote ethernet address " @@ -647,34 +618,44 @@ int netpoll_setup(struct netpoll *np) struct in_device *in_dev; struct netpoll_info *npinfo; unsigned long flags; + int err; if (np->dev_name) ndev = dev_get_by_name(np->dev_name); if (!ndev) { printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", np->name, np->dev_name); - return -1; + return -ENODEV; } np->dev = ndev; if (!ndev->npinfo) { npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); - if (!npinfo) + if (!npinfo) { + err = -ENOMEM; goto release; + } npinfo->rx_flags = 0; npinfo->rx_np = NULL; spin_lock_init(&npinfo->poll_lock); npinfo->poll_owner = -1; - npinfo->tries = MAX_RETRIES; + spin_lock_init(&npinfo->rx_lock); skb_queue_head_init(&npinfo->arp_tx); - } else + skb_queue_head_init(&npinfo->txq); + INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); + + atomic_set(&npinfo->refcnt, 1); + } else { npinfo = ndev->npinfo; + atomic_inc(&npinfo->refcnt); + } if (!ndev->poll_controller) { printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", np->name, np->dev_name); + err = -ENOTSUPP; goto release; } @@ -685,13 +666,14 @@ int netpoll_setup(struct netpoll *np) np->name, np->dev_name); rtnl_lock(); - if (dev_change_flags(ndev, ndev->flags | IFF_UP) < 0) { + err = dev_open(ndev); + rtnl_unlock(); + + if (err) { printk(KERN_ERR "%s: failed to open %s\n", - np->name, np->dev_name); - rtnl_unlock(); + np->name, ndev->name); goto release; } - rtnl_unlock(); atleast = jiffies + HZ/10; atmost = jiffies + 4*HZ; @@ -729,6 +711,7 @@ int netpoll_setup(struct netpoll *np) rcu_read_unlock(); printk(KERN_ERR "%s: no IP address for %s, aborting\n", np->name, np->dev_name); + err = -EDESTADDRREQ; goto release; } @@ -761,9 +744,16 @@ int netpoll_setup(struct netpoll *np) kfree(npinfo); np->dev = NULL; dev_put(ndev); - return -1; + return err; } +static int __init netpoll_init(void) +{ + skb_queue_head_init(&skb_pool); + return 0; +} +core_initcall(netpoll_init); + void netpoll_cleanup(struct netpoll *np) { struct netpoll_info *npinfo; @@ -771,12 +761,25 @@ void netpoll_cleanup(struct netpoll *np) if (np->dev) { npinfo = np->dev->npinfo; - if (npinfo && npinfo->rx_np == np) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - npinfo->rx_np = NULL; - npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; - spin_unlock_irqrestore(&npinfo->rx_lock, flags); + if (npinfo) { + if (npinfo->rx_np == np) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + npinfo->rx_np = NULL; + npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } + + np->dev->npinfo = NULL; + if (atomic_dec_and_test(&npinfo->refcnt)) { + skb_queue_purge(&npinfo->arp_tx); + skb_queue_purge(&npinfo->txq); + cancel_rearming_delayed_work(&npinfo->tx_work); + flush_scheduled_work(); + + kfree(npinfo); + } } + dev_put(np->dev); } @@ -803,4 +806,3 @@ EXPORT_SYMBOL(netpoll_setup); EXPORT_SYMBOL(netpoll_cleanup); EXPORT_SYMBOL(netpoll_send_udp); EXPORT_SYMBOL(netpoll_poll); -EXPORT_SYMBOL(netpoll_queue); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index dd023fd2830..1897a3a385d 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -207,7 +207,7 @@ static struct proc_dir_entry *pg_proc_dir = NULL; #define SVLAN_TAG_SIZE(x) ((x)->svlan_id == 0xffff ? 0 : 4) struct flow_state { - __u32 cur_daddr; + __be32 cur_daddr; int count; }; @@ -282,10 +282,10 @@ struct pktgen_dev { /* If we're doing ranges, random or incremental, then this * defines the min/max for those ranges. */ - __u32 saddr_min; /* inclusive, source IP address */ - __u32 saddr_max; /* exclusive, source IP address */ - __u32 daddr_min; /* inclusive, dest IP address */ - __u32 daddr_max; /* exclusive, dest IP address */ + __be32 saddr_min; /* inclusive, source IP address */ + __be32 saddr_max; /* exclusive, source IP address */ + __be32 daddr_min; /* inclusive, dest IP address */ + __be32 daddr_max; /* exclusive, dest IP address */ __u16 udp_src_min; /* inclusive, source UDP port */ __u16 udp_src_max; /* exclusive, source UDP port */ @@ -317,8 +317,8 @@ struct pktgen_dev { __u32 cur_dst_mac_offset; __u32 cur_src_mac_offset; - __u32 cur_saddr; - __u32 cur_daddr; + __be32 cur_saddr; + __be32 cur_daddr; __u16 cur_udp_dst; __u16 cur_udp_src; __u32 cur_pkt_size; @@ -350,10 +350,10 @@ struct pktgen_dev { }; struct pktgen_hdr { - __u32 pgh_magic; - __u32 seq_num; - __u32 tv_sec; - __u32 tv_usec; + __be32 pgh_magic; + __be32 seq_num; + __be32 tv_sec; + __be32 tv_usec; }; struct pktgen_thread { @@ -2160,7 +2160,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) for(i = 0; i < pkt_dev->nr_labels; i++) if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM) pkt_dev->labels[i] = MPLS_STACK_BOTTOM | - (pktgen_random() & + ((__force __be32)pktgen_random() & htonl(0x000fffff)); } @@ -2220,29 +2220,25 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (pkt_dev->cflows && pkt_dev->flows[flow].count != 0) { pkt_dev->cur_daddr = pkt_dev->flows[flow].cur_daddr; } else { - - if ((imn = ntohl(pkt_dev->daddr_min)) < (imx = - ntohl(pkt_dev-> - daddr_max))) - { + imn = ntohl(pkt_dev->daddr_min); + imx = ntohl(pkt_dev->daddr_max); + if (imn < imx) { __u32 t; + __be32 s; if (pkt_dev->flags & F_IPDST_RND) { - t = ((pktgen_random() % (imx - imn)) + - imn); - t = htonl(t); + t = pktgen_random() % (imx - imn) + imn; + s = htonl(t); - while (LOOPBACK(t) || MULTICAST(t) - || BADCLASS(t) || ZERONET(t) - || LOCAL_MCAST(t)) { - t = ((pktgen_random() % - (imx - imn)) + imn); - t = htonl(t); + while (LOOPBACK(s) || MULTICAST(s) + || BADCLASS(s) || ZERONET(s) + || LOCAL_MCAST(s)) { + t = (pktgen_random() % + (imx - imn)) + imn; + s = htonl(t); } - pkt_dev->cur_daddr = t; - } - - else { + pkt_dev->cur_daddr = s; + } else { t = ntohl(pkt_dev->cur_daddr); t++; if (t > imx) { @@ -2270,7 +2266,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) for (i = 0; i < 4; i++) { pkt_dev->cur_in6_daddr.s6_addr32[i] = - ((pktgen_random() | + (((__force __be32)pktgen_random() | pkt_dev->min_in6_daddr.s6_addr32[i]) & pkt_dev->max_in6_daddr.s6_addr32[i]); } @@ -2304,6 +2300,12 @@ static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev) *mpls |= MPLS_STACK_BOTTOM; } +static inline __be16 build_tci(unsigned int id, unsigned int cfi, + unsigned int prio) +{ + return htons(id | (cfi << 12) | (prio << 13)); +} + static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct pktgen_dev *pkt_dev) { @@ -2353,16 +2355,16 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, if (pkt_dev->vlan_id != 0xffff) { if(pkt_dev->svlan_id != 0xffff) { svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); - *svlan_tci = htons(pkt_dev->svlan_id); - *svlan_tci |= pkt_dev->svlan_p << 5; - *svlan_tci |= pkt_dev->svlan_cfi << 4; + *svlan_tci = build_tci(pkt_dev->svlan_id, + pkt_dev->svlan_cfi, + pkt_dev->svlan_p); svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); *svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q); } vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); - *vlan_tci = htons(pkt_dev->vlan_id); - *vlan_tci |= pkt_dev->vlan_p << 5; - *vlan_tci |= pkt_dev->vlan_cfi << 4; + *vlan_tci = build_tci(pkt_dev->vlan_id, + pkt_dev->vlan_cfi, + pkt_dev->vlan_p); vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); *vlan_encapsulated_proto = __constant_htons(ETH_P_IP); } @@ -2371,7 +2373,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); memcpy(eth, pkt_dev->hh, 12); - *(u16 *) & eth[12] = protocol; + *(__be16 *) & eth[12] = protocol; /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 - @@ -2491,7 +2493,7 @@ static unsigned int scan_ip6(const char *s, char ip[16]) char suffix[16]; unsigned int prefixlen = 0; unsigned int suffixlen = 0; - __u32 tmp; + __be32 tmp; for (i = 0; i < 16; i++) ip[i] = 0; @@ -2689,16 +2691,16 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, if (pkt_dev->vlan_id != 0xffff) { if(pkt_dev->svlan_id != 0xffff) { svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); - *svlan_tci = htons(pkt_dev->svlan_id); - *svlan_tci |= pkt_dev->svlan_p << 5; - *svlan_tci |= pkt_dev->svlan_cfi << 4; + *svlan_tci = build_tci(pkt_dev->svlan_id, + pkt_dev->svlan_cfi, + pkt_dev->svlan_p); svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); *svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q); } vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); - *vlan_tci = htons(pkt_dev->vlan_id); - *vlan_tci |= pkt_dev->vlan_p << 5; - *vlan_tci |= pkt_dev->vlan_cfi << 4; + *vlan_tci = build_tci(pkt_dev->vlan_id, + pkt_dev->vlan_cfi, + pkt_dev->vlan_p); vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); *vlan_encapsulated_proto = __constant_htons(ETH_P_IPV6); } @@ -2707,7 +2709,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); memcpy(eth, pkt_dev->hh, 12); - *(u16 *) & eth[12] = protocol; + *(__be16 *) & eth[12] = protocol; /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - @@ -2726,11 +2728,11 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, udph->len = htons(datalen + sizeof(struct udphdr)); udph->check = 0; /* No checksum */ - *(u32 *) iph = __constant_htonl(0x60000000); /* Version + flow */ + *(__be32 *) iph = __constant_htonl(0x60000000); /* Version + flow */ if (pkt_dev->traffic_class) { /* Version + traffic class + flow (0) */ - *(u32 *)iph |= htonl(0x60000000 | (pkt_dev->traffic_class << 20)); + *(__be32 *)iph |= htonl(0x60000000 | (pkt_dev->traffic_class << 20)); } iph->hop_limit = 32; diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 79ebd75fbe4..5f0818d815e 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -15,6 +15,7 @@ #include <linux/random.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/vmalloc.h> #include <net/request_sock.h> @@ -29,22 +30,31 @@ * it is absolutely not enough even at 100conn/sec. 256 cures most * of problems. This value is adjusted to 128 for very small machines * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb). - * Further increasing requires to change hash table size. + * Note : Dont forget somaxconn that may limit backlog too. */ int sysctl_max_syn_backlog = 256; int reqsk_queue_alloc(struct request_sock_queue *queue, - const int nr_table_entries) + unsigned int nr_table_entries) { - const int lopt_size = sizeof(struct listen_sock) + - nr_table_entries * sizeof(struct request_sock *); - struct listen_sock *lopt = kzalloc(lopt_size, GFP_KERNEL); - + size_t lopt_size = sizeof(struct listen_sock); + struct listen_sock *lopt; + + nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog); + nr_table_entries = max_t(u32, nr_table_entries, 8); + nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); + lopt_size += nr_table_entries * sizeof(struct request_sock *); + if (lopt_size > PAGE_SIZE) + lopt = __vmalloc(lopt_size, + GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, + PAGE_KERNEL); + else + lopt = kzalloc(lopt_size, GFP_KERNEL); if (lopt == NULL) return -ENOMEM; - for (lopt->max_qlen_log = 6; - (1 << lopt->max_qlen_log) < sysctl_max_syn_backlog; + for (lopt->max_qlen_log = 3; + (1 << lopt->max_qlen_log) < nr_table_entries; lopt->max_qlen_log++); get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); @@ -65,9 +75,11 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) { /* make all the listen_opt local to us */ struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); + size_t lopt_size = sizeof(struct listen_sock) + + lopt->nr_table_entries * sizeof(struct request_sock *); if (lopt->qlen != 0) { - int i; + unsigned int i; for (i = 0; i < lopt->nr_table_entries; i++) { struct request_sock *req; @@ -81,7 +93,10 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) } BUG_TRAP(lopt->qlen == 0); - kfree(lopt); + if (lopt_size > PAGE_SIZE) + vfree(lopt); + else + kfree(lopt); } EXPORT_SYMBOL(reqsk_queue_destroy); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 02f3c794789..e76539a5eb5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -108,7 +108,6 @@ static const int rtm_min[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWACTION)] = NLMSG_LENGTH(sizeof(struct tcamsg)), - [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), }; @@ -213,6 +212,26 @@ nla_put_failure: return nla_nest_cancel(skb, mx); } +int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, + u32 ts, u32 tsage, long expires, u32 error) +{ + struct rta_cacheinfo ci = { + .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse), + .rta_used = dst->__use, + .rta_clntref = atomic_read(&(dst->__refcnt)), + .rta_error = error, + .rta_id = id, + .rta_ts = ts, + .rta_tsage = tsage, + }; + + if (expires) + ci.rta_expires = jiffies_to_clock_t(expires); + + return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci); +} + +EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); static void set_operstate(struct net_device *dev, unsigned char transition) { @@ -273,6 +292,25 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->tx_compressed = b->tx_compressed; }; +static inline size_t if_nlmsg_size(int iwbuflen) +{ + return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ + + nla_total_size(sizeof(struct rtnl_link_ifmap)) + + nla_total_size(sizeof(struct rtnl_link_stats)) + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */ + + nla_total_size(4) /* IFLA_TXQLEN */ + + nla_total_size(4) /* IFLA_WEIGHT */ + + nla_total_size(4) /* IFLA_MTU */ + + nla_total_size(4) /* IFLA_LINK */ + + nla_total_size(4) /* IFLA_MASTER */ + + nla_total_size(1) /* IFLA_OPERSTATE */ + + nla_total_size(1) /* IFLA_LINKMODE */ + + nla_total_size(iwbuflen); +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, void *iwbuf, int iwbuflen, int type, u32 pid, u32 seq, u32 change, unsigned int flags) @@ -558,7 +596,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) struct sk_buff *nskb; char *iw_buf = NULL, *iw = NULL; int iw_buf_len = 0; - int err, payload; + int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); if (err < 0) @@ -587,9 +625,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ - payload = NLMSG_ALIGN(sizeof(struct ifinfomsg) + - nla_total_size(iw_buf_len)); - nskb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL); + nskb = nlmsg_new(if_nlmsg_size(iw_buf_len), GFP_KERNEL); if (nskb == NULL) { err = -ENOBUFS; goto errout; @@ -597,10 +633,8 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0); - if (err <= 0) { - kfree_skb(nskb); - goto errout; - } + /* failure impilies BUG in if_nlmsg_size or wireless_rtnetlink_get */ + BUG_ON(err < 0); err = rtnl_unicast(nskb, NETLINK_CB(skb).pid); errout: @@ -639,15 +673,13 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(if_nlmsg_size(0), GFP_KERNEL); if (skb == NULL) goto errout; err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + /* failure implies BUG in if_nlmsg_size() */ + BUG_ON(err < 0); err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); errout: diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3c23760c582..de7801d589e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -56,7 +56,6 @@ #include <linux/cache.h> #include <linux/rtnetlink.h> #include <linux/init.h> -#include <linux/highmem.h> #include <net/protocol.h> #include <net/dst.h> @@ -67,8 +66,10 @@ #include <asm/uaccess.h> #include <asm/system.h> -static kmem_cache_t *skbuff_head_cache __read_mostly; -static kmem_cache_t *skbuff_fclone_cache __read_mostly; +#include "kmap_skb.h" + +static struct kmem_cache *skbuff_head_cache __read_mostly; +static struct kmem_cache *skbuff_fclone_cache __read_mostly; /* * Keep out-of-line to prevent kernel bloat. @@ -131,6 +132,7 @@ EXPORT_SYMBOL(skb_truesize_bug); * @gfp_mask: allocation mask * @fclone: allocate from fclone cache instead of head cache * and allocate a cloned (child) skb + * @node: numa node to allocate memory on * * Allocate a new &sk_buff. The returned buffer has no headroom and a * tail room of size bytes. The object has a reference count of one. @@ -140,9 +142,9 @@ EXPORT_SYMBOL(skb_truesize_bug); * %GFP_ATOMIC. */ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, - int fclone) + int fclone, int node) { - kmem_cache_t *cache; + struct kmem_cache *cache; struct skb_shared_info *shinfo; struct sk_buff *skb; u8 *data; @@ -150,14 +152,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; /* Get the HEAD */ - skb = kmem_cache_alloc(cache, gfp_mask & ~__GFP_DMA); + skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); if (!skb) goto out; /* Get the DATA. Size must match skb_add_mtu(). */ size = SKB_DATA_ALIGN(size); - data = kmalloc_track_caller(size + sizeof(struct skb_shared_info), - gfp_mask); + data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), + gfp_mask, node); if (!data) goto nodata; @@ -209,7 +211,7 @@ nodata: * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ -struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, +struct sk_buff *alloc_skb_from_cache(struct kmem_cache *cp, unsigned int size, gfp_t gfp_mask) { @@ -266,9 +268,10 @@ nodata: struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, gfp_t gfp_mask) { + int node = dev->class_dev.dev ? dev_to_node(dev->class_dev.dev) : -1; struct sk_buff *skb; - skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); + skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node); if (likely(skb)) { skb_reserve(skb, NET_SKB_PAD); skb->dev = dev; @@ -473,8 +476,8 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) #endif C(protocol); n->destructor = NULL; + C(mark); #ifdef CONFIG_NETFILTER - C(nfmark); C(nfct); nf_conntrack_get(skb->nfct); C(nfctinfo); @@ -534,8 +537,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->pkt_type = old->pkt_type; new->tstamp = old->tstamp; new->destructor = NULL; + new->mark = old->mark; #ifdef CONFIG_NETFILTER - new->nfmark = old->nfmark; new->nfct = old->nfct; nf_conntrack_get(old->nfct); new->nfctinfo = old->nfctinfo; @@ -639,6 +642,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) n->csum = skb->csum; n->ip_summed = skb->ip_summed; + n->truesize += skb->data_len; n->data_len = skb->data_len; n->len = skb->len; @@ -1239,8 +1243,8 @@ EXPORT_SYMBOL(skb_store_bits); /* Checksum skb data. */ -unsigned int skb_checksum(const struct sk_buff *skb, int offset, - int len, unsigned int csum) +__wsum skb_checksum(const struct sk_buff *skb, int offset, + int len, __wsum csum) { int start = skb_headlen(skb); int i, copy = start - offset; @@ -1264,7 +1268,7 @@ unsigned int skb_checksum(const struct sk_buff *skb, int offset, end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { - unsigned int csum2; + __wsum csum2; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -1293,7 +1297,7 @@ unsigned int skb_checksum(const struct sk_buff *skb, int offset, end = start + list->len; if ((copy = end - offset) > 0) { - unsigned int csum2; + __wsum csum2; if (copy > len) copy = len; csum2 = skb_checksum(list, offset - start, @@ -1314,8 +1318,8 @@ unsigned int skb_checksum(const struct sk_buff *skb, int offset, /* Both of above in one bottle. */ -unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, - u8 *to, int len, unsigned int csum) +__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, + u8 *to, int len, __wsum csum) { int start = skb_headlen(skb); int i, copy = start - offset; @@ -1341,7 +1345,7 @@ unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { - unsigned int csum2; + __wsum csum2; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -1367,7 +1371,7 @@ unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, struct sk_buff *list = skb_shinfo(skb)->frag_list; for (; list; list = list->next) { - unsigned int csum2; + __wsum csum2; int end; BUG_TRAP(start <= offset + len); @@ -1395,7 +1399,7 @@ unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) { - unsigned int csum; + __wsum csum; long csstart; if (skb->ip_summed == CHECKSUM_PARTIAL) @@ -1413,9 +1417,9 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) skb->len - csstart, 0); if (skb->ip_summed == CHECKSUM_PARTIAL) { - long csstuff = csstart + skb->csum; + long csstuff = csstart + skb->csum_offset; - *((unsigned short *)(to + csstuff)) = csum_fold(csum); + *((__sum16 *)(to + csstuff)) = csum_fold(csum); } } @@ -1946,7 +1950,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) do { struct sk_buff *nskb; skb_frag_t *frag; - int hsize, nsize; + int hsize; int k; int size; @@ -1957,11 +1961,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) hsize = skb_headlen(skb) - offset; if (hsize < 0) hsize = 0; - nsize = hsize + doffset; - if (nsize > len + doffset || !sg) - nsize = len + doffset; + if (hsize > len || !sg) + hsize = len; - nskb = alloc_skb(nsize + headroom, GFP_ATOMIC); + nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC); if (unlikely(!nskb)) goto err; diff --git a/net/core/sock.c b/net/core/sock.c index d472db4776c..0ed5b4f0bc4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -111,6 +111,7 @@ #include <linux/poll.h> #include <linux/tcp.h> #include <linux/init.h> +#include <linux/highmem.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -270,7 +271,7 @@ out: } EXPORT_SYMBOL(sock_queue_rcv_skb); -int sk_receive_skb(struct sock *sk, struct sk_buff *skb) +int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) { int rc = NET_RX_SUCCESS; @@ -279,7 +280,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb) skb->dev = NULL; - bh_lock_sock(sk); + if (nested) + bh_lock_sock_nested(sk); + else + bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { /* * trylock + unlock semantics: @@ -806,24 +810,11 @@ lenout: */ static void inline sock_lock_init(struct sock *sk) { - spin_lock_init(&sk->sk_lock.slock); - sk->sk_lock.owner = NULL; - init_waitqueue_head(&sk->sk_lock.wq); - /* - * Make sure we are not reinitializing a held lock: - */ - debug_check_no_locks_freed((void *)&sk->sk_lock, sizeof(sk->sk_lock)); - - /* - * Mark both the sk_lock and the sk_lock.slock as a - * per-address-family lock class: - */ - lockdep_set_class_and_name(&sk->sk_lock.slock, - af_family_slock_keys + sk->sk_family, - af_family_slock_key_strings[sk->sk_family]); - lockdep_init_map(&sk->sk_lock.dep_map, - af_family_key_strings[sk->sk_family], - af_family_keys + sk->sk_family, 0); + sock_lock_init_class_and_name(sk, + af_family_slock_key_strings[sk->sk_family], + af_family_slock_keys + sk->sk_family, + af_family_key_strings[sk->sk_family], + af_family_keys + sk->sk_family); } /** @@ -837,7 +828,7 @@ struct sock *sk_alloc(int family, gfp_t priority, struct proto *prot, int zero_it) { struct sock *sk = NULL; - kmem_cache_t *slab = prot->slab; + struct kmem_cache *slab = prot->slab; if (slab != NULL) sk = kmem_cache_alloc(slab, priority); @@ -1160,7 +1151,7 @@ static struct sk_buff *sock_alloc_send_pskb(struct sock *sk, goto failure; if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { - skb = alloc_skb(header_len, sk->sk_allocation); + skb = alloc_skb(header_len, gfp_mask); if (skb) { int npages; int i; @@ -1527,7 +1518,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) atomic_set(&sk->sk_refcnt, 1); } -void fastcall lock_sock(struct sock *sk) +void fastcall lock_sock_nested(struct sock *sk, int subclass) { might_sleep(); spin_lock_bh(&sk->sk_lock.slock); @@ -1538,11 +1529,11 @@ void fastcall lock_sock(struct sock *sk) /* * The sk_lock has mutex_lock() semantics here: */ - mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); + mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); local_bh_enable(); } -EXPORT_SYMBOL(lock_sock); +EXPORT_SYMBOL(lock_sock_nested); void fastcall release_sock(struct sock *sk) { diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 02534131d88..1e75b158546 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -21,10 +21,6 @@ extern __u32 sysctl_rmem_max; extern int sysctl_core_destroy_delay; -#ifdef CONFIG_NET_DIVERT -extern char sysctl_divert_version[]; -#endif /* CONFIG_NET_DIVERT */ - #ifdef CONFIG_XFRM extern u32 sysctl_xfrm_aevent_etime; extern u32 sysctl_xfrm_aevent_rseqth; @@ -105,16 +101,6 @@ ctl_table core_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, -#ifdef CONFIG_NET_DIVERT - { - .ctl_name = NET_CORE_DIVERT_VERSION, - .procname = "divert_version", - .data = (void *)sysctl_divert_version, - .maxlen = 32, - .mode = 0444, - .proc_handler = &proc_dostring - }, -#endif /* CONFIG_NET_DIVERT */ #ifdef CONFIG_XFRM { .ctl_name = NET_CORE_AEVENT_ETIME, diff --git a/net/core/utils.c b/net/core/utils.c index d93fe64f669..61556065f07 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -88,7 +88,7 @@ EXPORT_SYMBOL(in_aton); #define IN6PTON_NULL 0x20000000 /* first/tail */ #define IN6PTON_UNKNOWN 0x40000000 -static inline int digit2bin(char c, char delim) +static inline int digit2bin(char c, int delim) { if (c == delim || c == '\0') return IN6PTON_DELIM; @@ -99,7 +99,7 @@ static inline int digit2bin(char c, char delim) return IN6PTON_UNKNOWN; } -static inline int xdigit2bin(char c, char delim) +static inline int xdigit2bin(char c, int delim) { if (c == delim || c == '\0') return IN6PTON_DELIM; @@ -113,12 +113,14 @@ static inline int xdigit2bin(char c, char delim) return (IN6PTON_XDIGIT | (c - 'a' + 10)); if (c >= 'A' && c <= 'F') return (IN6PTON_XDIGIT | (c - 'A' + 10)); + if (delim == -1) + return IN6PTON_DELIM; return IN6PTON_UNKNOWN; } int in4_pton(const char *src, int srclen, u8 *dst, - char delim, const char **end) + int delim, const char **end) { const char *s; u8 *d; @@ -173,7 +175,7 @@ EXPORT_SYMBOL(in4_pton); int in6_pton(const char *src, int srclen, u8 *dst, - char delim, const char **end) + int delim, const char **end) { const char *s, *tok = NULL; u8 *d, *dc = NULL; diff --git a/net/core/wireless.c b/net/core/wireless.c index cb1b8728d7e..f69ab7b4408 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -2130,7 +2130,7 @@ int iw_handler_set_spy(struct net_device * dev, * The rtnl_lock() make sure we don't race with the other iw_handlers. * This make sure wireless_spy_update() "see" that the spy list * is temporarily disabled. */ - wmb(); + smp_wmb(); /* Are there are addresses to copy? */ if(wrqu->data.length > 0) { @@ -2159,7 +2159,7 @@ int iw_handler_set_spy(struct net_device * dev, } /* Make sure above is updated before re-enabling */ - wmb(); + smp_wmb(); /* Enable addresses */ spydata->spy_number = wrqu->data.length; diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index ef8919cca74..b8a68dd4100 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -38,6 +38,9 @@ config IP_DCCP_DEBUG ---help--- Only use this if you're hacking DCCP. + When compiling DCCP as a module, this debugging output can be toggled + by setting the parameter dccp_debug of the `dccp' module to 0 or 1. + Just say N. config NET_DCCPPROBE @@ -49,7 +52,7 @@ config NET_DCCPPROBE DCCP congestion avoidance modules. If you don't understand what was just said, you don't need it: say N. - Documentation on how to use the packet generator can be found + Documentation on how to use DCCP connection probing can be found at http://linux-net.osdl.org/index.php/DccpProbe To compile this code as a module, choose M here: the diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 17ed99c4661..f4f8793aaff 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -1,13 +1,13 @@ -obj-$(CONFIG_IPV6) += dccp_ipv6.o - -dccp_ipv6-y := ipv6.o - obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o dccp_ipv4-y := ipv4.o +# build dccp_ipv6 as module whenever either IPv6 or DCCP is a module +obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o +dccp_ipv6-y := ipv6.o + dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index f8208874ac7..a086c6312d3 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -21,8 +21,8 @@ #include <net/sock.h> -static kmem_cache_t *dccp_ackvec_slab; -static kmem_cache_t *dccp_ackvec_record_slab; +static struct kmem_cache *dccp_ackvec_slab; +static struct kmem_cache *dccp_ackvec_record_slab; static struct dccp_ackvec_record *dccp_ackvec_record_new(void) { @@ -67,15 +67,16 @@ static void dccp_ackvec_insert_avr(struct dccp_ackvec *av, int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); -#ifdef CONFIG_IP_DCCP_DEBUG - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT tx: " : "server tx: "; -#endif struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; - int len = av->dccpav_vec_len + 2; + /* Figure out how many options do we need to represent the ackvec */ + const u16 nr_opts = (av->dccpav_vec_len + + DCCP_MAX_ACKVEC_OPT_LEN - 1) / + DCCP_MAX_ACKVEC_OPT_LEN; + u16 len = av->dccpav_vec_len + 2 * nr_opts, i; struct timeval now; u32 elapsed_time; - unsigned char *to, *from; + const unsigned char *tail, *from; + unsigned char *to; struct dccp_ackvec_record *avr; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) @@ -94,24 +95,37 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) DCCP_SKB_CB(skb)->dccpd_opt_len += len; - to = skb_push(skb, len); - *to++ = DCCPO_ACK_VECTOR_0; - *to++ = len; - + to = skb_push(skb, len); len = av->dccpav_vec_len; from = av->dccpav_buf + av->dccpav_buf_head; + tail = av->dccpav_buf + DCCP_MAX_ACKVEC_LEN; + + for (i = 0; i < nr_opts; ++i) { + int copylen = len; + + if (len > DCCP_MAX_ACKVEC_OPT_LEN) + copylen = DCCP_MAX_ACKVEC_OPT_LEN; - /* Check if buf_head wraps */ - if ((int)av->dccpav_buf_head + len > DCCP_MAX_ACKVEC_LEN) { - const u32 tailsize = DCCP_MAX_ACKVEC_LEN - av->dccpav_buf_head; + *to++ = DCCPO_ACK_VECTOR_0; + *to++ = copylen + 2; - memcpy(to, from, tailsize); - to += tailsize; - len -= tailsize; - from = av->dccpav_buf; + /* Check if buf_head wraps */ + if (from + copylen > tail) { + const u16 tailsize = tail - from; + + memcpy(to, from, tailsize); + to += tailsize; + len -= tailsize; + copylen -= tailsize; + from = av->dccpav_buf; + } + + memcpy(to, from, copylen); + from += copylen; + to += copylen; + len -= copylen; } - memcpy(to, from, len); /* * From RFC 4340, A.2: * @@ -129,9 +143,9 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) dccp_ackvec_insert_avr(av, avr); - dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " + dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, " "ack_ackno=%llu\n", - debug_prefix, avr->dccpavr_sent_len, + dccp_role(sk), avr->dccpavr_sent_len, (unsigned long long)avr->dccpavr_ack_seqno, (unsigned long long)avr->dccpavr_ack_ackno); return 0; @@ -145,7 +159,6 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1; av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1; av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; - av->dccpav_ack_ptr = 0; av->dccpav_time.tv_sec = 0; av->dccpav_time.tv_usec = 0; av->dccpav_vec_len = 0; @@ -174,13 +187,13 @@ void dccp_ackvec_free(struct dccp_ackvec *av) } static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, - const u8 index) + const u32 index) { return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK; } static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, - const u8 index) + const u32 index) { return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK; } @@ -210,7 +223,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, gap = -new_head; } new_head += DCCP_MAX_ACKVEC_LEN; - } + } av->dccpav_buf_head = new_head; @@ -280,7 +293,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, * could reduce the complexity of this scan.) */ u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno); - u8 index = av->dccpav_buf_head; + u32 index = av->dccpav_buf_head; while (1) { const u8 len = dccp_ackvec_len(av, index); @@ -322,21 +335,18 @@ out_duplicate: #ifdef CONFIG_IP_DCCP_DEBUG void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) { - if (!dccp_debug) - return; - - printk("ACK vector len=%d, ackno=%llu |", len, - (unsigned long long)ackno); + dccp_pr_debug_cat("ACK vector len=%d, ackno=%llu |", len, + (unsigned long long)ackno); while (len--) { const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6; const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; - printk("%d,%d|", state, rl); + dccp_pr_debug_cat("%d,%d|", state, rl); ++vector; } - printk("\n"); + dccp_pr_debug_cat("\n"); } void dccp_ackvec_print(const struct dccp_ackvec *av) @@ -380,24 +390,20 @@ void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, */ list_for_each_entry_reverse(avr, &av->dccpav_records, dccpavr_node) { if (ackno == avr->dccpavr_ack_seqno) { -#ifdef CONFIG_IP_DCCP_DEBUG - struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT rx ack: " : "server rx ack: "; -#endif - dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " + dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, " "ack_ackno=%llu, ACKED!\n", - debug_prefix, 1, + dccp_role(sk), 1, (unsigned long long)avr->dccpavr_ack_seqno, (unsigned long long)avr->dccpavr_ack_ackno); dccp_ackvec_throw_record(av, avr); break; - } + } else if (avr->dccpavr_ack_seqno > ackno) + break; /* old news */ } } static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, - struct sock *sk, u64 ackno, + struct sock *sk, u64 *ackno, const unsigned char len, const unsigned char *vector) { @@ -420,7 +426,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; u64 ackno_end_rl; - dccp_set_seqno(&ackno_end_rl, ackno - rl); + dccp_set_seqno(&ackno_end_rl, *ackno - rl); /* * If our AVR sequence number is greater than the ack, go @@ -428,25 +434,19 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, */ list_for_each_entry_from(avr, &av->dccpav_records, dccpavr_node) { - if (!after48(avr->dccpavr_ack_seqno, ackno)) + if (!after48(avr->dccpavr_ack_seqno, *ackno)) goto found; } /* End of the dccpav_records list, not found, exit */ break; found: - if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) { + if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, *ackno)) { const u8 state = *vector & DCCP_ACKVEC_STATE_MASK; if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { -#ifdef CONFIG_IP_DCCP_DEBUG - struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = - dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT rx ack: " : "server rx ack: "; -#endif - dccp_pr_debug("%sACK vector 0, len=%d, " + dccp_pr_debug("%s ACK vector 0, len=%d, " "ack_seqno=%llu, ack_ackno=%llu, " "ACKED!\n", - debug_prefix, len, + dccp_role(sk), len, (unsigned long long) avr->dccpavr_ack_seqno, (unsigned long long) @@ -460,27 +460,23 @@ found: */ } - dccp_set_seqno(&ackno, ackno_end_rl - 1); + dccp_set_seqno(ackno, ackno_end_rl - 1); ++vector; } } int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, - const u8 opt, const u8 *value, const u8 len) + u64 *ackno, const u8 opt, const u8 *value, const u8 len) { - if (len > DCCP_MAX_ACKVEC_LEN) + if (len > DCCP_MAX_ACKVEC_OPT_LEN) return -1; /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk, - DCCP_SKB_CB(skb)->dccpd_ack_seq, - len, value); + ackno, len, value); return 0; } -static char dccp_ackvec_slab_msg[] __initdata = - KERN_CRIT "DCCP: Unable to create ack vectors slab caches\n"; - int __init dccp_ackvec_init(void) { dccp_ackvec_slab = kmem_cache_create("dccp_ackvec", @@ -502,7 +498,7 @@ out_destroy_slab: kmem_cache_destroy(dccp_ackvec_slab); dccp_ackvec_slab = NULL; out_err: - printk(dccp_ackvec_slab_msg); + DCCP_CRIT("Unable to create Ack Vector slab cache"); return -ENOBUFS; } diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index cf8f20ce23a..96504a3b16e 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -17,7 +17,9 @@ #include <linux/types.h> /* Read about the ECN nonce to see why it is 253 */ -#define DCCP_MAX_ACKVEC_LEN 253 +#define DCCP_MAX_ACKVEC_OPT_LEN 253 +/* We can spread an ack vector across multiple options */ +#define DCCP_MAX_ACKVEC_LEN (DCCP_MAX_ACKVEC_OPT_LEN * 2) #define DCCP_ACKVEC_STATE_RECEIVED 0 #define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) @@ -41,7 +43,6 @@ * Ack Vectors it has recently sent. For each packet sent carrying an * Ack Vector, it remembers four variables: * - * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement. * @dccpav_records - list of dccp_ackvec_record * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. * @@ -52,9 +53,8 @@ struct dccp_ackvec { u64 dccpav_buf_ackno; struct list_head dccpav_records; struct timeval dccpav_time; - u8 dccpav_buf_head; - u8 dccpav_ack_ptr; - u8 dccpav_vec_len; + u16 dccpav_buf_head; + u16 dccpav_vec_len; u8 dccpav_buf_nonce; u8 dccpav_ack_nonce; u8 dccpav_buf[DCCP_MAX_ACKVEC_LEN]; @@ -77,9 +77,9 @@ struct dccp_ackvec_record { struct list_head dccpavr_node; u64 dccpavr_ack_seqno; u64 dccpavr_ack_ackno; - u8 dccpavr_ack_ptr; + u16 dccpavr_ack_ptr; + u16 dccpavr_sent_len; u8 dccpavr_ack_nonce; - u8 dccpavr_sent_len; }; struct sock; @@ -98,7 +98,8 @@ extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, const u64 ackno); extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, - const u8 opt, const u8 *value, const u8 len); + u64 *ackno, const u8 opt, + const u8 *value, const u8 len); extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); @@ -137,7 +138,8 @@ static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, } static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, - const u8 opt, const u8 *value, const u8 len) + const u64 *ackno, const u8 opt, + const u8 *value, const u8 len) { return -1; } diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index ff05e59043c..d8cf92f09e6 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -55,9 +55,9 @@ static inline void ccids_read_unlock(void) #define ccids_read_unlock() do { } while(0) #endif -static kmem_cache_t *ccid_kmem_cache_create(int obj_size, const char *fmt,...) +static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,...) { - kmem_cache_t *slab; + struct kmem_cache *slab; char slab_name_fmt[32], *slab_name; va_list args; @@ -75,7 +75,7 @@ static kmem_cache_t *ccid_kmem_cache_create(int obj_size, const char *fmt,...) return slab; } -static void ccid_kmem_cache_destroy(kmem_cache_t *slab) +static void ccid_kmem_cache_destroy(struct kmem_cache *slab) { if (slab != NULL) { const char *name = kmem_cache_name(slab); diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index f7eb6c61341..c65cb2453e4 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -27,9 +27,9 @@ struct ccid_operations { unsigned char ccid_id; const char *ccid_name; struct module *ccid_owner; - kmem_cache_t *ccid_hc_rx_slab; + struct kmem_cache *ccid_hc_rx_slab; __u32 ccid_hc_rx_obj_size; - kmem_cache_t *ccid_hc_tx_slab; + struct kmem_cache *ccid_hc_tx_slab; __u32 ccid_hc_tx_obj_size; int (*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk); int (*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk); @@ -43,8 +43,6 @@ struct ccid_operations { unsigned char* value); int (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb); - int (*ccid_hc_tx_insert_options)(struct sock *sk, - struct sk_buff *skb); void (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb); int (*ccid_hc_tx_parse_options)(struct sock *sk, @@ -52,9 +50,9 @@ struct ccid_operations { unsigned char len, u16 idx, unsigned char* value); int (*ccid_hc_tx_send_packet)(struct sock *sk, - struct sk_buff *skb, int len); - void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, - int len); + struct sk_buff *skb); + void (*ccid_hc_tx_packet_sent)(struct sock *sk, + int more, unsigned int len); void (*ccid_hc_rx_get_info)(struct sock *sk, struct tcp_info *info); void (*ccid_hc_tx_get_info)(struct sock *sk, @@ -94,16 +92,16 @@ extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk); extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk); static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, - struct sk_buff *skb, int len) + struct sk_buff *skb) { int rc = 0; if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL) - rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb, len); + rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb); return rc; } static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, - int more, int len) + int more, unsigned int len) { if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL) ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, more, len); @@ -146,14 +144,6 @@ static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, return rc; } -static inline int ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk, - struct sk_buff *skb) -{ - if (ccid->ccid_ops->ccid_hc_tx_insert_options != NULL) - return ccid->ccid_ops->ccid_hc_tx_insert_options(sk, skb); - return 0; -} - static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, struct sk_buff *skb) { diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index 8533dabfb9f..80f46988769 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig @@ -28,13 +28,20 @@ config IP_DCCP_CCID2 This text was extracted from RFC 4340 (sec. 10.1), http://www.ietf.org/rfc/rfc4340.txt + To compile this CCID as a module, choose M here: the module will be + called dccp_ccid2. + If in doubt, say M. config IP_DCCP_CCID2_DEBUG - bool "CCID2 debug" + bool "CCID2 debugging messages" depends on IP_DCCP_CCID2 ---help--- - Enable CCID2 debug messages. + Enable CCID2-specific debugging messages. + + When compiling CCID2 as a module, this debugging output can + additionally be toggled by setting the ccid2_debug module + parameter to 0 or 1. If in doubt, say N. @@ -62,10 +69,57 @@ config IP_DCCP_CCID3 This text was extracted from RFC 4340 (sec. 10.2), http://www.ietf.org/rfc/rfc4340.txt + To compile this CCID as a module, choose M here: the module will be + called dccp_ccid3. + If in doubt, say M. config IP_DCCP_TFRC_LIB depends on IP_DCCP_CCID3 def_tristate IP_DCCP_CCID3 +config IP_DCCP_CCID3_DEBUG + bool "CCID3 debugging messages" + depends on IP_DCCP_CCID3 + ---help--- + Enable CCID3-specific debugging messages. + + When compiling CCID3 as a module, this debugging output can + additionally be toggled by setting the ccid3_debug module + parameter to 0 or 1. + + If in doubt, say N. + +config IP_DCCP_CCID3_RTO + int "Use higher bound for nofeedback timer" + default 100 + depends on IP_DCCP_CCID3 && EXPERIMENTAL + ---help--- + Use higher lower bound for nofeedback timer expiration. + + The TFRC nofeedback timer normally expires after the maximum of 4 + RTTs and twice the current send interval (RFC 3448, 4.3). On LANs + with a small RTT this can mean a high processing load and reduced + performance, since then the nofeedback timer is triggered very + frequently. + + This option enables to set a higher lower bound for the nofeedback + value. Values in units of milliseconds can be set here. + + A value of 0 disables this feature by enforcing the value specified + in RFC 3448. The following values have been suggested as bounds for + experimental use: + * 16-20ms to match the typical multimedia inter-frame interval + * 100ms as a reasonable compromise [default] + * 1000ms corresponds to the lower TCP RTO bound (RFC 2988, 2.4) + + The default of 100ms is a compromise between a large value for + efficient DCCP implementations, and a small value to avoid disrupting + the network in times of congestion. + + The purpose of the nofeedback timer is to slow DCCP down when there + is serious network congestion: experimenting with larger values should + therefore not be performed on WANs. + + endmenu diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 2fbb84bf4e2..fd38b05d6f7 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -33,18 +33,11 @@ #include "../dccp.h" #include "ccid2.h" -static int ccid2_debug; #ifdef CONFIG_IP_DCCP_CCID2_DEBUG -#define ccid2_pr_debug(format, a...) \ - do { if (ccid2_debug) \ - printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ - } while (0) -#else -#define ccid2_pr_debug(format, a...) -#endif +static int ccid2_debug; +#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) -#ifdef CONFIG_IP_DCCP_CCID2_DEBUG static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) { int len = 0; @@ -86,7 +79,8 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN); } #else -#define ccid2_hc_tx_check_sanity(hctx) do {} while (0) +#define ccid2_pr_debug(format, a...) +#define ccid2_hc_tx_check_sanity(hctx) #endif static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, @@ -131,8 +125,7 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, return 0; } -static int ccid2_hc_tx_send_packet(struct sock *sk, - struct sk_buff *skb, int len) +static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { struct ccid2_hc_tx_sock *hctx; @@ -274,7 +267,7 @@ static void ccid2_start_rto_timer(struct sock *sk) jiffies + hctx->ccid2hctx_rto); } -static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len) +static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); @@ -352,14 +345,14 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len) #ifdef CONFIG_IP_DCCP_CCID2_DEBUG ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe); - ccid2_pr_debug("Sent: seq=%llu\n", seq); + ccid2_pr_debug("Sent: seq=%llu\n", (unsigned long long)seq); do { struct ccid2_seq *seqp = hctx->ccid2hctx_seqt; while (seqp != hctx->ccid2hctx_seqh) { ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", - seqp->ccid2s_seq, seqp->ccid2s_acked, - seqp->ccid2s_sent); + (unsigned long long)seqp->ccid2s_seq, + seqp->ccid2s_acked, seqp->ccid2s_sent); seqp = seqp->ccid2s_next; } } while (0); @@ -426,7 +419,7 @@ static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset, return -1; out_invalid_option: - BUG_ON(1); /* should never happen... options were previously parsed ! */ + DCCP_BUG("Invalid option - this should not happen (previous parsing)!"); return -1; } @@ -480,7 +473,8 @@ static inline void ccid2_new_ack(struct sock *sk, /* first measurement */ if (hctx->ccid2hctx_srtt == -1) { ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", - r, jiffies, seqp->ccid2s_seq); + r, jiffies, + (unsigned long long)seqp->ccid2s_seq); ccid2_change_srtt(hctx, r); hctx->ccid2hctx_rttvar = r >> 1; } else { @@ -524,8 +518,8 @@ static inline void ccid2_new_ack(struct sock *sk, hctx->ccid2hctx_lastrtt = jiffies; ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", - hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar, - hctx->ccid2hctx_rto, HZ, r); + hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar, + hctx->ccid2hctx_rto, HZ, r); hctx->ccid2hctx_sent = 0; } @@ -618,7 +612,17 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; - seqp = hctx->ccid2hctx_seqh->ccid2s_prev; + if (after48(ackno, hctx->ccid2hctx_high_ack)) + hctx->ccid2hctx_high_ack = ackno; + + seqp = hctx->ccid2hctx_seqt; + while (before48(seqp->ccid2s_seq, ackno)) { + seqp = seqp->ccid2s_next; + if (seqp == hctx->ccid2hctx_seqh) { + seqp = hctx->ccid2hctx_seqh->ccid2s_prev; + break; + } + } /* If in slow-start, cwnd can increase at most Ack Ratio / 2 packets for * this single ack. I round up. @@ -636,8 +640,9 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) u64 ackno_end_rl; dccp_set_seqno(&ackno_end_rl, ackno - rl); - ccid2_pr_debug("ackvec start:%llu end:%llu\n", ackno, - ackno_end_rl); + ccid2_pr_debug("ackvec start:%llu end:%llu\n", + (unsigned long long)ackno, + (unsigned long long)ackno_end_rl); /* if the seqno we are analyzing is larger than the * current ackno, then move towards the tail of our * seqnos. @@ -662,9 +667,9 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* new packet received or marked */ if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && !seqp->ccid2s_acked) { - if (state == + if (state == DCCP_ACKVEC_STATE_ECN_MARKED) { - ccid2_congestion_event(hctx, + ccid2_congestion_event(hctx, seqp); } else ccid2_new_ack(sk, seqp, @@ -672,7 +677,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) seqp->ccid2s_acked = 1; ccid2_pr_debug("Got ack for %llu\n", - seqp->ccid2s_seq); + (unsigned long long)seqp->ccid2s_seq); ccid2_hc_tx_dec_pipe(sk); } if (seqp == hctx->ccid2hctx_seqt) { @@ -695,7 +700,14 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* The state about what is acked should be correct now * Check for NUMDUPACK */ - seqp = hctx->ccid2hctx_seqh->ccid2s_prev; + seqp = hctx->ccid2hctx_seqt; + while (before48(seqp->ccid2s_seq, hctx->ccid2hctx_high_ack)) { + seqp = seqp->ccid2s_next; + if (seqp == hctx->ccid2hctx_seqh) { + seqp = hctx->ccid2hctx_seqh->ccid2s_prev; + break; + } + } done = 0; while (1) { if (seqp->ccid2s_acked) { @@ -718,7 +730,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) while (1) { if (!seqp->ccid2s_acked) { ccid2_pr_debug("Packet lost: %llu\n", - seqp->ccid2s_seq); + (unsigned long long)seqp->ccid2s_seq); /* XXX need to traverse from tail -> head in * order to detect multiple congestion events in * one ack vector. @@ -769,6 +781,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hctx->ccid2hctx_lastrtt = 0; hctx->ccid2hctx_rpdupack = -1; hctx->ccid2hctx_last_cong = jiffies; + hctx->ccid2hctx_high_ack = 0; hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire; hctx->ccid2hctx_rtotimer.data = (unsigned long)sk; @@ -821,8 +834,10 @@ static struct ccid_operations ccid2 = { .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, }; +#ifdef CONFIG_IP_DCCP_CCID2_DEBUG module_param(ccid2_debug, int, 0444); MODULE_PARM_DESC(ccid2_debug, "Enable debug messages"); +#endif static __init int ccid2_module_init(void) { diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 5b2ef4acb30..ebd79499c85 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -35,7 +35,7 @@ struct ccid2_seq { struct ccid2_seq *ccid2s_next; }; -#define CCID2_SEQBUF_LEN 256 +#define CCID2_SEQBUF_LEN 1024 #define CCID2_SEQBUF_MAX 128 /** struct ccid2_hc_tx_sock - CCID2 TX half connection @@ -72,6 +72,7 @@ struct ccid2_hc_tx_sock { int ccid2hctx_rpdupack; int ccid2hctx_sendwait; unsigned long ccid2hctx_last_cong; + u64 ccid2hctx_high_ack; }; struct ccid2_hc_rx_sock { diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index cec23ad286d..40402c59506 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -41,32 +41,9 @@ #include "lib/tfrc.h" #include "ccid3.h" -/* - * Reason for maths here is to avoid 32 bit overflow when a is big. - * With this we get close to the limit. - */ -static u32 usecs_div(const u32 a, const u32 b) -{ - const u32 div = a < (UINT_MAX / (USEC_PER_SEC / 10)) ? 10 : - a < (UINT_MAX / (USEC_PER_SEC / 50)) ? 50 : - a < (UINT_MAX / (USEC_PER_SEC / 100)) ? 100 : - a < (UINT_MAX / (USEC_PER_SEC / 500)) ? 500 : - a < (UINT_MAX / (USEC_PER_SEC / 1000)) ? 1000 : - a < (UINT_MAX / (USEC_PER_SEC / 5000)) ? 5000 : - a < (UINT_MAX / (USEC_PER_SEC / 10000)) ? 10000 : - a < (UINT_MAX / (USEC_PER_SEC / 50000)) ? 50000 : - 100000; - const u32 tmp = a * (USEC_PER_SEC / div); - return (b >= 2 * div) ? tmp / (b / div) : tmp; -} - +#ifdef CONFIG_IP_DCCP_CCID3_DEBUG static int ccid3_debug; - -#ifdef CCID3_DEBUG -#define ccid3_pr_debug(format, a...) \ - do { if (ccid3_debug) \ - printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ - } while (0) +#define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a) #else #define ccid3_pr_debug(format, a...) #endif @@ -75,15 +52,7 @@ static struct dccp_tx_hist *ccid3_tx_hist; static struct dccp_rx_hist *ccid3_rx_hist; static struct dccp_li_hist *ccid3_li_hist; -/* TFRC sender states */ -enum ccid3_hc_tx_states { - TFRC_SSTATE_NO_SENT = 1, - TFRC_SSTATE_NO_FBACK, - TFRC_SSTATE_FBACK, - TFRC_SSTATE_TERM, -}; - -#ifdef CCID3_DEBUG +#ifdef CONFIG_IP_DCCP_CCID3_DEBUG static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) { static char *ccid3_state_names[] = { @@ -110,324 +79,319 @@ static void ccid3_hc_tx_set_state(struct sock *sk, hctx->ccid3hctx_state = state; } -/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ -static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) +/* + * Recalculate scheduled nominal send time t_nom, inter-packet interval + * t_ipi, and delta value. Should be called after each change to X. + */ +static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx) { - /* - * If no feedback spec says t_ipi is 1 second (set elsewhere and then - * doubles after every no feedback timer (separate function) - */ - if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK) - hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s, - hctx->ccid3hctx_x); -} + timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); -/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ -static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) -{ + /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */ + hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_x >> 6); + + /* Update nominal send time with regard to the new t_ipi */ + timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); + + /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); } - /* * Update X by * If (p > 0) - * x_calc = calcX(s, R, p); + * X_calc = calcX(s, R, p); * X = max(min(X_calc, 2 * X_recv), s / t_mbi); * Else * If (now - tld >= R) * X = max(min(2 * X, 2 * X_recv), s / R); * tld = now; - */ -static void ccid3_hc_tx_update_x(struct sock *sk) + * + * Note: X and X_recv are both stored in units of 64 * bytes/second, to support + * fine-grained resolution of sending rates. This requires scaling by 2^6 + * throughout the code. Only X_calc is unscaled (in bytes/second). + * + * If X has changed, we also update the scheduled send time t_now, + * the inter-packet interval t_ipi, and the delta value. + */ +static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) + { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + const __u64 old_x = hctx->ccid3hctx_x; - /* To avoid large error in calcX */ - if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { - hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s, - hctx->ccid3hctx_rtt, - hctx->ccid3hctx_p); - hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, - 2 * hctx->ccid3hctx_x_recv), - (hctx->ccid3hctx_s / - TFRC_MAX_BACK_OFF_TIME)); - } else { - struct timeval now; + if (hctx->ccid3hctx_p > 0) { - dccp_timestamp(sk, &now); - if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >= - hctx->ccid3hctx_rtt) { - hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv, - hctx->ccid3hctx_x) * 2, - usecs_div(hctx->ccid3hctx_s, - hctx->ccid3hctx_rtt)); - hctx->ccid3hctx_t_ld = now; - } + hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6, + hctx->ccid3hctx_x_recv * 2); + hctx->ccid3hctx_x = max(hctx->ccid3hctx_x, + (((__u64)hctx->ccid3hctx_s) << 6) / + TFRC_T_MBI); + + } else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) - + (suseconds_t)hctx->ccid3hctx_rtt >= 0) { + + hctx->ccid3hctx_x = + max(2 * min(hctx->ccid3hctx_x, hctx->ccid3hctx_x_recv), + scaled_div(((__u64)hctx->ccid3hctx_s) << 6, + hctx->ccid3hctx_rtt)); + hctx->ccid3hctx_t_ld = *now; + } + + if (hctx->ccid3hctx_x != old_x) + ccid3_update_send_time(hctx); +} + +/* + * Track the mean packet size `s' (cf. RFC 4342, 5.3 and RFC 3448, 4.1) + * @len: DCCP packet payload size in bytes + */ +static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) +{ + if (unlikely(len == 0)) + ccid3_pr_debug("Packet payload length is 0 - not updating\n"); + else + hctx->ccid3hctx_s = hctx->ccid3hctx_s == 0 ? len : + (9 * hctx->ccid3hctx_s + len) / 10; + /* + * Note: We could do a potential optimisation here - when `s' changes, + * recalculate sending rate and consequently t_ipi, t_delta, and + * t_now. This is however non-standard, and the benefits are not + * clear, so it is currently left out. + */ +} + +/* + * Update Window Counter using the algorithm from [RFC 4342, 8.1]. + * The algorithm is not applicable if RTT < 4 microseconds. + */ +static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx, + struct timeval *now) +{ + suseconds_t delta; + u32 quarter_rtts; + + if (unlikely(hctx->ccid3hctx_rtt < 4)) /* avoid divide-by-zero */ + return; + + delta = timeval_delta(now, &hctx->ccid3hctx_t_last_win_count); + DCCP_BUG_ON(delta < 0); + + quarter_rtts = (u32)delta / (hctx->ccid3hctx_rtt / 4); + + if (quarter_rtts > 0) { + hctx->ccid3hctx_t_last_win_count = *now; + hctx->ccid3hctx_last_win_count += min_t(u32, quarter_rtts, 5); + hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */ + + ccid3_pr_debug("now at %#X\n", hctx->ccid3hctx_last_win_count); } } static void ccid3_hc_tx_no_feedback_timer(unsigned long data) { struct sock *sk = (struct sock *)data; - unsigned long next_tmout = 0; struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + unsigned long t_nfb = USEC_PER_SEC / 5; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later. */ /* XXX: set some sensible MIB */ - sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, - jiffies + HZ / 5); - goto out; + goto restart_timer; } - ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk, + ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state)); - + switch (hctx->ccid3hctx_state) { - case TFRC_SSTATE_TERM: - goto out; case TFRC_SSTATE_NO_FBACK: - /* Halve send rate */ - hctx->ccid3hctx_x /= 2; - if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / - TFRC_MAX_BACK_OFF_TIME)) - hctx->ccid3hctx_x = (hctx->ccid3hctx_s / - TFRC_MAX_BACK_OFF_TIME); - - ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d " - "bytes/s\n", - dccp_role(sk), sk, + /* RFC 3448, 4.4: Halve send rate directly */ + hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2, + (((__u64)hctx->ccid3hctx_s) << 6) / + TFRC_T_MBI); + + ccid3_pr_debug("%s(%p, state=%s), updated tx rate to %u " + "bytes/s\n", dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state), - hctx->ccid3hctx_x); - next_tmout = max_t(u32, 2 * usecs_div(hctx->ccid3hctx_s, - hctx->ccid3hctx_x), - TFRC_INITIAL_TIMEOUT); - /* - * FIXME - not sure above calculation is correct. See section - * 5 of CCID3 11 should adjust tx_t_ipi and double that to - * achieve it really - */ + (unsigned)(hctx->ccid3hctx_x >> 6)); + /* The value of R is still undefined and so we can not recompute + * the timout value. Keep initial value as per [RFC 4342, 5]. */ + t_nfb = TFRC_INITIAL_TIMEOUT; + ccid3_update_send_time(hctx); break; case TFRC_SSTATE_FBACK: /* * Check if IDLE since last timeout and recv rate is less than - * 4 packets per RTT + * 4 packets (in units of 64*bytes/sec) per RTT */ if (!hctx->ccid3hctx_idle || - (hctx->ccid3hctx_x_recv >= - 4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) { - ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", + (hctx->ccid3hctx_x_recv >= 4 * + scaled_div(((__u64)hctx->ccid3hctx_s) << 6, + hctx->ccid3hctx_rtt))) { + struct timeval now; + + ccid3_pr_debug("%s(%p, state=%s), not idle\n", dccp_role(sk), sk, - ccid3_tx_state_name(hctx->ccid3hctx_state)); - /* Halve sending rate */ + ccid3_tx_state_name(hctx->ccid3hctx_state)); - /* If (X_calc > 2 * X_recv) + /* + * Modify the cached value of X_recv [RFC 3448, 4.4] + * + * If (p == 0 || X_calc > 2 * X_recv) * X_recv = max(X_recv / 2, s / (2 * t_mbi)); * Else * X_recv = X_calc / 4; + * + * Note that X_recv is scaled by 2^6 while X_calc is not */ - BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P && - hctx->ccid3hctx_x_calc == 0); - - /* check also if p is zero -> x_calc is infinity? */ - if (hctx->ccid3hctx_p < TFRC_SMALLEST_P || - hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv) - hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2, - hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME)); - else - hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4; - - /* Update sending rate */ - ccid3_hc_tx_update_x(sk); + BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc); + + if (hctx->ccid3hctx_p == 0 || + (hctx->ccid3hctx_x_calc > + (hctx->ccid3hctx_x_recv >> 5))) { + + hctx->ccid3hctx_x_recv = + max(hctx->ccid3hctx_x_recv / 2, + (((__u64)hctx->ccid3hctx_s) << 6) / + (2 * TFRC_T_MBI)); + + if (hctx->ccid3hctx_p == 0) + dccp_timestamp(sk, &now); + } else { + hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc; + hctx->ccid3hctx_x_recv <<= 4; + } + /* Now recalculate X [RFC 3448, 4.3, step (4)] */ + ccid3_hc_tx_update_x(sk, &now); } /* * Schedule no feedback timer to expire in - * max(4 * R, 2 * s / X) + * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) + * See comments in packet_recv() regarding the value of t_RTO. */ - next_tmout = max_t(u32, hctx->ccid3hctx_t_rto, - 2 * usecs_div(hctx->ccid3hctx_s, - hctx->ccid3hctx_x)); + t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); break; - default: - printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", - __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); - dump_stack(); + case TFRC_SSTATE_NO_SENT: + DCCP_BUG("%s(%p) - Illegal state NO_SENT", dccp_role(sk), sk); + /* fall through */ + case TFRC_SSTATE_TERM: goto out; } - sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, - jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); hctx->ccid3hctx_idle = 1; + +restart_timer: + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + jiffies + usecs_to_jiffies(t_nfb)); out: bh_unlock_sock(sk); sock_put(sk); } -static int ccid3_hc_tx_send_packet(struct sock *sk, - struct sk_buff *skb, int len) +/* + * returns + * > 0: delay (in msecs) that should pass before actually sending + * = 0: can send immediately + * < 0: error condition; do not send packet + */ +static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - struct dccp_tx_hist_entry *new_packet; struct timeval now; - long delay; - int rc = -ENOTCONN; + suseconds_t delay; - BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM); + BUG_ON(hctx == NULL); - /* Check if pure ACK or Terminating*/ /* - * XXX: We only call this function for DATA and DATAACK, on, these - * packets can have zero length, but why the comment about "pure ACK"? + * This function is called only for Data and DataAck packets. Sending + * zero-sized Data(Ack)s is theoretically possible, but for congestion + * control this case is pathological - ignore it. */ - if (unlikely(len == 0)) - goto out; - - /* See if last packet allocated was not sent */ - new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); - if (new_packet == NULL || new_packet->dccphtx_sent) { - new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist, - SLAB_ATOMIC); - - rc = -ENOBUFS; - if (unlikely(new_packet == NULL)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, not enough " - "mem to add to history, send refused\n", - __FUNCTION__, dccp_role(sk), sk); - goto out; - } - - dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet); - } + if (unlikely(skb->len == 0)) + return -EBADMSG; dccp_timestamp(sk, &now); switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_SENT: sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, - jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)); + (jiffies + + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); hctx->ccid3hctx_last_win_count = 0; hctx->ccid3hctx_t_last_win_count = now; ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); - hctx->ccid3hctx_t_ipi = TFRC_INITIAL_IPI; - /* Set nominal send time for initial packet */ + /* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */ + ccid3_hc_tx_update_s(hctx, skb->len); + hctx->ccid3hctx_x = hctx->ccid3hctx_s; + hctx->ccid3hctx_x <<= 6; + + /* First timeout, according to [RFC 3448, 4.2], is 1 second */ + hctx->ccid3hctx_t_ipi = USEC_PER_SEC; + /* Initial delta: minimum of 0.5 sec and t_gran/2 */ + hctx->ccid3hctx_delta = TFRC_OPSYS_HALF_TIME_GRAN; + + /* Set t_0 for initial packet */ hctx->ccid3hctx_t_nom = now; - timeval_add_usecs(&hctx->ccid3hctx_t_nom, - hctx->ccid3hctx_t_ipi); - ccid3_calc_new_delta(hctx); - rc = 0; break; case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: - delay = (timeval_delta(&now, &hctx->ccid3hctx_t_nom) - - hctx->ccid3hctx_delta); - delay /= -1000; - /* divide by -1000 is to convert to ms and get sign right */ - rc = delay > 0 ? delay : 0; - break; - default: - printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", - __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); - dump_stack(); - rc = -EINVAL; + delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now); + /* + * Scheduling of packet transmissions [RFC 3448, 4.6] + * + * if (t_now > t_nom - delta) + * // send the packet now + * else + * // send the packet in (t_nom - t_now) milliseconds. + */ + if (delay - (suseconds_t)hctx->ccid3hctx_delta >= 0) + return delay / 1000L; + + ccid3_hc_tx_update_win_count(hctx, &now); break; + case TFRC_SSTATE_TERM: + DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); + return -EINVAL; } - /* Can we send? if so add options and add to packet history */ - if (rc == 0) { - dp->dccps_hc_tx_insert_options = 1; - new_packet->dccphtx_ccval = - DCCP_SKB_CB(skb)->dccpd_ccval = - hctx->ccid3hctx_last_win_count; - timeval_add_usecs(&hctx->ccid3hctx_t_nom, - hctx->ccid3hctx_t_ipi); - } -out: - return rc; + /* prepare to send now (add options etc.) */ + dp->dccps_hc_tx_insert_options = 1; + DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + + /* set the nominal send time for the next following packet */ + timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); + + return 0; } -static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) +static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, + unsigned int len) { - const struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct timeval now; + struct dccp_tx_hist_entry *packet; - BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM); - - dccp_timestamp(sk, &now); - - /* check if we have sent a data packet */ - if (len > 0) { - unsigned long quarter_rtt; - struct dccp_tx_hist_entry *packet; - - packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); - if (unlikely(packet == NULL)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: packet doesn't " - "exists in history!\n", __FUNCTION__); - return; - } - if (unlikely(packet->dccphtx_sent)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: no unsent packet in " - "history!\n", __FUNCTION__); - return; - } - packet->dccphtx_tstamp = now; - packet->dccphtx_seqno = dp->dccps_gss; - /* - * Check if win_count have changed - * Algorithm in "8.1. Window Counter Value" in RFC 4342. - */ - quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count); - if (likely(hctx->ccid3hctx_rtt > 8)) - quarter_rtt /= hctx->ccid3hctx_rtt / 4; - - if (quarter_rtt > 0) { - hctx->ccid3hctx_t_last_win_count = now; - hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + - min_t(unsigned long, quarter_rtt, 5)) % 16; - ccid3_pr_debug("%s, sk=%p, window changed from " - "%u to %u!\n", - dccp_role(sk), sk, - packet->dccphtx_ccval, - hctx->ccid3hctx_last_win_count); - } + BUG_ON(hctx == NULL); - hctx->ccid3hctx_idle = 0; - packet->dccphtx_rtt = hctx->ccid3hctx_rtt; - packet->dccphtx_sent = 1; - } else - ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n", - dccp_role(sk), sk, dp->dccps_gss); + ccid3_hc_tx_update_s(hctx, len); - switch (hctx->ccid3hctx_state) { - case TFRC_SSTATE_NO_SENT: - /* if first wasn't pure ack */ - if (len != 0) - printk(KERN_CRIT "%s: %s, First packet sent is noted " - "as a data packet\n", - __FUNCTION__, dccp_role(sk)); + packet = dccp_tx_hist_entry_new(ccid3_tx_hist, GFP_ATOMIC); + if (unlikely(packet == NULL)) { + DCCP_CRIT("packet history - out of memory!"); return; - case TFRC_SSTATE_NO_FBACK: - case TFRC_SSTATE_FBACK: - if (len > 0) { - timeval_sub_usecs(&hctx->ccid3hctx_t_nom, - hctx->ccid3hctx_t_ipi); - ccid3_calc_new_t_ipi(hctx); - ccid3_calc_new_delta(hctx); - timeval_add_usecs(&hctx->ccid3hctx_t_nom, - hctx->ccid3hctx_t_ipi); - } - break; - default: - printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", - __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); - dump_stack(); - break; } + dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, packet); + + dccp_timestamp(sk, &now); + packet->dccphtx_tstamp = now; + packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss; + packet->dccphtx_rtt = hctx->ccid3hctx_rtt; + packet->dccphtx_sent = 1; + hctx->ccid3hctx_idle = 0; } static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) @@ -437,13 +401,11 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) struct ccid3_options_received *opt_recv; struct dccp_tx_hist_entry *packet; struct timeval now; - unsigned long next_tmout; - u32 t_elapsed; + unsigned long t_nfb; u32 pinv; - u32 x_recv; - u32 r_sample; + suseconds_t r_sample, t_elapsed; - BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM); + BUG_ON(hctx == NULL); /* we are only interested in ACKs */ if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || @@ -452,41 +414,49 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) opt_recv = &hctx->ccid3hctx_options_received; - t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10; - x_recv = opt_recv->ccid3or_receive_rate; - pinv = opt_recv->ccid3or_loss_event_rate; - switch (hctx->ccid3hctx_state) { - case TFRC_SSTATE_NO_SENT: - /* FIXME: what to do here? */ - return; case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: - /* Calculate new round trip sample by - * R_sample = (now - t_recvdata) - t_delay */ - /* get t_recvdata from history */ + /* get packet from history to look up t_recvdata */ packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist, - DCCP_SKB_CB(skb)->dccpd_ack_seq); + DCCP_SKB_CB(skb)->dccpd_ack_seq); if (unlikely(packet == NULL)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, seqno " - "%llu(%s) does't exist in history!\n", - __FUNCTION__, dccp_role(sk), sk, + DCCP_WARN("%s(%p), seqno %llu(%s) doesn't exist " + "in history!\n", dccp_role(sk), sk, (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); return; } - /* Update RTT */ + /* Update receive rate in units of 64 * bytes/second */ + hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate; + hctx->ccid3hctx_x_recv <<= 6; + + /* Update loss event rate */ + pinv = opt_recv->ccid3or_loss_event_rate; + if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */ + hctx->ccid3hctx_p = 0; + else /* can not exceed 100% */ + hctx->ccid3hctx_p = 1000000 / pinv; + dccp_timestamp(sk, &now); - r_sample = timeval_delta(&now, &packet->dccphtx_tstamp); + + /* + * Calculate new round trip sample as per [RFC 3448, 4.3] by + * R_sample = (now - t_recvdata) - t_elapsed + */ + r_sample = timeval_delta(&now, &packet->dccphtx_tstamp); + t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10; + + DCCP_BUG_ON(r_sample < 0); if (unlikely(r_sample <= t_elapsed)) - LIMIT_NETDEBUG(KERN_WARNING "%s: r_sample=%uus, " - "t_elapsed=%uus\n", - __FUNCTION__, r_sample, t_elapsed); + DCCP_WARN("WARNING: r_sample=%dus <= t_elapsed=%dus\n", + (int)r_sample, (int)t_elapsed); else r_sample -= t_elapsed; + CCID3_RTT_SANITY_CHECK(r_sample); - /* Update RTT estimate by + /* Update RTT estimate by * If (No feedback recv) * R = R_sample; * Else @@ -495,97 +465,96 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * q is a constant, RFC 3448 recomments 0.9 */ if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { - ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); - hctx->ccid3hctx_rtt = r_sample; - } else - hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + - r_sample / 10; - - ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, " - "r_sample=%us\n", dccp_role(sk), sk, - hctx->ccid3hctx_rtt, r_sample); - - /* Update timeout interval */ - hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, - USEC_PER_SEC); + /* + * Larger Initial Windows [RFC 4342, sec. 5] + * We deviate in that we use `s' instead of `MSS'. + */ + __u64 w_init = min(4 * hctx->ccid3hctx_s, + max(2 * hctx->ccid3hctx_s, 4380)); + hctx->ccid3hctx_rtt = r_sample; + hctx->ccid3hctx_x = scaled_div(w_init << 6, r_sample); + hctx->ccid3hctx_t_ld = now; - /* Update receive rate */ - hctx->ccid3hctx_x_recv = x_recv;/* X_recv in bytes per sec */ + ccid3_update_send_time(hctx); - /* Update loss event rate */ - if (pinv == ~0 || pinv == 0) - hctx->ccid3hctx_p = 0; - else { - hctx->ccid3hctx_p = 1000000 / pinv; + ccid3_pr_debug("%s(%p), s=%u, w_init=%llu, " + "R_sample=%dus, X=%u\n", dccp_role(sk), + sk, hctx->ccid3hctx_s, w_init, + (int)r_sample, + (unsigned)(hctx->ccid3hctx_x >> 6)); - if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) { - hctx->ccid3hctx_p = TFRC_SMALLEST_P; - ccid3_pr_debug("%s, sk=%p, Smallest p used!\n", - dccp_role(sk), sk); - } + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); + } else { + hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt + + (u32)r_sample) / 10; + + /* Update sending rate (step 4 of [RFC 3448, 4.3]) */ + if (hctx->ccid3hctx_p > 0) + hctx->ccid3hctx_x_calc = + tfrc_calc_x(hctx->ccid3hctx_s, + hctx->ccid3hctx_rtt, + hctx->ccid3hctx_p); + ccid3_hc_tx_update_x(sk, &now); + + ccid3_pr_debug("%s(%p), RTT=%uus (sample=%dus), s=%u, " + "p=%u, X_calc=%u, X_recv=%u, X=%u\n", + dccp_role(sk), + sk, hctx->ccid3hctx_rtt, (int)r_sample, + hctx->ccid3hctx_s, hctx->ccid3hctx_p, + hctx->ccid3hctx_x_calc, + (unsigned)(hctx->ccid3hctx_x_recv >> 6), + (unsigned)(hctx->ccid3hctx_x >> 6)); } /* unschedule no feedback timer */ sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); - /* Update sending rate */ - ccid3_hc_tx_update_x(sk); - - /* Update next send time */ - timeval_sub_usecs(&hctx->ccid3hctx_t_nom, - hctx->ccid3hctx_t_ipi); - ccid3_calc_new_t_ipi(hctx); - timeval_add_usecs(&hctx->ccid3hctx_t_nom, - hctx->ccid3hctx_t_ipi); - ccid3_calc_new_delta(hctx); - /* remove all packets older than the one acked from history */ dccp_tx_hist_purge_older(ccid3_tx_hist, &hctx->ccid3hctx_hist, packet); /* - * As we have calculated new ipi, delta, t_nom it is possible that - * we now can send a packet, so wake up dccp_wait_for_ccids. + * As we have calculated new ipi, delta, t_nom it is possible + * that we now can send a packet, so wake up dccp_wait_for_ccid */ sk->sk_write_space(sk); /* + * Update timeout interval for the nofeedback timer. + * We use a configuration option to increase the lower bound. + * This can help avoid triggering the nofeedback timer too + * often ('spinning') on LANs with small RTTs. + */ + hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, + CONFIG_IP_DCCP_CCID3_RTO * + (USEC_PER_SEC/1000)); + /* * Schedule no feedback timer to expire in - * max(4 * R, 2 * s / X) + * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) */ - next_tmout = max(hctx->ccid3hctx_t_rto, - 2 * usecs_div(hctx->ccid3hctx_s, - hctx->ccid3hctx_x)); - - ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to " + t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); + + ccid3_pr_debug("%s(%p), Scheduled no feedback timer to " "expire in %lu jiffies (%luus)\n", - dccp_role(sk), sk, - usecs_to_jiffies(next_tmout), next_tmout); + dccp_role(sk), + sk, usecs_to_jiffies(t_nfb), t_nfb); - sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, - jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + jiffies + usecs_to_jiffies(t_nfb)); /* set idle flag */ - hctx->ccid3hctx_idle = 1; + hctx->ccid3hctx_idle = 1; break; - default: - printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", - __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); - dump_stack(); + case TFRC_SSTATE_NO_SENT: + /* + * XXX when implementing bidirectional rx/tx check this again + */ + DCCP_WARN("Illegal ACK received - no packet sent\n"); + /* fall through */ + case TFRC_SSTATE_TERM: /* ignore feedback when closing */ break; } } -static int ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) -{ - const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - - BUG_ON(hctx == NULL); - - if (sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN) - DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; - return 0; -} - static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, unsigned char len, u16 idx, unsigned char *value) @@ -610,13 +579,14 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, switch (option) { case TFRC_OPT_LOSS_EVENT_RATE: if (unlikely(len != 4)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, invalid " - "len for TFRC_OPT_LOSS_EVENT_RATE\n", - __FUNCTION__, dccp_role(sk), sk); + DCCP_WARN("%s(%p), invalid len %d " + "for TFRC_OPT_LOSS_EVENT_RATE\n", + dccp_role(sk), sk, len); rc = -EINVAL; } else { - opt_recv->ccid3or_loss_event_rate = ntohl(*(__be32 *)value); - ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n", + opt_recv->ccid3or_loss_event_rate = + ntohl(*(__be32 *)value); + ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n", dccp_role(sk), sk, opt_recv->ccid3or_loss_event_rate); } @@ -624,20 +594,21 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, case TFRC_OPT_LOSS_INTERVALS: opt_recv->ccid3or_loss_intervals_idx = idx; opt_recv->ccid3or_loss_intervals_len = len; - ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n", + ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n", dccp_role(sk), sk, opt_recv->ccid3or_loss_intervals_idx, opt_recv->ccid3or_loss_intervals_len); break; case TFRC_OPT_RECEIVE_RATE: if (unlikely(len != 4)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, invalid " - "len for TFRC_OPT_RECEIVE_RATE\n", - __FUNCTION__, dccp_role(sk), sk); + DCCP_WARN("%s(%p), invalid len %d " + "for TFRC_OPT_RECEIVE_RATE\n", + dccp_role(sk), sk, len); rc = -EINVAL; } else { - opt_recv->ccid3or_receive_rate = ntohl(*(__be32 *)value); - ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n", + opt_recv->ccid3or_receive_rate = + ntohl(*(__be32 *)value); + ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", dccp_role(sk), sk, opt_recv->ccid3or_receive_rate); } @@ -649,22 +620,15 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid); - if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && - dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE) - hctx->ccid3hctx_s = dp->dccps_packet_size; - else - hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE; - - /* Set transmission rate to 1 packet per second */ - hctx->ccid3hctx_x = hctx->ccid3hctx_s; - hctx->ccid3hctx_t_rto = USEC_PER_SEC; + hctx->ccid3hctx_s = 0; + hctx->ccid3hctx_rtt = 0; hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; INIT_LIST_HEAD(&hctx->ccid3hctx_hist); - hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; + hctx->ccid3hctx_no_feedback_timer.function = + ccid3_hc_tx_no_feedback_timer; hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; init_timer(&hctx->ccid3hctx_no_feedback_timer); @@ -688,14 +652,7 @@ static void ccid3_hc_tx_exit(struct sock *sk) * RX Half Connection methods */ -/* TFRC receiver states */ -enum ccid3_hc_rx_states { - TFRC_RSTATE_NO_DATA = 1, - TFRC_RSTATE_DATA, - TFRC_RSTATE_TERM = 127, -}; - -#ifdef CCID3_DEBUG +#ifdef CONFIG_IP_DCCP_CCID3_DEBUG static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) { static char *ccid3_rx_state_names[] = { @@ -721,14 +678,24 @@ static void ccid3_hc_rx_set_state(struct sock *sk, hcrx->ccid3hcrx_state = state; } +static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len) +{ + if (unlikely(len == 0)) /* don't update on empty packets (e.g. ACKs) */ + ccid3_pr_debug("Packet payload length is 0 - not updating\n"); + else + hcrx->ccid3hcrx_s = hcrx->ccid3hcrx_s == 0 ? len : + (9 * hcrx->ccid3hcrx_s + len) / 10; +} + static void ccid3_hc_rx_send_feedback(struct sock *sk) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct dccp_rx_hist_entry *packet; struct timeval now; + suseconds_t delta; - ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk); dccp_timestamp(sk, &now); @@ -736,25 +703,22 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) case TFRC_RSTATE_NO_DATA: hcrx->ccid3hcrx_x_recv = 0; break; - case TFRC_RSTATE_DATA: { - const u32 delta = timeval_delta(&now, - &hcrx->ccid3hcrx_tstamp_last_feedback); - hcrx->ccid3hcrx_x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, - delta); - } + case TFRC_RSTATE_DATA: + delta = timeval_delta(&now, + &hcrx->ccid3hcrx_tstamp_last_feedback); + DCCP_BUG_ON(delta < 0); + hcrx->ccid3hcrx_x_recv = + scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); break; - default: - printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", - __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); - dump_stack(); + case TFRC_RSTATE_TERM: + DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); return; } packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist); if (unlikely(packet == NULL)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, no data packet " - "in history!\n", - __FUNCTION__, dccp_role(sk), sk); + DCCP_WARN("%s(%p), no data packet in history!\n", + dccp_role(sk), sk); return; } @@ -762,13 +726,19 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) hcrx->ccid3hcrx_ccval_last_counter = packet->dccphrx_ccval; hcrx->ccid3hcrx_bytes_recv = 0; - /* Convert to multiples of 10us */ - hcrx->ccid3hcrx_elapsed_time = - timeval_delta(&now, &packet->dccphrx_tstamp) / 10; + /* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */ + delta = timeval_delta(&now, &packet->dccphrx_tstamp); + DCCP_BUG_ON(delta < 0); + hcrx->ccid3hcrx_elapsed_time = delta / 10; + if (hcrx->ccid3hcrx_p == 0) - hcrx->ccid3hcrx_pinv = ~0; - else + hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */ + else if (hcrx->ccid3hcrx_p > 1000000) { + DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p); + hcrx->ccid3hcrx_pinv = 1; /* use 100% in this case */ + } else hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; + dp->dccps_hc_rx_insert_options = 1; dccp_send_ack(sk); } @@ -796,9 +766,9 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) hcrx->ccid3hcrx_elapsed_time)) || dccp_insert_option_timestamp(sk, skb) || dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, - &pinv, sizeof(pinv)) || + &pinv, sizeof(pinv)) || dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, - &x_recv, sizeof(x_recv))) + &x_recv, sizeof(x_recv))) return -1; return 0; @@ -812,12 +782,13 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_rx_hist_entry *entry, *next, *tail = NULL; - u32 rtt, delta, x_recv, fval, p, tmp2; + u32 x_recv, p; + suseconds_t rtt, delta; struct timeval tstamp = { 0, }; int interval = 0; int win_count = 0; int step = 0; - u64 tmp1; + u64 fval; list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, dccphrx_node) { @@ -842,58 +813,66 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) } if (unlikely(step == 0)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, packet history " - "contains no data packets!\n", - __FUNCTION__, dccp_role(sk), sk); + DCCP_WARN("%s(%p), packet history has no data packets!\n", + dccp_role(sk), sk); return ~0; } if (unlikely(interval == 0)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, Could not find a " - "win_count interval > 0. Defaulting to 1\n", - __FUNCTION__, dccp_role(sk), sk); + DCCP_WARN("%s(%p), Could not find a win_count interval > 0." + "Defaulting to 1\n", dccp_role(sk), sk); interval = 1; } found: if (!tail) { - LIMIT_NETDEBUG(KERN_WARNING "%s: tail is null\n", - __FUNCTION__); + DCCP_CRIT("tail is null\n"); return ~0; } - rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval; - ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", - dccp_role(sk), sk, rtt); - if (rtt == 0) - rtt = 1; - - dccp_timestamp(sk, &tstamp); - delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); - x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta); - if (x_recv == 0) - x_recv = hcrx->ccid3hcrx_x_recv; + delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp); + DCCP_BUG_ON(delta < 0); - tmp1 = (u64)x_recv * (u64)rtt; - do_div(tmp1,10000000); - tmp2 = (u32)tmp1; + rtt = delta * 4 / interval; + ccid3_pr_debug("%s(%p), approximated RTT to %dus\n", + dccp_role(sk), sk, (int)rtt); - if (!tmp2) { - LIMIT_NETDEBUG(KERN_WARNING "tmp2 = 0 " - "%s: x_recv = %u, rtt =%u\n", - __FUNCTION__, x_recv, rtt); + /* + * Determine the length of the first loss interval via inverse lookup. + * Assume that X_recv can be computed by the throughput equation + * s + * X_recv = -------- + * R * fval + * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1]. + */ + if (rtt == 0) { /* would result in divide-by-zero */ + DCCP_WARN("RTT==0\n"); return ~0; } - fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; - /* do not alter order above or you will get overflow on 32 bit */ + dccp_timestamp(sk, &tstamp); + delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); + DCCP_BUG_ON(delta <= 0); + + x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); + if (x_recv == 0) { /* would also trigger divide-by-zero */ + DCCP_WARN("X_recv==0\n"); + if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) { + DCCP_BUG("stored value of X_recv is zero"); + return ~0; + } + } + + fval = scaled_div(hcrx->ccid3hcrx_s, rtt); + fval = scaled_div32(fval, x_recv); p = tfrc_calc_x_reverse_lookup(fval); - ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied " + + ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied " "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); if (p == 0) return ~0; else - return 1000000 / p; + return 1000000 / p; } static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) @@ -923,11 +902,10 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) /* new loss event detected */ /* calculate last interval length */ seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); - entry = dccp_li_hist_entry_new(ccid3_li_hist, SLAB_ATOMIC); + entry = dccp_li_hist_entry_new(ccid3_li_hist, GFP_ATOMIC); if (entry == NULL) { - printk(KERN_CRIT "%s: out of memory\n",__FUNCTION__); - dump_stack(); + DCCP_BUG("out of memory - can not allocate entry"); return; } @@ -948,7 +926,8 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk, struct dccp_rx_hist_entry *packet) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - struct dccp_rx_hist_entry *rx_hist = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); + struct dccp_rx_hist_entry *rx_hist = + dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); u64 seqno = packet->dccphrx_seqno; u64 tmp_seqno; int loss = 0; @@ -976,7 +955,7 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk, dccp_inc_seqno(&tmp_seqno); while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist, tmp_seqno, &ccval)) { - hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; + hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; hcrx->ccid3hcrx_ccval_nonloss = ccval; dccp_inc_seqno(&tmp_seqno); } @@ -1002,13 +981,11 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) const struct dccp_options_received *opt_recv; struct dccp_rx_hist_entry *packet; struct timeval now; - u8 win_count; - u32 p_prev, rtt_prev, r_sample, t_elapsed; - int loss; + u32 p_prev, rtt_prev; + suseconds_t r_sample, t_elapsed; + int loss, payload_size; - BUG_ON(hcrx == NULL || - !(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || - hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA)); + BUG_ON(hcrx == NULL); opt_recv = &dccp_sk(sk)->dccps_options_received; @@ -1025,12 +1002,13 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) r_sample = timeval_usecs(&now); t_elapsed = opt_recv->dccpor_elapsed_time * 10; + DCCP_BUG_ON(r_sample < 0); if (unlikely(r_sample <= t_elapsed)) - LIMIT_NETDEBUG(KERN_WARNING "%s: r_sample=%uus, " - "t_elapsed=%uus\n", - __FUNCTION__, r_sample, t_elapsed); + DCCP_WARN("r_sample=%ldus, t_elapsed=%ldus\n", + r_sample, t_elapsed); else r_sample -= t_elapsed; + CCID3_RTT_SANITY_CHECK(r_sample); if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) hcrx->ccid3hcrx_rtt = r_sample; @@ -1039,8 +1017,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) r_sample / 10; if (rtt_prev != hcrx->ccid3hcrx_rtt) - ccid3_pr_debug("%s, New RTT=%uus, elapsed time=%u\n", - dccp_role(sk), hcrx->ccid3hcrx_rtt, + ccid3_pr_debug("%s(%p), New RTT=%uus, elapsed time=%u\n", + dccp_role(sk), sk, hcrx->ccid3hcrx_rtt, opt_recv->dccpor_elapsed_time); break; case DCCP_PKT_DATA: @@ -1050,52 +1028,48 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) } packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp, - skb, SLAB_ATOMIC); + skb, GFP_ATOMIC); if (unlikely(packet == NULL)) { - LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, Not enough mem to " - "add rx packet to history, consider it lost!\n", - __FUNCTION__, dccp_role(sk), sk); + DCCP_WARN("%s(%p), Not enough mem to add rx packet " + "to history, consider it lost!\n", dccp_role(sk), sk); return; } - win_count = packet->dccphrx_ccval; - loss = ccid3_hc_rx_detect_loss(sk, packet); if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) return; + payload_size = skb->len - dccp_hdr(skb)->dccph_doff * 4; + ccid3_hc_rx_update_s(hcrx, payload_size); + switch (hcrx->ccid3hcrx_state) { case TFRC_RSTATE_NO_DATA: - ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial " - "feedback\n", - dccp_role(sk), sk, + ccid3_pr_debug("%s(%p, state=%s), skb=%p, sending initial " + "feedback\n", dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); ccid3_hc_rx_send_feedback(sk); ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); return; case TFRC_RSTATE_DATA: - hcrx->ccid3hcrx_bytes_recv += skb->len - - dccp_hdr(skb)->dccph_doff * 4; + hcrx->ccid3hcrx_bytes_recv += payload_size; if (loss) break; dccp_timestamp(sk, &now); - if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >= - hcrx->ccid3hcrx_rtt) { + if ((timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) - + (suseconds_t)hcrx->ccid3hcrx_rtt) >= 0) { hcrx->ccid3hcrx_tstamp_last_ack = now; ccid3_hc_rx_send_feedback(sk); } return; - default: - printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", - __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); - dump_stack(); + case TFRC_RSTATE_TERM: + DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); return; } /* Dealing with packet loss */ - ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n", + ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n", dccp_role(sk), sk, dccp_state_name(sk->sk_state)); p_prev = hcrx->ccid3hcrx_p; @@ -1107,10 +1081,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) /* Scaling up by 1000000 as fixed decimal */ if (i_mean != 0) hcrx->ccid3hcrx_p = 1000000 / i_mean; - } else { - printk(KERN_CRIT "%s: empty loss hist\n",__FUNCTION__); - dump_stack(); - } + } else + DCCP_BUG("empty loss history"); if (hcrx->ccid3hcrx_p > p_prev) { ccid3_hc_rx_send_feedback(sk); @@ -1120,23 +1092,17 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid); - ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - - if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && - dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE) - hcrx->ccid3hcrx_s = dp->dccps_packet_size; - else - hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE; + ccid3_pr_debug("entry\n"); hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack); hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack; - hcrx->ccid3hcrx_rtt = 5000; /* XXX 5ms for now... */ + hcrx->ccid3hcrx_s = 0; + hcrx->ccid3hcrx_rtt = 0; return 0; } @@ -1165,9 +1131,9 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) BUG_ON(hcrx == NULL); - info->tcpi_ca_state = hcrx->ccid3hcrx_state; - info->tcpi_options |= TCPI_OPT_TIMESTAMPS; - info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; + info->tcpi_ca_state = hcrx->ccid3hcrx_state; + info->tcpi_options |= TCPI_OPT_TIMESTAMPS; + info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; } static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) @@ -1248,7 +1214,6 @@ static struct ccid_operations ccid3 = { .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet, .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent, .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv, - .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options, .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options, .ccid_hc_rx_obj_size = sizeof(struct ccid3_hc_rx_sock), .ccid_hc_rx_init = ccid3_hc_rx_init, @@ -1260,9 +1225,11 @@ static struct ccid_operations ccid3 = { .ccid_hc_rx_getsockopt = ccid3_hc_rx_getsockopt, .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, }; - + +#ifdef CONFIG_IP_DCCP_CCID3_DEBUG module_param(ccid3_debug, int, 0444); MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); +#endif static __init int ccid3_module_init(void) { @@ -1281,7 +1248,7 @@ static __init int ccid3_module_init(void) goto out_free_tx; rc = ccid_register(&ccid3); - if (rc != 0) + if (rc != 0) goto out_free_loss_interval_history; out: return rc; diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 0a2cb7536d2..15776a88c09 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -42,22 +42,24 @@ #include <linux/tfrc.h> #include "../ccid.h" -#define TFRC_MIN_PACKET_SIZE 16 -#define TFRC_STD_PACKET_SIZE 256 -#define TFRC_MAX_PACKET_SIZE 65535 - -/* Two seconds as per CCID3 spec */ +/* Two seconds as per RFC 3448 4.2 */ #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) -#define TFRC_INITIAL_IPI (USEC_PER_SEC / 4) - /* In usecs - half the scheduling granularity as per RFC3448 4.6 */ #define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) -/* In seconds */ -#define TFRC_MAX_BACK_OFF_TIME 64 +/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ +#define TFRC_T_MBI 64 + +/* What we think is a reasonable upper limit on RTT values */ +#define CCID3_SANE_RTT_MAX ((suseconds_t)(4 * USEC_PER_SEC)) -#define TFRC_SMALLEST_P 40 +#define CCID3_RTT_SANITY_CHECK(rtt) do { \ + if (rtt > CCID3_SANE_RTT_MAX) { \ + DCCP_CRIT("RTT (%d) too large, substituting %d", \ + (int)rtt, (int)CCID3_SANE_RTT_MAX); \ + rtt = CCID3_SANE_RTT_MAX; \ + } } while (0) enum ccid3_options { TFRC_OPT_LOSS_EVENT_RATE = 192, @@ -73,26 +75,36 @@ struct ccid3_options_received { u32 ccid3or_receive_rate; }; -/** struct ccid3_hc_tx_sock - CCID3 sender half connection sock +/* TFRC sender states */ +enum ccid3_hc_tx_states { + TFRC_SSTATE_NO_SENT = 1, + TFRC_SSTATE_NO_FBACK, + TFRC_SSTATE_FBACK, + TFRC_SSTATE_TERM, +}; + +/** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket * - * @ccid3hctx_state - Sender state - * @ccid3hctx_x - Current sending rate - * @ccid3hctx_x_recv - Receive rate - * @ccid3hctx_x_calc - Calculated send (?) rate - * @ccid3hctx_s - Packet size - * @ccid3hctx_rtt - Estimate of current round trip time in usecs - * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 - * @ccid3hctx_last_win_count - Last window counter sent - * @ccid3hctx_t_last_win_count - Timestamp of earliest packet - * with last_win_count value sent - * @ccid3hctx_no_feedback_timer - Handle to no feedback timer - * @ccid3hctx_idle - FIXME - * @ccid3hctx_t_ld - Time last doubled during slow start - * @ccid3hctx_t_nom - Nominal send time of next packet - * @ccid3hctx_t_ipi - Interpacket (send) interval - * @ccid3hctx_delta - Send timer delta - * @ccid3hctx_hist - Packet history - */ + * @ccid3hctx_x - Current sending rate in 64 * bytes per second + * @ccid3hctx_x_recv - Receive rate in 64 * bytes per second + * @ccid3hctx_x_calc - Calculated rate in bytes per second + * @ccid3hctx_rtt - Estimate of current round trip time in usecs + * @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 + * @ccid3hctx_s - Packet size in bytes + * @ccid3hctx_t_rto - Nofeedback Timer setting in usecs + * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs + * @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states + * @ccid3hctx_last_win_count - Last window counter sent + * @ccid3hctx_t_last_win_count - Timestamp of earliest packet + * with last_win_count value sent + * @ccid3hctx_no_feedback_timer - Handle to no feedback timer + * @ccid3hctx_idle - Flag indicating that sender is idling + * @ccid3hctx_t_ld - Time last doubled during slow start + * @ccid3hctx_t_nom - Nominal send time of next packet + * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs + * @ccid3hctx_hist - Packet history + * @ccid3hctx_options_received - Parsed set of retrieved options + */ struct ccid3_hc_tx_sock { struct tfrc_tx_info ccid3hctx_tfrc; #define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x @@ -103,7 +115,7 @@ struct ccid3_hc_tx_sock { #define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto #define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi u16 ccid3hctx_s; - u8 ccid3hctx_state; + enum ccid3_hc_tx_states ccid3hctx_state:8; u8 ccid3hctx_last_win_count; u8 ccid3hctx_idle; struct timeval ccid3hctx_t_last_win_count; @@ -115,23 +127,48 @@ struct ccid3_hc_tx_sock { struct ccid3_options_received ccid3hctx_options_received; }; +/* TFRC receiver states */ +enum ccid3_hc_rx_states { + TFRC_RSTATE_NO_DATA = 1, + TFRC_RSTATE_DATA, + TFRC_RSTATE_TERM = 127, +}; + +/** struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket + * + * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3) + * @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard) + * @ccid3hcrx_p - current loss event rate (RFC 3448 5.4) + * @ccid3hcrx_seqno_nonloss - Last received non-loss sequence number + * @ccid3hcrx_ccval_nonloss - Last received non-loss Window CCVal + * @ccid3hcrx_ccval_last_counter - Tracks window counter (RFC 4342, 8.1) + * @ccid3hcrx_state - receiver state, one of %ccid3_hc_rx_states + * @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes + * @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent + * @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent + * @ccid3hcrx_hist - Packet history + * @ccid3hcrx_li_hist - Loss Interval History + * @ccid3hcrx_s - Received packet size in bytes + * @ccid3hcrx_pinv - Inverse of Loss Event Rate (RFC 4342, sec. 8.5) + * @ccid3hcrx_elapsed_time - Time since packet reception + */ struct ccid3_hc_rx_sock { - struct tfrc_rx_info ccid3hcrx_tfrc; -#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv -#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt -#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p - u64 ccid3hcrx_seqno_nonloss:48, - ccid3hcrx_ccval_nonloss:4, - ccid3hcrx_state:8, - ccid3hcrx_ccval_last_counter:4; - u32 ccid3hcrx_bytes_recv; - struct timeval ccid3hcrx_tstamp_last_feedback; - struct timeval ccid3hcrx_tstamp_last_ack; - struct list_head ccid3hcrx_hist; - struct list_head ccid3hcrx_li_hist; - u16 ccid3hcrx_s; - u32 ccid3hcrx_pinv; - u32 ccid3hcrx_elapsed_time; + struct tfrc_rx_info ccid3hcrx_tfrc; +#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv +#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt +#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p + u64 ccid3hcrx_seqno_nonloss:48, + ccid3hcrx_ccval_nonloss:4, + ccid3hcrx_ccval_last_counter:4; + enum ccid3_hc_rx_states ccid3hcrx_state:8; + u32 ccid3hcrx_bytes_recv; + struct timeval ccid3hcrx_tstamp_last_feedback; + struct timeval ccid3hcrx_tstamp_last_ack; + struct list_head ccid3hcrx_hist; + struct list_head ccid3hcrx_li_hist; + u16 ccid3hcrx_s; + u32 ccid3hcrx_pinv; + u32 ccid3hcrx_elapsed_time; }; static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 906c81ab9d4..0a0baef16b3 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -13,7 +13,7 @@ #include <linux/module.h> #include <net/sock.h> - +#include "../../dccp.h" #include "loss_interval.h" struct dccp_li_hist *dccp_li_hist_new(const char *name) @@ -109,7 +109,7 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list) i_tot = max(i_tot0, i_tot1); if (!w_tot) { - LIMIT_NETDEBUG(KERN_WARNING "%s: w_tot = 0\n", __FUNCTION__); + DCCP_WARN("w_tot = 0\n"); return 1; } @@ -125,10 +125,10 @@ int dccp_li_hist_interval_new(struct dccp_li_hist *hist, int i; for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) { - entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC); + entry = dccp_li_hist_entry_new(hist, GFP_ATOMIC); if (entry == NULL) { dccp_li_hist_purge(hist, list); - dump_stack(); + DCCP_BUG("loss interval list entry is NULL"); return 0; } entry->dccplih_interval = ~0; diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 0ae85f0340b..eb257014dd7 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -20,7 +20,7 @@ #define DCCP_LI_HIST_IVAL_F_LENGTH 8 struct dccp_li_hist { - kmem_cache_t *dccplih_slab; + struct kmem_cache *dccplih_slab; }; extern struct dccp_li_hist *dccp_li_hist_new(const char *name); diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index b876c9c81c6..2e8ef42721e 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -36,9 +36,100 @@ #include <linux/module.h> #include <linux/string.h> - #include "packet_history.h" +/* + * Transmitter History Routines + */ +struct dccp_tx_hist *dccp_tx_hist_new(const char *name) +{ + struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); + static const char dccp_tx_hist_mask[] = "tx_hist_%s"; + char *slab_name; + + if (hist == NULL) + goto out; + + slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1, + GFP_ATOMIC); + if (slab_name == NULL) + goto out_free_hist; + + sprintf(slab_name, dccp_tx_hist_mask, name); + hist->dccptxh_slab = kmem_cache_create(slab_name, + sizeof(struct dccp_tx_hist_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (hist->dccptxh_slab == NULL) + goto out_free_slab_name; +out: + return hist; +out_free_slab_name: + kfree(slab_name); +out_free_hist: + kfree(hist); + hist = NULL; + goto out; +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_new); + +void dccp_tx_hist_delete(struct dccp_tx_hist *hist) +{ + const char* name = kmem_cache_name(hist->dccptxh_slab); + + kmem_cache_destroy(hist->dccptxh_slab); + kfree(name); + kfree(hist); +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); + +struct dccp_tx_hist_entry * + dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq) +{ + struct dccp_tx_hist_entry *packet = NULL, *entry; + + list_for_each_entry(entry, list, dccphtx_node) + if (entry->dccphtx_seqno == seq) { + packet = entry; + break; + } + + return packet; +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); + +void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) +{ + struct dccp_tx_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, list, dccphtx_node) { + list_del_init(&entry->dccphtx_node); + dccp_tx_hist_entry_delete(hist, entry); + } +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); + +void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, + struct list_head *list, + struct dccp_tx_hist_entry *packet) +{ + struct dccp_tx_hist_entry *next; + + list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) { + list_del_init(&packet->dccphtx_node); + dccp_tx_hist_entry_delete(hist, packet); + } +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older); + +/* + * Receiver History Routines + */ struct dccp_rx_hist *dccp_rx_hist_new(const char *name) { struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); @@ -83,18 +174,24 @@ void dccp_rx_hist_delete(struct dccp_rx_hist *hist) EXPORT_SYMBOL_GPL(dccp_rx_hist_delete); -void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list) +int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, + u8 *ccval) { - struct dccp_rx_hist_entry *entry, *next; + struct dccp_rx_hist_entry *packet = NULL, *entry; - list_for_each_entry_safe(entry, next, list, dccphrx_node) { - list_del_init(&entry->dccphrx_node); - kmem_cache_free(hist->dccprxh_slab, entry); - } -} + list_for_each_entry(entry, list, dccphrx_node) + if (entry->dccphrx_seqno == seq) { + packet = entry; + break; + } -EXPORT_SYMBOL_GPL(dccp_rx_hist_purge); + if (packet) + *ccval = packet->dccphrx_ccval; + return packet != NULL; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry); struct dccp_rx_hist_entry * dccp_rx_hist_find_data_packet(const struct list_head *list) { @@ -184,110 +281,18 @@ void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); -struct dccp_tx_hist *dccp_tx_hist_new(const char *name) -{ - struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); - static const char dccp_tx_hist_mask[] = "tx_hist_%s"; - char *slab_name; - - if (hist == NULL) - goto out; - - slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1, - GFP_ATOMIC); - if (slab_name == NULL) - goto out_free_hist; - - sprintf(slab_name, dccp_tx_hist_mask, name); - hist->dccptxh_slab = kmem_cache_create(slab_name, - sizeof(struct dccp_tx_hist_entry), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (hist->dccptxh_slab == NULL) - goto out_free_slab_name; -out: - return hist; -out_free_slab_name: - kfree(slab_name); -out_free_hist: - kfree(hist); - hist = NULL; - goto out; -} - -EXPORT_SYMBOL_GPL(dccp_tx_hist_new); - -void dccp_tx_hist_delete(struct dccp_tx_hist *hist) -{ - const char* name = kmem_cache_name(hist->dccptxh_slab); - - kmem_cache_destroy(hist->dccptxh_slab); - kfree(name); - kfree(hist); -} - -EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); - -struct dccp_tx_hist_entry * - dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq) -{ - struct dccp_tx_hist_entry *packet = NULL, *entry; - - list_for_each_entry(entry, list, dccphtx_node) - if (entry->dccphtx_seqno == seq) { - packet = entry; - break; - } - - return packet; -} - -EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); - -int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, - u8 *ccval) -{ - struct dccp_rx_hist_entry *packet = NULL, *entry; - - list_for_each_entry(entry, list, dccphrx_node) - if (entry->dccphrx_seqno == seq) { - packet = entry; - break; - } - - if (packet) - *ccval = packet->dccphrx_ccval; - - return packet != NULL; -} - -EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry); - -void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, - struct list_head *list, - struct dccp_tx_hist_entry *packet) +void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list) { - struct dccp_tx_hist_entry *next; + struct dccp_rx_hist_entry *entry, *next; - list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) { - list_del_init(&packet->dccphtx_node); - dccp_tx_hist_entry_delete(hist, packet); + list_for_each_entry_safe(entry, next, list, dccphrx_node) { + list_del_init(&entry->dccphrx_node); + kmem_cache_free(hist->dccprxh_slab, entry); } } -EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older); - -void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) -{ - struct dccp_tx_hist_entry *entry, *next; - - list_for_each_entry_safe(entry, next, list, dccphtx_node) { - list_del_init(&entry->dccphtx_node); - dccp_tx_hist_entry_delete(hist, entry); - } -} +EXPORT_SYMBOL_GPL(dccp_rx_hist_purge); -EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, " "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 067cf1c85a3..1f960c19ea1 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -49,43 +49,27 @@ #define TFRC_WIN_COUNT_PER_RTT 4 #define TFRC_WIN_COUNT_LIMIT 16 +/* + * Transmitter History data structures and declarations + */ struct dccp_tx_hist_entry { struct list_head dccphtx_node; u64 dccphtx_seqno:48, - dccphtx_ccval:4, dccphtx_sent:1; u32 dccphtx_rtt; struct timeval dccphtx_tstamp; }; -struct dccp_rx_hist_entry { - struct list_head dccphrx_node; - u64 dccphrx_seqno:48, - dccphrx_ccval:4, - dccphrx_type:4; - u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ - struct timeval dccphrx_tstamp; -}; - struct dccp_tx_hist { - kmem_cache_t *dccptxh_slab; + struct kmem_cache *dccptxh_slab; }; extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name); -extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); - -struct dccp_rx_hist { - kmem_cache_t *dccprxh_slab; -}; - -extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name); -extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist); -extern struct dccp_rx_hist_entry * - dccp_rx_hist_find_data_packet(const struct list_head *list); +extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); static inline struct dccp_tx_hist_entry * - dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, - const gfp_t prio) + dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, + const gfp_t prio) { struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab, prio); @@ -96,18 +80,20 @@ static inline struct dccp_tx_hist_entry * return entry; } -static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, - struct dccp_tx_hist_entry *entry) +static inline struct dccp_tx_hist_entry * + dccp_tx_hist_head(struct list_head *list) { - if (entry != NULL) - kmem_cache_free(hist->dccptxh_slab, entry); + struct dccp_tx_hist_entry *head = NULL; + + if (!list_empty(list)) + head = list_entry(list->next, struct dccp_tx_hist_entry, + dccphtx_node); + return head; } extern struct dccp_tx_hist_entry * dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq); -extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, - u8 *ccval); static inline void dccp_tx_hist_add_entry(struct list_head *list, struct dccp_tx_hist_entry *entry) @@ -115,30 +101,45 @@ static inline void dccp_tx_hist_add_entry(struct list_head *list, list_add(&entry->dccphtx_node, list); } +static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, + struct dccp_tx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(hist->dccptxh_slab, entry); +} + +extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, + struct list_head *list); + extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, struct list_head *list, struct dccp_tx_hist_entry *next); -extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, - struct list_head *list); +/* + * Receiver History data structures and declarations + */ +struct dccp_rx_hist_entry { + struct list_head dccphrx_node; + u64 dccphrx_seqno:48, + dccphrx_ccval:4, + dccphrx_type:4; + u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ + struct timeval dccphrx_tstamp; +}; -static inline struct dccp_tx_hist_entry * - dccp_tx_hist_head(struct list_head *list) -{ - struct dccp_tx_hist_entry *head = NULL; +struct dccp_rx_hist { + struct kmem_cache *dccprxh_slab; +}; - if (!list_empty(list)) - head = list_entry(list->next, struct dccp_tx_hist_entry, - dccphtx_node); - return head; -} +extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name); +extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist); static inline struct dccp_rx_hist_entry * - dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, - const struct sock *sk, - const u32 ndp, - const struct sk_buff *skb, - const gfp_t prio) + dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, + const struct sock *sk, + const u32 ndp, + const struct sk_buff *skb, + const gfp_t prio) { struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab, prio); @@ -156,18 +157,8 @@ static inline struct dccp_rx_hist_entry * return entry; } -static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, - struct dccp_rx_hist_entry *entry) -{ - if (entry != NULL) - kmem_cache_free(hist->dccprxh_slab, entry); -} - -extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, - struct list_head *list); - static inline struct dccp_rx_hist_entry * - dccp_rx_hist_head(struct list_head *list) + dccp_rx_hist_head(struct list_head *list) { struct dccp_rx_hist_entry *head = NULL; @@ -177,6 +168,27 @@ static inline struct dccp_rx_hist_entry * return head; } +extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, + u8 *ccval); +extern struct dccp_rx_hist_entry * + dccp_rx_hist_find_data_packet(const struct list_head *list); + +extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, + struct list_head *rx_list, + struct list_head *li_list, + struct dccp_rx_hist_entry *packet, + u64 nonloss_seqno); + +static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, + struct dccp_rx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(hist->dccprxh_slab, entry); +} + +extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, + struct list_head *list); + static inline int dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry) { @@ -184,12 +196,6 @@ static inline int entry->dccphrx_type == DCCP_PKT_DATAACK; } -extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, - struct list_head *rx_list, - struct list_head *li_list, - struct dccp_rx_hist_entry *packet, - u64 nonloss_seqno); - extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, struct list_head *li_list, u8 *win_loss); diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index 45f30f59ea2..faf5f7e219e 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -13,8 +13,29 @@ * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ - #include <linux/types.h> +#include <asm/div64.h> + +/* integer-arithmetic divisions of type (a * 1000000)/b */ +static inline u64 scaled_div(u64 a, u32 b) +{ + BUG_ON(b==0); + a *= 1000000; + do_div(a, b); + return a; +} + +static inline u32 scaled_div32(u64 a, u32 b) +{ + u64 result = scaled_div(a, b); + + if (result > UINT_MAX) { + DCCP_CRIT("Overflow: a(%llu)/b(%u) > ~0U", + (unsigned long long)a, b); + return UINT_MAX; + } + return result; +} extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index 44076e0c659..90009fd77e1 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -13,16 +13,83 @@ */ #include <linux/module.h> - -#include <asm/div64.h> - +#include "../../dccp.h" #include "tfrc.h" #define TFRC_CALC_X_ARRSIZE 500 +#define TFRC_CALC_X_SPLIT 50000 /* 0.05 * 1000000, details below */ +#define TFRC_SMALLEST_P (TFRC_CALC_X_SPLIT/TFRC_CALC_X_ARRSIZE) -#define TFRC_CALC_X_SPLIT 50000 -/* equivalent to 0.05 */ - +/* + TFRC TCP Reno Throughput Equation Lookup Table for f(p) + + The following two-column lookup table implements a part of the TCP throughput + equation from [RFC 3448, sec. 3.1]: + + s + X_calc = -------------------------------------------------------------- + R * sqrt(2*b*p/3) + (3 * t_RTO * sqrt(3*b*p/8) * (p + 32*p^3)) + + Where: + X is the transmit rate in bytes/second + s is the packet size in bytes + R is the round trip time in seconds + p is the loss event rate, between 0 and 1.0, of the number of loss + events as a fraction of the number of packets transmitted + t_RTO is the TCP retransmission timeout value in seconds + b is the number of packets acknowledged by a single TCP ACK + + We can assume that b = 1 and t_RTO is 4 * R. The equation now becomes: + + s + X_calc = ------------------------------------------------------- + R * sqrt(p*2/3) + (12 * R * sqrt(p*3/8) * (p + 32*p^3)) + + which we can break down into: + + s + X_calc = --------- + R * f(p) + + where f(p) is given for 0 < p <= 1 by: + + f(p) = sqrt(2*p/3) + 12 * sqrt(3*p/8) * (p + 32*p^3) + + Since this is kernel code, floating-point arithmetic is avoided in favour of + integer arithmetic. This means that nearly all fractional parameters are + scaled by 1000000: + * the parameters p and R + * the return result f(p) + The lookup table therefore actually tabulates the following function g(q): + + g(q) = 1000000 * f(q/1000000) + + Hence, when p <= 1, q must be less than or equal to 1000000. To achieve finer + granularity for the practically more relevant case of small values of p (up to + 5%), the second column is used; the first one ranges up to 100%. This split + corresponds to the value of q = TFRC_CALC_X_SPLIT. At the same time this also + determines the smallest resolution possible with this lookup table: + + TFRC_SMALLEST_P = TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE + + The entire table is generated by: + for(i=0; i < TFRC_CALC_X_ARRSIZE; i++) { + lookup[i][0] = g((i+1) * 1000000/TFRC_CALC_X_ARRSIZE); + lookup[i][1] = g((i+1) * TFRC_CALC_X_SPLIT/TFRC_CALC_X_ARRSIZE); + } + + With the given configuration, we have, with M = TFRC_CALC_X_ARRSIZE-1, + lookup[0][0] = g(1000000/(M+1)) = 1000000 * f(0.2%) + lookup[M][0] = g(1000000) = 1000000 * f(100%) + lookup[0][1] = g(TFRC_SMALLEST_P) = 1000000 * f(0.01%) + lookup[M][1] = g(TFRC_CALC_X_SPLIT) = 1000000 * f(5%) + + In summary, the two columns represent f(p) for the following ranges: + * The first column is for 0.002 <= p <= 1.0 + * The second column is for 0.0001 <= p <= 0.05 + Where the columns overlap, the second (finer-grained) is given preference, + i.e. the first column is used only for p >= 0.05. + */ static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = { { 37172, 8172 }, { 53499, 11567 }, @@ -526,117 +593,105 @@ static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = { { 243315981, 271305 } }; -/* Calculate the send rate as per section 3.1 of RFC3448 - -Returns send rate in bytes per second - -Integer maths and lookups are used as not allowed floating point in kernel - -The function for Xcalc as per section 3.1 of RFC3448 is: - -X = s - ------------------------------------------------------------- - R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2))) - -where -X is the trasmit rate in bytes/second -s is the packet size in bytes -R is the round trip time in seconds -p is the loss event rate, between 0 and 1.0, of the number of loss events - as a fraction of the number of packets transmitted -t_RTO is the TCP retransmission timeout value in seconds -b is the number of packets acknowledged by a single TCP acknowledgement - -we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes: - -X = s - ----------------------------------------------------------------------- - R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2))) - - -which we can break down into: - -X = s - -------- - R * f(p) - -where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p)) - -Function parameters: -s - bytes -R - RTT in usecs -p - loss rate (decimal fraction multiplied by 1,000,000) - -Returns Xcalc in bytes per second - -DON'T alter this code unless you run test cases against it as the code -has been manipulated to stop underflow/overlow. +/* return largest index i such that fval <= lookup[i][small] */ +static inline u32 tfrc_binsearch(u32 fval, u8 small) +{ + u32 try, low = 0, high = TFRC_CALC_X_ARRSIZE - 1; + + while (low < high) { + try = (low + high) / 2; + if (fval <= tfrc_calc_x_lookup[try][small]) + high = try; + else + low = try + 1; + } + return high; +} -*/ +/** + * tfrc_calc_x - Calculate the send rate as per section 3.1 of RFC3448 + * + * @s: packet size in bytes + * @R: RTT scaled by 1000000 (i.e., microseconds) + * @p: loss ratio estimate scaled by 1000000 + * Returns X_calc in bytes per second (not scaled). + */ u32 tfrc_calc_x(u16 s, u32 R, u32 p) { - int index; + u16 index; u32 f; - u64 tmp1, tmp2; + u64 result; - if (p < TFRC_CALC_X_SPLIT) - index = (p / (TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE)) - 1; - else - index = (p / (1000000 / TFRC_CALC_X_ARRSIZE)) - 1; + /* check against invalid parameters and divide-by-zero */ + BUG_ON(p > 1000000); /* p must not exceed 100% */ + BUG_ON(p == 0); /* f(0) = 0, divide by zero */ + if (R == 0) { /* possible divide by zero */ + DCCP_CRIT("WARNING: RTT is 0, returning maximum X_calc."); + return ~0U; + } - if (index < 0) - /* p should be 0 unless there is a bug in my code */ - index = 0; + if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */ + if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */ + DCCP_WARN("Value of p (%d) below resolution. " + "Substituting %d\n", p, TFRC_SMALLEST_P); + index = 0; + } else /* 0.0001 <= p <= 0.05 */ + index = p/TFRC_SMALLEST_P - 1; - if (R == 0) - R = 1; /* RTT can't be zero or else divide by zero */ + f = tfrc_calc_x_lookup[index][1]; - BUG_ON(index >= TFRC_CALC_X_ARRSIZE); + } else { /* 0.05 < p <= 1.00 */ + index = p/(1000000/TFRC_CALC_X_ARRSIZE) - 1; - if (p >= TFRC_CALC_X_SPLIT) f = tfrc_calc_x_lookup[index][0]; - else - f = tfrc_calc_x_lookup[index][1]; - - tmp1 = ((u64)s * 100000000); - tmp2 = ((u64)R * (u64)f); - do_div(tmp2, 10000); - do_div(tmp1, tmp2); - /* Don't alter above math unless you test due to overflow on 32 bit */ - - return (u32)tmp1; + } + + /* + * Compute X = s/(R*f(p)) in bytes per second. + * Since f(p) and R are both scaled by 1000000, we need to multiply by + * 1000000^2. To avoid overflow, the result is computed in two stages. + * This works under almost all reasonable operational conditions, for a + * wide range of parameters. Yet, should some strange combination of + * parameters result in overflow, the use of scaled_div32 will catch + * this and return UINT_MAX - which is a logically adequate consequence. + */ + result = scaled_div(s, R); + return scaled_div32(result, f); } EXPORT_SYMBOL_GPL(tfrc_calc_x); /* - * args: fvalue - function value to match - * returns: p closest to that value + * tfrc_calc_x_reverse_lookup - try to find p given f(p) * - * both fvalue and p are multiplied by 1,000,000 to use ints + * @fvalue: function value to match, scaled by 1000000 + * Returns closest match for p, also scaled by 1000000 */ u32 tfrc_calc_x_reverse_lookup(u32 fvalue) { - int ctr = 0; - int small; + int index; - if (fvalue < tfrc_calc_x_lookup[0][1]) + if (fvalue == 0) /* f(p) = 0 whenever p = 0 */ return 0; - if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1]) - small = 1; - else if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) + /* Error cases. */ + if (fvalue < tfrc_calc_x_lookup[0][1]) { + DCCP_WARN("fvalue %d smaller than resolution\n", fvalue); + return tfrc_calc_x_lookup[0][1]; + } + if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) { + DCCP_WARN("fvalue %d exceeds bounds!\n", fvalue); return 1000000; - else - small = 0; - - while (fvalue > tfrc_calc_x_lookup[ctr][small]) - ctr++; + } - if (small) - return TFRC_CALC_X_SPLIT * ctr / TFRC_CALC_X_ARRSIZE; - else - return 1000000 * ctr / TFRC_CALC_X_ARRSIZE; + if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1]) { + index = tfrc_binsearch(fvalue, 1); + return (index + 1) * TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE; + } + + /* else ... it must be in the coarse-grained column */ + index = tfrc_binsearch(fvalue, 0); + return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; } EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 272e8584564..a0900bf98e6 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -18,15 +18,33 @@ #include <net/tcp.h> #include "ackvec.h" +/* + * DCCP - specific warning and debugging macros. + */ +#define DCCP_WARN(fmt, a...) LIMIT_NETDEBUG(KERN_WARNING "%s: " fmt, \ + __FUNCTION__, ##a) +#define DCCP_CRIT(fmt, a...) printk(KERN_CRIT fmt " at %s:%d/%s()\n", ##a, \ + __FILE__, __LINE__, __FUNCTION__) +#define DCCP_BUG(a...) do { DCCP_CRIT("BUG: " a); dump_stack(); } while(0) +#define DCCP_BUG_ON(cond) do { if (unlikely((cond) != 0)) \ + DCCP_BUG("\"%s\" holds (exception!)", \ + __stringify(cond)); \ + } while (0) + +#ifdef MODULE +#define DCCP_PRINTK(enable, fmt, args...) do { if (enable) \ + printk(fmt, ##args); \ + } while(0) +#else +#define DCCP_PRINTK(enable, fmt, args...) printk(fmt, ##args) +#endif +#define DCCP_PR_DEBUG(enable, fmt, a...) DCCP_PRINTK(enable, KERN_DEBUG \ + "%s: " fmt, __FUNCTION__, ##a) + #ifdef CONFIG_IP_DCCP_DEBUG extern int dccp_debug; - -#define dccp_pr_debug(format, a...) \ - do { if (dccp_debug) \ - printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \ - } while (0) -#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) \ - printk(format, ##a); } while (0) +#define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a) +#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a) #else #define dccp_pr_debug(format, a...) #define dccp_pr_debug_cat(format, a...) @@ -35,17 +53,21 @@ extern int dccp_debug; extern struct inet_hashinfo dccp_hashinfo; extern atomic_t dccp_orphan_count; -extern int dccp_tw_count; -extern void dccp_tw_deschedule(struct inet_timewait_sock *tw); extern void dccp_time_wait(struct sock *sk, int state, int timeo); -/* FIXME: Right size this */ -#define DCCP_MAX_OPT_LEN 128 - -#define DCCP_MAX_PACKET_HDR 32 - -#define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER) +/* + * Set safe upper bounds for header and option length. Since Data Offset is 8 + * bits (RFC 4340, sec. 5.1), the total header length can never be more than + * 4 * 255 = 1020 bytes. The largest possible header length is 28 bytes (X=1): + * - DCCP-Response with ACK Subheader and 4 bytes of Service code OR + * - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields + * Hence a safe upper bound for the maximum option length is 1020-28 = 992 + */ +#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(int)) +#define DCCP_MAX_PACKET_HDR 28 +#define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR) +#define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER) #define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT * state, about 60 seconds */ @@ -58,6 +80,18 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ +/* sysctl variables for DCCP */ +extern int sysctl_dccp_request_retries; +extern int sysctl_dccp_retries1; +extern int sysctl_dccp_retries2; +extern int sysctl_dccp_feat_sequence_window; +extern int sysctl_dccp_feat_rx_ccid; +extern int sysctl_dccp_feat_tx_ccid; +extern int sysctl_dccp_feat_ack_ratio; +extern int sysctl_dccp_feat_send_ack_vector; +extern int sysctl_dccp_feat_send_ndp_count; +extern int sysctl_dccp_tx_qlen; + /* is seq1 < seq2 ? */ static inline int before48(const u64 seq1, const u64 seq2) { @@ -123,10 +157,36 @@ DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); #define DCCP_ADD_STATS_USER(field, val) \ SNMP_ADD_STATS_USER(dccp_statistics, field, val) +/* + * Checksumming routines + */ +static inline int dccp_csum_coverage(const struct sk_buff *skb) +{ + const struct dccp_hdr* dh = dccp_hdr(skb); + + if (dh->dccph_cscov == 0) + return skb->len; + return (dh->dccph_doff + dh->dccph_cscov - 1) * sizeof(u32); +} + +static inline void dccp_csum_outgoing(struct sk_buff *skb) +{ + int cov = dccp_csum_coverage(skb); + + if (cov >= skb->len) + dccp_hdr(skb)->dccph_cscov = 0; + + skb->csum = skb_checksum(skb, 0, (cov > skb->len)? skb->len : cov, 0); +} + +extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); + extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); extern void dccp_send_ack(struct sock *sk); extern void dccp_send_delayed_ack(struct sock *sk); +extern void dccp_reqsk_send_ack(struct sk_buff *sk, struct request_sock *rsk); + extern void dccp_send_sync(struct sock *sk, const u64 seq, const enum dccp_pkt_type pkt_type); @@ -147,18 +207,7 @@ extern const char *dccp_state_name(const int state); extern void dccp_set_state(struct sock *sk, const int state); extern void dccp_done(struct sock *sk); -static inline void dccp_openreq_init(struct request_sock *req, - struct dccp_sock *dp, - struct sk_buff *skb) -{ - /* - * FIXME: fill in the other req fields from the DCCP options - * received - */ - inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; - inet_rsk(req)->acked = 0; - req->rcv_wnd = 0; -} +extern void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb); extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); @@ -217,14 +266,9 @@ extern void dccp_shutdown(struct sock *sk, int how); extern int inet_dccp_listen(struct socket *sock, int backlog); extern unsigned int dccp_poll(struct file *file, struct socket *sock, poll_table *wait); -extern void dccp_v4_send_check(struct sock *sk, int len, - struct sk_buff *skb); extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); -extern int dccp_v4_checksum(const struct sk_buff *skb, - const __be32 saddr, const __be32 daddr); - extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code); extern void dccp_send_close(struct sock *sk, const int active); extern int dccp_invalid_packet(struct sk_buff *skb); @@ -388,6 +432,7 @@ static inline void timeval_sub_usecs(struct timeval *tv, tv->tv_sec--; tv->tv_usec += USEC_PER_SEC; } + DCCP_BUG_ON(tv->tv_sec < 0); } #ifdef CONFIG_SYSCTL diff --git a/net/dccp/feat.c b/net/dccp/feat.c index a1b0682ee77..95b6927ec65 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -12,7 +12,6 @@ #include <linux/module.h> -#include "dccp.h" #include "ccid.h" #include "feat.h" @@ -23,9 +22,17 @@ int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, { struct dccp_opt_pend *opt; - dccp_pr_debug("feat change type=%d feat=%d\n", type, feature); + dccp_feat_debug(type, feature, *val); - /* XXX sanity check feat change request */ + if (!dccp_feat_is_valid_type(type)) { + DCCP_WARN("option type %d invalid in negotiation\n", type); + return 1; + } + if (!dccp_feat_is_valid_length(type, feature, len)) { + DCCP_WARN("invalid length %d\n", len); + return 1; + } + /* XXX add further sanity checks */ /* check if that feature is already being negotiated */ list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { @@ -95,14 +102,14 @@ static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr) /* XXX taking only u8 vals */ static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val) { - dccp_pr_debug("changing [%d] feat %d to %d\n", type, feat, val); + dccp_feat_debug(type, feat, val); switch (feat) { case DCCPF_CCID: return dccp_feat_update_ccid(sk, type, val); default: - dccp_pr_debug("IMPLEMENT changing [%d] feat %d to %d\n", - type, feat, val); + dccp_pr_debug("UNIMPLEMENTED: %s(%d, ...)\n", + dccp_feat_typename(type), feat); break; } return 0; @@ -162,7 +169,8 @@ static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt, break; default: - WARN_ON(1); /* XXX implement res */ + DCCP_BUG("Fell through, feat=%d", opt->dccpop_feat); + /* XXX implement res */ return -EFAULT; } @@ -265,10 +273,10 @@ static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) u8 *copy; int rc; - /* NN features must be change L */ - if (type == DCCPO_CHANGE_R) { - dccp_pr_debug("received CHANGE_R %d for NN feat %d\n", - type, feature); + /* NN features must be Change L (sec. 6.3.2) */ + if (type != DCCPO_CHANGE_L) { + dccp_pr_debug("received %s for NN feature %d\n", + dccp_feat_typename(type), feature); return -EFAULT; } @@ -279,12 +287,11 @@ static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) if (opt == NULL) return -ENOMEM; - copy = kmalloc(len, GFP_ATOMIC); + copy = kmemdup(val, len, GFP_ATOMIC); if (copy == NULL) { kfree(opt); return -ENOMEM; } - memcpy(copy, val, len); opt->dccpop_type = DCCPO_CONFIRM_R; /* NN can only confirm R */ opt->dccpop_feat = feature; @@ -299,7 +306,8 @@ static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) return rc; } - dccp_pr_debug("Confirming NN feature %d (val=%d)\n", feature, *copy); + dccp_feat_debug(type, feature, *copy); + list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf); return 0; @@ -318,14 +326,19 @@ static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk, return; } - opt->dccpop_type = type == DCCPO_CHANGE_L ? DCCPO_CONFIRM_R : - DCCPO_CONFIRM_L; + switch (type) { + case DCCPO_CHANGE_L: opt->dccpop_type = DCCPO_CONFIRM_R; break; + case DCCPO_CHANGE_R: opt->dccpop_type = DCCPO_CONFIRM_L; break; + default: DCCP_WARN("invalid type %d\n", type); return; + + } opt->dccpop_feat = feature; opt->dccpop_val = NULL; opt->dccpop_len = 0; /* change feature */ - dccp_pr_debug("Empty confirm feature %d type %d\n", feature, type); + dccp_pr_debug("Empty %s(%d)\n", dccp_feat_typename(type), feature); + list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf); } @@ -359,7 +372,7 @@ int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) { int rc; - dccp_pr_debug("got feat change type=%d feat=%d\n", type, feature); + dccp_feat_debug(type, feature, *val); /* figure out if it's SP or NN feature */ switch (feature) { @@ -375,6 +388,8 @@ int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) /* XXX implement other features */ default: + dccp_pr_debug("UNIMPLEMENTED: not handling %s(%d, ...)\n", + dccp_feat_typename(type), feature); rc = -EFAULT; break; } @@ -403,20 +418,27 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, u8 t; struct dccp_opt_pend *opt; struct dccp_minisock *dmsk = dccp_msk(sk); - int rc = 1; + int found = 0; int all_confirmed = 1; - dccp_pr_debug("got feat confirm type=%d feat=%d\n", type, feature); - - /* XXX sanity check type & feat */ + dccp_feat_debug(type, feature, *val); /* locate our change request */ - t = type == DCCPO_CONFIRM_L ? DCCPO_CHANGE_R : DCCPO_CHANGE_L; + switch (type) { + case DCCPO_CONFIRM_L: t = DCCPO_CHANGE_R; break; + case DCCPO_CONFIRM_R: t = DCCPO_CHANGE_L; break; + default: DCCP_WARN("invalid type %d\n", type); + return 1; + + } + /* XXX sanity check feature value */ list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { if (!opt->dccpop_conf && opt->dccpop_type == t && opt->dccpop_feat == feature) { - /* we found it */ + found = 1; + dccp_pr_debug("feature %d found\n", opt->dccpop_feat); + /* XXX do sanity check */ opt->dccpop_conf = 1; @@ -425,9 +447,7 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, dccp_feat_update(sk, opt->dccpop_type, opt->dccpop_feat, *val); - dccp_pr_debug("feat %d type %d confirmed %d\n", - feature, type, *val); - rc = 0; + /* XXX check the return value of dccp_feat_update */ break; } @@ -446,9 +466,9 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); } - if (rc) - dccp_pr_debug("feat %d type %d never requested\n", - feature, type); + if (!found) + dccp_pr_debug("%s(%d, ...) never requested\n", + dccp_feat_typename(type), feature); return 0; } @@ -501,20 +521,18 @@ int dccp_feat_clone(struct sock *oldsk, struct sock *newsk) list_for_each_entry(opt, &olddmsk->dccpms_pending, dccpop_node) { struct dccp_opt_pend *newopt; /* copy the value of the option */ - u8 *val = kmalloc(opt->dccpop_len, GFP_ATOMIC); + u8 *val = kmemdup(opt->dccpop_val, opt->dccpop_len, GFP_ATOMIC); if (val == NULL) goto out_clean; - memcpy(val, opt->dccpop_val, opt->dccpop_len); - newopt = kmalloc(sizeof(*newopt), GFP_ATOMIC); + newopt = kmemdup(opt, sizeof(*newopt), GFP_ATOMIC); if (newopt == NULL) { kfree(val); goto out_clean; } /* insert the option */ - memcpy(newopt, opt, sizeof(*newopt)); newopt->dccpop_val = val; list_add_tail(&newopt->dccpop_node, &newdmsk->dccpms_pending); @@ -545,10 +563,9 @@ static int __dccp_feat_init(struct dccp_minisock *dmsk, u8 type, u8 feat, u8 *val, u8 len) { int rc = -ENOMEM; - u8 *copy = kmalloc(len, GFP_KERNEL); + u8 *copy = kmemdup(val, len, GFP_KERNEL); if (copy != NULL) { - memcpy(copy, val, len); rc = dccp_feat_change(dmsk, type, feat, copy, len, GFP_KERNEL); if (rc) kfree(copy); @@ -583,3 +600,45 @@ out: } EXPORT_SYMBOL_GPL(dccp_feat_init); + +#ifdef CONFIG_IP_DCCP_DEBUG +const char *dccp_feat_typename(const u8 type) +{ + switch(type) { + case DCCPO_CHANGE_L: return("ChangeL"); + case DCCPO_CONFIRM_L: return("ConfirmL"); + case DCCPO_CHANGE_R: return("ChangeR"); + case DCCPO_CONFIRM_R: return("ConfirmR"); + /* the following case must not appear in feature negotation */ + default: dccp_pr_debug("unknown type %d [BUG!]\n", type); + } + return NULL; +} + +EXPORT_SYMBOL_GPL(dccp_feat_typename); + +const char *dccp_feat_name(const u8 feat) +{ + static const char *feature_names[] = { + [DCCPF_RESERVED] = "Reserved", + [DCCPF_CCID] = "CCID", + [DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos", + [DCCPF_SEQUENCE_WINDOW] = "Sequence Window", + [DCCPF_ECN_INCAPABLE] = "ECN Incapable", + [DCCPF_ACK_RATIO] = "Ack Ratio", + [DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector", + [DCCPF_SEND_NDP_COUNT] = "Send NDP Count", + [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage", + [DCCPF_DATA_CHECKSUM] = "Send Data Checksum", + }; + if (feat >= DCCPF_MIN_CCID_SPECIFIC) + return "CCID-specific"; + + if (dccp_feat_is_reserved(feat)) + return feature_names[DCCPF_RESERVED]; + + return feature_names[feat]; +} + +EXPORT_SYMBOL_GPL(dccp_feat_name); +#endif /* CONFIG_IP_DCCP_DEBUG */ diff --git a/net/dccp/feat.h b/net/dccp/feat.h index cee553d416c..2c373ad7edc 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -12,9 +12,46 @@ */ #include <linux/types.h> +#include "dccp.h" -struct sock; -struct dccp_minisock; +static inline int dccp_feat_is_valid_length(u8 type, u8 feature, u8 len) +{ + /* sec. 6.1: Confirm has at least length 3, + * sec. 6.2: Change has at least length 4 */ + if (len < 3) + return 1; + if (len < 4 && (type == DCCPO_CHANGE_L || type == DCCPO_CHANGE_R)) + return 1; + /* XXX: add per-feature length validation (sec. 6.6.8) */ + return 0; +} + +static inline int dccp_feat_is_reserved(const u8 feat) +{ + return (feat > DCCPF_DATA_CHECKSUM && + feat < DCCPF_MIN_CCID_SPECIFIC) || + feat == DCCPF_RESERVED; +} + +/* feature negotiation knows only these four option types (RFC 4340, sec. 6) */ +static inline int dccp_feat_is_valid_type(const u8 optnum) +{ + return optnum >= DCCPO_CHANGE_L && optnum <= DCCPO_CONFIRM_R; + +} + +#ifdef CONFIG_IP_DCCP_DEBUG +extern const char *dccp_feat_typename(const u8 type); +extern const char *dccp_feat_name(const u8 feat); + +static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val) +{ + dccp_pr_debug("%s(%s (%d), %d)\n", dccp_feat_typename(type), + dccp_feat_name(feat), feat, val); +} +#else +#define dccp_feat_debug(type, feat, val) +#endif /* CONFIG_IP_DCCP_DEBUG */ extern int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, u8 *val, u8 len, gfp_t gfp); @@ -26,11 +63,4 @@ extern void dccp_feat_clean(struct dccp_minisock *dmsk); extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk); extern int dccp_feat_init(struct dccp_minisock *dmsk); -extern int dccp_feat_default_sequence_window; -extern int dccp_feat_default_rx_ccid; -extern int dccp_feat_default_tx_ccid; -extern int dccp_feat_default_ack_ratio; -extern int dccp_feat_default_send_ack_vector; -extern int dccp_feat_default_send_ndp_count; - #endif /* _DCCP_FEAT_H */ diff --git a/net/dccp/input.c b/net/dccp/input.c index 1d24881ac0a..565bc80557c 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -1,6 +1,6 @@ /* * net/dccp/input.c - * + * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * @@ -82,7 +82,7 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) * Otherwise, * Drop packet and return */ - if (dh->dccph_type == DCCP_PKT_SYNC || + if (dh->dccph_type == DCCP_PKT_SYNC || dh->dccph_type == DCCP_PKT_SYNCACK) { if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh) && @@ -128,21 +128,18 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) DCCP_PKT_WITHOUT_ACK_SEQ)) dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; } else { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: Step 6 failed for %s packet, " - "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " - "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " - "sending SYNC...\n", - dccp_packet_name(dh->dccph_type), - (unsigned long long) lswl, - (unsigned long long) - DCCP_SKB_CB(skb)->dccpd_seq, - (unsigned long long) dp->dccps_swh, - (DCCP_SKB_CB(skb)->dccpd_ack_seq == + DCCP_WARN("DCCP: Step 6 failed for %s packet, " + "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " + "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " + "sending SYNC...\n", dccp_packet_name(dh->dccph_type), + (unsigned long long) lswl, + (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq, + (unsigned long long) dp->dccps_swh, + (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists", - (unsigned long long) lawl, - (unsigned long long) - DCCP_SKB_CB(skb)->dccpd_ack_seq, - (unsigned long long) dp->dccps_awh); + (unsigned long long) lawl, + (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq, + (unsigned long long) dp->dccps_awh); dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); return -1; } @@ -188,8 +185,8 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb, dccp_rcv_close(sk, skb); return 0; case DCCP_PKT_REQUEST: - /* Step 7 - * or (S.is_server and P.type == Response) + /* Step 7 + * or (S.is_server and P.type == Response) * or (S.is_client and P.type == Request) * or (S.state >= OPEN and P.type == Request * and P.seqno >= S.OSR) @@ -251,8 +248,18 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, DCCP_ACKVEC_STATE_RECEIVED)) goto discard; - ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); - ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); + /* + * Deliver to the CCID module in charge. + * FIXME: Currently DCCP operates one-directional only, i.e. a listening + * server is not at the same time a connecting client. There is + * not much sense in delivering to both rx/tx sides at the moment + * (only one is active at a time); when moving to bidirectional + * service, this needs to be revised. + */ + if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER) + ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); + else + ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); return __dccp_rcv_established(sk, skb, dh, len); discard: @@ -267,7 +274,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, const struct dccp_hdr *dh, const unsigned len) { - /* + /* * Step 4: Prepare sequence numbers in REQUEST * If S.state == REQUEST, * If (P.type == Response or P.type == Reset) @@ -335,7 +342,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, * from the Response * / * S.state := PARTOPEN * Set PARTOPEN timer - * Continue with S.state == PARTOPEN + * Continue with S.state == PARTOPEN * / * Step 12 will send the Ack completing the * three-way handshake * / */ @@ -366,7 +373,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, */ __kfree_skb(skb); return 0; - } + } dccp_send_ack(sk); return -1; } @@ -374,7 +381,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, out_invalid_packet: /* dccp_v4_do_rcv will send a reset */ DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; - return 1; + return 1; } static int dccp_rcv_respond_partopen_state_process(struct sock *sk, @@ -431,29 +438,25 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* * Step 3: Process LISTEN state - * (Continuing from dccp_v4_do_rcv and dccp_v6_do_rcv) * * If S.state == LISTEN, - * If P.type == Request or P contains a valid Init Cookie - * option, - * * Must scan the packet's options to check for an Init - * Cookie. Only the Init Cookie is processed here, - * however; other options are processed in Step 8. This - * scan need only be performed if the endpoint uses Init - * Cookies * - * * Generate a new socket and switch to that socket * - * Set S := new socket for this port pair - * S.state = RESPOND - * Choose S.ISS (initial seqno) or set from Init Cookie - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie - * Continue with S.state == RESPOND - * * A Response packet will be generated in Step 11 * - * Otherwise, - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return - * - * NOTE: the check for the packet types is done in - * dccp_rcv_state_process + * If P.type == Request or P contains a valid Init Cookie option, + * (* Must scan the packet's options to check for Init + * Cookies. Only Init Cookies are processed here, + * however; other options are processed in Step 8. This + * scan need only be performed if the endpoint uses Init + * Cookies *) + * (* Generate a new socket and switch to that socket *) + * Set S := new socket for this port pair + * S.state = RESPOND + * Choose S.ISS (initial seqno) or set from Init Cookies + * Initialize S.GAR := S.ISS + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init + * Cookies Continue with S.state == RESPOND + * (* A Response packet will be generated in Step 11 *) + * Otherwise, + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return */ if (sk->sk_state == DCCP_LISTEN) { if (dh->dccph_type == DCCP_PKT_REQUEST) { @@ -485,14 +488,17 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); - if (dccp_msk(sk)->dccpms_send_ack_vector && + if (dccp_msk(sk)->dccpms_send_ack_vector && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, - DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_ACKVEC_STATE_RECEIVED)) - goto discard; + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKVEC_STATE_RECEIVED)) + goto discard; - ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); - ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); + /* XXX see the comments in dccp_rcv_established about this */ + if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER) + ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); + else + ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); } /* @@ -574,7 +580,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } } - if (!queued) { + if (!queued) { discard: __kfree_skb(skb); } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index e08e7688a26..90c74b4adb7 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -113,13 +113,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* OK, now commit destination to socket. */ sk_setup_caps(sk, &rt->u.dst); - dp->dccps_gar = - dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, - inet->daddr, - inet->sport, - usin->sin_port); - dccp_update_gss(sk, dp->dccps_iss); - + dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, inet->daddr, + inet->sport, inet->dport); inet->id = dp->dccps_iss ^ jiffies; err = dccp_connect(sk); @@ -162,7 +157,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, /* We don't check in the destentry if pmtu discovery is forbidden * on this route. We just assume that no packet_to_big packets * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the + * There is a small race when the user changes this flag in the * route, but I think that's acceptable. */ if ((dst = __sk_dst_check(sk, 0)) == NULL) @@ -193,86 +188,6 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, } /* else let the usual retransmit timer handle it */ } -static void dccp_v4_reqsk_send_ack(struct sk_buff *rxskb, - struct request_sock *req) -{ - int err; - struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; - const u32 dccp_hdr_ack_len = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_ack_bits); - struct sk_buff *skb; - - if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) - return; - - skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header, GFP_ATOMIC); - if (skb == NULL) - return; - - /* Reserve space for headers. */ - skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header); - - skb->dst = dst_clone(rxskb->dst); - - skb->h.raw = skb_push(skb, dccp_hdr_ack_len); - dh = dccp_hdr(skb); - memset(dh, 0, dccp_hdr_ack_len); - - /* Build DCCP header and checksum it. */ - dh->dccph_type = DCCP_PKT_ACK; - dh->dccph_sport = rxdh->dccph_dport; - dh->dccph_dport = rxdh->dccph_sport; - dh->dccph_doff = dccp_hdr_ack_len / 4; - dh->dccph_x = 1; - - dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), - DCCP_SKB_CB(rxskb)->dccpd_seq); - - bh_lock_sock(dccp_v4_ctl_socket->sk); - err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk, - rxskb->nh.iph->daddr, - rxskb->nh.iph->saddr, NULL); - bh_unlock_sock(dccp_v4_ctl_socket->sk); - - if (err == NET_XMIT_CN || err == 0) { - DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); - DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); - } -} - -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, - struct dst_entry *dst) -{ - int err = -1; - struct sk_buff *skb; - - /* First, grab a route. */ - - if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) - goto out; - - skb = dccp_make_response(sk, dst, req); - if (skb != NULL) { - const struct inet_request_sock *ireq = inet_rsk(req); - struct dccp_hdr *dh = dccp_hdr(skb); - - dh->dccph_checksum = dccp_v4_checksum(skb, ireq->loc_addr, - ireq->rmt_addr); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, - ireq->rmt_addr, - ireq->opt); - if (err == NET_XMIT_CN) - err = 0; - } - -out: - dst_release(dst); - return err; -} - /* * This routine is called by the ICMP module when it gets some sort of error * condition. If err < 0 then the socket should be closed and the error @@ -329,7 +244,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) seq = dccp_hdr_seq(skb); if (sk->sk_state != DCCP_LISTEN && !between48(seq, dp->dccps_swl, dp->dccps_swh)) { - NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS); + NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -429,19 +344,24 @@ out: sock_put(sk); } -/* This routine computes an IPv4 DCCP checksum. */ -void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) +static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb, + __be32 src, __be32 dst) +{ + return csum_tcpudp_magic(src, dst, skb->len, IPPROTO_DCCP, skb->csum); +} + +void dccp_v4_send_check(struct sock *sk, int unused, struct sk_buff *skb) { const struct inet_sock *inet = inet_sk(sk); struct dccp_hdr *dh = dccp_hdr(skb); - dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr); + dccp_csum_outgoing(skb); + dh->dccph_checksum = dccp_v4_csum_finish(skb, inet->saddr, inet->daddr); } EXPORT_SYMBOL_GPL(dccp_v4_send_check); -static inline u64 dccp_v4_init_sequence(const struct sock *sk, - const struct sk_buff *skb) +static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb) { return secure_dccp_sequence_number(skb->nh.iph->daddr, skb->nh.iph->saddr, @@ -449,95 +369,6 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk, dccp_hdr(skb)->dccph_sport); } -static struct request_sock_ops dccp_request_sock_ops; - -int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) -{ - struct inet_request_sock *ireq; - struct dccp_sock dp; - struct request_sock *req; - struct dccp_request_sock *dreq; - const __be32 saddr = skb->nh.iph->saddr; - const __be32 daddr = skb->nh.iph->daddr; - const __be32 service = dccp_hdr_request(skb)->dccph_req_service; - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; - - /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ - if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST | RTCF_MULTICAST)) { - reset_code = DCCP_RESET_CODE_NO_CONNECTION; - goto drop; - } - - if (dccp_bad_service_code(sk, service)) { - reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; - goto drop; - } - /* - * TW buckets are converted to open requests without - * limitations, they conserve resources and peer is - * evidently real one. - */ - if (inet_csk_reqsk_queue_is_full(sk)) - goto drop; - - /* - * Accept backlog is full. If we have already queued enough - * of warm entries in syn queue, drop request. It is better than - * clogging syn queue with openreqs with exponentially increasing - * timeout. - */ - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) - goto drop; - - req = reqsk_alloc(&dccp_request_sock_ops); - if (req == NULL) - goto drop; - - if (dccp_parse_options(sk, skb)) - goto drop_and_free; - - dccp_openreq_init(req, &dp, skb); - - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - - ireq = inet_rsk(req); - ireq->loc_addr = daddr; - ireq->rmt_addr = saddr; - req->rcv_wnd = dccp_feat_default_sequence_window; - ireq->opt = NULL; - - /* - * Step 3: Process LISTEN state - * - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie - * - * In fact we defer setting S.GSR, S.SWL, S.SWH to - * dccp_create_openreq_child. - */ - dreq = dccp_rsk(req); - dreq->dreq_isr = dcb->dccpd_seq; - dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); - dreq->dreq_service = service; - - if (dccp_v4_send_response(sk, req, NULL)) - goto drop_and_free; - - inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); - return 0; - -drop_and_free: - reqsk_free(req); -drop: - DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); - dcb->dccpd_reset_code = reset_code; - return -1; -} - -EXPORT_SYMBOL_GPL(dccp_v4_conn_request); - /* * The three way handshake has completed - we got a valid ACK or DATAACK - * now create the new socket. @@ -623,47 +454,6 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) return sk; } -int dccp_v4_checksum(const struct sk_buff *skb, const __be32 saddr, - const __be32 daddr) -{ - const struct dccp_hdr* dh = dccp_hdr(skb); - int checksum_len; - u32 tmp; - - if (dh->dccph_cscov == 0) - checksum_len = skb->len; - else { - checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); - checksum_len = checksum_len < skb->len ? checksum_len : - skb->len; - } - - tmp = csum_partial((unsigned char *)dh, checksum_len, 0); - return csum_tcpudp_magic(saddr, daddr, checksum_len, - IPPROTO_DCCP, tmp); -} - -EXPORT_SYMBOL_GPL(dccp_v4_checksum); - -static int dccp_v4_verify_checksum(struct sk_buff *skb, - const __be32 saddr, const __be32 daddr) -{ - struct dccp_hdr *dh = dccp_hdr(skb); - int checksum_len; - u32 tmp; - - if (dh->dccph_cscov == 0) - checksum_len = skb->len; - else { - checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); - checksum_len = checksum_len < skb->len ? checksum_len : - skb->len; - } - tmp = csum_partial((unsigned char *)dh, checksum_len, 0); - return csum_tcpudp_magic(saddr, daddr, checksum_len, - IPPROTO_DCCP, tmp) == 0 ? 0 : -1; -} - static struct dst_entry* dccp_v4_route_skb(struct sock *sk, struct sk_buff *skb) { @@ -677,7 +467,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, .uli_u = { .ports = { .sport = dccp_hdr(skb)->dccph_dport, .dport = dccp_hdr(skb)->dccph_sport } - } + } }; security_skb_classify_flow(skb, &fl); @@ -689,7 +479,37 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, return &rt->u.dst; } -static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, + struct dst_entry *dst) +{ + int err = -1; + struct sk_buff *skb; + + /* First, grab a route. */ + + if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) + goto out; + + skb = dccp_make_response(sk, dst, req); + if (skb != NULL) { + const struct inet_request_sock *ireq = inet_rsk(req); + struct dccp_hdr *dh = dccp_hdr(skb); + + dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr, + ireq->rmt_addr); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, + ireq->rmt_addr, + ireq->opt); + err = net_xmit_eval(err); + } + +out: + dst_release(dst); + return err; +} + +static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { int err; struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; @@ -698,7 +518,7 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) sizeof(struct dccp_hdr_reset); struct sk_buff *skb; struct dst_entry *dst; - u64 seqno; + u64 seqno = 0; /* Never send a reset in response to a reset. */ if (rxdh->dccph_type == DCCP_PKT_RESET) @@ -720,9 +540,7 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header); skb->dst = dst_clone(dst); - skb->h.raw = skb_push(skb, dccp_hdr_reset_len); - dh = dccp_hdr(skb); - memset(dh, 0, dccp_hdr_reset_len); + dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len); /* Build DCCP header and checksum it. */ dh->dccph_type = DCCP_PKT_RESET; @@ -734,16 +552,15 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) DCCP_SKB_CB(rxskb)->dccpd_reset_code; /* See "8.3.1. Abnormal Termination" in RFC 4340 */ - seqno = 0; if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); dccp_hdr_set_seq(dh, seqno); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), - DCCP_SKB_CB(rxskb)->dccpd_seq); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); - dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr, - rxskb->nh.iph->daddr); + dccp_csum_outgoing(skb); + dh->dccph_checksum = dccp_v4_csum_finish(skb, rxskb->nh.iph->saddr, + rxskb->nh.iph->daddr); bh_lock_sock(dccp_v4_ctl_socket->sk); err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk, @@ -751,7 +568,7 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) rxskb->nh.iph->saddr, NULL); bh_unlock_sock(dccp_v4_ctl_socket->sk); - if (err == NET_XMIT_CN || err == 0) { + if (net_xmit_eval(err) == 0) { DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); } @@ -759,6 +576,103 @@ out: dst_release(dst); } +static void dccp_v4_reqsk_destructor(struct request_sock *req) +{ + kfree(inet_rsk(req)->opt); +} + +static struct request_sock_ops dccp_request_sock_ops __read_mostly = { + .family = PF_INET, + .obj_size = sizeof(struct dccp_request_sock), + .rtx_syn_ack = dccp_v4_send_response, + .send_ack = dccp_reqsk_send_ack, + .destructor = dccp_v4_reqsk_destructor, + .send_reset = dccp_v4_ctl_send_reset, +}; + +int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) +{ + struct inet_request_sock *ireq; + struct request_sock *req; + struct dccp_request_sock *dreq; + const __be32 service = dccp_hdr_request(skb)->dccph_req_service; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; + + /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ + if (((struct rtable *)skb->dst)->rt_flags & + (RTCF_BROADCAST | RTCF_MULTICAST)) { + reset_code = DCCP_RESET_CODE_NO_CONNECTION; + goto drop; + } + + if (dccp_bad_service_code(sk, service)) { + reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; + goto drop; + } + /* + * TW buckets are converted to open requests without + * limitations, they conserve resources and peer is + * evidently real one. + */ + if (inet_csk_reqsk_queue_is_full(sk)) + goto drop; + + /* + * Accept backlog is full. If we have already queued enough + * of warm entries in syn queue, drop request. It is better than + * clogging syn queue with openreqs with exponentially increasing + * timeout. + */ + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + goto drop; + + req = reqsk_alloc(&dccp_request_sock_ops); + if (req == NULL) + goto drop; + + if (dccp_parse_options(sk, skb)) + goto drop_and_free; + + dccp_reqsk_init(req, skb); + + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + + ireq = inet_rsk(req); + ireq->loc_addr = skb->nh.iph->daddr; + ireq->rmt_addr = skb->nh.iph->saddr; + ireq->opt = NULL; + + /* + * Step 3: Process LISTEN state + * + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + * + * In fact we defer setting S.GSR, S.SWL, S.SWH to + * dccp_create_openreq_child. + */ + dreq = dccp_rsk(req); + dreq->dreq_isr = dcb->dccpd_seq; + dreq->dreq_iss = dccp_v4_init_sequence(skb); + dreq->dreq_service = service; + + if (dccp_v4_send_response(sk, req, NULL)) + goto drop_and_free; + + inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); + return 0; + +drop_and_free: + reqsk_free(req); +drop: + DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + dcb->dccpd_reset_code = reset_code; + return -1; +} + +EXPORT_SYMBOL_GPL(dccp_v4_conn_request); + int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) { struct dccp_hdr *dh = dccp_hdr(skb); @@ -771,24 +685,23 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) /* * Step 3: Process LISTEN state - * If S.state == LISTEN, - * If P.type == Request or P contains a valid Init Cookie - * option, - * * Must scan the packet's options to check for an Init - * Cookie. Only the Init Cookie is processed here, - * however; other options are processed in Step 8. This - * scan need only be performed if the endpoint uses Init - * Cookies * - * * Generate a new socket and switch to that socket * - * Set S := new socket for this port pair - * S.state = RESPOND - * Choose S.ISS (initial seqno) or set from Init Cookie - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie - * Continue with S.state == RESPOND - * * A Response packet will be generated in Step 11 * - * Otherwise, - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return + * If P.type == Request or P contains a valid Init Cookie option, + * (* Must scan the packet's options to check for Init + * Cookies. Only Init Cookies are processed here, + * however; other options are processed in Step 8. This + * scan need only be performed if the endpoint uses Init + * Cookies *) + * (* Generate a new socket and switch to that socket *) + * Set S := new socket for this port pair + * S.state = RESPOND + * Choose S.ISS (initial seqno) or set from Init Cookies + * Initialize S.GAR := S.ISS + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies + * Continue with S.state == RESPOND + * (* A Response packet will be generated in Step 11 *) + * Otherwise, + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return * * NOTE: the check for the packet types is done in * dccp_rcv_state_process @@ -811,7 +724,7 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - dccp_v4_ctl_send_reset(skb); + dccp_v4_ctl_send_reset(sk, skb); discard: kfree_skb(skb); return 0; @@ -819,60 +732,74 @@ discard: EXPORT_SYMBOL_GPL(dccp_v4_do_rcv); +/** + * dccp_invalid_packet - check for malformed packets + * Implements RFC 4340, 8.5: Step 1: Check header basics + * Packets that fail these checks are ignored and do not receive Resets. + */ int dccp_invalid_packet(struct sk_buff *skb) { const struct dccp_hdr *dh; + unsigned int cscov; if (skb->pkt_type != PACKET_HOST) return 1; + /* If the packet is shorter than 12 bytes, drop packet and return */ if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n"); + DCCP_WARN("pskb_may_pull failed\n"); return 1; } dh = dccp_hdr(skb); - /* If the packet type is not understood, drop packet and return */ + /* If P.type is not understood, drop packet and return */ if (dh->dccph_type >= DCCP_PKT_INVALID) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n"); + DCCP_WARN("invalid packet type\n"); return 1; } /* - * If P.Data Offset is too small for packet type, or too large for - * packet, drop packet and return + * If P.Data Offset is too small for packet type, drop packet and return */ if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " - "too small 1\n", - dh->dccph_doff); + DCCP_WARN("P.Data Offset(%u) too small\n", dh->dccph_doff); return 1; } - + /* + * If P.Data Offset is too too large for packet, drop packet and return + */ if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " - "too small 2\n", - dh->dccph_doff); + DCCP_WARN("P.Data Offset(%u) too large\n", dh->dccph_doff); return 1; } - dh = dccp_hdr(skb); - /* * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet * has short sequence numbers), drop packet and return */ - if (dh->dccph_x == 0 && - dh->dccph_type != DCCP_PKT_DATA && - dh->dccph_type != DCCP_PKT_ACK && - dh->dccph_type != DCCP_PKT_DATAACK) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack " - "nor DataAck and P.X == 0\n", - dccp_packet_name(dh->dccph_type)); + if (dh->dccph_type >= DCCP_PKT_DATA && + dh->dccph_type <= DCCP_PKT_DATAACK && dh->dccph_x == 0) { + DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n", + dccp_packet_name(dh->dccph_type)); return 1; } + /* + * If P.CsCov is too large for the packet size, drop packet and return. + * This must come _before_ checksumming (not as RFC 4340 suggests). + */ + cscov = dccp_csum_coverage(skb); + if (cscov > skb->len) { + DCCP_WARN("P.CsCov %u exceeds packet length %d\n", + dh->dccph_cscov, skb->len); + return 1; + } + + /* If header checksum is incorrect, drop packet and return. + * (This step is completed in the AF-dependent functions.) */ + skb->csum = skb_checksum(skb, 0, cscov, 0); + return 0; } @@ -883,17 +810,16 @@ static int dccp_v4_rcv(struct sk_buff *skb) { const struct dccp_hdr *dh; struct sock *sk; + int min_cov; - /* Step 1: Check header basics: */ + /* Step 1: Check header basics */ if (dccp_invalid_packet(skb)) goto discard_it; - /* If the header checksum is incorrect, drop packet and return */ - if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr, - skb->nh.iph->daddr) < 0) { - LIMIT_NETDEBUG(KERN_WARNING "%s: incorrect header checksum\n", - __FUNCTION__); + /* Step 1: If header checksum is incorrect, drop packet and return */ + if (dccp_v4_csum_finish(skb, skb->nh.iph->saddr, skb->nh.iph->daddr)) { + DCCP_WARN("dropped packet with invalid checksum\n"); goto discard_it; } @@ -915,23 +841,20 @@ static int dccp_v4_rcv(struct sk_buff *skb) dccp_pr_debug_cat("\n"); } else { DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); - dccp_pr_debug_cat(", ack=%llu\n", - (unsigned long long) + dccp_pr_debug_cat(", ack=%llu\n", (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); } /* Step 2: - * Look up flow ID in table and get corresponding socket */ + * Look up flow ID in table and get corresponding socket */ sk = __inet_lookup(&dccp_hashinfo, skb->nh.iph->saddr, dh->dccph_sport, skb->nh.iph->daddr, dh->dccph_dport, inet_iif(skb)); - /* + /* * Step 2: - * If no socket ... - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return + * If no socket ... */ if (sk == NULL) { dccp_pr_debug("failed to look up flow ID in table and " @@ -939,51 +862,61 @@ static int dccp_v4_rcv(struct sk_buff *skb) goto no_dccp_socket; } - /* + /* * Step 2: - * ... or S.state == TIMEWAIT, + * ... or S.state == TIMEWAIT, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ - if (sk->sk_state == DCCP_TIME_WAIT) { - dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: " - "do_time_wait\n"); - goto do_time_wait; + dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n"); + inet_twsk_put(inet_twsk(sk)); + goto no_dccp_socket; + } + + /* + * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage + * o if MinCsCov = 0, only packets with CsCov = 0 are accepted + * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov + */ + min_cov = dccp_sk(sk)->dccps_pcrlen; + if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) { + dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n", + dh->dccph_cscov, min_cov); + /* FIXME: "Such packets SHOULD be reported using Data Dropped + * options (Section 11.7) with Drop Code 0, Protocol + * Constraints." */ + goto discard_and_relse; } if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; nf_reset(skb); - return sk_receive_skb(sk, skb); + return sk_receive_skb(sk, skb, 1); no_dccp_socket: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; /* * Step 2: + * If no socket ... * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ if (dh->dccph_type != DCCP_PKT_RESET) { DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - dccp_v4_ctl_send_reset(skb); + dccp_v4_ctl_send_reset(sk, skb); } discard_it: - /* Discard frame. */ kfree_skb(skb); return 0; discard_and_relse: sock_put(sk); goto discard_it; - -do_time_wait: - inet_twsk_put(inet_twsk(sk)); - goto no_dccp_socket; } static struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { @@ -1017,20 +950,6 @@ static int dccp_v4_init_sock(struct sock *sk) return err; } -static void dccp_v4_reqsk_destructor(struct request_sock *req) -{ - kfree(inet_rsk(req)->opt); -} - -static struct request_sock_ops dccp_request_sock_ops = { - .family = PF_INET, - .obj_size = sizeof(struct dccp_request_sock), - .rtx_syn_ack = dccp_v4_send_response, - .send_ack = dccp_v4_reqsk_send_ack, - .destructor = dccp_v4_reqsk_destructor, - .send_reset = dccp_v4_ctl_send_reset, -}; - static struct timewait_sock_ops dccp_timewait_sock_ops = { .twsk_obj_size = sizeof(struct inet_timewait_sock), }; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index eb0ff7ab05e..6b91a9dd041 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -36,13 +36,6 @@ /* Socket used for sending RSTs and ACKs */ static struct socket *dccp_v6_ctl_socket; -static void dccp_v6_ctl_send_reset(struct sk_buff *skb); -static void dccp_v6_reqsk_send_ack(struct sk_buff *skb, - struct request_sock *req); -static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb); - -static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); - static struct inet_connection_sock_af_ops dccp_ipv6_mapped; static struct inet_connection_sock_af_ops dccp_ipv6_af_ops; @@ -65,205 +58,37 @@ static void dccp_v6_hash(struct sock *sk) } } -static inline u16 dccp_v6_check(struct dccp_hdr *dh, int len, - struct in6_addr *saddr, - struct in6_addr *daddr, - unsigned long base) +/* add pseudo-header to DCCP checksum stored in skb->csum */ +static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb, + struct in6_addr *saddr, + struct in6_addr *daddr) { - return csum_ipv6_magic(saddr, daddr, len, IPPROTO_DCCP, base); + return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum); } -static __u32 dccp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) +static inline void dccp_v6_send_check(struct sock *sk, int unused_value, + struct sk_buff *skb) { - const struct dccp_hdr *dh = dccp_hdr(skb); - - if (skb->protocol == htons(ETH_P_IPV6)) - return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32, - skb->nh.ipv6h->saddr.s6_addr32, - dh->dccph_dport, - dh->dccph_sport); + struct ipv6_pinfo *np = inet6_sk(sk); + struct dccp_hdr *dh = dccp_hdr(skb); - return secure_dccp_sequence_number(skb->nh.iph->daddr, - skb->nh.iph->saddr, - dh->dccph_dport, - dh->dccph_sport); + dccp_csum_outgoing(skb); + dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr); } -static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, - int addr_len) +static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport ) { - struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr; - struct inet_connection_sock *icsk = inet_csk(sk); - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct dccp_sock *dp = dccp_sk(sk); - struct in6_addr *saddr = NULL, *final_p = NULL, final; - struct flowi fl; - struct dst_entry *dst; - int addr_type; - int err; - - dp->dccps_role = DCCP_ROLE_CLIENT; - - if (addr_len < SIN6_LEN_RFC2133) - return -EINVAL; - - if (usin->sin6_family != AF_INET6) - return -EAFNOSUPPORT; - - memset(&fl, 0, sizeof(fl)); - - if (np->sndflow) { - fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; - IP6_ECN_flow_init(fl.fl6_flowlabel); - if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) { - struct ip6_flowlabel *flowlabel; - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); - if (flowlabel == NULL) - return -EINVAL; - ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); - fl6_sock_release(flowlabel); - } - } - /* - * connect() to INADDR_ANY means loopback (BSD'ism). - */ - if (ipv6_addr_any(&usin->sin6_addr)) - usin->sin6_addr.s6_addr[15] = 1; - - addr_type = ipv6_addr_type(&usin->sin6_addr); - - if (addr_type & IPV6_ADDR_MULTICAST) - return -ENETUNREACH; - - if (addr_type & IPV6_ADDR_LINKLOCAL) { - if (addr_len >= sizeof(struct sockaddr_in6) && - usin->sin6_scope_id) { - /* If interface is set while binding, indices - * must coincide. - */ - if (sk->sk_bound_dev_if && - sk->sk_bound_dev_if != usin->sin6_scope_id) - return -EINVAL; - - sk->sk_bound_dev_if = usin->sin6_scope_id; - } - - /* Connect to link-local address requires an interface */ - if (!sk->sk_bound_dev_if) - return -EINVAL; - } - - ipv6_addr_copy(&np->daddr, &usin->sin6_addr); - np->flow_label = fl.fl6_flowlabel; - - /* - * DCCP over IPv4 - */ - if (addr_type == IPV6_ADDR_MAPPED) { - u32 exthdrlen = icsk->icsk_ext_hdr_len; - struct sockaddr_in sin; - - SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); - - if (__ipv6_only_sock(sk)) - return -ENETUNREACH; - - sin.sin_family = AF_INET; - sin.sin_port = usin->sin6_port; - sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; - - icsk->icsk_af_ops = &dccp_ipv6_mapped; - sk->sk_backlog_rcv = dccp_v4_do_rcv; - - err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); - if (err) { - icsk->icsk_ext_hdr_len = exthdrlen; - icsk->icsk_af_ops = &dccp_ipv6_af_ops; - sk->sk_backlog_rcv = dccp_v6_do_rcv; - goto failure; - } else { - ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF), - inet->saddr); - ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF), - inet->rcv_saddr); - } - - return err; - } - - if (!ipv6_addr_any(&np->rcv_saddr)) - saddr = &np->rcv_saddr; - - fl.proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); - fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = usin->sin6_port; - fl.fl_ip_sport = inet->sport; - security_sk_classify_flow(sk, &fl); - - if (np->opt != NULL && np->opt->srcrt != NULL) { - const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; - - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } - - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) - goto failure; - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - err = xfrm_lookup(&dst, &fl, sk, 0); - if (err < 0) - goto failure; - - if (saddr == NULL) { - saddr = &fl.fl6_src; - ipv6_addr_copy(&np->rcv_saddr, saddr); - } - - /* set the source address */ - ipv6_addr_copy(&np->saddr, saddr); - inet->rcv_saddr = LOOPBACK4_IPV6; - - __ip6_dst_store(sk, dst, NULL, NULL); - - icsk->icsk_ext_hdr_len = 0; - if (np->opt != NULL) - icsk->icsk_ext_hdr_len = (np->opt->opt_flen + - np->opt->opt_nflen); - - inet->dport = usin->sin6_port; - - dccp_set_state(sk, DCCP_REQUESTING); - err = inet6_hash_connect(&dccp_death_row, sk); - if (err) - goto late_failure; - /* FIXME */ -#if 0 - dp->dccps_gar = secure_dccp_v6_sequence_number(np->saddr.s6_addr32, - np->daddr.s6_addr32, - inet->sport, - inet->dport); -#endif - err = dccp_connect(sk); - if (err) - goto late_failure; + return secure_tcpv6_sequence_number(saddr, daddr, sport, dport); +} - return 0; +static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb) +{ + return secure_dccpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32, + skb->nh.ipv6h->saddr.s6_addr32, + dccp_hdr(skb)->dccph_dport, + dccp_hdr(skb)->dccph_sport ); -late_failure: - dccp_set_state(sk, DCCP_CLOSED); - __sk_dst_reset(sk); -failure: - inet->dport = 0; - sk->sk_route_caps = 0; - return err; } static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, @@ -277,7 +102,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, __u64 seq; sk = inet6_lookup(&dccp_hashinfo, &hdr->daddr, dh->dccph_dport, - &hdr->saddr, dh->dccph_sport, skb->dev->ifindex); + &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); if (sk == NULL) { ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); @@ -464,16 +289,12 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, if (skb != NULL) { struct dccp_hdr *dh = dccp_hdr(skb); - dh->dccph_checksum = dccp_v6_check(dh, skb->len, - &ireq6->loc_addr, - &ireq6->rmt_addr, - csum_partial((char *)dh, - skb->len, - skb->csum)); + dh->dccph_checksum = dccp_v6_csum_finish(skb, + &ireq6->loc_addr, + &ireq6->rmt_addr); ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); err = ip6_xmit(sk, skb, &fl, opt, 0); - if (err == NET_XMIT_CN) - err = 0; + err = net_xmit_eval(err); } done: @@ -489,32 +310,7 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req) kfree_skb(inet6_rsk(req)->pktopts); } -static struct request_sock_ops dccp6_request_sock_ops = { - .family = AF_INET6, - .obj_size = sizeof(struct dccp6_request_sock), - .rtx_syn_ack = dccp_v6_send_response, - .send_ack = dccp_v6_reqsk_send_ack, - .destructor = dccp_v6_reqsk_destructor, - .send_reset = dccp_v6_ctl_send_reset, -}; - -static struct timewait_sock_ops dccp6_timewait_sock_ops = { - .twsk_obj_size = sizeof(struct dccp6_timewait_sock), -}; - -static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct dccp_hdr *dh = dccp_hdr(skb); - - dh->dccph_checksum = csum_ipv6_magic(&np->saddr, &np->daddr, - len, IPPROTO_DCCP, - csum_partial((char *)dh, - dh->dccph_doff << 2, - skb->csum)); -} - -static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) +static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) + @@ -522,7 +318,7 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) sizeof(struct dccp_hdr_reset); struct sk_buff *skb; struct flowi fl; - u64 seqno; + u64 seqno = 0; if (rxdh->dccph_type == DCCP_PKT_RESET) return; @@ -533,13 +329,11 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) skb = alloc_skb(dccp_v6_ctl_socket->sk->sk_prot->max_header, GFP_ATOMIC); if (skb == NULL) - return; + return; skb_reserve(skb, dccp_v6_ctl_socket->sk->sk_prot->max_header); - skb->h.raw = skb_push(skb, dccp_hdr_reset_len); - dh = dccp_hdr(skb); - memset(dh, 0, dccp_hdr_reset_len); + dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len); /* Swap the send and the receive. */ dh->dccph_type = DCCP_PKT_RESET; @@ -551,20 +345,20 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) DCCP_SKB_CB(rxskb)->dccpd_reset_code; /* See "8.3.1. Abnormal Termination" in RFC 4340 */ - seqno = 0; if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); dccp_hdr_set_seq(dh, seqno); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), - DCCP_SKB_CB(rxskb)->dccpd_seq); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); + + dccp_csum_outgoing(skb); + dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxskb->nh.ipv6h->saddr, + &rxskb->nh.ipv6h->daddr); memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr); ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr); - dh->dccph_checksum = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst, - sizeof(*dh), IPPROTO_DCCP, - skb->csum); + fl.proto = IPPROTO_DCCP; fl.oif = inet6_iif(rxskb); fl.fl_ip_dport = dh->dccph_dport; @@ -584,60 +378,14 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) kfree_skb(skb); } -static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb, - struct request_sock *req) -{ - struct flowi fl; - struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; - const u32 dccp_hdr_ack_len = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_ack_bits); - struct sk_buff *skb; - - skb = alloc_skb(dccp_v6_ctl_socket->sk->sk_prot->max_header, - GFP_ATOMIC); - if (skb == NULL) - return; - - skb_reserve(skb, dccp_v6_ctl_socket->sk->sk_prot->max_header); - - skb->h.raw = skb_push(skb, dccp_hdr_ack_len); - dh = dccp_hdr(skb); - memset(dh, 0, dccp_hdr_ack_len); - - /* Build DCCP header and checksum it. */ - dh->dccph_type = DCCP_PKT_ACK; - dh->dccph_sport = rxdh->dccph_dport; - dh->dccph_dport = rxdh->dccph_sport; - dh->dccph_doff = dccp_hdr_ack_len / 4; - dh->dccph_x = 1; - - dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), - DCCP_SKB_CB(rxskb)->dccpd_seq); - - memset(&fl, 0, sizeof(fl)); - ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr); - ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr); - - /* FIXME: calculate checksum, IPv4 also should... */ - - fl.proto = IPPROTO_DCCP; - fl.oif = inet6_iif(rxskb); - fl.fl_ip_dport = dh->dccph_dport; - fl.fl_ip_sport = dh->dccph_sport; - security_req_classify_flow(req, &fl); - - if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { - if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { - ip6_xmit(dccp_v6_ctl_socket->sk, skb, &fl, NULL, 0); - DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); - return; - } - } - - kfree_skb(skb); -} +static struct request_sock_ops dccp6_request_sock_ops = { + .family = AF_INET6, + .obj_size = sizeof(struct dccp6_request_sock), + .rtx_syn_ack = dccp_v6_send_response, + .send_ack = dccp_reqsk_send_ack, + .destructor = dccp_v6_reqsk_destructor, + .send_reset = dccp_v6_ctl_send_reset, +}; static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) { @@ -672,12 +420,11 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { - struct dccp_sock dp; struct request_sock *req; struct dccp_request_sock *dreq; struct inet6_request_sock *ireq6; struct ipv6_pinfo *np = inet6_sk(sk); - const __be32 service = dccp_hdr_request(skb)->dccph_req_service; + const __be32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; @@ -690,7 +437,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_bad_service_code(sk, service)) { reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; goto drop; - } + } /* * There are no SYN attacks on IPv6, yet... */ @@ -704,9 +451,10 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (req == NULL) goto drop; - /* FIXME: process options */ + if (dccp_parse_options(sk, skb)) + goto drop_and_free; - dccp_openreq_init(req, &dp, skb); + dccp_reqsk_init(req, skb); if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; @@ -714,7 +462,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) ireq6 = inet6_rsk(req); ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr); ipv6_addr_copy(&ireq6->loc_addr, &skb->nh.ipv6h->daddr); - req->rcv_wnd = dccp_feat_default_sequence_window; ireq6->pktopts = NULL; if (ipv6_opt_accepted(sk, skb) || @@ -733,14 +480,14 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) /* * Step 3: Process LISTEN state * - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie * - * In fact we defer setting S.GSR, S.SWL, S.SWH to - * dccp_create_openreq_child. + * In fact we defer setting S.GSR, S.SWL, S.SWH to + * dccp_create_openreq_child. */ dreq = dccp_rsk(req); dreq->dreq_isr = dcb->dccpd_seq; - dreq->dreq_iss = dccp_v6_init_sequence(sk, skb); + dreq->dreq_iss = dccp_v6_init_sequence(skb); dreq->dreq_service = service; if (dccp_v6_send_response(sk, req, NULL)) @@ -990,18 +737,46 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) + /* + * FIXME: Add handling of IPV6_PKTOPTIONS skb. See the comments below + * (wrt ipv6_pktopions) and net/ipv6/tcp_ipv6.c for an example. + */ opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == DCCP_OPEN) { /* Fast path */ if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len)) goto reset; if (opt_skb) { - /* This is where we would goto ipv6_pktoptions. */ + /* XXX This is where we would goto ipv6_pktoptions. */ __kfree_skb(opt_skb); } return 0; } + /* + * Step 3: Process LISTEN state + * If S.state == LISTEN, + * If P.type == Request or P contains a valid Init Cookie option, + * (* Must scan the packet's options to check for Init + * Cookies. Only Init Cookies are processed here, + * however; other options are processed in Step 8. This + * scan need only be performed if the endpoint uses Init + * Cookies *) + * (* Generate a new socket and switch to that socket *) + * Set S := new socket for this port pair + * S.state = RESPOND + * Choose S.ISS (initial seqno) or set from Init Cookies + * Initialize S.GAR := S.ISS + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies + * Continue with S.state == RESPOND + * (* A Response packet will be generated in Step 11 *) + * Otherwise, + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return + * + * NOTE: the check for the packet types is done in + * dccp_rcv_state_process + */ if (sk->sk_state == DCCP_LISTEN) { struct sock *nsk = dccp_v6_hnd_req(sk, skb); @@ -1012,7 +787,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * otherwise we just shortcircuit this and continue with * the new socket.. */ - if (nsk != sk) { + if (nsk != sk) { if (dccp_child_process(sk, nsk, skb)) goto reset; if (opt_skb != NULL) @@ -1024,13 +799,13 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len)) goto reset; if (opt_skb) { - /* This is where we would goto ipv6_pktoptions. */ + /* XXX This is where we would goto ipv6_pktoptions. */ __kfree_skb(opt_skb); } return 0; reset: - dccp_v6_ctl_send_reset(skb); + dccp_v6_ctl_send_reset(sk, skb); discard: if (opt_skb != NULL) __kfree_skb(opt_skb); @@ -1043,12 +818,20 @@ static int dccp_v6_rcv(struct sk_buff **pskb) const struct dccp_hdr *dh; struct sk_buff *skb = *pskb; struct sock *sk; + int min_cov; - /* Step 1: Check header basics: */ + /* Step 1: Check header basics */ if (dccp_invalid_packet(skb)) goto discard_it; + /* Step 1: If header checksum is incorrect, drop packet and return. */ + if (dccp_v6_csum_finish(skb, &skb->nh.ipv6h->saddr, + &skb->nh.ipv6h->daddr)) { + DCCP_WARN("dropped packet with invalid checksum\n"); + goto discard_it; + } + dh = dccp_hdr(skb); DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); @@ -1060,63 +843,247 @@ static int dccp_v6_rcv(struct sk_buff **pskb) DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); /* Step 2: - * Look up flow ID in table and get corresponding socket */ + * Look up flow ID in table and get corresponding socket */ sk = __inet6_lookup(&dccp_hashinfo, &skb->nh.ipv6h->saddr, dh->dccph_sport, &skb->nh.ipv6h->daddr, ntohs(dh->dccph_dport), inet6_iif(skb)); /* * Step 2: - * If no socket ... - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return + * If no socket ... */ - if (sk == NULL) + if (sk == NULL) { + dccp_pr_debug("failed to look up flow ID in table and " + "get corresponding socket\n"); goto no_dccp_socket; + } /* * Step 2: - * ... or S.state == TIMEWAIT, + * ... or S.state == TIMEWAIT, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ - if (sk->sk_state == DCCP_TIME_WAIT) - goto do_time_wait; + if (sk->sk_state == DCCP_TIME_WAIT) { + dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n"); + inet_twsk_put(inet_twsk(sk)); + goto no_dccp_socket; + } + + /* + * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage + * o if MinCsCov = 0, only packets with CsCov = 0 are accepted + * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov + */ + min_cov = dccp_sk(sk)->dccps_pcrlen; + if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) { + dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n", + dh->dccph_cscov, min_cov); + /* FIXME: send Data Dropped option (see also dccp_v4_rcv) */ + goto discard_and_relse; + } if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - return sk_receive_skb(sk, skb) ? -1 : 0; + return sk_receive_skb(sk, skb, 1) ? -1 : 0; no_dccp_socket: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; /* * Step 2: + * If no socket ... * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ if (dh->dccph_type != DCCP_PKT_RESET) { DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - dccp_v6_ctl_send_reset(skb); + dccp_v6_ctl_send_reset(sk, skb); } -discard_it: - - /* - * Discard frame - */ +discard_it: kfree_skb(skb); return 0; discard_and_relse: sock_put(sk); goto discard_it; +} -do_time_wait: - inet_twsk_put(inet_twsk(sk)); - goto no_dccp_socket; +static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr; + struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct dccp_sock *dp = dccp_sk(sk); + struct in6_addr *saddr = NULL, *final_p = NULL, final; + struct flowi fl; + struct dst_entry *dst; + int addr_type; + int err; + + dp->dccps_role = DCCP_ROLE_CLIENT; + + if (addr_len < SIN6_LEN_RFC2133) + return -EINVAL; + + if (usin->sin6_family != AF_INET6) + return -EAFNOSUPPORT; + + memset(&fl, 0, sizeof(fl)); + + if (np->sndflow) { + fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; + IP6_ECN_flow_init(fl.fl6_flowlabel); + if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) { + struct ip6_flowlabel *flowlabel; + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + if (flowlabel == NULL) + return -EINVAL; + ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); + fl6_sock_release(flowlabel); + } + } + /* + * connect() to INADDR_ANY means loopback (BSD'ism). + */ + if (ipv6_addr_any(&usin->sin6_addr)) + usin->sin6_addr.s6_addr[15] = 1; + + addr_type = ipv6_addr_type(&usin->sin6_addr); + + if (addr_type & IPV6_ADDR_MULTICAST) + return -ENETUNREACH; + + if (addr_type & IPV6_ADDR_LINKLOCAL) { + if (addr_len >= sizeof(struct sockaddr_in6) && + usin->sin6_scope_id) { + /* If interface is set while binding, indices + * must coincide. + */ + if (sk->sk_bound_dev_if && + sk->sk_bound_dev_if != usin->sin6_scope_id) + return -EINVAL; + + sk->sk_bound_dev_if = usin->sin6_scope_id; + } + + /* Connect to link-local address requires an interface */ + if (!sk->sk_bound_dev_if) + return -EINVAL; + } + + ipv6_addr_copy(&np->daddr, &usin->sin6_addr); + np->flow_label = fl.fl6_flowlabel; + + /* + * DCCP over IPv4 + */ + if (addr_type == IPV6_ADDR_MAPPED) { + u32 exthdrlen = icsk->icsk_ext_hdr_len; + struct sockaddr_in sin; + + SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); + + if (__ipv6_only_sock(sk)) + return -ENETUNREACH; + + sin.sin_family = AF_INET; + sin.sin_port = usin->sin6_port; + sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; + + icsk->icsk_af_ops = &dccp_ipv6_mapped; + sk->sk_backlog_rcv = dccp_v4_do_rcv; + + err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); + if (err) { + icsk->icsk_ext_hdr_len = exthdrlen; + icsk->icsk_af_ops = &dccp_ipv6_af_ops; + sk->sk_backlog_rcv = dccp_v6_do_rcv; + goto failure; + } else { + ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF), + inet->saddr); + ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF), + inet->rcv_saddr); + } + + return err; + } + + if (!ipv6_addr_any(&np->rcv_saddr)) + saddr = &np->rcv_saddr; + + fl.proto = IPPROTO_DCCP; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); + fl.oif = sk->sk_bound_dev_if; + fl.fl_ip_dport = usin->sin6_port; + fl.fl_ip_sport = inet->sport; + security_sk_classify_flow(sk, &fl); + + if (np->opt != NULL && np->opt->srcrt != NULL) { + const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; + + ipv6_addr_copy(&final, &fl.fl6_dst); + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + final_p = &final; + } + + err = ip6_dst_lookup(sk, &dst, &fl); + if (err) + goto failure; + + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + + err = xfrm_lookup(&dst, &fl, sk, 0); + if (err < 0) + goto failure; + + if (saddr == NULL) { + saddr = &fl.fl6_src; + ipv6_addr_copy(&np->rcv_saddr, saddr); + } + + /* set the source address */ + ipv6_addr_copy(&np->saddr, saddr); + inet->rcv_saddr = LOOPBACK4_IPV6; + + __ip6_dst_store(sk, dst, NULL, NULL); + + icsk->icsk_ext_hdr_len = 0; + if (np->opt != NULL) + icsk->icsk_ext_hdr_len = (np->opt->opt_flen + + np->opt->opt_nflen); + + inet->dport = usin->sin6_port; + + dccp_set_state(sk, DCCP_REQUESTING); + err = inet6_hash_connect(&dccp_death_row, sk); + if (err) + goto late_failure; + + dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32, + np->daddr.s6_addr32, + inet->sport, inet->dport); + err = dccp_connect(sk); + if (err) + goto late_failure; + + return 0; + +late_failure: + dccp_set_state(sk, DCCP_CLOSED); + __sk_dst_reset(sk); +failure: + inet->dport = 0; + sk->sk_route_caps = 0; + return err; } static struct inet_connection_sock_af_ops dccp_ipv6_af_ops = { @@ -1179,6 +1146,10 @@ static int dccp_v6_destroy_sock(struct sock *sk) return inet6_destroy_sock(sk); } +static struct timewait_sock_ops dccp6_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct dccp6_timewait_sock), +}; + static struct proto dccp_v6_prot = { .name = "DCCPv6", .owner = THIS_MODULE, diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 9045438d6b3..6656bb497c7 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -11,6 +11,7 @@ */ #include <linux/dccp.h> +#include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/timer.h> @@ -31,8 +32,7 @@ struct inet_timewait_death_row dccp_death_row = { .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, (unsigned long)&dccp_death_row), .twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work, - inet_twdr_twkill_work, - &dccp_death_row), + inet_twdr_twkill_work), /* Short-time timewait calendar */ .twcal_hand = -1, @@ -83,8 +83,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) * socket up. We've got bigger problems than * non-graceful socket closings. */ - LIMIT_NETDEBUG(KERN_INFO "DCCP: time wait bucket " - "table overflow\n"); + DCCP_WARN("time wait bucket table overflow\n"); } dccp_done(sk); @@ -97,8 +96,8 @@ struct sock *dccp_create_openreq_child(struct sock *sk, /* * Step 3: Process LISTEN state * - * // Generate a new socket and switch to that socket - * Set S := new socket for this port pair + * (* Generate a new socket and switch to that socket *) + * Set S := new socket for this port pair */ struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); @@ -147,9 +146,9 @@ out_free: /* * Step 3: Process LISTEN state * - * Choose S.ISS (initial seqno) or set from Init Cookie - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init - * Cookie + * Choose S.ISS (initial seqno) or set from Init Cookies + * Initialize S.GAR := S.ISS + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies */ /* See dccp_v4_conn_request */ @@ -183,7 +182,7 @@ out_free: EXPORT_SYMBOL_GPL(dccp_create_openreq_child); -/* +/* * Process an incoming packet for RESPOND sockets represented * as an request_sock. */ @@ -195,15 +194,17 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, /* Check for retransmitted REQUEST */ if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { - if (after48(DCCP_SKB_CB(skb)->dccpd_seq, - dccp_rsk(req)->dreq_isr)) { - struct dccp_request_sock *dreq = dccp_rsk(req); + struct dccp_request_sock *dreq = dccp_rsk(req); + if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_isr)) { dccp_pr_debug("Retransmitted REQUEST\n"); - /* Send another RESPONSE packet */ - dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1); - dccp_set_seqno(&dreq->dreq_isr, - DCCP_SKB_CB(skb)->dccpd_seq); + dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; + /* + * Send another RESPONSE packet + * To protect against Request floods, increment retrans + * counter (backoff, monitored by dccp_response_timer). + */ + req->retrans++; req->rsk_ops->rtx_syn_ack(sk, req, NULL); } /* Network Duplicate, discard packet */ @@ -243,7 +244,7 @@ listen_overflow: DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; drop: if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) - req->rsk_ops->send_reset(skb); + req->rsk_ops->send_reset(sk, skb); inet_csk_reqsk_queue_drop(sk, req, prev); goto out; @@ -283,3 +284,19 @@ int dccp_child_process(struct sock *parent, struct sock *child, } EXPORT_SYMBOL_GPL(dccp_child_process); + +void dccp_reqsk_send_ack(struct sk_buff *skb, struct request_sock *rsk) +{ + DCCP_BUG("DCCP-ACK packets are never sent in LISTEN/RESPOND state"); +} + +EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack); + +void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb) +{ + inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; + inet_rsk(req)->acked = 0; + req->rcv_wnd = sysctl_dccp_feat_sequence_window; +} + +EXPORT_SYMBOL_GPL(dccp_reqsk_init); diff --git a/net/dccp/options.c b/net/dccp/options.c index fb0db1f7cd7..c03ba61eb6d 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -22,23 +22,23 @@ #include "dccp.h" #include "feat.h" -int dccp_feat_default_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW; -int dccp_feat_default_rx_ccid = DCCPF_INITIAL_CCID; -int dccp_feat_default_tx_ccid = DCCPF_INITIAL_CCID; -int dccp_feat_default_ack_ratio = DCCPF_INITIAL_ACK_RATIO; -int dccp_feat_default_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; -int dccp_feat_default_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; +int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW; +int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID; +int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID; +int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO; +int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; +int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; -EXPORT_SYMBOL_GPL(dccp_feat_default_sequence_window); +EXPORT_SYMBOL_GPL(sysctl_dccp_feat_sequence_window); void dccp_minisock_init(struct dccp_minisock *dmsk) { - dmsk->dccpms_sequence_window = dccp_feat_default_sequence_window; - dmsk->dccpms_rx_ccid = dccp_feat_default_rx_ccid; - dmsk->dccpms_tx_ccid = dccp_feat_default_tx_ccid; - dmsk->dccpms_ack_ratio = dccp_feat_default_ack_ratio; - dmsk->dccpms_send_ack_vector = dccp_feat_default_send_ack_vector; - dmsk->dccpms_send_ndp_count = dccp_feat_default_send_ndp_count; + dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; + dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid; + dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid; + dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio; + dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector; + dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count; } static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) @@ -60,12 +60,9 @@ static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) int dccp_parse_options(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); -#ifdef CONFIG_IP_DCCP_DEBUG - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT rx opt: " : "server rx opt: "; -#endif const struct dccp_hdr *dh = dccp_hdr(skb); const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; + u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); unsigned char *opt_ptr = options; const unsigned char *opt_end = (unsigned char *)dh + @@ -119,7 +116,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) goto out_invalid_option; opt_recv->dccpor_ndp = dccp_decode_value_var(value, len); - dccp_pr_debug("%sNDP count=%d\n", debug_prefix, + dccp_pr_debug("%s rx opt: NDP count=%d\n", dccp_role(sk), opt_recv->dccpor_ndp); break; case DCCPO_CHANGE_L: @@ -153,7 +150,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) break; if (dccp_msk(sk)->dccpms_send_ack_vector && - dccp_ackvec_parse(sk, skb, opt, value, len)) + dccp_ackvec_parse(sk, skb, &ackno, opt, value, len)) goto out_invalid_option; break; case DCCPO_TIMESTAMP: @@ -165,8 +162,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; dccp_timestamp(sk, &dp->dccps_timestamp_time); - dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", - debug_prefix, opt_recv->dccpor_timestamp, + dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n", + dccp_role(sk), opt_recv->dccpor_timestamp, (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); break; @@ -176,8 +173,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_timestamp_echo = ntohl(*(__be32 *)value); - dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, ", - debug_prefix, + dccp_pr_debug("%s rx opt: TIMESTAMP_ECHO=%u, len=%d, " + "ackno=%llu, ", dccp_role(sk), opt_recv->dccpor_timestamp_echo, len + 2, (unsigned long long) @@ -211,8 +208,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) if (elapsed_time > opt_recv->dccpor_elapsed_time) opt_recv->dccpor_elapsed_time = elapsed_time; - dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, - elapsed_time); + dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", + dccp_role(sk), elapsed_time); break; /* * From RFC 4340, sec. 10.3: @@ -242,9 +239,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) } break; default: - pr_info("DCCP(%p): option %d(len=%d) not " - "implemented, ignoring\n", - sk, opt, len); + DCCP_CRIT("DCCP(%p): option %d(len=%d) not " + "implemented, ignoring", sk, opt, len); break; } @@ -261,7 +257,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) out_invalid_option: DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR; - pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len); + DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len); return -1; } @@ -451,8 +447,7 @@ static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat, u8 *to; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 3 > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small" - " to insert feature %d option!\n", feat); + DCCP_WARN("packet too small for feature %d option!\n", feat); return -1; } @@ -465,8 +460,10 @@ static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat, if (len) memcpy(to, val, len); - dccp_pr_debug("option %d feat %d len %d\n", type, feat, len); + dccp_pr_debug("%s(%s (%d), ...), length %d\n", + dccp_feat_typename(type), + dccp_feat_name(feat), feat, len); return 0; } @@ -560,11 +557,6 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) return -1; dp->dccps_hc_rx_insert_options = 0; } - if (dp->dccps_hc_tx_insert_options) { - if (ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb)) - return -1; - dp->dccps_hc_tx_insert_options = 0; - } /* Feature negotiation */ /* Data packets can't do feat negotiation */ diff --git a/net/dccp/output.c b/net/dccp/output.c index 7102e3aed4c..82456965908 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -1,6 +1,6 @@ /* * net/dccp/output.c - * + * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * @@ -88,16 +88,15 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) return -EPROTO; } - skb->h.raw = skb_push(skb, dccp_header_size); - dh = dccp_hdr(skb); /* Build DCCP header and checksum it. */ - memset(dh, 0, dccp_header_size); + dh = dccp_zeroed_hdr(skb, dccp_header_size); dh->dccph_type = dcb->dccpd_type; dh->dccph_sport = inet->sport; dh->dccph_dport = inet->dport; dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; dh->dccph_ccval = dcb->dccpd_ccval; + dh->dccph_cscov = dp->dccps_pcslen; /* XXX For now we're using only 48 bits sequence numbers */ dh->dccph_x = 1; @@ -117,7 +116,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) break; } - icsk->icsk_af_ops->send_check(sk, skb->len, skb); + icsk->icsk_af_ops->send_check(sk, 0, skb); if (set_ack) dccp_event_ack_sent(sk); @@ -125,17 +124,8 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) DCCP_INC_STATS(DCCP_MIB_OUTSEGS); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = icsk->icsk_af_ops->queue_xmit(skb, 0); - if (err <= 0) - return err; - - /* NET_XMIT_CN is special. It does not guarantee, - * that this packet is lost. It tells that device - * is about to start to drop packets or already - * drops some packets of the same priority and - * invokes us to send less aggressively. - */ - return err == NET_XMIT_CN ? 0 : err; + err = icsk->icsk_af_ops->queue_xmit(skb, sk, 0); + return net_xmit_eval(err); } return -ENOBUFS; } @@ -185,14 +175,12 @@ void dccp_write_space(struct sock *sk) /** * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet * @sk: socket to wait for - * @timeo: for how long */ -static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, - long *timeo) +static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); DEFINE_WAIT(wait); - long delay; + unsigned long delay; int rc; while (1) { @@ -200,22 +188,16 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, if (sk->sk_err) goto do_error; - if (!*timeo) - goto do_nonblock; if (signal_pending(current)) goto do_interrupted; - rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, - skb->len); + rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); if (rc <= 0) break; delay = msecs_to_jiffies(rc); - if (delay > *timeo || delay < 0) - goto do_nonblock; - sk->sk_write_pending++; release_sock(sk); - *timeo -= schedule_timeout(delay); + schedule_timeout(delay); lock_sock(sk); sk->sk_write_pending--; } @@ -226,11 +208,8 @@ out: do_error: rc = -EPIPE; goto out; -do_nonblock: - rc = -EAGAIN; - goto out; do_interrupted: - rc = sock_intr_errno(*timeo); + rc = -EINTR; goto out; } @@ -251,12 +230,9 @@ void dccp_write_xmit(struct sock *sk, int block) { struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; - long timeo = 30000; /* If a packet is taking longer than 2 secs - we have other issues */ while ((skb = skb_peek(&sk->sk_write_queue))) { - int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, - skb->len); + int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); if (err > 0) { if (!block) { @@ -264,12 +240,9 @@ void dccp_write_xmit(struct sock *sk, int block) msecs_to_jiffies(err)+jiffies); break; } else - err = dccp_wait_for_ccid(sk, skb, &timeo); - if (err) { - printk(KERN_CRIT "%s:err at dccp_wait_for_ccid" - " %d\n", __FUNCTION__, err); - dump_stack(); - } + err = dccp_wait_for_ccid(sk, skb); + if (err && err != -EINTR) + DCCP_BUG("err=%d after dccp_wait_for_ccid", err); } skb_dequeue(&sk->sk_write_queue); @@ -291,14 +264,13 @@ void dccp_write_xmit(struct sock *sk, int block) err = dccp_transmit_skb(sk, skb); ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); - if (err) { - printk(KERN_CRIT "%s:err from " - "ccid_hc_tx_packet_sent %d\n", - __FUNCTION__, err); - dump_stack(); - } - } else + if (err) + DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", + err); + } else { + dccp_pr_debug("packet discarded\n"); kfree(skb); + } } } @@ -329,9 +301,10 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, skb_reserve(skb, sk->sk_prot->max_header); skb->dst = dst_clone(dst); - skb->csum = 0; dreq = dccp_rsk(req); + if (inet_rsk(req)->acked) /* increase ISS upon retransmission */ + dccp_inc_seqno(&dreq->dreq_iss); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; @@ -340,10 +313,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, return NULL; } - skb->h.raw = skb_push(skb, dccp_header_size); - - dh = dccp_hdr(skb); - memset(dh, 0, dccp_header_size); + /* Build and checksum header */ + dh = dccp_zeroed_hdr(skb, dccp_header_size); dh->dccph_sport = inet_sk(sk)->sport; dh->dccph_dport = inet_rsk(req)->rmt_port; @@ -355,6 +326,10 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr); dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service; + dccp_csum_outgoing(skb); + + /* We use `acked' to remember that a Response was already sent. */ + inet_rsk(req)->acked = 1; DCCP_INC_STATS(DCCP_MIB_OUTSEGS); return skb; } @@ -363,7 +338,6 @@ EXPORT_SYMBOL_GPL(dccp_make_response); static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, const enum dccp_reset_codes code) - { struct dccp_hdr *dh; struct dccp_sock *dp = dccp_sk(sk); @@ -379,7 +353,6 @@ static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, skb_reserve(skb, sk->sk_prot->max_header); skb->dst = dst_clone(dst); - skb->csum = 0; dccp_inc_seqno(&dp->dccps_gss); @@ -392,10 +365,7 @@ static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, return NULL; } - skb->h.raw = skb_push(skb, dccp_header_size); - - dh = dccp_hdr(skb); - memset(dh, 0, dccp_header_size); + dh = dccp_zeroed_hdr(skb, dccp_header_size); dh->dccph_sport = inet_sk(sk)->sport; dh->dccph_dport = inet_sk(sk)->dport; @@ -407,7 +377,7 @@ static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr); dccp_hdr_reset(skb)->dccph_reset_code = code; - inet_csk(sk)->icsk_af_ops->send_check(sk, skb->len, skb); + inet_csk(sk)->icsk_af_ops->send_check(sk, 0, skb); DCCP_INC_STATS(DCCP_MIB_OUTSEGS); return skb; @@ -426,9 +396,8 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) code); if (skb != NULL) { memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, 0); - if (err == NET_XMIT_CN) - err = 0; + err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, sk, 0); + return net_xmit_eval(err); } } @@ -449,17 +418,21 @@ static inline void dccp_connect_init(struct sock *sk) dccp_sync_mss(sk, dst_mtu(dst)); - dccp_update_gss(sk, dp->dccps_iss); - /* + /* * SWL and AWL are initially adjusted so that they are not less than * the initial Sequence Numbers received and sent, respectively: * SWL := max(GSR + 1 - floor(W/4), ISR), * AWL := max(GSS - W' + 1, ISS). * These adjustments MUST be applied only at the beginning of the * connection. - */ + */ + dccp_update_gss(sk, dp->dccps_iss); dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); + /* S.GAR - greatest valid acknowledgement number received on a non-Sync; + * initialized to S.ISS (sec. 8.5) */ + dp->dccps_gar = dp->dccps_iss; + icsk->icsk_retransmits = 0; init_timer(&dp->dccps_xmit_timer); dp->dccps_xmit_timer.data = (unsigned long)sk; @@ -481,7 +454,6 @@ int dccp_connect(struct sock *sk) skb_reserve(skb, sk->sk_prot->max_header); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; - skb->csum = 0; dccp_skb_entail(sk, skb); dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); @@ -513,7 +485,6 @@ void dccp_send_ack(struct sock *sk) /* Reserve space for headers */ skb_reserve(skb, sk->sk_prot->max_header); - skb->csum = 0; DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK; dccp_transmit_skb(sk, skb); } @@ -567,7 +538,6 @@ void dccp_send_sync(struct sock *sk, const u64 seq, /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, sk->sk_prot->max_header); - skb->csum = 0; DCCP_SKB_CB(skb)->dccpd_type = pkt_type; DCCP_SKB_CB(skb)->dccpd_seq = seq; @@ -593,7 +563,6 @@ void dccp_send_close(struct sock *sk, const int active) /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, sk->sk_prot->max_header); - skb->csum = 0; DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 146496fce2e..f81e37de35d 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -106,8 +106,10 @@ static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk, } static struct jprobe dccp_send_probe = { - .kp = { .addr = (kprobe_opcode_t *)&dccp_sendmsg, }, - .entry = (kprobe_opcode_t *)&jdccp_sendmsg, + .kp = { + .symbol_name = "dccp_sendmsg", + }, + .entry = JPROBE_ENTRY(jdccp_sendmsg), }; static int dccpprobe_open(struct inode *inode, struct file *file) @@ -160,6 +162,8 @@ static __init int dccpprobe_init(void) init_waitqueue_head(&dccpw.wait); spin_lock_init(&dccpw.lock); dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock); + if (IS_ERR(dccpw.fifo)) + return PTR_ERR(dccpw.fifo); if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops)) goto err0; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 72cbdcfc2c6..63b3fa20e14 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -52,6 +52,9 @@ struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { EXPORT_SYMBOL_GPL(dccp_hashinfo); +/* the maximum queue length for tx in packets. 0 is no limit */ +int sysctl_dccp_tx_qlen __read_mostly = 5; + void dccp_set_state(struct sock *sk, const int state) { const int oldstate = sk->sk_state; @@ -193,7 +196,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) sk, GFP_KERNEL); dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid, sk, GFP_KERNEL); - if (unlikely(dp->dccps_hc_rx_ccid == NULL || + if (unlikely(dp->dccps_hc_rx_ccid == NULL || dp->dccps_hc_tx_ccid == NULL)) { ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); @@ -212,6 +215,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) dccp_init_xmit_timers(sk); icsk->icsk_rto = DCCP_TIMEOUT_INIT; + icsk->icsk_syn_retries = sysctl_dccp_request_retries; sk->sk_state = DCCP_CLOSED; sk->sk_write_space = dccp_write_space; icsk->icsk_sync_mss = dccp_sync_mss; @@ -262,12 +266,12 @@ int dccp_destroy_sock(struct sock *sk) EXPORT_SYMBOL_GPL(dccp_destroy_sock); -static inline int dccp_listen_start(struct sock *sk) +static inline int dccp_listen_start(struct sock *sk, int backlog) { struct dccp_sock *dp = dccp_sk(sk); dp->dccps_role = DCCP_ROLE_LISTEN; - return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); + return inet_csk_listen_start(sk, backlog); } int dccp_disconnect(struct sock *sk, int flags) @@ -386,7 +390,7 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service, struct dccp_sock *dp = dccp_sk(sk); struct dccp_service_list *sl = NULL; - if (service == DCCP_SERVICE_INVALID_VALUE || + if (service == DCCP_SERVICE_INVALID_VALUE || optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32)) return -EINVAL; @@ -451,9 +455,8 @@ out_free_val: static int do_dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - struct dccp_sock *dp; - int err; - int val; + struct dccp_sock *dp = dccp_sk(sk); + int val, err = 0; if (optlen < sizeof(int)) return -EINVAL; @@ -465,14 +468,11 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, return dccp_setsockopt_service(sk, val, optval, optlen); lock_sock(sk); - dp = dccp_sk(sk); - err = 0; - switch (optname) { case DCCP_SOCKOPT_PACKET_SIZE: - dp->dccps_packet_size = val; + DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); + err = 0; break; - case DCCP_SOCKOPT_CHANGE_L: if (optlen != sizeof(struct dccp_so_feat)) err = -EINVAL; @@ -481,7 +481,6 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, (struct dccp_so_feat __user *) optval); break; - case DCCP_SOCKOPT_CHANGE_R: if (optlen != sizeof(struct dccp_so_feat)) err = -EINVAL; @@ -490,12 +489,26 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, (struct dccp_so_feat __user *) optval); break; - + case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */ + if (val < 0 || val > 15) + err = -EINVAL; + else + dp->dccps_pcslen = val; + break; + case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */ + if (val < 0 || val > 15) + err = -EINVAL; + else { + dp->dccps_pcrlen = val; + /* FIXME: add feature negotiation, + * ChangeL(MinimumChecksumCoverage, val) */ + } + break; default: err = -ENOPROTOOPT; break; } - + release_sock(sk); return err; } @@ -569,12 +582,17 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case DCCP_SOCKOPT_PACKET_SIZE: - val = dp->dccps_packet_size; - len = sizeof(dp->dccps_packet_size); - break; + DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); + return 0; case DCCP_SOCKOPT_SERVICE: return dccp_getsockopt_service(sk, len, (__be32 __user *)optval, optlen); + case DCCP_SOCKOPT_SEND_CSCOV: + val = dp->dccps_pcslen; + break; + case DCCP_SOCKOPT_RECV_CSCOV: + val = dp->dccps_pcrlen; + break; case 128 ... 191: return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, len, (u32 __user *)optval, optlen); @@ -630,6 +648,13 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return -EMSGSIZE; lock_sock(sk); + + if (sysctl_dccp_tx_qlen && + (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) { + rc = -EAGAIN; + goto out_release; + } + timeo = sock_sndtimeo(sk, noblock); /* @@ -788,7 +813,7 @@ int inet_dccp_listen(struct socket *sock, int backlog) * FIXME: here it probably should be sk->sk_prot->listen_start * see tcp_listen_start */ - err = dccp_listen_start(sk); + err = dccp_listen_start(sk, backlog); if (err) goto out; } @@ -805,7 +830,7 @@ EXPORT_SYMBOL_GPL(inet_dccp_listen); static const unsigned char dccp_new_state[] = { /* current state: new state: action: */ [0] = DCCP_CLOSED, - [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, + [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, [DCCP_REQUESTING] = DCCP_CLOSED, [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, [DCCP_LISTEN] = DCCP_CLOSED, @@ -1008,8 +1033,7 @@ static int __init dccp_init(void) } while (!dccp_hashinfo.ehash && --ehash_order > 0); if (!dccp_hashinfo.ehash) { - printk(KERN_CRIT "Failed to allocate DCCP " - "established hash table\n"); + DCCP_CRIT("Failed to allocate DCCP established hash table"); goto out_free_bind_bucket_cachep; } @@ -1031,7 +1055,7 @@ static int __init dccp_init(void) } while (!dccp_hashinfo.bhash && --bhash_order >= 0); if (!dccp_hashinfo.bhash) { - printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n"); + DCCP_CRIT("Failed to allocate DCCP bind hash table"); goto out_free_dccp_ehash; } diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 38bc157876f..fdcfca3e920 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -11,6 +11,7 @@ #include <linux/mm.h> #include <linux/sysctl.h> +#include "dccp.h" #include "feat.h" #ifndef CONFIG_SYSCTL @@ -19,53 +20,76 @@ static struct ctl_table dccp_default_table[] = { { - .ctl_name = NET_DCCP_DEFAULT_SEQ_WINDOW, .procname = "seq_window", - .data = &dccp_feat_default_sequence_window, - .maxlen = sizeof(dccp_feat_default_sequence_window), + .data = &sysctl_dccp_feat_sequence_window, + .maxlen = sizeof(sysctl_dccp_feat_sequence_window), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_DCCP_DEFAULT_RX_CCID, .procname = "rx_ccid", - .data = &dccp_feat_default_rx_ccid, - .maxlen = sizeof(dccp_feat_default_rx_ccid), + .data = &sysctl_dccp_feat_rx_ccid, + .maxlen = sizeof(sysctl_dccp_feat_rx_ccid), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_DCCP_DEFAULT_TX_CCID, .procname = "tx_ccid", - .data = &dccp_feat_default_tx_ccid, - .maxlen = sizeof(dccp_feat_default_tx_ccid), + .data = &sysctl_dccp_feat_tx_ccid, + .maxlen = sizeof(sysctl_dccp_feat_tx_ccid), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_DCCP_DEFAULT_ACK_RATIO, .procname = "ack_ratio", - .data = &dccp_feat_default_ack_ratio, - .maxlen = sizeof(dccp_feat_default_ack_ratio), + .data = &sysctl_dccp_feat_ack_ratio, + .maxlen = sizeof(sysctl_dccp_feat_ack_ratio), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_DCCP_DEFAULT_SEND_ACKVEC, .procname = "send_ackvec", - .data = &dccp_feat_default_send_ack_vector, - .maxlen = sizeof(dccp_feat_default_send_ack_vector), + .data = &sysctl_dccp_feat_send_ack_vector, + .maxlen = sizeof(sysctl_dccp_feat_send_ack_vector), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_DCCP_DEFAULT_SEND_NDP, .procname = "send_ndp", - .data = &dccp_feat_default_send_ndp_count, - .maxlen = sizeof(dccp_feat_default_send_ndp_count), + .data = &sysctl_dccp_feat_send_ndp_count, + .maxlen = sizeof(sysctl_dccp_feat_send_ndp_count), .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "request_retries", + .data = &sysctl_dccp_request_retries, + .maxlen = sizeof(sysctl_dccp_request_retries), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "retries1", + .data = &sysctl_dccp_retries1, + .maxlen = sizeof(sysctl_dccp_retries1), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "retries2", + .data = &sysctl_dccp_retries2, + .maxlen = sizeof(sysctl_dccp_retries2), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tx_qlen", + .data = &sysctl_dccp_tx_qlen, + .maxlen = sizeof(sysctl_dccp_tx_qlen), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .ctl_name = 0, } }; diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 8447742f561..e5348f369c6 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -1,6 +1,6 @@ /* * net/dccp/timer.c - * + * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * @@ -15,15 +15,10 @@ #include "dccp.h" -static void dccp_write_timer(unsigned long data); -static void dccp_keepalive_timer(unsigned long data); -static void dccp_delack_timer(unsigned long data); - -void dccp_init_xmit_timers(struct sock *sk) -{ - inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, - &dccp_keepalive_timer); -} +/* sysctl variables governing numbers of retransmission attempts */ +int sysctl_dccp_request_retries __read_mostly = TCP_SYN_RETRIES; +int sysctl_dccp_retries1 __read_mostly = TCP_RETR1; +int sysctl_dccp_retries2 __read_mostly = TCP_RETR2; static void dccp_write_err(struct sock *sk) { @@ -44,11 +39,10 @@ static int dccp_write_timeout(struct sock *sk) if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) { if (icsk->icsk_retransmits != 0) dst_negative_advice(&sk->sk_dst_cache); - retry_until = icsk->icsk_syn_retries ? : - /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */; + retry_until = icsk->icsk_syn_retries ? + : sysctl_dccp_request_retries; } else { - if (icsk->icsk_retransmits >= - /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) { + if (icsk->icsk_retransmits >= sysctl_dccp_retries1) { /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black hole detection. :-( @@ -72,7 +66,7 @@ static int dccp_write_timeout(struct sock *sk) dst_negative_advice(&sk->sk_dst_cache); } - retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */; + retry_until = sysctl_dccp_retries2; /* * FIXME: see tcp_write_timout and tcp_out_of_resources */ @@ -86,53 +80,6 @@ static int dccp_write_timeout(struct sock *sk) return 0; } -/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */ -static void dccp_delack_timer(unsigned long data) -{ - struct sock *sk = (struct sock *)data; - struct inet_connection_sock *icsk = inet_csk(sk); - - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - /* Try again later. */ - icsk->icsk_ack.blocked = 1; - NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); - sk_reset_timer(sk, &icsk->icsk_delack_timer, - jiffies + TCP_DELACK_MIN); - goto out; - } - - if (sk->sk_state == DCCP_CLOSED || - !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) - goto out; - if (time_after(icsk->icsk_ack.timeout, jiffies)) { - sk_reset_timer(sk, &icsk->icsk_delack_timer, - icsk->icsk_ack.timeout); - goto out; - } - - icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; - - if (inet_csk_ack_scheduled(sk)) { - if (!icsk->icsk_ack.pingpong) { - /* Delayed ACK missed: inflate ATO. */ - icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, - icsk->icsk_rto); - } else { - /* Delayed ACK missed: leave pingpong mode and - * deflate ATO. - */ - icsk->icsk_ack.pingpong = 0; - icsk->icsk_ack.ato = TCP_ATO_MIN; - } - dccp_send_ack(sk); - NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); - } -out: - bh_unlock_sock(sk); - sock_put(sk); -} - /* * The DCCP retransmit timer. */ @@ -142,7 +89,7 @@ static void dccp_retransmit_timer(struct sock *sk) /* retransmit timer is used for feature negotiation throughout * connection. In this case, no packet is re-transmitted, but rather an - * ack is generated and pending changes are splaced into its options. + * ack is generated and pending changes are placed into its options. */ if (sk->sk_send_head == NULL) { dccp_pr_debug("feat negotiation retransmit timeout %p\n", sk); @@ -154,12 +101,14 @@ static void dccp_retransmit_timer(struct sock *sk) /* * sk->sk_send_head has to have one skb with * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP - * packet types (REQUEST, RESPONSE, the ACK in the 3way handshake - * (PARTOPEN timer), etc). - */ + * packet types. The only packets eligible for retransmission are: + * -- Requests in client-REQUEST state (sec. 8.1.1) + * -- Acks in client-PARTOPEN state (sec. 8.1.5) + * -- CloseReq in server-CLOSEREQ state (sec. 8.3) + * -- Close in node-CLOSING state (sec. 8.3) */ BUG_TRAP(sk->sk_send_head != NULL); - /* + /* * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was * sent, no need to retransmit, this sock is dead. */ @@ -194,7 +143,7 @@ backoff: icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, DCCP_RTO_MAX); - if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */) + if (icsk->icsk_retransmits > sysctl_dccp_retries1) __sk_dst_reset(sk); out:; } @@ -251,7 +200,7 @@ static void dccp_keepalive_timer(unsigned long data) /* Only process if socket is not in use. */ bh_lock_sock(sk); if (sock_owned_by_user(sk)) { - /* Try again later. */ + /* Try again later. */ inet_csk_reset_keepalive_timer(sk, HZ / 20); goto out; } @@ -264,3 +213,56 @@ out: bh_unlock_sock(sk); sock_put(sk); } + +/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */ +static void dccp_delack_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + struct inet_connection_sock *icsk = inet_csk(sk); + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + icsk->icsk_ack.blocked = 1; + NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); + sk_reset_timer(sk, &icsk->icsk_delack_timer, + jiffies + TCP_DELACK_MIN); + goto out; + } + + if (sk->sk_state == DCCP_CLOSED || + !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) + goto out; + if (time_after(icsk->icsk_ack.timeout, jiffies)) { + sk_reset_timer(sk, &icsk->icsk_delack_timer, + icsk->icsk_ack.timeout); + goto out; + } + + icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; + + if (inet_csk_ack_scheduled(sk)) { + if (!icsk->icsk_ack.pingpong) { + /* Delayed ACK missed: inflate ATO. */ + icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, + icsk->icsk_rto); + } else { + /* Delayed ACK missed: leave pingpong mode and + * deflate ATO. + */ + icsk->icsk_ack.pingpong = 0; + icsk->icsk_ack.ato = TCP_ATO_MIN; + } + dccp_send_ack(sk); + NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); + } +out: + bh_unlock_sock(sk); + sock_put(sk); +} + +void dccp_init_xmit_timers(struct sock *sk) +{ + inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, + &dccp_keepalive_timer); +} diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig index 36e72cb145b..7914fd619c5 100644 --- a/net/decnet/Kconfig +++ b/net/decnet/Kconfig @@ -41,11 +41,3 @@ config DECNET_ROUTER See <file:Documentation/networking/decnet.txt> for more information. -config DECNET_ROUTE_FWMARK - bool "DECnet: use FWMARK value as routing key (EXPERIMENTAL)" - depends on DECNET_ROUTER && NETFILTER - help - If you say Y here, you will be able to specify different routes for - packets with different FWMARK ("firewalling mark") values - (see ipchains(8), "-m" argument). - diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 3456cd33183..21f20f21dd3 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -166,7 +166,7 @@ static struct hlist_head *dn_find_list(struct sock *sk) if (scp->addr.sdn_flags & SDF_WILD) return hlist_empty(&dn_wild_sk) ? &dn_wild_sk : NULL; - return &dn_sk_hash[scp->addrloc & DN_SK_HASH_MASK]; + return &dn_sk_hash[dn_ntohs(scp->addrloc) & DN_SK_HASH_MASK]; } /* @@ -180,7 +180,7 @@ static int check_port(__le16 port) if (port == 0) return -1; - sk_for_each(sk, node, &dn_sk_hash[port & DN_SK_HASH_MASK]) { + sk_for_each(sk, node, &dn_sk_hash[dn_ntohs(port) & DN_SK_HASH_MASK]) { struct dn_scp *scp = DN_SK(sk); if (scp->addrloc == port) return -1; @@ -194,12 +194,12 @@ static unsigned short port_alloc(struct sock *sk) static unsigned short port = 0x2000; unsigned short i_port = port; - while(check_port(++port) != 0) { + while(check_port(dn_htons(++port)) != 0) { if (port == i_port) return 0; } - scp->addrloc = port; + scp->addrloc = dn_htons(port); return 1; } @@ -418,7 +418,7 @@ struct sock *dn_find_by_skb(struct sk_buff *skb) struct dn_scp *scp; read_lock(&dn_hash_lock); - sk_for_each(sk, node, &dn_sk_hash[cb->dst_port & DN_SK_HASH_MASK]) { + sk_for_each(sk, node, &dn_sk_hash[dn_ntohs(cb->dst_port) & DN_SK_HASH_MASK]) { scp = DN_SK(sk); if (cb->src != dn_saddr2dn(&scp->peer)) continue; @@ -1016,13 +1016,14 @@ static void dn_access_copy(struct sk_buff *skb, struct accessdata_dn *acc) static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt) { - unsigned char *ptr = skb->data; - - opt->opt_optl = *ptr++; - opt->opt_status = 0; - memcpy(opt->opt_data, ptr, opt->opt_optl); - skb_pull(skb, dn_ntohs(opt->opt_optl) + 1); - + unsigned char *ptr = skb->data; + u16 len = *ptr++; /* yes, it's 8bit on the wire */ + + BUG_ON(len > 16); /* we've checked the contents earlier */ + opt->opt_optl = dn_htons(len); + opt->opt_status = 0; + memcpy(opt->opt_data, ptr, len); + skb_pull(skb, len + 1); } static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 01861feb608..fc6f3c023a5 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -38,7 +38,6 @@ #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/skbuff.h> -#include <linux/rtnetlink.h> #include <linux/sysctl.h> #include <linux/notifier.h> #include <asm/uaccess.h> @@ -47,6 +46,7 @@ #include <net/dst.h> #include <net/flow.h> #include <net/fib_rules.h> +#include <net/netlink.h> #include <net/dn.h> #include <net/dn_dev.h> #include <net/dn_route.h> @@ -73,7 +73,7 @@ static BLOCKING_NOTIFIER_HEAD(dnaddr_chain); static struct dn_dev *dn_dev_create(struct net_device *dev, int *err); static void dn_dev_delete(struct net_device *dev); -static void rtmsg_ifa(int event, struct dn_ifaddr *ifa); +static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa); static int dn_eth_up(struct net_device *); static void dn_eth_down(struct net_device *); @@ -167,8 +167,7 @@ static int dn_forwarding_proc(ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context); + void __user *newval, size_t newlen); static struct dn_dev_sysctl_table { struct ctl_table_header *sysctl_header; @@ -255,12 +254,10 @@ static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms * struct dn_dev_sysctl_table *t; int i; - t = kmalloc(sizeof(*t), GFP_KERNEL); + t = kmemdup(&dn_dev_sysctl, sizeof(*t), GFP_KERNEL); if (t == NULL) return; - memcpy(t, &dn_dev_sysctl, sizeof(*t)); - for(i = 0; i < ARRAY_SIZE(t->dn_dev_vars) - 1; i++) { long offset = (long)t->dn_dev_vars[i].data; t->dn_dev_vars[i].data = ((char *)parms) + offset; @@ -349,8 +346,7 @@ static int dn_forwarding_proc(ctl_table *table, int write, static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, size_t newlen) { #ifdef CONFIG_DECNET_ROUTER struct net_device *dev = table->extra1; @@ -442,7 +438,7 @@ static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr **ifap, int de } } - rtmsg_ifa(RTM_DELADDR, ifa1); + dn_ifaddr_notify(RTM_DELADDR, ifa1); blocking_notifier_call_chain(&dnaddr_chain, NETDEV_DOWN, ifa1); if (destroy) { dn_dev_free_ifa(ifa1); @@ -477,7 +473,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa) ifa->ifa_next = dn_db->ifa_list; dn_db->ifa_list = ifa; - rtmsg_ifa(RTM_NEWADDR, ifa); + dn_ifaddr_notify(RTM_NEWADDR, ifa); blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa); return 0; @@ -647,41 +643,62 @@ static struct dn_dev *dn_dev_by_index(int ifindex) return dn_dev; } -static int dn_dev_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static struct nla_policy dn_ifa_policy[IFA_MAX+1] __read_mostly = { + [IFA_ADDRESS] = { .type = NLA_U16 }, + [IFA_LOCAL] = { .type = NLA_U16 }, + [IFA_LABEL] = { .type = NLA_STRING, + .len = IFNAMSIZ - 1 }, +}; + +static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct rtattr **rta = arg; + struct nlattr *tb[IFA_MAX+1]; struct dn_dev *dn_db; - struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct ifaddrmsg *ifm; struct dn_ifaddr *ifa, **ifap; + int err = -EADDRNOTAVAIL; + + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); + if (err < 0) + goto errout; + ifm = nlmsg_data(nlh); if ((dn_db = dn_dev_by_index(ifm->ifa_index)) == NULL) - return -EADDRNOTAVAIL; + goto errout; + + for (ifap = &dn_db->ifa_list; (ifa = *ifap); ifap = &ifa->ifa_next) { + if (tb[IFA_LOCAL] && + nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2)) + continue; - for(ifap = &dn_db->ifa_list; (ifa=*ifap) != NULL; ifap = &ifa->ifa_next) { - void *tmp = rta[IFA_LOCAL-1]; - if ((tmp && memcmp(RTA_DATA(tmp), &ifa->ifa_local, 2)) || - (rta[IFA_LABEL-1] && rtattr_strcmp(rta[IFA_LABEL-1], ifa->ifa_label))) + if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) continue; dn_dev_del_ifa(dn_db, ifap, 1); return 0; } - return -EADDRNOTAVAIL; +errout: + return err; } -static int dn_dev_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct rtattr **rta = arg; + struct nlattr *tb[IFA_MAX+1]; struct net_device *dev; struct dn_dev *dn_db; - struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct ifaddrmsg *ifm; struct dn_ifaddr *ifa; - int rv; + int err; - if (rta[IFA_LOCAL-1] == NULL) + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); + if (err < 0) + return err; + + if (tb[IFA_LOCAL] == NULL) return -EINVAL; + ifm = nlmsg_data(nlh); if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL) return -ENODEV; @@ -695,69 +712,77 @@ static int dn_dev_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *a if ((ifa = dn_dev_alloc_ifa()) == NULL) return -ENOBUFS; - if (!rta[IFA_ADDRESS - 1]) - rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1]; - memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL-1]), 2); - memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS-1]), 2); + if (tb[IFA_ADDRESS] == NULL) + tb[IFA_ADDRESS] = tb[IFA_LOCAL]; + + ifa->ifa_local = nla_get_le16(tb[IFA_LOCAL]); + ifa->ifa_address = nla_get_le16(tb[IFA_ADDRESS]); ifa->ifa_flags = ifm->ifa_flags; ifa->ifa_scope = ifm->ifa_scope; ifa->ifa_dev = dn_db; - if (rta[IFA_LABEL-1]) - rtattr_strlcpy(ifa->ifa_label, rta[IFA_LABEL-1], IFNAMSIZ); + + if (tb[IFA_LABEL]) + nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); - rv = dn_dev_insert_ifa(dn_db, ifa); - if (rv) + err = dn_dev_insert_ifa(dn_db, ifa); + if (err) dn_dev_free_ifa(ifa); - return rv; + + return err; } -static int dn_dev_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, - u32 pid, u32 seq, int event, unsigned int flags) +static inline size_t dn_ifaddr_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ + + nla_total_size(2) /* IFA_ADDRESS */ + + nla_total_size(2); /* IFA_LOCAL */ +} + +static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, + u32 pid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); - ifm = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); + if (nlh == NULL) + return -ENOBUFS; + ifm = nlmsg_data(nlh); ifm->ifa_family = AF_DECnet; ifm->ifa_prefixlen = 16; ifm->ifa_flags = ifa->ifa_flags | IFA_F_PERMANENT; ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; + if (ifa->ifa_address) - RTA_PUT(skb, IFA_ADDRESS, 2, &ifa->ifa_address); + NLA_PUT_LE16(skb, IFA_ADDRESS, ifa->ifa_address); if (ifa->ifa_local) - RTA_PUT(skb, IFA_LOCAL, 2, &ifa->ifa_local); + NLA_PUT_LE16(skb, IFA_LOCAL, ifa->ifa_local); if (ifa->ifa_label[0]) - RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label); - nlh->nlmsg_len = skb->tail - b; - return skb->len; + NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); + + return nlmsg_end(skb, nlh); -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; +nla_put_failure: + return nlmsg_cancel(skb, nlh); } -static void rtmsg_ifa(int event, struct dn_ifaddr *ifa) +static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa) { struct sk_buff *skb; - int payload = sizeof(struct ifaddrmsg) + 128; int err = -ENOBUFS; - skb = alloc_skb(nlmsg_total_size(payload), GFP_KERNEL); + skb = alloc_skb(dn_ifaddr_nlmsg_size(), GFP_KERNEL); if (skb == NULL) goto errout; - err = dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + err = dn_nl_fill_ifaddr(skb, ifa, 0, 0, event, 0); + /* failure implies BUG in dn_ifaddr_nlmsg_size() */ + BUG_ON(err < 0); err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); errout: @@ -765,39 +790,43 @@ errout: rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err); } -static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) +static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { - int idx, dn_idx; - int s_idx, s_dn_idx; + int idx, dn_idx = 0, skip_ndevs, skip_naddr; struct net_device *dev; struct dn_dev *dn_db; struct dn_ifaddr *ifa; - s_idx = cb->args[0]; - s_dn_idx = dn_idx = cb->args[1]; + skip_ndevs = cb->args[0]; + skip_naddr = cb->args[1]; + read_lock(&dev_base_lock); - for(dev = dev_base, idx = 0; dev; dev = dev->next, idx++) { - if (idx < s_idx) + for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) { + if (idx < skip_ndevs) continue; - if (idx > s_idx) - s_dn_idx = 0; + else if (idx > skip_ndevs) { + /* Only skip over addresses for first dev dumped + * in this iteration (idx == skip_ndevs) */ + skip_naddr = 0; + } + if ((dn_db = dev->dn_ptr) == NULL) continue; - for(ifa = dn_db->ifa_list, dn_idx = 0; ifa; ifa = ifa->ifa_next, dn_idx++) { - if (dn_idx < s_dn_idx) + for (ifa = dn_db->ifa_list, dn_idx = 0; ifa; + ifa = ifa->ifa_next, dn_idx++) { + if (dn_idx < skip_naddr) continue; - if (dn_dev_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - RTM_NEWADDR, - NLM_F_MULTI) <= 0) + if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWADDR, + NLM_F_MULTI) < 0) goto done; } } done: read_unlock(&dev_base_lock); + cb->args[0] = idx; cb->args[1] = dn_idx; @@ -1414,9 +1443,9 @@ static struct file_operations dn_dev_seq_fops = { static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] = { - [RTM_NEWADDR - RTM_BASE] = { .doit = dn_dev_rtm_newaddr, }, - [RTM_DELADDR - RTM_BASE] = { .doit = dn_dev_rtm_deladdr, }, - [RTM_GETADDR - RTM_BASE] = { .dumpit = dn_dev_dump_ifaddr, }, + [RTM_NEWADDR - RTM_BASE] = { .doit = dn_nl_newaddr, }, + [RTM_DELADDR - RTM_BASE] = { .doit = dn_nl_deladdr, }, + [RTM_GETADDR - RTM_BASE] = { .dumpit = dn_nl_dump_ifaddr, }, #ifdef CONFIG_DECNET_ROUTER [RTM_NEWROUTE - RTM_BASE] = { .doit = dn_fib_rtm_newroute, }, [RTM_DELROUTE - RTM_BASE] = { .doit = dn_fib_rtm_delroute, }, diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index ff0ebe99137..7322bb36e82 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -591,7 +591,6 @@ static int dn_neigh_seq_open(struct inode *inode, struct file *file) seq = file->private_data; seq->private = s; - memset(s, 0, sizeof(*s)); out: return rc; out_kfree: diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 72ecc6e62ec..39a6cf7fb56 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -360,9 +360,9 @@ static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb) scp->max_window = decnet_no_fc_max_cwnd; if (skb->len > 0) { - unsigned char dlen = *skb->data; + u16 dlen = *skb->data; if ((dlen <= 16) && (dlen <= skb->len)) { - scp->conndata_in.opt_optl = dn_htons((__u16)dlen); + scp->conndata_in.opt_optl = dn_htons(dlen); memcpy(scp->conndata_in.opt_data, skb->data + 1, dlen); } } @@ -404,9 +404,9 @@ static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb) memset(scp->discdata_in.opt_data, 0, 16); if (skb->len > 0) { - unsigned char dlen = *skb->data; + u16 dlen = *skb->data; if ((dlen <= 16) && (dlen <= skb->len)) { - scp->discdata_in.opt_optl = dn_htons((__u16)dlen); + scp->discdata_in.opt_optl = dn_htons(dlen); memcpy(scp->discdata_in.opt_data, skb->data + 1, dlen); } } @@ -804,7 +804,7 @@ got_it: goto free_out; } - return sk_receive_skb(sk, skb); + return sk_receive_skb(sk, skb, 0); } return dn_nsp_no_socket(skb, reason); diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index c2e21cd89b3..b342e4e8f5f 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -526,7 +526,7 @@ void dn_send_conn_conf(struct sock *sk, gfp_t gfp) struct nsp_conn_init_msg *msg; __u8 len = (__u8)dn_ntohs(scp->conndata_out.opt_optl); - if ((skb = dn_alloc_skb(sk, 50 + dn_ntohs(scp->conndata_out.opt_optl), gfp)) == NULL) + if ((skb = dn_alloc_skb(sk, 50 + len, gfp)) == NULL) return; msg = (struct nsp_conn_init_msg *)skb_put(skb, sizeof(*msg)); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 23489f7232d..9881933167b 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -269,9 +269,7 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { return ((fl1->nl_u.dn_u.daddr ^ fl2->nl_u.dn_u.daddr) | (fl1->nl_u.dn_u.saddr ^ fl2->nl_u.dn_u.saddr) | -#ifdef CONFIG_DECNET_ROUTE_FWMARK - (fl1->nl_u.dn_u.fwmark ^ fl2->nl_u.dn_u.fwmark) | -#endif + (fl1->mark ^ fl2->mark) | (fl1->nl_u.dn_u.scope ^ fl2->nl_u.dn_u.scope) | (fl1->oif ^ fl2->oif) | (fl1->iif ^ fl2->iif)) == 0; @@ -882,10 +880,8 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old { .daddr = oldflp->fld_dst, .saddr = oldflp->fld_src, .scope = RT_SCOPE_UNIVERSE, -#ifdef CONFIG_DECNET_ROUTE_FWMARK - .fwmark = oldflp->fld_fwmark -#endif } }, + .mark = oldflp->mark, .iif = loopback_dev.ifindex, .oif = oldflp->oif }; struct dn_route *rt = NULL; @@ -903,7 +899,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old "dn_route_output_slow: dst=%04x src=%04x mark=%d" " iif=%d oif=%d\n", dn_ntohs(oldflp->fld_dst), dn_ntohs(oldflp->fld_src), - oldflp->fld_fwmark, loopback_dev.ifindex, oldflp->oif); + oldflp->mark, loopback_dev.ifindex, oldflp->oif); /* If we have an output interface, verify its a DECnet device */ if (oldflp->oif) { @@ -1108,9 +1104,7 @@ make_route: rt->fl.fld_dst = oldflp->fld_dst; rt->fl.oif = oldflp->oif; rt->fl.iif = 0; -#ifdef CONFIG_DECNET_ROUTE_FWMARK - rt->fl.fld_fwmark = oldflp->fld_fwmark; -#endif + rt->fl.mark = oldflp->mark; rt->rt_saddr = fl.fld_src; rt->rt_daddr = fl.fld_dst; @@ -1178,9 +1172,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl rt = rcu_dereference(rt->u.rt_next)) { if ((flp->fld_dst == rt->fl.fld_dst) && (flp->fld_src == rt->fl.fld_src) && -#ifdef CONFIG_DECNET_ROUTE_FWMARK - (flp->fld_fwmark == rt->fl.fld_fwmark) && -#endif + (flp->mark == rt->fl.mark) && (rt->fl.iif == 0) && (rt->fl.oif == flp->oif)) { rt->u.dst.lastuse = jiffies; @@ -1235,10 +1227,8 @@ static int dn_route_input_slow(struct sk_buff *skb) { .daddr = cb->dst, .saddr = cb->src, .scope = RT_SCOPE_UNIVERSE, -#ifdef CONFIG_DECNET_ROUTE_FWMARK - .fwmark = skb->nfmark -#endif } }, + .mark = skb->mark, .iif = skb->dev->ifindex }; struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE }; int err = -EINVAL; @@ -1385,7 +1375,7 @@ make_route: rt->fl.fld_dst = cb->dst; rt->fl.oif = 0; rt->fl.iif = in_dev->ifindex; - rt->fl.fld_fwmark = fl.fld_fwmark; + rt->fl.mark = fl.mark; rt->u.dst.flags = DST_HOST; rt->u.dst.neighbour = neigh; @@ -1457,9 +1447,7 @@ int dn_route_input(struct sk_buff *skb) if ((rt->fl.fld_src == cb->src) && (rt->fl.fld_dst == cb->dst) && (rt->fl.oif == 0) && -#ifdef CONFIG_DECNET_ROUTE_FWMARK - (rt->fl.fld_fwmark == skb->nfmark) && -#endif + (rt->fl.mark == skb->mark) && (rt->fl.iif == cb->iif)) { rt->u.dst.lastuse = jiffies; dst_hold(&rt->u.dst); @@ -1481,7 +1469,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, struct rtmsg *r; struct nlmsghdr *nlh; unsigned char *b = skb->tail; - struct rta_cacheinfo ci; + long expires; nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); r = NLMSG_DATA(nlh); @@ -1514,16 +1502,10 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway); if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) goto rtattr_failure; - ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); - ci.rta_used = rt->u.dst.__use; - ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); - if (rt->u.dst.expires) - ci.rta_expires = jiffies_to_clock_t(rt->u.dst.expires - jiffies); - else - ci.rta_expires = 0; - ci.rta_error = rt->u.dst.error; - ci.rta_id = ci.rta_ts = ci.rta_tsage = 0; - RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); + expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; + if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires, + rt->u.dst.error) < 0) + goto rtattr_failure; if (rt->fl.iif) RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); @@ -1604,8 +1586,6 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; - err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err == 0) diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 3e0c882c90b..e32d0c3d5a9 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -45,10 +45,6 @@ struct dn_fib_rule __le16 dstmask; __le16 srcmap; u8 flags; -#ifdef CONFIG_DECNET_ROUTE_FWMARK - u32 fwmark; - u32 fwmask; -#endif }; static struct dn_fib_rule default_rule = { @@ -112,30 +108,21 @@ errout: } static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = { - [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, - [FRA_PRIORITY] = { .type = NLA_U32 }, + FRA_GENERIC_POLICY, [FRA_SRC] = { .type = NLA_U16 }, [FRA_DST] = { .type = NLA_U16 }, - [FRA_FWMARK] = { .type = NLA_U32 }, - [FRA_FWMASK] = { .type = NLA_U32 }, - [FRA_TABLE] = { .type = NLA_U32 }, }; static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - u16 daddr = fl->fld_dst; - u16 saddr = fl->fld_src; + __le16 daddr = fl->fld_dst; + __le16 saddr = fl->fld_src; if (((saddr ^ r->src) & r->srcmask) || ((daddr ^ r->dst) & r->dstmask)) return 0; -#ifdef CONFIG_DECNET_ROUTE_FWMARK - if ((r->fwmark ^ fl->fld_fwmark) & r->fwmask) - return 0; -#endif - return 1; } @@ -169,20 +156,6 @@ static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb, if (tb[FRA_DST]) r->dst = nla_get_u16(tb[FRA_DST]); -#ifdef CONFIG_DECNET_ROUTE_FWMARK - if (tb[FRA_FWMARK]) { - r->fwmark = nla_get_u32(tb[FRA_FWMARK]); - if (r->fwmark) - /* compatibility: if the mark value is non-zero all bits - * are compared unless a mask is explicitly specified. - */ - r->fwmask = 0xFFFFFFFF; - } - - if (tb[FRA_FWMASK]) - r->fwmask = nla_get_u32(tb[FRA_FWMASK]); -#endif - r->src_len = frh->src_len; r->srcmask = dnet_make_mask(r->src_len); r->dst_len = frh->dst_len; @@ -203,14 +176,6 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, if (frh->dst_len && (r->dst_len != frh->dst_len)) return 0; -#ifdef CONFIG_DECNET_ROUTE_FWMARK - if (tb[FRA_FWMARK] && (r->fwmark != nla_get_u32(tb[FRA_FWMARK]))) - return 0; - - if (tb[FRA_FWMASK] && (r->fwmask != nla_get_u32(tb[FRA_FWMASK]))) - return 0; -#endif - if (tb[FRA_SRC] && (r->src != nla_get_u16(tb[FRA_SRC]))) return 0; @@ -248,12 +213,6 @@ static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb, frh->src_len = r->src_len; frh->tos = 0; -#ifdef CONFIG_DECNET_ROUTE_FWMARK - if (r->fwmark) - NLA_PUT_U32(skb, FRA_FWMARK, r->fwmark); - if (r->fwmask || r->fwmark) - NLA_PUT_U32(skb, FRA_FWMASK, r->fwmask); -#endif if (r->dst_len) NLA_PUT_U16(skb, FRA_DST, r->dst); if (r->src_len) diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 317904bb589..13b2421991b 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -79,7 +79,7 @@ for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_n static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ]; static DEFINE_RWLOCK(dn_fib_tables_lock); -static kmem_cache_t *dn_hash_kmem __read_mostly; +static struct kmem_cache *dn_hash_kmem __read_mostly; static int dn_fib_hash_zombies; static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz) @@ -263,6 +263,32 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern return 0; } +static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi) +{ + size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) + + nla_total_size(4) /* RTA_TABLE */ + + nla_total_size(2) /* RTA_DST */ + + nla_total_size(4); /* RTA_PRIORITY */ + + /* space for nested metrics */ + payload += nla_total_size((RTAX_MAX * nla_total_size(4))); + + if (fi->fib_nhs) { + /* Also handles the special case fib_nhs == 1 */ + + /* each nexthop is packed in an attribute */ + size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); + + /* may contain a gateway attribute */ + nhsize += nla_total_size(4); + + /* all nexthops are packed in a nested attribute */ + payload += nla_total_size(fi->fib_nhs * nhsize); + } + + return payload; +} + static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, struct dn_fib_info *fi, unsigned int flags) @@ -335,17 +361,15 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, u32 pid = req ? req->pid : 0; int err = -ENOBUFS; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(dn_fib_nlmsg_size(DN_FIB_INFO(f)), GFP_KERNEL); if (skb == NULL) goto errout; err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, f->fn_type, f->fn_scope, &f->fn_key, z, DN_FIB_INFO(f), 0); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + /* failure implies BUG in dn_fib_nlmsg_size() */ + BUG_ON(err < 0); err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); errout: @@ -566,7 +590,7 @@ create: replace: err = -ENOBUFS; - new_f = kmem_cache_alloc(dn_hash_kmem, SLAB_KERNEL); + new_f = kmem_cache_alloc(dn_hash_kmem, GFP_KERNEL); if (new_f == NULL) goto out; @@ -807,10 +831,11 @@ struct dn_fib_table *dn_fib_get_table(u32 n, int create) printk(KERN_DEBUG "DECnet: BUG! Attempt to create routing table from interrupt\n"); return NULL; } - if ((t = kmalloc(sizeof(struct dn_fib_table) + sizeof(struct dn_hash), GFP_KERNEL)) == NULL) - return NULL; - memset(t, 0, sizeof(struct dn_fib_table)); + t = kzalloc(sizeof(struct dn_fib_table) + sizeof(struct dn_hash), + GFP_KERNEL); + if (t == NULL) + return NULL; t->n = n; t->insert = dn_fib_table_insert; @@ -818,7 +843,6 @@ struct dn_fib_table *dn_fib_get_table(u32 n, int create) t->lookup = dn_fib_table_lookup; t->flush = dn_fib_table_flush; t->dump = dn_fib_table_dump; - memset(t->data, 0, sizeof(struct dn_hash)); hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]); return t; diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index e246f054f36..a4065eb1341 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -134,8 +134,7 @@ static int parse_addr(__le16 *addr, char *str) static int dn_node_address_strategy(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, size_t newlen) { size_t len; __le16 addr; @@ -220,8 +219,7 @@ static int dn_node_address_handler(ctl_table *table, int write, static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, size_t newlen) { size_t len; struct net_device *dev; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 4bd78c8cfb2..2d31bf3f05c 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -60,7 +60,6 @@ #include <net/ip.h> #include <asm/uaccess.h> #include <asm/system.h> -#include <asm/checksum.h> __setup("ether=", netdev_boot_setup); diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c index 4200ec50986..fc1f99a5973 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/ieee80211/ieee80211_crypt_tkip.c @@ -16,6 +16,7 @@ #include <linux/random.h> #include <linux/skbuff.h> #include <linux/netdevice.h> +#include <linux/mm.h> #include <linux/if_ether.h> #include <linux/if_arp.h> #include <asm/string.h> diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c index 1b2efff11d3..7a95c3d8131 100644 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ b/net/ieee80211/ieee80211_crypt_wep.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/random.h> #include <linux/skbuff.h> +#include <linux/mm.h> #include <asm/string.h> #include <net/ieee80211.h> diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index 13b1e5fff7e..b1c6d1f717d 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -67,7 +67,7 @@ static int ieee80211_networks_allocate(struct ieee80211_device *ieee) return 0; ieee->networks = - kmalloc(MAX_NETWORK_COUNT * sizeof(struct ieee80211_network), + kzalloc(MAX_NETWORK_COUNT * sizeof(struct ieee80211_network), GFP_KERNEL); if (!ieee->networks) { printk(KERN_WARNING "%s: Out of memory allocating beacons\n", @@ -75,9 +75,6 @@ static int ieee80211_networks_allocate(struct ieee80211_device *ieee) return -ENOMEM; } - memset(ieee->networks, 0, - MAX_NETWORK_COUNT * sizeof(struct ieee80211_network)); - return 0; } @@ -118,6 +115,21 @@ static void ieee80211_networks_initialize(struct ieee80211_device *ieee) &ieee->network_free_list); } +static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) +{ + if ((new_mtu < 68) || (new_mtu > IEEE80211_DATA_LEN)) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +static struct net_device_stats *ieee80211_generic_get_stats( + struct net_device *dev) +{ + struct ieee80211_device *ieee = netdev_priv(dev); + return &ieee->stats; +} + struct net_device *alloc_ieee80211(int sizeof_priv) { struct ieee80211_device *ieee; @@ -133,6 +145,11 @@ struct net_device *alloc_ieee80211(int sizeof_priv) } ieee = netdev_priv(dev); dev->hard_start_xmit = ieee80211_xmit; + dev->change_mtu = ieee80211_change_mtu; + + /* Drivers are free to override this if the generic implementation + * does not meet their needs. */ + dev->get_stats = ieee80211_generic_get_stats; ieee->dev = dev; diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index 770704183a1..d97e5412e31 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -415,17 +415,16 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, ieee->host_mc_decrypt : ieee->host_decrypt; if (can_be_decrypted) { - int idx = 0; if (skb->len >= hdrlen + 3) { /* Top two-bits of byte 3 are the key index */ - idx = skb->data[hdrlen + 3] >> 6; + keyidx = skb->data[hdrlen + 3] >> 6; } - /* ieee->crypt[] is WEP_KEY (4) in length. Given that idx - * is only allowed 2-bits of storage, no value of idx can - * be provided via above code that would result in idx + /* ieee->crypt[] is WEP_KEY (4) in length. Given that keyidx + * is only allowed 2-bits of storage, no value of keyidx can + * be provided via above code that would result in keyidx * being out of range */ - crypt = ieee->crypt[idx]; + crypt = ieee->crypt[keyidx]; #ifdef NOT_YET sta = NULL; @@ -479,6 +478,11 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, goto rx_exit; } #endif + /* drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.29) */ + if (sc == ieee->prev_seq_ctl) + goto rx_dropped; + else + ieee->prev_seq_ctl = sc; /* Data frame - extract src/dst addresses */ if (skb->len < IEEE80211_3ADDR_LEN) @@ -655,6 +659,51 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, goto rx_dropped; } + /* If the frame was decrypted in hardware, we may need to strip off + * any security data (IV, ICV, etc) that was left behind */ + if (!can_be_decrypted && (fc & IEEE80211_FCTL_PROTECTED) && + ieee->host_strip_iv_icv) { + int trimlen = 0; + + /* Top two-bits of byte 3 are the key index */ + if (skb->len >= hdrlen + 3) + keyidx = skb->data[hdrlen + 3] >> 6; + + /* To strip off any security data which appears before the + * payload, we simply increase hdrlen (as the header gets + * chopped off immediately below). For the security data which + * appears after the payload, we use skb_trim. */ + + switch (ieee->sec.encode_alg[keyidx]) { + case SEC_ALG_WEP: + /* 4 byte IV */ + hdrlen += 4; + /* 4 byte ICV */ + trimlen = 4; + break; + case SEC_ALG_TKIP: + /* 4 byte IV, 4 byte ExtIV */ + hdrlen += 8; + /* 8 byte MIC, 4 byte ICV */ + trimlen = 12; + break; + case SEC_ALG_CCMP: + /* 8 byte CCMP header */ + hdrlen += 8; + /* 8 byte MIC */ + trimlen = 8; + break; + } + + if (skb->len < trimlen) + goto rx_dropped; + + __skb_trim(skb, skb->len - trimlen); + + if (skb->len < hdrlen) + goto rx_dropped; + } + /* skb: hdr + (possible reassembled) full plaintext payload */ payload = skb->data + hdrlen; @@ -1078,12 +1127,12 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element while (length >= sizeof(*info_element)) { if (sizeof(*info_element) + info_element->len > length) { - IEEE80211_ERROR("Info elem: parse failed: " - "info_element->len + 2 > left : " - "info_element->len+2=%zd left=%d, id=%d.\n", - info_element->len + - sizeof(*info_element), - length, info_element->id); + IEEE80211_DEBUG_MGMT("Info elem: parse failed: " + "info_element->len + 2 > left : " + "info_element->len+2=%zd left=%d, id=%d.\n", + info_element->len + + sizeof(*info_element), + length, info_element->id); /* We stop processing but don't return an error here * because some misbehaviour APs break this rule. ie. * Orinoco AP1000. */ @@ -1255,12 +1304,11 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element case MFIE_TYPE_IBSS_DFS: if (network->ibss_dfs) break; - network->ibss_dfs = - kmalloc(info_element->len, GFP_ATOMIC); + network->ibss_dfs = kmemdup(info_element->data, + info_element->len, + GFP_ATOMIC); if (!network->ibss_dfs) return 1; - memcpy(network->ibss_dfs, info_element->data, - info_element->len); network->flags |= NETWORK_HAS_IBSS_DFS; break; diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c index ae254497ba3..854fc13cd78 100644 --- a/net/ieee80211/ieee80211_tx.c +++ b/net/ieee80211/ieee80211_tx.c @@ -390,7 +390,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) * this stack is providing the full 802.11 header, one will * eventually be affixed to this fragment -- so we must account * for it when determining the amount of payload space. */ - bytes_per_frag = frag_size - IEEE80211_3ADDR_LEN; + bytes_per_frag = frag_size - hdr_len; if (ieee->config & (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS)) bytes_per_frag -= IEEE80211_FCS_LEN; @@ -412,7 +412,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) } else { nr_frags = 1; bytes_per_frag = bytes_last_frag = bytes; - frag_size = bytes + IEEE80211_3ADDR_LEN; + frag_size = bytes + hdr_len; } rts_required = (frag_size > ieee->rts diff --git a/net/ieee80211/softmac/ieee80211softmac_assoc.c b/net/ieee80211/softmac/ieee80211softmac_assoc.c index cf51c87a971..e3f37fdda65 100644 --- a/net/ieee80211/softmac/ieee80211softmac_assoc.c +++ b/net/ieee80211/softmac/ieee80211softmac_assoc.c @@ -58,9 +58,11 @@ ieee80211softmac_assoc(struct ieee80211softmac_device *mac, struct ieee80211soft } void -ieee80211softmac_assoc_timeout(void *d) +ieee80211softmac_assoc_timeout(struct work_struct *work) { - struct ieee80211softmac_device *mac = (struct ieee80211softmac_device *)d; + struct ieee80211softmac_device *mac = + container_of(work, struct ieee80211softmac_device, + associnfo.timeout.work); struct ieee80211softmac_network *n; mutex_lock(&mac->associnfo.mutex); @@ -186,9 +188,11 @@ ieee80211softmac_assoc_notify_auth(struct net_device *dev, int event_type, void /* This function is called to handle userspace requests (asynchronously) */ void -ieee80211softmac_assoc_work(void *d) +ieee80211softmac_assoc_work(struct work_struct *work) { - struct ieee80211softmac_device *mac = (struct ieee80211softmac_device *)d; + struct ieee80211softmac_device *mac = + container_of(work, struct ieee80211softmac_device, + associnfo.work.work); struct ieee80211softmac_network *found = NULL; struct ieee80211_network *net = NULL, *best = NULL; int bssvalid; @@ -412,7 +416,7 @@ ieee80211softmac_handle_assoc_response(struct net_device * dev, network->authenticated = 0; /* we don't want to do this more than once ... */ network->auth_desynced_once = 1; - schedule_work(&mac->associnfo.work); + schedule_delayed_work(&mac->associnfo.work, 0); break; } default: @@ -427,6 +431,17 @@ ieee80211softmac_handle_assoc_response(struct net_device * dev, return 0; } +void +ieee80211softmac_try_reassoc(struct ieee80211softmac_device *mac) +{ + unsigned long flags; + + spin_lock_irqsave(&mac->lock, flags); + mac->associnfo.associating = 1; + schedule_delayed_work(&mac->associnfo.work, 0); + spin_unlock_irqrestore(&mac->lock, flags); +} + int ieee80211softmac_handle_disassoc(struct net_device * dev, struct ieee80211_disassoc *disassoc) @@ -445,8 +460,7 @@ ieee80211softmac_handle_disassoc(struct net_device * dev, dprintk(KERN_INFO PFX "got disassoc frame\n"); ieee80211softmac_disassoc(mac); - /* try to reassociate */ - schedule_work(&mac->associnfo.work); + ieee80211softmac_try_reassoc(mac); return 0; } @@ -466,7 +480,7 @@ ieee80211softmac_handle_reassoc_req(struct net_device * dev, dprintkl(KERN_INFO PFX "reassoc request from unknown network\n"); return 0; } - schedule_work(&mac->associnfo.work); + schedule_delayed_work(&mac->associnfo.work, 0); return 0; } diff --git a/net/ieee80211/softmac/ieee80211softmac_auth.c b/net/ieee80211/softmac/ieee80211softmac_auth.c index 4cef39e171d..8ed3e59b802 100644 --- a/net/ieee80211/softmac/ieee80211softmac_auth.c +++ b/net/ieee80211/softmac/ieee80211softmac_auth.c @@ -26,7 +26,7 @@ #include "ieee80211softmac_priv.h" -static void ieee80211softmac_auth_queue(void *data); +static void ieee80211softmac_auth_queue(struct work_struct *work); /* Queues an auth request to the desired AP */ int @@ -54,14 +54,14 @@ ieee80211softmac_auth_req(struct ieee80211softmac_device *mac, auth->mac = mac; auth->retry = IEEE80211SOFTMAC_AUTH_RETRY_LIMIT; auth->state = IEEE80211SOFTMAC_AUTH_OPEN_REQUEST; - INIT_WORK(&auth->work, &ieee80211softmac_auth_queue, (void *)auth); + INIT_DELAYED_WORK(&auth->work, ieee80211softmac_auth_queue); /* Lock (for list) */ spin_lock_irqsave(&mac->lock, flags); /* add to list */ list_add_tail(&auth->list, &mac->auth_queue); - schedule_work(&auth->work); + schedule_delayed_work(&auth->work, 0); spin_unlock_irqrestore(&mac->lock, flags); return 0; @@ -70,14 +70,15 @@ ieee80211softmac_auth_req(struct ieee80211softmac_device *mac, /* Sends an auth request to the desired AP and handles timeouts */ static void -ieee80211softmac_auth_queue(void *data) +ieee80211softmac_auth_queue(struct work_struct *work) { struct ieee80211softmac_device *mac; struct ieee80211softmac_auth_queue_item *auth; struct ieee80211softmac_network *net; unsigned long flags; - auth = (struct ieee80211softmac_auth_queue_item *)data; + auth = container_of(work, struct ieee80211softmac_auth_queue_item, + work.work); net = auth->net; mac = auth->mac; @@ -118,9 +119,11 @@ ieee80211softmac_auth_queue(void *data) /* Sends a response to an auth challenge (for shared key auth). */ static void -ieee80211softmac_auth_challenge_response(void *_aq) +ieee80211softmac_auth_challenge_response(struct work_struct *work) { - struct ieee80211softmac_auth_queue_item *aq = _aq; + struct ieee80211softmac_auth_queue_item *aq = + container_of(work, struct ieee80211softmac_auth_queue_item, + work.work); /* Send our response */ ieee80211softmac_send_mgt_frame(aq->mac, aq->net, IEEE80211_STYPE_AUTH, aq->state); @@ -158,7 +161,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) /* Make sure that we've got an auth queue item for this request */ if(aq == NULL) { - printkl(KERN_DEBUG PFX "Authentication response received from "MAC_FMT" but no queue item exists.\n", MAC_ARG(auth->header.addr2)); + dprintkl(KERN_DEBUG PFX "Authentication response received from "MAC_FMT" but no queue item exists.\n", MAC_ARG(auth->header.addr2)); /* Error #? */ return -1; } @@ -166,7 +169,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) /* Check for out of order authentication */ if(!net->authenticating) { - printkl(KERN_DEBUG PFX "Authentication response received from "MAC_FMT" but did not request authentication.\n",MAC_ARG(auth->header.addr2)); + dprintkl(KERN_DEBUG PFX "Authentication response received from "MAC_FMT" but did not request authentication.\n",MAC_ARG(auth->header.addr2)); return -1; } @@ -216,10 +219,16 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) net->challenge_len = *data++; if (net->challenge_len > WLAN_AUTH_CHALLENGE_LEN) net->challenge_len = WLAN_AUTH_CHALLENGE_LEN; - if (net->challenge != NULL) - kfree(net->challenge); - net->challenge = kmalloc(net->challenge_len, GFP_ATOMIC); - memcpy(net->challenge, data, net->challenge_len); + kfree(net->challenge); + net->challenge = kmemdup(data, net->challenge_len, + GFP_ATOMIC); + if (net->challenge == NULL) { + printkl(KERN_NOTICE PFX "Shared Key " + "Authentication failed due to " + "memory shortage.\n"); + spin_unlock_irqrestore(&mac->lock, flags); + break; + } aq->state = IEEE80211SOFTMAC_AUTH_SHARED_RESPONSE; /* We reuse the work struct from the auth request here. @@ -228,8 +237,8 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) * we have obviously already sent the initial auth * request. */ cancel_delayed_work(&aq->work); - INIT_WORK(&aq->work, &ieee80211softmac_auth_challenge_response, (void *)aq); - schedule_work(&aq->work); + INIT_DELAYED_WORK(&aq->work, &ieee80211softmac_auth_challenge_response); + schedule_delayed_work(&aq->work, 0); spin_unlock_irqrestore(&mac->lock, flags); return 0; case IEEE80211SOFTMAC_AUTH_SHARED_PASS: @@ -328,6 +337,8 @@ ieee80211softmac_deauth_from_net(struct ieee80211softmac_device *mac, /* can't transmit data right now... */ netif_carrier_off(mac->dev); spin_unlock_irqrestore(&mac->lock, flags); + + ieee80211softmac_try_reassoc(mac); } /* @@ -342,7 +353,7 @@ ieee80211softmac_deauth_req(struct ieee80211softmac_device *mac, /* Make sure the network is authenticated */ if (!net->authenticated) { - printkl(KERN_DEBUG PFX "Can't send deauthentication packet, network is not authenticated.\n"); + dprintkl(KERN_DEBUG PFX "Can't send deauthentication packet, network is not authenticated.\n"); /* Error okay? */ return -EPERM; } @@ -376,7 +387,7 @@ ieee80211softmac_deauth_resp(struct net_device *dev, struct ieee80211_deauth *de net = ieee80211softmac_get_network_by_bssid(mac, deauth->header.addr2); if (net == NULL) { - printkl(KERN_DEBUG PFX "Received deauthentication packet from "MAC_FMT", but that network is unknown.\n", + dprintkl(KERN_DEBUG PFX "Received deauthentication packet from "MAC_FMT", but that network is unknown.\n", MAC_ARG(deauth->header.addr2)); return 0; } @@ -384,7 +395,7 @@ ieee80211softmac_deauth_resp(struct net_device *dev, struct ieee80211_deauth *de /* Make sure the network is authenticated */ if(!net->authenticated) { - printkl(KERN_DEBUG PFX "Can't perform deauthentication, network is not authenticated.\n"); + dprintkl(KERN_DEBUG PFX "Can't perform deauthentication, network is not authenticated.\n"); /* Error okay? */ return -EPERM; } @@ -392,6 +403,6 @@ ieee80211softmac_deauth_resp(struct net_device *dev, struct ieee80211_deauth *de ieee80211softmac_deauth_from_net(mac, net); /* let's try to re-associate */ - schedule_work(&mac->associnfo.work); + schedule_delayed_work(&mac->associnfo.work, 0); return 0; } diff --git a/net/ieee80211/softmac/ieee80211softmac_event.c b/net/ieee80211/softmac/ieee80211softmac_event.c index f34fa2ef666..b9015656cfb 100644 --- a/net/ieee80211/softmac/ieee80211softmac_event.c +++ b/net/ieee80211/softmac/ieee80211softmac_event.c @@ -73,10 +73,12 @@ static char *event_descriptions[IEEE80211SOFTMAC_EVENT_LAST+1] = { static void -ieee80211softmac_notify_callback(void *d) +ieee80211softmac_notify_callback(struct work_struct *work) { - struct ieee80211softmac_event event = *(struct ieee80211softmac_event*) d; - kfree(d); + struct ieee80211softmac_event *pevent = + container_of(work, struct ieee80211softmac_event, work.work); + struct ieee80211softmac_event event = *pevent; + kfree(pevent); event.fun(event.mac->dev, event.event_type, event.context); } @@ -99,7 +101,7 @@ ieee80211softmac_notify_internal(struct ieee80211softmac_device *mac, return -ENOMEM; eventptr->event_type = event; - INIT_WORK(&eventptr->work, ieee80211softmac_notify_callback, eventptr); + INIT_DELAYED_WORK(&eventptr->work, ieee80211softmac_notify_callback); eventptr->fun = fun; eventptr->context = context; eventptr->mac = mac; @@ -170,7 +172,7 @@ ieee80211softmac_call_events_locked(struct ieee80211softmac_device *mac, int eve /* User may have subscribed to ANY event, so * we tell them which event triggered it. */ eventptr->event_type = event; - schedule_work(&eventptr->work); + schedule_delayed_work(&eventptr->work, 0); } } } diff --git a/net/ieee80211/softmac/ieee80211softmac_module.c b/net/ieee80211/softmac/ieee80211softmac_module.c index 33aff4f4a47..256207b71dc 100644 --- a/net/ieee80211/softmac/ieee80211softmac_module.c +++ b/net/ieee80211/softmac/ieee80211softmac_module.c @@ -58,8 +58,8 @@ struct net_device *alloc_ieee80211softmac(int sizeof_priv) INIT_LIST_HEAD(&softmac->events); mutex_init(&softmac->associnfo.mutex); - INIT_WORK(&softmac->associnfo.work, ieee80211softmac_assoc_work, softmac); - INIT_WORK(&softmac->associnfo.timeout, ieee80211softmac_assoc_timeout, softmac); + INIT_DELAYED_WORK(&softmac->associnfo.work, ieee80211softmac_assoc_work); + INIT_DELAYED_WORK(&softmac->associnfo.timeout, ieee80211softmac_assoc_timeout); softmac->start_scan = ieee80211softmac_start_scan_implementation; softmac->wait_for_scan = ieee80211softmac_wait_for_scan_implementation; softmac->stop_scan = ieee80211softmac_stop_scan_implementation; diff --git a/net/ieee80211/softmac/ieee80211softmac_priv.h b/net/ieee80211/softmac/ieee80211softmac_priv.h index 0642e090b8a..4c2bba34d32 100644 --- a/net/ieee80211/softmac/ieee80211softmac_priv.h +++ b/net/ieee80211/softmac/ieee80211softmac_priv.h @@ -78,7 +78,7 @@ /* private definitions and prototypes */ /*** prototypes from _scan.c */ -void ieee80211softmac_scan(void *sm); +void ieee80211softmac_scan(struct work_struct *work); /* for internal use if scanning is needed */ int ieee80211softmac_start_scan(struct ieee80211softmac_device *mac); void ieee80211softmac_stop_scan(struct ieee80211softmac_device *mac); @@ -149,7 +149,7 @@ int ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *au int ieee80211softmac_deauth_resp(struct net_device *dev, struct ieee80211_deauth *deauth); /*** prototypes from _assoc.c */ -void ieee80211softmac_assoc_work(void *d); +void ieee80211softmac_assoc_work(struct work_struct *work); int ieee80211softmac_handle_assoc_response(struct net_device * dev, struct ieee80211_assoc_response * resp, struct ieee80211_network * network); @@ -157,7 +157,7 @@ int ieee80211softmac_handle_disassoc(struct net_device * dev, struct ieee80211_disassoc * disassoc); int ieee80211softmac_handle_reassoc_req(struct net_device * dev, struct ieee80211_reassoc_request * reassoc); -void ieee80211softmac_assoc_timeout(void *d); +void ieee80211softmac_assoc_timeout(struct work_struct *work); void ieee80211softmac_send_disassoc_req(struct ieee80211softmac_device *mac, u16 reason); void ieee80211softmac_disassoc(struct ieee80211softmac_device *mac); @@ -207,7 +207,7 @@ struct ieee80211softmac_auth_queue_item { struct ieee80211softmac_device *mac; /* SoftMAC device */ u8 retry; /* Retry limit */ u8 state; /* Auth State */ - struct work_struct work; /* Work queue */ + struct delayed_work work; /* Work queue */ }; /* scanning information */ @@ -219,7 +219,8 @@ struct ieee80211softmac_scaninfo { stop:1; u8 skip_flags; struct completion finished; - struct work_struct softmac_scan; + struct delayed_work softmac_scan; + struct ieee80211softmac_device *mac; }; /* private event struct */ @@ -227,7 +228,7 @@ struct ieee80211softmac_event { struct list_head list; int event_type; void *event_context; - struct work_struct work; + struct delayed_work work; notify_function_ptr fun; void *context; struct ieee80211softmac_device *mac; @@ -238,4 +239,6 @@ void ieee80211softmac_call_events_locked(struct ieee80211softmac_device *mac, in int ieee80211softmac_notify_internal(struct ieee80211softmac_device *mac, int event, void *event_context, notify_function_ptr fun, void *context, gfp_t gfp_mask); +void ieee80211softmac_try_reassoc(struct ieee80211softmac_device *mac); + #endif /* IEEE80211SOFTMAC_PRIV_H_ */ diff --git a/net/ieee80211/softmac/ieee80211softmac_scan.c b/net/ieee80211/softmac/ieee80211softmac_scan.c index d31cf77498c..0c85d6c24cd 100644 --- a/net/ieee80211/softmac/ieee80211softmac_scan.c +++ b/net/ieee80211/softmac/ieee80211softmac_scan.c @@ -47,7 +47,6 @@ ieee80211softmac_start_scan(struct ieee80211softmac_device *sm) sm->scanning = 1; spin_unlock_irqrestore(&sm->lock, flags); - netif_tx_disable(sm->ieee->dev); ret = sm->start_scan(sm->dev); if (ret) { spin_lock_irqsave(&sm->lock, flags); @@ -91,12 +90,14 @@ ieee80211softmac_wait_for_scan(struct ieee80211softmac_device *sm) /* internal scanning implementation follows */ -void ieee80211softmac_scan(void *d) +void ieee80211softmac_scan(struct work_struct *work) { int invalid_channel; u8 current_channel_idx; - struct ieee80211softmac_device *sm = (struct ieee80211softmac_device *)d; - struct ieee80211softmac_scaninfo *si = sm->scaninfo; + struct ieee80211softmac_scaninfo *si = + container_of(work, struct ieee80211softmac_scaninfo, + softmac_scan.work); + struct ieee80211softmac_device *sm = si->mac; unsigned long flags; while (!(si->stop) && (si->current_channel_idx < si->number_channels)) { @@ -135,7 +136,8 @@ void ieee80211softmac_scan(void *d) si->started = 0; spin_unlock_irqrestore(&sm->lock, flags); - dprintk(PFX "Scanning finished\n"); + dprintk(PFX "Scanning finished: scanned %d channels starting with channel %d\n", + sm->scaninfo->number_channels, sm->scaninfo->channels[0].channel); ieee80211softmac_scan_finished(sm); complete_all(&sm->scaninfo->finished); } @@ -146,7 +148,8 @@ static inline struct ieee80211softmac_scaninfo *allocate_scaninfo(struct ieee802 struct ieee80211softmac_scaninfo *info = kmalloc(sizeof(struct ieee80211softmac_scaninfo), GFP_ATOMIC); if (unlikely(!info)) return NULL; - INIT_WORK(&info->softmac_scan, ieee80211softmac_scan, mac); + INIT_DELAYED_WORK(&info->softmac_scan, ieee80211softmac_scan); + info->mac = mac; init_completion(&info->finished); return info; } @@ -183,13 +186,11 @@ int ieee80211softmac_start_scan_implementation(struct net_device *dev) sm->scaninfo->channels = sm->ieee->geo.bg; sm->scaninfo->number_channels = sm->ieee->geo.bg_channels; } - dprintk(PFX "Start scanning with channel: %d\n", sm->scaninfo->channels[0].channel); - dprintk(PFX "Scanning %d channels\n", sm->scaninfo->number_channels); sm->scaninfo->current_channel_idx = 0; sm->scaninfo->started = 1; sm->scaninfo->stop = 0; INIT_COMPLETION(sm->scaninfo->finished); - schedule_work(&sm->scaninfo->softmac_scan); + schedule_delayed_work(&sm->scaninfo->softmac_scan, 0); spin_unlock_irqrestore(&sm->lock, flags); return 0; } @@ -248,7 +249,6 @@ void ieee80211softmac_scan_finished(struct ieee80211softmac_device *sm) if (net) sm->set_channel(sm->dev, net->channel); } - netif_wake_queue(sm->ieee->dev); ieee80211softmac_call_events(sm, IEEE80211SOFTMAC_EVENT_SCAN_FINISHED, NULL); } EXPORT_SYMBOL_GPL(ieee80211softmac_scan_finished); diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c index 23068a830f7..480d72c7a42 100644 --- a/net/ieee80211/softmac/ieee80211softmac_wx.c +++ b/net/ieee80211/softmac/ieee80211softmac_wx.c @@ -122,7 +122,7 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev, sm->associnfo.associating = 1; /* queue lower level code to do work (if necessary) */ - schedule_work(&sm->associnfo.work); + schedule_delayed_work(&sm->associnfo.work, 0); out: mutex_unlock(&sm->associnfo.mutex); @@ -356,7 +356,7 @@ ieee80211softmac_wx_set_wap(struct net_device *net_dev, /* force reassociation */ mac->associnfo.bssvalid = 0; if (mac->associnfo.associated) - schedule_work(&mac->associnfo.work); + schedule_delayed_work(&mac->associnfo.work, 0); } else if (is_zero_ether_addr(data->ap_addr.sa_data)) { /* the bssid we have is no longer fixed */ mac->associnfo.bssfixed = 0; @@ -373,7 +373,7 @@ ieee80211softmac_wx_set_wap(struct net_device *net_dev, /* tell the other code that this bssid should be used no matter what */ mac->associnfo.bssfixed = 1; /* queue associate if new bssid or (old one again and not associated) */ - schedule_work(&mac->associnfo.work); + schedule_delayed_work(&mac->associnfo.work, 0); } out: @@ -495,7 +495,8 @@ ieee80211softmac_wx_set_mlme(struct net_device *dev, printk(KERN_DEBUG PFX "wx_set_mlme: we should know the net here...\n"); goto out; } - return ieee80211softmac_deauth_req(mac, net, reason); + err = ieee80211softmac_deauth_req(mac, net, reason); + goto out; case IW_MLME_DISASSOC: ieee80211softmac_send_disassoc_req(mac, reason); mac->associnfo.associated = 0; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 5572071af73..503e7059e31 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -104,13 +104,6 @@ config IP_MULTIPLE_TABLES If unsure, say N. -config IP_ROUTE_FWMARK - bool "IP: use netfilter MARK value as routing key" - depends on IP_MULTIPLE_TABLES && NETFILTER - help - If you say Y here, you will be able to specify different routes for - packets with different mark values (see iptables(8), MARK target). - config IP_ROUTE_MULTIPATH bool "IP: equal cost multipath" depends on IP_ADVANCED_ROUTER @@ -625,5 +618,17 @@ config DEFAULT_TCP_CONG default "reno" if DEFAULT_RENO default "cubic" +config TCP_MD5SIG + bool "TCP: MD5 Signature Option support (RFC2385) (EXPERIMENTAL)" + depends on EXPERIMENTAL + select CRYPTO + select CRYPTO_MD5 + ---help--- + RFC2385 specifices a method of giving MD5 protection to TCP sessions. + Its main (only?) use is to protect BGP sessions between core routers + on the Internet. + + If unsure, say N. + source "net/ipv4/ipvs/Kconfig" diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 15645c51520..7a068626fee 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,8 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o \ - datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ + datagram.o raw.o udp.o udplite.o \ + arp.o icmp.o devinet.o af_inet.o igmp.o \ sysctl_net_ipv4.o fib_frontend.o fib_semantics.o obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index edcf0932ac6..1144900d37f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -104,6 +104,7 @@ #include <net/inet_connection_sock.h> #include <net/tcp.h> #include <net/udp.h> +#include <net/udplite.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/raw.h> @@ -204,7 +205,7 @@ int inet_listen(struct socket *sock, int backlog) * we can only allow the backlog to be adjusted. */ if (old_state != TCP_LISTEN) { - err = inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); + err = inet_csk_listen_start(sk, backlog); if (err) goto out; } @@ -643,7 +644,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin_port = inet->dport; sin->sin_addr.s_addr = inet->daddr; } else { - __u32 addr = inet->rcv_saddr; + __be32 addr = inet->rcv_saddr; if (!addr) addr = inet->saddr; sin->sin_port = inet->sport; @@ -994,8 +995,8 @@ static int inet_sk_reselect_saddr(struct sock *sk) struct inet_sock *inet = inet_sk(sk); int err; struct rtable *rt; - __u32 old_saddr = inet->saddr; - __u32 new_saddr; + __be32 old_saddr = inet->saddr; + __be32 new_saddr; __be32 daddr = inet->daddr; if (inet->opt && inet->opt->srr) @@ -1223,10 +1224,13 @@ static int __init init_ipv4_mibs(void) tcp_statistics[1] = alloc_percpu(struct tcp_mib); udp_statistics[0] = alloc_percpu(struct udp_mib); udp_statistics[1] = alloc_percpu(struct udp_mib); + udplite_statistics[0] = alloc_percpu(struct udp_mib); + udplite_statistics[1] = alloc_percpu(struct udp_mib); if (! (net_statistics[0] && net_statistics[1] && ip_statistics[0] && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1] - && udp_statistics[0] && udp_statistics[1])) + && udp_statistics[0] && udp_statistics[1] + && udplite_statistics[0] && udplite_statistics[1] ) ) return -ENOMEM; (void) tcp_mib_init(); @@ -1313,6 +1317,8 @@ static int __init inet_init(void) /* Setup TCP slab cache for open requests. */ tcp_init(); + /* Add UDP-Lite (RFC 3828) */ + udplite4_register(); /* * Set the ICMP layer up diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 99542977e47..67a5509e26f 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -14,7 +14,7 @@ * into IP header for icv calculation. Options are already checked * for validity, so paranoia is not required. */ -static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr) +static int ip_clear_mutable_options(struct iphdr *iph, __be32 *daddr) { unsigned char * optptr = (unsigned char*)(iph+1); int l = iph->ihl*4 - sizeof(struct iphdr); @@ -162,7 +162,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) iph->frag_off = 0; iph->check = 0; if (ihl > sizeof(*iph)) { - u32 dummy; + __be32 dummy; if (ip_clear_mutable_options(iph, &dummy)) goto out; } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index cfb5d3de9c8..3981e8be9ab 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -203,7 +203,7 @@ struct neigh_table arp_tbl = { .gc_thresh3 = 1024, }; -int arp_mc_map(u32 addr, u8 *haddr, struct net_device *dev, int dir) +int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir) { switch (dev->type) { case ARPHRD_ETHER: diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index e2077a3aa8c..60aafb4a8ad 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -319,6 +319,7 @@ static int cipso_v4_cache_check(const unsigned char *key, entry->activity += 1; atomic_inc(&entry->lsm_data->refcount); secattr->cache = entry->lsm_data; + secattr->flags |= NETLBL_SECATTR_CACHE; if (prev_entry == NULL) { spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; @@ -377,12 +378,11 @@ int cipso_v4_cache_add(const struct sk_buff *skb, entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) return -ENOMEM; - entry->key = kmalloc(cipso_ptr_len, GFP_ATOMIC); + entry->key = kmemdup(cipso_ptr, cipso_ptr_len, GFP_ATOMIC); if (entry->key == NULL) { ret_val = -ENOMEM; goto cache_add_failure; } - memcpy(entry->key, cipso_ptr, cipso_ptr_len); entry->key_len = cipso_ptr_len; entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len); atomic_inc(&secattr->cache->refcount); @@ -447,8 +447,30 @@ static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi) */ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) { + u32 iter; + if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN) return -EINVAL; + for (iter = 0; iter < CIPSO_V4_TAG_MAXCNT; iter++) { + switch (doi_def->tags[iter]) { + case CIPSO_V4_TAG_RBITMAP: + break; + case CIPSO_V4_TAG_RANGE: + if (doi_def->type != CIPSO_V4_MAP_PASS) + return -EINVAL; + break; + case CIPSO_V4_TAG_INVALID: + if (iter == 0) + return -EINVAL; + break; + case CIPSO_V4_TAG_ENUM: + if (doi_def->type != CIPSO_V4_MAP_PASS) + return -EINVAL; + break; + default: + return -EINVAL; + } + } doi_def->valid = 1; INIT_RCU_HEAD(&doi_def->rcu); @@ -805,8 +827,7 @@ static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def, /** * cipso_v4_map_cat_rbm_hton - Perform a category mapping from host to network * @doi_def: the DOI definition - * @host_cat: the category bitmap in host format - * @host_cat_len: the length of the host's category bitmap in bytes + * @secattr: the security attributes * @net_cat: the zero'd out category bitmap in network/CIPSO format * @net_cat_len: the length of the CIPSO bitmap in bytes * @@ -817,59 +838,51 @@ static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def, * */ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, - const unsigned char *host_cat, - u32 host_cat_len, + const struct netlbl_lsm_secattr *secattr, unsigned char *net_cat, u32 net_cat_len) { int host_spot = -1; - u32 net_spot; + u32 net_spot = CIPSO_V4_INV_CAT; u32 net_spot_max = 0; - u32 host_clen_bits = host_cat_len * 8; u32 net_clen_bits = net_cat_len * 8; - u32 host_cat_size; - u32 *host_cat_array; + u32 host_cat_size = 0; + u32 *host_cat_array = NULL; - switch (doi_def->type) { - case CIPSO_V4_MAP_PASS: - net_spot_max = host_cat_len; - while (net_spot_max > 0 && host_cat[net_spot_max - 1] == 0) - net_spot_max--; - if (net_spot_max > net_cat_len) - return -EINVAL; - memcpy(net_cat, host_cat, net_spot_max); - return net_spot_max; - case CIPSO_V4_MAP_STD: + if (doi_def->type == CIPSO_V4_MAP_STD) { host_cat_size = doi_def->map.std->cat.local_size; host_cat_array = doi_def->map.std->cat.local; - for (;;) { - host_spot = cipso_v4_bitmap_walk(host_cat, - host_clen_bits, - host_spot + 1, - 1); - if (host_spot < 0) - break; + } + + for (;;) { + host_spot = netlbl_secattr_catmap_walk(secattr->mls_cat, + host_spot + 1); + if (host_spot < 0) + break; + + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + net_spot = host_spot; + break; + case CIPSO_V4_MAP_STD: if (host_spot >= host_cat_size) return -EPERM; - net_spot = host_cat_array[host_spot]; - if (net_spot >= net_clen_bits) - return -ENOSPC; - cipso_v4_bitmap_setbit(net_cat, net_spot, 1); - - if (net_spot > net_spot_max) - net_spot_max = net_spot; + if (net_spot >= CIPSO_V4_INV_CAT) + return -EPERM; + break; } + if (net_spot >= net_clen_bits) + return -ENOSPC; + cipso_v4_bitmap_setbit(net_cat, net_spot, 1); - if (host_spot == -2) - return -EFAULT; - - if (++net_spot_max % 8) - return net_spot_max / 8 + 1; - return net_spot_max / 8; + if (net_spot > net_spot_max) + net_spot_max = net_spot; } - return -EINVAL; + if (++net_spot_max % 8) + return net_spot_max / 8 + 1; + return net_spot_max / 8; } /** @@ -877,102 +890,333 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, * @doi_def: the DOI definition * @net_cat: the category bitmap in network/CIPSO format * @net_cat_len: the length of the CIPSO bitmap in bytes - * @host_cat: the zero'd out category bitmap in host format - * @host_cat_len: the length of the host's category bitmap in bytes + * @secattr: the security attributes * * Description: * Perform a label mapping to translate a CIPSO bitmap to the correct local - * MLS category bitmap using the given DOI definition. Returns the minimum - * size in bytes of the host bitmap on success, negative values otherwise. + * MLS category bitmap using the given DOI definition. Returns zero on + * success, negative values on failure. * */ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, const unsigned char *net_cat, u32 net_cat_len, - unsigned char *host_cat, - u32 host_cat_len) + struct netlbl_lsm_secattr *secattr) { - u32 host_spot; - u32 host_spot_max = 0; + int ret_val; int net_spot = -1; + u32 host_spot = CIPSO_V4_INV_CAT; u32 net_clen_bits = net_cat_len * 8; - u32 host_clen_bits = host_cat_len * 8; - u32 net_cat_size; - u32 *net_cat_array; + u32 net_cat_size = 0; + u32 *net_cat_array = NULL; - switch (doi_def->type) { - case CIPSO_V4_MAP_PASS: - if (net_cat_len > host_cat_len) - return -EINVAL; - memcpy(host_cat, net_cat, net_cat_len); - return net_cat_len; - case CIPSO_V4_MAP_STD: + if (doi_def->type == CIPSO_V4_MAP_STD) { net_cat_size = doi_def->map.std->cat.cipso_size; net_cat_array = doi_def->map.std->cat.cipso; - for (;;) { - net_spot = cipso_v4_bitmap_walk(net_cat, - net_clen_bits, - net_spot + 1, - 1); - if (net_spot < 0) - break; - if (net_spot >= net_cat_size || - net_cat_array[net_spot] >= CIPSO_V4_INV_CAT) - return -EPERM; + } - host_spot = net_cat_array[net_spot]; - if (host_spot >= host_clen_bits) - return -ENOSPC; - cipso_v4_bitmap_setbit(host_cat, host_spot, 1); + for (;;) { + net_spot = cipso_v4_bitmap_walk(net_cat, + net_clen_bits, + net_spot + 1, + 1); + if (net_spot < 0) { + if (net_spot == -2) + return -EFAULT; + return 0; + } - if (host_spot > host_spot_max) - host_spot_max = host_spot; + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + host_spot = net_spot; + break; + case CIPSO_V4_MAP_STD: + if (net_spot >= net_cat_size) + return -EPERM; + host_spot = net_cat_array[net_spot]; + if (host_spot >= CIPSO_V4_INV_CAT) + return -EPERM; + break; } + ret_val = netlbl_secattr_catmap_setbit(secattr->mls_cat, + host_spot, + GFP_ATOMIC); + if (ret_val != 0) + return ret_val; + } + + return -EINVAL; +} + +/** + * cipso_v4_map_cat_enum_valid - Checks to see if the categories are valid + * @doi_def: the DOI definition + * @enumcat: category list + * @enumcat_len: length of the category list in bytes + * + * Description: + * Checks the given categories against the given DOI definition and returns a + * negative value if any of the categories do not have a valid mapping and a + * zero value if all of the categories are valid. + * + */ +static int cipso_v4_map_cat_enum_valid(const struct cipso_v4_doi *doi_def, + const unsigned char *enumcat, + u32 enumcat_len) +{ + u16 cat; + int cat_prev = -1; + u32 iter; + + if (doi_def->type != CIPSO_V4_MAP_PASS || enumcat_len & 0x01) + return -EFAULT; - if (net_spot == -2) + for (iter = 0; iter < enumcat_len; iter += 2) { + cat = ntohs(*((__be16 *)&enumcat[iter])); + if (cat <= cat_prev) return -EFAULT; + cat_prev = cat; + } + + return 0; +} + +/** + * cipso_v4_map_cat_enum_hton - Perform a category mapping from host to network + * @doi_def: the DOI definition + * @secattr: the security attributes + * @net_cat: the zero'd out category list in network/CIPSO format + * @net_cat_len: the length of the CIPSO category list in bytes + * + * Description: + * Perform a label mapping to translate a local MLS category bitmap to the + * correct CIPSO category list using the given DOI definition. Returns the + * size in bytes of the network category bitmap on success, negative values + * otherwise. + * + */ +static int cipso_v4_map_cat_enum_hton(const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr, + unsigned char *net_cat, + u32 net_cat_len) +{ + int cat = -1; + u32 cat_iter = 0; - if (++host_spot_max % 8) - return host_spot_max / 8 + 1; - return host_spot_max / 8; + for (;;) { + cat = netlbl_secattr_catmap_walk(secattr->mls_cat, cat + 1); + if (cat < 0) + break; + if ((cat_iter + 2) > net_cat_len) + return -ENOSPC; + + *((__be16 *)&net_cat[cat_iter]) = htons(cat); + cat_iter += 2; } - return -EINVAL; + return cat_iter; +} + +/** + * cipso_v4_map_cat_enum_ntoh - Perform a category mapping from network to host + * @doi_def: the DOI definition + * @net_cat: the category list in network/CIPSO format + * @net_cat_len: the length of the CIPSO bitmap in bytes + * @secattr: the security attributes + * + * Description: + * Perform a label mapping to translate a CIPSO category list to the correct + * local MLS category bitmap using the given DOI definition. Returns zero on + * success, negative values on failure. + * + */ +static int cipso_v4_map_cat_enum_ntoh(const struct cipso_v4_doi *doi_def, + const unsigned char *net_cat, + u32 net_cat_len, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + u32 iter; + + for (iter = 0; iter < net_cat_len; iter += 2) { + ret_val = netlbl_secattr_catmap_setbit(secattr->mls_cat, + ntohs(*((__be16 *)&net_cat[iter])), + GFP_ATOMIC); + if (ret_val != 0) + return ret_val; + } + + return 0; +} + +/** + * cipso_v4_map_cat_rng_valid - Checks to see if the categories are valid + * @doi_def: the DOI definition + * @rngcat: category list + * @rngcat_len: length of the category list in bytes + * + * Description: + * Checks the given categories against the given DOI definition and returns a + * negative value if any of the categories do not have a valid mapping and a + * zero value if all of the categories are valid. + * + */ +static int cipso_v4_map_cat_rng_valid(const struct cipso_v4_doi *doi_def, + const unsigned char *rngcat, + u32 rngcat_len) +{ + u16 cat_high; + u16 cat_low; + u32 cat_prev = CIPSO_V4_MAX_REM_CATS + 1; + u32 iter; + + if (doi_def->type != CIPSO_V4_MAP_PASS || rngcat_len & 0x01) + return -EFAULT; + + for (iter = 0; iter < rngcat_len; iter += 4) { + cat_high = ntohs(*((__be16 *)&rngcat[iter])); + if ((iter + 4) <= rngcat_len) + cat_low = ntohs(*((__be16 *)&rngcat[iter + 2])); + else + cat_low = 0; + + if (cat_high > cat_prev) + return -EFAULT; + + cat_prev = cat_low; + } + + return 0; +} + +/** + * cipso_v4_map_cat_rng_hton - Perform a category mapping from host to network + * @doi_def: the DOI definition + * @secattr: the security attributes + * @net_cat: the zero'd out category list in network/CIPSO format + * @net_cat_len: the length of the CIPSO category list in bytes + * + * Description: + * Perform a label mapping to translate a local MLS category bitmap to the + * correct CIPSO category list using the given DOI definition. Returns the + * size in bytes of the network category bitmap on success, negative values + * otherwise. + * + */ +static int cipso_v4_map_cat_rng_hton(const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr, + unsigned char *net_cat, + u32 net_cat_len) +{ + /* The constant '16' is not random, it is the maximum number of + * high/low category range pairs as permitted by the CIPSO draft based + * on a maximum IPv4 header length of 60 bytes - the BUG_ON() assertion + * does a sanity check to make sure we don't overflow the array. */ + int iter = -1; + u16 array[16]; + u32 array_cnt = 0; + u32 cat_size = 0; + + BUG_ON(net_cat_len > 30); + + for (;;) { + iter = netlbl_secattr_catmap_walk(secattr->mls_cat, iter + 1); + if (iter < 0) + break; + cat_size += (iter == 0 ? 0 : sizeof(u16)); + if (cat_size > net_cat_len) + return -ENOSPC; + array[array_cnt++] = iter; + + iter = netlbl_secattr_catmap_walk_rng(secattr->mls_cat, iter); + if (iter < 0) + return -EFAULT; + cat_size += sizeof(u16); + if (cat_size > net_cat_len) + return -ENOSPC; + array[array_cnt++] = iter; + } + + for (iter = 0; array_cnt > 0;) { + *((__be16 *)&net_cat[iter]) = htons(array[--array_cnt]); + iter += 2; + array_cnt--; + if (array[array_cnt] != 0) { + *((__be16 *)&net_cat[iter]) = htons(array[array_cnt]); + iter += 2; + } + } + + return cat_size; +} + +/** + * cipso_v4_map_cat_rng_ntoh - Perform a category mapping from network to host + * @doi_def: the DOI definition + * @net_cat: the category list in network/CIPSO format + * @net_cat_len: the length of the CIPSO bitmap in bytes + * @secattr: the security attributes + * + * Description: + * Perform a label mapping to translate a CIPSO category list to the correct + * local MLS category bitmap using the given DOI definition. Returns zero on + * success, negative values on failure. + * + */ +static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def, + const unsigned char *net_cat, + u32 net_cat_len, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + u32 net_iter; + u16 cat_low; + u16 cat_high; + + for(net_iter = 0; net_iter < net_cat_len; net_iter += 4) { + cat_high = ntohs(*((__be16 *)&net_cat[net_iter])); + if ((net_iter + 4) <= net_cat_len) + cat_low = ntohs(*((__be16 *)&net_cat[net_iter + 2])); + else + cat_low = 0; + + ret_val = netlbl_secattr_catmap_setrng(secattr->mls_cat, + cat_low, + cat_high, + GFP_ATOMIC); + if (ret_val != 0) + return ret_val; + } + + return 0; } /* * Protocol Handling Functions */ +#define CIPSO_V4_OPT_LEN_MAX 40 #define CIPSO_V4_HDR_LEN 6 /** * cipso_v4_gentag_hdr - Generate a CIPSO option header * @doi_def: the DOI definition - * @len: the total tag length in bytes + * @len: the total tag length in bytes, not including this header * @buf: the CIPSO option buffer * * Description: - * Write a CIPSO header into the beginning of @buffer. Return zero on success, - * negative values on failure. + * Write a CIPSO header into the beginning of @buffer. * */ -static int cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def, - u32 len, - unsigned char *buf) +static void cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def, + unsigned char *buf, + u32 len) { - if (CIPSO_V4_HDR_LEN + len > 40) - return -ENOSPC; - buf[0] = IPOPT_CIPSO; buf[1] = CIPSO_V4_HDR_LEN + len; - *(u32 *)&buf[2] = htonl(doi_def->doi); - - return 0; + *(__be32 *)&buf[2] = htonl(doi_def->doi); } -#define CIPSO_V4_TAG1_CAT_LEN 30 - /** * cipso_v4_gentag_rbm - Generate a CIPSO restricted bitmap tag (type #1) * @doi_def: the DOI definition @@ -983,83 +1227,249 @@ static int cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def, * Description: * Generate a CIPSO option using the restricted bitmap tag, tag type #1. The * actual buffer length may be larger than the indicated size due to - * translation between host and network category bitmaps. Returns zero on - * success, negative values on failure. + * translation between host and network category bitmaps. Returns the size of + * the tag on success, negative values on failure. * */ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr, - unsigned char **buffer, - u32 *buffer_len) + unsigned char *buffer, + u32 buffer_len) { - int ret_val = -EPERM; - unsigned char *buf = NULL; - u32 buf_len; + int ret_val; + u32 tag_len; u32 level; - if (secattr->mls_cat) { - buf = kzalloc(CIPSO_V4_HDR_LEN + 4 + CIPSO_V4_TAG1_CAT_LEN, - GFP_ATOMIC); - if (buf == NULL) - return -ENOMEM; + if ((secattr->flags & NETLBL_SECATTR_MLS_LVL) == 0) + return -EPERM; + + ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level); + if (ret_val != 0) + return ret_val; + if (secattr->flags & NETLBL_SECATTR_MLS_CAT) { ret_val = cipso_v4_map_cat_rbm_hton(doi_def, - secattr->mls_cat, - secattr->mls_cat_len, - &buf[CIPSO_V4_HDR_LEN + 4], - CIPSO_V4_TAG1_CAT_LEN); + secattr, + &buffer[4], + buffer_len - 4); if (ret_val < 0) - goto gentag_failure; + return ret_val; /* This will send packets using the "optimized" format when * possibile as specified in section 3.4.2.6 of the * CIPSO draft. */ - if (cipso_v4_rbm_optfmt && (ret_val > 0 && ret_val < 10)) - ret_val = 10; + if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10) + tag_len = 14; + else + tag_len = 4 + ret_val; + } else + tag_len = 4; + + buffer[0] = 0x01; + buffer[1] = tag_len; + buffer[3] = level; + + return tag_len; +} - buf_len = 4 + ret_val; - } else { - buf = kzalloc(CIPSO_V4_HDR_LEN + 4, GFP_ATOMIC); - if (buf == NULL) +/** + * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag + * @doi_def: the DOI definition + * @tag: the CIPSO tag + * @secattr: the security attributes + * + * Description: + * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security + * attributes in @secattr. Return zero on success, negatives values on + * failure. + * + */ +static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, + const unsigned char *tag, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + u8 tag_len = tag[1]; + u32 level; + + ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level); + if (ret_val != 0) + return ret_val; + secattr->mls_lvl = level; + secattr->flags |= NETLBL_SECATTR_MLS_LVL; + + if (tag_len > 4) { + secattr->mls_cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); + if (secattr->mls_cat == NULL) return -ENOMEM; - buf_len = 4; + + ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def, + &tag[4], + tag_len - 4, + secattr); + if (ret_val != 0) { + netlbl_secattr_catmap_free(secattr->mls_cat); + return ret_val; + } + + secattr->flags |= NETLBL_SECATTR_MLS_CAT; } + return 0; +} + +/** + * cipso_v4_gentag_enum - Generate a CIPSO enumerated tag (type #2) + * @doi_def: the DOI definition + * @secattr: the security attributes + * @buffer: the option buffer + * @buffer_len: length of buffer in bytes + * + * Description: + * Generate a CIPSO option using the enumerated tag, tag type #2. Returns the + * size of the tag on success, negative values on failure. + * + */ +static int cipso_v4_gentag_enum(const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr, + unsigned char *buffer, + u32 buffer_len) +{ + int ret_val; + u32 tag_len; + u32 level; + + if (!(secattr->flags & NETLBL_SECATTR_MLS_LVL)) + return -EPERM; + ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level); if (ret_val != 0) - goto gentag_failure; + return ret_val; + + if (secattr->flags & NETLBL_SECATTR_MLS_CAT) { + ret_val = cipso_v4_map_cat_enum_hton(doi_def, + secattr, + &buffer[4], + buffer_len - 4); + if (ret_val < 0) + return ret_val; + + tag_len = 4 + ret_val; + } else + tag_len = 4; + + buffer[0] = 0x02; + buffer[1] = tag_len; + buffer[3] = level; + + return tag_len; +} + +/** + * cipso_v4_parsetag_enum - Parse a CIPSO enumerated tag + * @doi_def: the DOI definition + * @tag: the CIPSO tag + * @secattr: the security attributes + * + * Description: + * Parse a CIPSO enumerated tag (tag type #2) and return the security + * attributes in @secattr. Return zero on success, negatives values on + * failure. + * + */ +static int cipso_v4_parsetag_enum(const struct cipso_v4_doi *doi_def, + const unsigned char *tag, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + u8 tag_len = tag[1]; + u32 level; - ret_val = cipso_v4_gentag_hdr(doi_def, buf_len, buf); + ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level); if (ret_val != 0) - goto gentag_failure; + return ret_val; + secattr->mls_lvl = level; + secattr->flags |= NETLBL_SECATTR_MLS_LVL; - buf[CIPSO_V4_HDR_LEN] = 0x01; - buf[CIPSO_V4_HDR_LEN + 1] = buf_len; - buf[CIPSO_V4_HDR_LEN + 3] = level; + if (tag_len > 4) { + secattr->mls_cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); + if (secattr->mls_cat == NULL) + return -ENOMEM; - *buffer = buf; - *buffer_len = CIPSO_V4_HDR_LEN + buf_len; + ret_val = cipso_v4_map_cat_enum_ntoh(doi_def, + &tag[4], + tag_len - 4, + secattr); + if (ret_val != 0) { + netlbl_secattr_catmap_free(secattr->mls_cat); + return ret_val; + } + + secattr->flags |= NETLBL_SECATTR_MLS_CAT; + } return 0; +} -gentag_failure: - kfree(buf); - return ret_val; +/** + * cipso_v4_gentag_rng - Generate a CIPSO ranged tag (type #5) + * @doi_def: the DOI definition + * @secattr: the security attributes + * @buffer: the option buffer + * @buffer_len: length of buffer in bytes + * + * Description: + * Generate a CIPSO option using the ranged tag, tag type #5. Returns the + * size of the tag on success, negative values on failure. + * + */ +static int cipso_v4_gentag_rng(const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr, + unsigned char *buffer, + u32 buffer_len) +{ + int ret_val; + u32 tag_len; + u32 level; + + if (!(secattr->flags & NETLBL_SECATTR_MLS_LVL)) + return -EPERM; + + ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level); + if (ret_val != 0) + return ret_val; + + if (secattr->flags & NETLBL_SECATTR_MLS_CAT) { + ret_val = cipso_v4_map_cat_rng_hton(doi_def, + secattr, + &buffer[4], + buffer_len - 4); + if (ret_val < 0) + return ret_val; + + tag_len = 4 + ret_val; + } else + tag_len = 4; + + buffer[0] = 0x05; + buffer[1] = tag_len; + buffer[3] = level; + + return tag_len; } /** - * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag + * cipso_v4_parsetag_rng - Parse a CIPSO ranged tag * @doi_def: the DOI definition * @tag: the CIPSO tag * @secattr: the security attributes * * Description: - * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security - * attributes in @secattr. Return zero on success, negatives values on - * failure. + * Parse a CIPSO ranged tag (tag type #5) and return the security attributes + * in @secattr. Return zero on success, negatives values on failure. * */ -static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, +static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, const unsigned char *tag, struct netlbl_lsm_secattr *secattr) { @@ -1071,32 +1481,23 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, if (ret_val != 0) return ret_val; secattr->mls_lvl = level; - secattr->mls_lvl_vld = 1; + secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { - switch (doi_def->type) { - case CIPSO_V4_MAP_PASS: - secattr->mls_cat_len = tag_len - 4; - break; - case CIPSO_V4_MAP_STD: - secattr->mls_cat_len = - doi_def->map.std->cat.local_size; - break; - } - secattr->mls_cat = kzalloc(secattr->mls_cat_len, GFP_ATOMIC); + secattr->mls_cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); if (secattr->mls_cat == NULL) return -ENOMEM; - ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def, + ret_val = cipso_v4_map_cat_rng_ntoh(doi_def, &tag[4], tag_len - 4, - secattr->mls_cat, - secattr->mls_cat_len); - if (ret_val < 0) { - kfree(secattr->mls_cat); + secattr); + if (ret_val != 0) { + netlbl_secattr_catmap_free(secattr->mls_cat); return ret_val; } - secattr->mls_cat_len = ret_val; + + secattr->flags |= NETLBL_SECATTR_MLS_CAT; } return 0; @@ -1140,7 +1541,7 @@ int cipso_v4_validate(unsigned char **option) } rcu_read_lock(); - doi_def = cipso_v4_doi_getdef(ntohl(*((u32 *)&opt[2]))); + doi_def = cipso_v4_doi_search(ntohl(*((__be32 *)&opt[2]))); if (doi_def == NULL) { err_offset = 2; goto validate_return_locked; @@ -1191,6 +1592,44 @@ int cipso_v4_validate(unsigned char **option) } } break; + case CIPSO_V4_TAG_ENUM: + if (tag_len < 4) { + err_offset = opt_iter + 1; + goto validate_return_locked; + } + + if (cipso_v4_map_lvl_valid(doi_def, + tag[3]) < 0) { + err_offset = opt_iter + 3; + goto validate_return_locked; + } + if (tag_len > 4 && + cipso_v4_map_cat_enum_valid(doi_def, + &tag[4], + tag_len - 4) < 0) { + err_offset = opt_iter + 4; + goto validate_return_locked; + } + break; + case CIPSO_V4_TAG_RANGE: + if (tag_len < 4) { + err_offset = opt_iter + 1; + goto validate_return_locked; + } + + if (cipso_v4_map_lvl_valid(doi_def, + tag[3]) < 0) { + err_offset = opt_iter + 3; + goto validate_return_locked; + } + if (tag_len > 4 && + cipso_v4_map_cat_rng_valid(doi_def, + &tag[4], + tag_len - 4) < 0) { + err_offset = opt_iter + 4; + goto validate_return_locked; + } + break; default: err_offset = opt_iter; goto validate_return_locked; @@ -1265,7 +1704,7 @@ int cipso_v4_socket_setattr(const struct socket *sock, { int ret_val = -EPERM; u32 iter; - unsigned char *buf = NULL; + unsigned char *buf; u32 buf_len = 0; u32 opt_len; struct ip_options *opt = NULL; @@ -1281,17 +1720,40 @@ int cipso_v4_socket_setattr(const struct socket *sock, if (sk == NULL) return 0; + /* We allocate the maximum CIPSO option size here so we are probably + * being a little wasteful, but it makes our life _much_ easier later + * on and after all we are only talking about 40 bytes. */ + buf_len = CIPSO_V4_OPT_LEN_MAX; + buf = kmalloc(buf_len, GFP_ATOMIC); + if (buf == NULL) { + ret_val = -ENOMEM; + goto socket_setattr_failure; + } + /* XXX - This code assumes only one tag per CIPSO option which isn't * really a good assumption to make but since we only support the MAC * tags right now it is a safe assumption. */ iter = 0; do { + memset(buf, 0, buf_len); switch (doi_def->tags[iter]) { case CIPSO_V4_TAG_RBITMAP: ret_val = cipso_v4_gentag_rbm(doi_def, - secattr, - &buf, - &buf_len); + secattr, + &buf[CIPSO_V4_HDR_LEN], + buf_len - CIPSO_V4_HDR_LEN); + break; + case CIPSO_V4_TAG_ENUM: + ret_val = cipso_v4_gentag_enum(doi_def, + secattr, + &buf[CIPSO_V4_HDR_LEN], + buf_len - CIPSO_V4_HDR_LEN); + break; + case CIPSO_V4_TAG_RANGE: + ret_val = cipso_v4_gentag_rng(doi_def, + secattr, + &buf[CIPSO_V4_HDR_LEN], + buf_len - CIPSO_V4_HDR_LEN); break; default: ret_val = -EPERM; @@ -1299,15 +1761,18 @@ int cipso_v4_socket_setattr(const struct socket *sock, } iter++; - } while (ret_val != 0 && + } while (ret_val < 0 && iter < CIPSO_V4_TAG_MAXCNT && doi_def->tags[iter] != CIPSO_V4_TAG_INVALID); - if (ret_val != 0) + if (ret_val < 0) goto socket_setattr_failure; + cipso_v4_gentag_hdr(doi_def, buf, ret_val); + buf_len = CIPSO_V4_HDR_LEN + ret_val; /* We can't use ip_options_get() directly because it makes a call to * ip_options_get_alloc() which allocates memory with GFP_KERNEL and - * we can't block here. */ + * we won't always have CAP_NET_RAW even though we _always_ want to + * set the IPOPT_CIPSO option. */ opt_len = (buf_len + 3) & ~3; opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC); if (opt == NULL) { @@ -1317,11 +1782,9 @@ int cipso_v4_socket_setattr(const struct socket *sock, memcpy(opt->__data, buf, buf_len); opt->optlen = opt_len; opt->is_data = 1; + opt->cipso = sizeof(struct iphdr); kfree(buf); buf = NULL; - ret_val = ip_options_compile(opt, NULL); - if (ret_val != 0) - goto socket_setattr_failure; sk_inet = inet_sk(sk); if (sk_inet->is_icsk) { @@ -1371,19 +1834,33 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) if (ret_val == 0) return ret_val; - doi = ntohl(*(u32 *)&cipso_ptr[2]); + doi = ntohl(*(__be32 *)&cipso_ptr[2]); rcu_read_lock(); - doi_def = cipso_v4_doi_getdef(doi); + doi_def = cipso_v4_doi_search(doi); if (doi_def == NULL) { rcu_read_unlock(); return -ENOMSG; } + + /* XXX - This code assumes only one tag per CIPSO option which isn't + * really a good assumption to make but since we only support the MAC + * tags right now it is a safe assumption. */ switch (cipso_ptr[6]) { case CIPSO_V4_TAG_RBITMAP: ret_val = cipso_v4_parsetag_rbm(doi_def, &cipso_ptr[6], secattr); break; + case CIPSO_V4_TAG_ENUM: + ret_val = cipso_v4_parsetag_enum(doi_def, + &cipso_ptr[6], + secattr); + break; + case CIPSO_V4_TAG_RANGE: + ret_val = cipso_v4_parsetag_rng(doi_def, + &cipso_ptr[6], + secattr); + break; } rcu_read_unlock(); @@ -1431,23 +1908,30 @@ int cipso_v4_skbuff_getattr(const struct sk_buff *skb, u32 doi; struct cipso_v4_doi *doi_def; - if (!CIPSO_V4_OPTEXIST(skb)) - return -ENOMSG; cipso_ptr = CIPSO_V4_OPTPTR(skb); if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0) return 0; - doi = ntohl(*(u32 *)&cipso_ptr[2]); + doi = ntohl(*(__be32 *)&cipso_ptr[2]); rcu_read_lock(); - doi_def = cipso_v4_doi_getdef(doi); + doi_def = cipso_v4_doi_search(doi); if (doi_def == NULL) goto skbuff_getattr_return; + + /* XXX - This code assumes only one tag per CIPSO option which isn't + * really a good assumption to make but since we only support the MAC + * tags right now it is a safe assumption. */ switch (cipso_ptr[6]) { case CIPSO_V4_TAG_RBITMAP: ret_val = cipso_v4_parsetag_rbm(doi_def, &cipso_ptr[6], secattr); break; + case CIPSO_V4_TAG_ENUM: + ret_val = cipso_v4_parsetag_enum(doi_def, + &cipso_ptr[6], + secattr); + break; } skbuff_getattr_return: diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7602c79a389..84bed40273a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -577,20 +577,20 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg * Determine a default network mask, based on the IP address. */ -static __inline__ int inet_abc_len(u32 addr) +static __inline__ int inet_abc_len(__be32 addr) { int rc = -1; /* Something else, probably a multicast. */ if (ZERONET(addr)) rc = 0; else { - addr = ntohl(addr); + __u32 haddr = ntohl(addr); - if (IN_CLASSA(addr)) + if (IN_CLASSA(haddr)) rc = 8; - else if (IN_CLASSB(addr)) + else if (IN_CLASSB(haddr)) rc = 16; - else if (IN_CLASSC(addr)) + else if (IN_CLASSC(haddr)) rc = 24; } @@ -1120,6 +1120,16 @@ static struct notifier_block ip_netdev_notifier = { .notifier_call =inetdev_event, }; +static inline size_t inet_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(4) /* IFA_ADDRESS */ + + nla_total_size(4) /* IFA_LOCAL */ + + nla_total_size(4) /* IFA_BROADCAST */ + + nla_total_size(4) /* IFA_ANYCAST */ + + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ +} + static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, u32 pid, u32 seq, int event, unsigned int flags) { @@ -1208,15 +1218,13 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, u32 seq = nlh ? nlh->nlmsg_seq : 0; int err = -ENOBUFS; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); if (skb == NULL) goto errout; err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + /* failure implies BUG in inet_nlmsg_size() */ + BUG_ON(err < 0); err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); errout: @@ -1295,8 +1303,7 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, size_t newlen) { int *valp = table->data; int new; @@ -1556,12 +1563,12 @@ static void devinet_sysctl_register(struct in_device *in_dev, { int i; struct net_device *dev = in_dev ? in_dev->dev : NULL; - struct devinet_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL); + struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t), + GFP_KERNEL); char *dev_name = NULL; if (!t) return; - memcpy(t, &devinet_sysctl, sizeof(*t)); for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; t->devinet_vars[i].de = NULL; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b5c205b5766..f2c6776ea0e 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -67,7 +67,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) if (x->encap) { struct xfrm_encap_tmpl *encap = x->encap; struct udphdr *uh; - u32 *udpdata32; + __be32 *udpdata32; uh = (struct udphdr *)esph; uh->source = encap->encap_sport; @@ -81,7 +81,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esph = (struct ip_esp_hdr *)(uh + 1); break; case UDP_ENCAP_ESPINUDP_NON_IKE: - udpdata32 = (u32 *)(uh + 1); + udpdata32 = (__be32 *)(uh + 1); udpdata32[0] = udpdata32[1] = 0; esph = (struct ip_esp_hdr *)(udpdata32 + 2); break; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index af0190d8b6c..d47b72af89e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -768,8 +768,8 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) { struct fib_result res; - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = frn->fl_addr, - .fwmark = frn->fl_fwmark, + struct flowi fl = { .mark = frn->fl_mark, + .nl_u = { .ip4_u = { .daddr = frn->fl_addr, .tos = frn->fl_tos, .scope = frn->fl_scope } } }; if (tb) { @@ -811,7 +811,6 @@ static void nl_fib_input(struct sock *sk, int len) pid = nlh->nlmsg_pid; /*pid of sending process */ NETLINK_CB(skb).pid = 0; /* from kernel */ - NETLINK_CB(skb).dst_pid = pid; NETLINK_CB(skb).dst_group = 0; /* unicast */ netlink_unicast(sk, skb, pid, MSG_DONTWAIT); } diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 107bb6cbb0b..648f47c1c39 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -45,8 +45,8 @@ #include "fib_lookup.h" -static kmem_cache_t *fn_hash_kmem __read_mostly; -static kmem_cache_t *fn_alias_kmem __read_mostly; +static struct kmem_cache *fn_hash_kmem __read_mostly; +static struct kmem_cache *fn_alias_kmem __read_mostly; struct fib_node { struct hlist_node fn_hash; @@ -485,13 +485,13 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) goto out; err = -ENOBUFS; - new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL); + new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); if (new_fa == NULL) goto out; new_f = NULL; if (!f) { - new_f = kmem_cache_alloc(fn_hash_kmem, SLAB_KERNEL); + new_f = kmem_cache_alloc(fn_hash_kmem, GFP_KERNEL); if (new_f == NULL) goto out_free_new_fa; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 0852b9cd065..b837c33e040 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -44,10 +44,6 @@ struct fib4_rule __be32 srcmask; __be32 dst; __be32 dstmask; -#ifdef CONFIG_IP_ROUTE_FWMARK - u32 fwmark; - u32 fwmask; -#endif #ifdef CONFIG_NET_CLS_ROUTE u32 tclassid; #endif @@ -160,11 +156,6 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) if (r->tos && (r->tos != fl->fl4_tos)) return 0; -#ifdef CONFIG_IP_ROUTE_FWMARK - if ((r->fwmark ^ fl->fl4_fwmark) & r->fwmask) - return 0; -#endif - return 1; } @@ -179,14 +170,10 @@ static struct fib_table *fib_empty_table(void) } static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = { - [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, - [FRA_PRIORITY] = { .type = NLA_U32 }, + FRA_GENERIC_POLICY, [FRA_SRC] = { .type = NLA_U32 }, [FRA_DST] = { .type = NLA_U32 }, - [FRA_FWMARK] = { .type = NLA_U32 }, - [FRA_FWMASK] = { .type = NLA_U32 }, [FRA_FLOW] = { .type = NLA_U32 }, - [FRA_TABLE] = { .type = NLA_U32 }, }; static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, @@ -220,20 +207,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, if (tb[FRA_DST]) rule4->dst = nla_get_be32(tb[FRA_DST]); -#ifdef CONFIG_IP_ROUTE_FWMARK - if (tb[FRA_FWMARK]) { - rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]); - if (rule4->fwmark) - /* compatibility: if the mark value is non-zero all bits - * are compared unless a mask is explicitly specified. - */ - rule4->fwmask = 0xFFFFFFFF; - } - - if (tb[FRA_FWMASK]) - rule4->fwmask = nla_get_u32(tb[FRA_FWMASK]); -#endif - #ifdef CONFIG_NET_CLS_ROUTE if (tb[FRA_FLOW]) rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); @@ -264,14 +237,6 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, if (frh->tos && (rule4->tos != frh->tos)) return 0; -#ifdef CONFIG_IP_ROUTE_FWMARK - if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK]))) - return 0; - - if (tb[FRA_FWMASK] && (rule4->fwmask != nla_get_u32(tb[FRA_FWMASK]))) - return 0; -#endif - #ifdef CONFIG_NET_CLS_ROUTE if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) return 0; @@ -296,14 +261,6 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, frh->src_len = rule4->src_len; frh->tos = rule4->tos; -#ifdef CONFIG_IP_ROUTE_FWMARK - if (rule4->fwmark) - NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark); - - if (rule4->fwmask || rule4->fwmark) - NLA_PUT_U32(skb, FRA_FWMASK, rule4->fwmask); -#endif - if (rule4->dst_len) NLA_PUT_BE32(skb, FRA_DST, rule4->dst); @@ -342,6 +299,13 @@ static u32 fib4_rule_default_pref(void) return 0; } +static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) +{ + return nla_total_size(4) /* dst */ + + nla_total_size(4) /* src */ + + nla_total_size(4); /* flow */ +} + static struct fib_rules_ops fib4_rules_ops = { .family = AF_INET, .rule_size = sizeof(struct fib4_rule), @@ -351,6 +315,7 @@ static struct fib_rules_ops fib4_rules_ops = { .compare = fib4_rule_compare, .fill = fib4_rule_fill, .default_pref = fib4_rule_default_pref, + .nlmsg_payload = fib4_rule_nlmsg_payload, .nlgroup = RTNLGRP_IPV4_RULE, .policy = fib4_rule_policy, .rules_list = &fib4_rules, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 884d176e008..e63b8a98fb4 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -273,25 +273,49 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) return -1; } +static inline size_t fib_nlmsg_size(struct fib_info *fi) +{ + size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) + + nla_total_size(4) /* RTA_TABLE */ + + nla_total_size(4) /* RTA_DST */ + + nla_total_size(4) /* RTA_PRIORITY */ + + nla_total_size(4); /* RTA_PREFSRC */ + + /* space for nested metrics */ + payload += nla_total_size((RTAX_MAX * nla_total_size(4))); + + if (fi->fib_nhs) { + /* Also handles the special case fib_nhs == 1 */ + + /* each nexthop is packed in an attribute */ + size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); + + /* may contain flow and gateway attribute */ + nhsize += 2 * nla_total_size(4); + + /* all nexthops are packed in a nested attribute */ + payload += nla_total_size(fi->fib_nhs * nhsize); + } + + return payload; +} + void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id, struct nl_info *info) { struct sk_buff *skb; - int payload = sizeof(struct rtmsg) + 256; u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; int err = -ENOBUFS; - skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL); + skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); if (skb == NULL) goto errout; err = fib_dump_info(skb, info->pid, seq, event, tb_id, fa->fa_type, fa->fa_scope, key, dst_len, fa->fa_tos, fa->fa_info, 0); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + /* failure implies BUG in fib_nlmsg_size() */ + BUG_ON(err < 0); err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE, info->nlh, GFP_KERNEL); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d17990ec724..cfb249cc0a5 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -172,7 +172,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn); static struct tnode *halve(struct trie *t, struct tnode *tn); static void tnode_free(struct tnode *tn); -static kmem_cache_t *fn_alias_kmem __read_mostly; +static struct kmem_cache *fn_alias_kmem __read_mostly; static struct trie *trie_local = NULL, *trie_main = NULL; @@ -1187,7 +1187,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) u8 state; err = -ENOBUFS; - new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL); + new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); if (new_fa == NULL) goto out; @@ -1232,7 +1232,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) goto out; err = -ENOBUFS; - new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL); + new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); if (new_fa == NULL) goto out; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index b39a37a4754..40cf0d0e1b8 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -332,7 +332,7 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) { struct icmp_bxm *icmp_param = (struct icmp_bxm *)from; - unsigned int csum; + __wsum csum; csum = skb_copy_and_csum_bits(icmp_param->skb, icmp_param->offset + offset, @@ -356,7 +356,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, ip_flush_pending_frames(icmp_socket->sk); else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { struct icmphdr *icmph = skb->h.icmph; - unsigned int csum = 0; + __wsum csum = 0; struct sk_buff *skb1; skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) { @@ -931,7 +931,7 @@ int icmp_rcv(struct sk_buff *skb) switch (skb->ip_summed) { case CHECKSUM_COMPLETE: - if (!(u16)csum_fold(skb->csum)) + if (!csum_fold(skb->csum)) break; /* fall through */ case CHECKSUM_NONE: diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6eee71647b7..0017ccb01d6 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -932,7 +932,7 @@ int igmp_rcv(struct sk_buff *skb) switch (skb->ip_summed) { case CHECKSUM_COMPLETE: - if (!(u16)csum_fold(skb->csum)) + if (!csum_fold(skb->csum)) break; /* fall through */ case CHECKSUM_NONE: diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 96bbe2a0aa1..9d68837888d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -343,7 +343,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, EXPORT_SYMBOL_GPL(inet_csk_route_req); static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, - const u32 rnd, const u16 synq_hsize) + const u32 rnd, const u32 synq_hsize) { return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1); } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 244c4f445c7..8c79c8a4ea5 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -27,11 +27,11 @@ * Allocate and initialize a new local port bind bucket. * The bindhash mutex for snum's hash chain must be held here. */ -struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep, +struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_hashbucket *head, const unsigned short snum) { - struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC); + struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb != NULL) { tb->port = snum; @@ -45,7 +45,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep, /* * Caller must hold hashbucket lock for this tb with local BH disabled */ -void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb) +void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb) { if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index cdd805344c6..9f414e35c48 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -91,7 +91,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat { struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, - SLAB_ATOMIC); + GFP_ATOMIC); if (tw != NULL) { const struct inet_sock *inet = inet_sk(sk); @@ -178,7 +178,6 @@ void inet_twdr_hangman(unsigned long data) need_timer = 0; if (inet_twdr_do_twkill_work(twdr, twdr->slot)) { twdr->thread_slots |= (1 << twdr->slot); - mb(); schedule_work(&twdr->twkill_work); need_timer = 1; } else { @@ -197,9 +196,10 @@ EXPORT_SYMBOL_GPL(inet_twdr_hangman); extern void twkill_slots_invalid(void); -void inet_twdr_twkill_work(void *data) +void inet_twdr_twkill_work(struct work_struct *work) { - struct inet_timewait_death_row *twdr = data; + struct inet_timewait_death_row *twdr = + container_of(work, struct inet_timewait_death_row, twkill_work); int i; if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8)) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index f072f3875af..711eb6d0285 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -73,7 +73,7 @@ /* Exported for inet_getid inline function. */ DEFINE_SPINLOCK(inet_peer_idlock); -static kmem_cache_t *peer_cachep __read_mostly; +static struct kmem_cache *peer_cachep __read_mostly; #define node_height(x) x->avl_height static struct inet_peer peer_fake_node = { diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 74046efdf87..8ce00d3703d 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -565,7 +565,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } else { struct sk_buff *free_it = next; - /* Old fragmnet is completely overridden with + /* Old fragment is completely overridden with * new one drop it. */ next = next->next; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d5b5dec075b..476cb6084c7 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -144,7 +144,7 @@ static struct net_device *ipgre_fb_tunnel_dev; */ #define HASH_SIZE 16 -#define HASH(addr) ((addr^(addr>>4))&0xF) +#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) static struct ip_tunnel *tunnels[4][HASH_SIZE]; @@ -157,7 +157,7 @@ static DEFINE_RWLOCK(ipgre_lock); /* Given src, dst and key, find appropriate for input tunnel. */ -static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key) +static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be32 key) { unsigned h0 = HASH(remote); unsigned h1 = HASH(key); @@ -194,9 +194,9 @@ static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key) static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t) { - u32 remote = t->parms.iph.daddr; - u32 local = t->parms.iph.saddr; - u32 key = t->parms.i_key; + __be32 remote = t->parms.iph.daddr; + __be32 local = t->parms.iph.saddr; + __be32 key = t->parms.i_key; unsigned h = HASH(key); int prio = 0; @@ -236,9 +236,9 @@ static void ipgre_tunnel_unlink(struct ip_tunnel *t) static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create) { - u32 remote = parms->iph.daddr; - u32 local = parms->iph.saddr; - u32 key = parms->i_key; + __be32 remote = parms->iph.daddr; + __be32 local = parms->iph.saddr; + __be32 key = parms->i_key; struct ip_tunnel *t, **tp, *nt; struct net_device *dev; unsigned h = HASH(key); @@ -319,12 +319,12 @@ static void ipgre_err(struct sk_buff *skb, u32 info) */ struct iphdr *iph = (struct iphdr*)skb->data; - u16 *p = (u16*)(skb->data+(iph->ihl<<2)); + __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); int grehlen = (iph->ihl<<2) + 4; int type = skb->h.icmph->type; int code = skb->h.icmph->code; struct ip_tunnel *t; - u16 flags; + __be16 flags; flags = p[0]; if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { @@ -370,7 +370,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) } read_lock(&ipgre_lock); - t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((u32*)p) + (grehlen>>2) - 1) : 0); + t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0); if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr)) goto out; @@ -388,14 +388,14 @@ out: #else struct iphdr *iph = (struct iphdr*)dp; struct iphdr *eiph; - u16 *p = (u16*)(dp+(iph->ihl<<2)); + __be16 *p = (__be16*)(dp+(iph->ihl<<2)); int type = skb->h.icmph->type; int code = skb->h.icmph->code; int rel_type = 0; int rel_code = 0; __be32 rel_info = 0; __u32 n = 0; - u16 flags; + __be16 flags; int grehlen = (iph->ihl<<2) + 4; struct sk_buff *skb2; struct flowi fl; @@ -556,9 +556,9 @@ static int ipgre_rcv(struct sk_buff *skb) { struct iphdr *iph; u8 *h; - u16 flags; - u16 csum = 0; - u32 key = 0; + __be16 flags; + __sum16 csum = 0; + __be32 key = 0; u32 seqno = 0; struct ip_tunnel *tunnel; int offset = 4; @@ -568,7 +568,7 @@ static int ipgre_rcv(struct sk_buff *skb) iph = skb->nh.iph; h = skb->data; - flags = *(u16*)h; + flags = *(__be16*)h; if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { /* - Version must be 0. @@ -580,7 +580,7 @@ static int ipgre_rcv(struct sk_buff *skb) if (flags&GRE_CSUM) { switch (skb->ip_summed) { case CHECKSUM_COMPLETE: - csum = (u16)csum_fold(skb->csum); + csum = csum_fold(skb->csum); if (!csum) break; /* fall through */ @@ -592,11 +592,11 @@ static int ipgre_rcv(struct sk_buff *skb) offset += 4; } if (flags&GRE_KEY) { - key = *(u32*)(h + offset); + key = *(__be32*)(h + offset); offset += 4; } if (flags&GRE_SEQ) { - seqno = ntohl(*(u32*)(h + offset)); + seqno = ntohl(*(__be32*)(h + offset)); offset += 4; } } @@ -605,7 +605,7 @@ static int ipgre_rcv(struct sk_buff *skb) if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) { secpath_reset(skb); - skb->protocol = *(u16*)(h + 2); + skb->protocol = *(__be16*)(h + 2); /* WCCP version 1 and 2 protocol decoding. * - Change protocol to IP * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header @@ -673,13 +673,13 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct iphdr *old_iph = skb->nh.iph; struct iphdr *tiph; u8 tos; - u16 df; + __be16 df; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ struct iphdr *iph; /* Our new IP header */ int max_headroom; /* The extra header space needed */ int gre_hlen; - u32 dst; + __be32 dst; int mtu; if (tunnel->recursion++) { @@ -860,11 +860,11 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); } - ((u16*)(iph+1))[0] = tunnel->parms.o_flags; - ((u16*)(iph+1))[1] = skb->protocol; + ((__be16*)(iph+1))[0] = tunnel->parms.o_flags; + ((__be16*)(iph+1))[1] = skb->protocol; if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { - u32 *ptr = (u32*)(((u8*)iph) + tunnel->hlen - 4); + __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4); if (tunnel->parms.o_flags&GRE_SEQ) { ++tunnel->o_seqno; @@ -877,7 +877,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } if (tunnel->parms.o_flags&GRE_CSUM) { *ptr = 0; - *(__u16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); + *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); } } @@ -1068,7 +1068,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned sh { struct ip_tunnel *t = netdev_priv(dev); struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); - u16 *p = (u16*)(iph+1); + __be16 *p = (__be16*)(iph+1); memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); p[0] = t->parms.o_flags; diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 8dabbfc3126..9f02917d6f4 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -443,7 +443,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) opt->router_alert = optptr - iph; break; case IPOPT_CIPSO: - if (opt->cipso) { + if ((!skb && !capable(CAP_NET_RAW)) || opt->cipso) { pp_ptr = optptr; goto error; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index fc195a44fc2..f071f84808f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -53,6 +53,7 @@ #include <linux/mm.h> #include <linux/string.h> #include <linux/errno.h> +#include <linux/highmem.h> #include <linux/socket.h> #include <linux/sockios.h> @@ -163,7 +164,6 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); static inline int ip_finish_output2(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; - struct hh_cache *hh = dst->hh; struct net_device *dev = dst->dev; int hh_len = LL_RESERVED_SPACE(dev); @@ -182,16 +182,9 @@ static inline int ip_finish_output2(struct sk_buff *skb) skb = skb2; } - if (hh) { - int hh_alen; - - read_lock_bh(&hh->hh_lock); - hh_alen = HH_DATA_ALIGN(hh->hh_len); - memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); - read_unlock_bh(&hh->hh_lock); - skb_push(skb, hh->hh_len); - return hh->hh_output(skb); - } else if (dst->neighbour) + if (dst->hh) + return neigh_hh_output(dst->hh, skb); + else if (dst->neighbour) return dst->neighbour->output(skb); if (net_ratelimit()) @@ -288,9 +281,8 @@ int ip_output(struct sk_buff *skb) !(IPCB(skb)->flags & IPSKB_REROUTED)); } -int ip_queue_xmit(struct sk_buff *skb, int ipfragok) +int ip_queue_xmit(struct sk_buff *skb, struct sock *sk, int ipfragok) { - struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct ip_options *opt = inet->opt; struct rtable *rt; @@ -342,7 +334,7 @@ packet_routed: /* OK, we know where to send it, allocate and build IP header. */ iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); - *((__u16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); + *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); iph->tot_len = htons(skb->len); if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok) iph->frag_off = htons(IP_DF); @@ -386,6 +378,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) dst_release(to->dst); to->dst = dst_clone(from->dst); to->dev = from->dev; + to->mark = from->mark; /* Copy the flags to each fragment. */ IPCB(to)->flags = IPCB(from)->flags; @@ -394,7 +387,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif #ifdef CONFIG_NETFILTER - to->nfmark = from->nfmark; /* Connection association is same as pre-frag packet */ nf_conntrack_put(to->nfct); to->nfct = from->nfct; @@ -683,7 +675,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk if (memcpy_fromiovecend(to, iov, offset, len) < 0) return -EFAULT; } else { - unsigned int csum = 0; + __wsum csum = 0; if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0) return -EFAULT; skb->csum = csum_block_add(skb->csum, csum, odd); @@ -691,11 +683,11 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk return 0; } -static inline unsigned int +static inline __wsum csum_page(struct page *page, int offset, int copy) { char *kaddr; - unsigned int csum; + __wsum csum; kaddr = kmap(page); csum = csum_partial(kaddr + offset, copy, 0); kunmap(page); @@ -1167,7 +1159,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, } if (skb->ip_summed == CHECKSUM_NONE) { - unsigned int csum; + __wsum csum; csum = csum_page(page, offset, len); skb->csum = csum_block_add(skb->csum, csum, skb->len); } @@ -1315,7 +1307,7 @@ void ip_flush_pending_frames(struct sock *sk) static int ip_reply_glue_bits(void *dptr, char *to, int offset, int len, int odd, struct sk_buff *skb) { - unsigned int csum; + __wsum csum; csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0); skb->csum = csum_block_add(skb->csum, csum, odd); @@ -1385,7 +1377,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar &ipc, rt, MSG_DONTWAIT); if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { if (arg->csumoffset >= 0) - *((u16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum)); + *((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum)); skb->ip_summed = CHECKSUM_NONE; ip_push_pending_frames(sk); } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 4b132953bcc..57d4bae6f08 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -355,7 +355,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) sin = (struct sockaddr_in *)msg->msg_name; if (sin) { sin->sin_family = AF_INET; - sin->sin_addr.s_addr = *(u32*)(skb->nh.raw + serr->addr_offset); + sin->sin_addr.s_addr = *(__be32*)(skb->nh.raw + serr->addr_offset); sin->sin_port = serr->port; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 955a07abb91..afa60b9a003 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -101,6 +101,7 @@ #define CONF_NAMESERVERS_MAX 3 /* Maximum number of nameservers - '3' from resolv.h */ +#define NONE __constant_htonl(INADDR_NONE) /* * Public IP configuration @@ -129,19 +130,19 @@ int ic_proto_enabled __initdata = 0 static int ic_host_name_set __initdata = 0; /* Host name set by us? */ -u32 ic_myaddr = INADDR_NONE; /* My IP address */ -static u32 ic_netmask = INADDR_NONE; /* Netmask for local subnet */ -u32 ic_gateway = INADDR_NONE; /* Gateway IP address */ +__be32 ic_myaddr = NONE; /* My IP address */ +static __be32 ic_netmask = NONE; /* Netmask for local subnet */ +__be32 ic_gateway = NONE; /* Gateway IP address */ -u32 ic_servaddr = INADDR_NONE; /* Boot server IP address */ +__be32 ic_servaddr = NONE; /* Boot server IP address */ -u32 root_server_addr = INADDR_NONE; /* Address of NFS server */ +__be32 root_server_addr = NONE; /* Address of NFS server */ u8 root_server_path[256] = { 0, }; /* Path to mount as root */ /* Persistent data: */ static int ic_proto_used; /* Protocol used, if any */ -static u32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */ +static __be32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */ static u8 ic_domain[64]; /* DNS (not NIS) domain name */ /* @@ -172,7 +173,7 @@ struct ic_device { struct net_device *dev; unsigned short flags; short able; - u32 xid; + __be32 xid; }; static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */ @@ -223,7 +224,7 @@ static int __init ic_open_devs(void) d->flags = oflags; d->able = able; if (able & IC_BOOTP) - get_random_bytes(&d->xid, sizeof(u32)); + get_random_bytes(&d->xid, sizeof(__be32)); else d->xid = 0; ic_proto_have_if |= able; @@ -269,7 +270,7 @@ static void __init ic_close_devs(void) */ static inline void -set_sockaddr(struct sockaddr_in *sin, u32 addr, u16 port) +set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = addr; @@ -332,7 +333,7 @@ static int __init ic_setup_routes(void) { /* No need to setup device routes, only the default route... */ - if (ic_gateway != INADDR_NONE) { + if (ic_gateway != NONE) { struct rtentry rm; int err; @@ -368,10 +369,10 @@ static int __init ic_defaults(void) if (!ic_host_name_set) sprintf(init_utsname()->nodename, "%u.%u.%u.%u", NIPQUAD(ic_myaddr)); - if (root_server_addr == INADDR_NONE) + if (root_server_addr == NONE) root_server_addr = ic_servaddr; - if (ic_netmask == INADDR_NONE) { + if (ic_netmask == NONE) { if (IN_CLASSA(ntohl(ic_myaddr))) ic_netmask = htonl(IN_CLASSA_NET); else if (IN_CLASSB(ntohl(ic_myaddr))) @@ -420,7 +421,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt { struct arphdr *rarp; unsigned char *rarp_ptr; - u32 sip, tip; + __be32 sip, tip; unsigned char *sha, *tha; /* s for "source", t for "target" */ struct ic_device *d; @@ -485,12 +486,12 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt goto drop_unlock; /* Discard packets which are not from specified server. */ - if (ic_servaddr != INADDR_NONE && ic_servaddr != sip) + if (ic_servaddr != NONE && ic_servaddr != sip) goto drop_unlock; /* We have a winner! */ ic_dev = dev; - if (ic_myaddr == INADDR_NONE) + if (ic_myaddr == NONE) ic_myaddr = tip; ic_servaddr = sip; ic_got_reply = IC_RARP; @@ -530,13 +531,13 @@ struct bootp_pkt { /* BOOTP packet format */ u8 htype; /* HW address type */ u8 hlen; /* HW address length */ u8 hops; /* Used only by gateways */ - u32 xid; /* Transaction ID */ - u16 secs; /* Seconds since we started */ - u16 flags; /* Just what it says */ - u32 client_ip; /* Client's IP address if known */ - u32 your_ip; /* Assigned IP address */ - u32 server_ip; /* (Next, e.g. NFS) Server's IP address */ - u32 relay_ip; /* IP address of BOOTP relay */ + __be32 xid; /* Transaction ID */ + __be16 secs; /* Seconds since we started */ + __be16 flags; /* Just what it says */ + __be32 client_ip; /* Client's IP address if known */ + __be32 your_ip; /* Assigned IP address */ + __be32 server_ip; /* (Next, e.g. NFS) Server's IP address */ + __be32 relay_ip; /* IP address of BOOTP relay */ u8 hw_addr[16]; /* Client's HW address */ u8 serv_name[64]; /* Server host name */ u8 boot_file[128]; /* Name of boot file */ @@ -576,7 +577,7 @@ static const u8 ic_bootp_cookie[4] = { 99, 130, 83, 99 }; static void __init ic_dhcp_init_options(u8 *options) { - u8 mt = ((ic_servaddr == INADDR_NONE) + u8 mt = ((ic_servaddr == NONE) ? DHCPDISCOVER : DHCPREQUEST); u8 *e = options; @@ -666,7 +667,7 @@ static inline void ic_bootp_init(void) int i; for (i = 0; i < CONF_NAMESERVERS_MAX; i++) - ic_nameservers[i] = INADDR_NONE; + ic_nameservers[i] = NONE; dev_add_pack(&bootp_packet_type); } @@ -708,7 +709,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d h->frag_off = htons(IP_DF); h->ttl = 64; h->protocol = IPPROTO_UDP; - h->daddr = INADDR_BROADCAST; + h->daddr = htonl(INADDR_BROADCAST); h->check = ip_fast_csum((unsigned char *) h, h->ihl); /* Construct UDP header */ @@ -730,8 +731,8 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d b->htype = dev->type; /* can cause undefined behavior */ } b->hlen = dev->addr_len; - b->your_ip = INADDR_NONE; - b->server_ip = INADDR_NONE; + b->your_ip = NONE; + b->server_ip = NONE; memcpy(b->hw_addr, dev->dev_addr, dev->addr_len); b->secs = htons(jiffies_diff / HZ); b->xid = d->xid; @@ -788,11 +789,11 @@ static void __init ic_do_bootp_ext(u8 *ext) switch (*ext++) { case 1: /* Subnet mask */ - if (ic_netmask == INADDR_NONE) + if (ic_netmask == NONE) memcpy(&ic_netmask, ext+1, 4); break; case 3: /* Default gateway */ - if (ic_gateway == INADDR_NONE) + if (ic_gateway == NONE) memcpy(&ic_gateway, ext+1, 4); break; case 6: /* DNS server */ @@ -800,7 +801,7 @@ static void __init ic_do_bootp_ext(u8 *ext) if (servers > CONF_NAMESERVERS_MAX) servers = CONF_NAMESERVERS_MAX; for (i = 0; i < servers; i++) { - if (ic_nameservers[i] == INADDR_NONE) + if (ic_nameservers[i] == NONE) memcpy(&ic_nameservers[i], ext+1+4*i, 4); } break; @@ -917,7 +918,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str #ifdef IPCONFIG_DHCP if (ic_proto_enabled & IC_USE_DHCP) { - u32 server_id = INADDR_NONE; + __be32 server_id = NONE; int mt = 0; ext = &b->exten[4]; @@ -949,7 +950,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str /* While in the process of accepting one offer, * ignore all others. */ - if (ic_myaddr != INADDR_NONE) + if (ic_myaddr != NONE) goto drop_unlock; /* Let's accept that offer. */ @@ -965,7 +966,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str * precedence over the bootp header one if * they are different. */ - if ((server_id != INADDR_NONE) && + if ((server_id != NONE) && (b->server_ip != server_id)) b->server_ip = ic_servaddr; break; @@ -979,8 +980,8 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str default: /* Urque. Forget it*/ - ic_myaddr = INADDR_NONE; - ic_servaddr = INADDR_NONE; + ic_myaddr = NONE; + ic_servaddr = NONE; goto drop_unlock; }; @@ -1004,9 +1005,9 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str ic_dev = dev; ic_myaddr = b->your_ip; ic_servaddr = b->server_ip; - if (ic_gateway == INADDR_NONE && b->relay_ip) + if (ic_gateway == NONE && b->relay_ip) ic_gateway = b->relay_ip; - if (ic_nameservers[0] == INADDR_NONE) + if (ic_nameservers[0] == NONE) ic_nameservers[0] = ic_servaddr; ic_got_reply = IC_BOOTP; @@ -1150,7 +1151,7 @@ static int __init ic_dynamic(void) #endif if (!ic_got_reply) { - ic_myaddr = INADDR_NONE; + ic_myaddr = NONE; return -1; } @@ -1182,12 +1183,12 @@ static int pnp_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "domain %s\n", ic_domain); for (i = 0; i < CONF_NAMESERVERS_MAX; i++) { - if (ic_nameservers[i] != INADDR_NONE) + if (ic_nameservers[i] != NONE) seq_printf(seq, "nameserver %u.%u.%u.%u\n", NIPQUAD(ic_nameservers[i])); } - if (ic_servaddr != INADDR_NONE) + if (ic_servaddr != NONE) seq_printf(seq, "bootserver %u.%u.%u.%u\n", NIPQUAD(ic_servaddr)); @@ -1213,9 +1214,9 @@ static struct file_operations pnp_seq_fops = { * need to have root_server_addr set _before_ IPConfig gets called as it * can override it. */ -u32 __init root_nfs_parse_addr(char *name) +__be32 __init root_nfs_parse_addr(char *name) { - u32 addr; + __be32 addr; int octets = 0; char *cp, *cq; @@ -1237,7 +1238,7 @@ u32 __init root_nfs_parse_addr(char *name) addr = in_aton(name); memmove(name, cp, strlen(cp) + 1); } else - addr = INADDR_NONE; + addr = NONE; return addr; } @@ -1248,7 +1249,7 @@ u32 __init root_nfs_parse_addr(char *name) static int __init ip_auto_config(void) { - u32 addr; + __be32 addr; #ifdef CONFIG_PROC_FS proc_net_fops_create("pnp", S_IRUGO, &pnp_seq_fops); @@ -1277,11 +1278,11 @@ static int __init ip_auto_config(void) * interfaces and no default was set), use BOOTP or RARP to get the * missing values. */ - if (ic_myaddr == INADDR_NONE || + if (ic_myaddr == NONE || #ifdef CONFIG_ROOT_NFS (MAJOR(ROOT_DEV) == UNNAMED_MAJOR - && root_server_addr == INADDR_NONE - && ic_servaddr == INADDR_NONE) || + && root_server_addr == NONE + && ic_servaddr == NONE) || #endif ic_first_dev->next) { #ifdef IPCONFIG_DYNAMIC @@ -1334,7 +1335,7 @@ static int __init ip_auto_config(void) } addr = root_nfs_parse_addr(root_server_path); - if (root_server_addr == INADDR_NONE) + if (root_server_addr == NONE) root_server_addr = addr; /* @@ -1461,19 +1462,19 @@ static int __init ip_auto_config_setup(char *addrs) switch (num) { case 0: if ((ic_myaddr = in_aton(ip)) == INADDR_ANY) - ic_myaddr = INADDR_NONE; + ic_myaddr = NONE; break; case 1: if ((ic_servaddr = in_aton(ip)) == INADDR_ANY) - ic_servaddr = INADDR_NONE; + ic_servaddr = NONE; break; case 2: if ((ic_gateway = in_aton(ip)) == INADDR_ANY) - ic_gateway = INADDR_NONE; + ic_gateway = NONE; break; case 3: if ((ic_netmask = in_aton(ip)) == INADDR_ANY) - ic_netmask = INADDR_NONE; + ic_netmask = NONE; break; case 4: if ((dp = strchr(ip, '.'))) { diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 0c455652922..9d719d664e5 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -118,7 +118,7 @@ #include <net/xfrm.h> #define HASH_SIZE 16 -#define HASH(addr) ((addr^(addr>>4))&0xF) +#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) static int ipip_fb_tunnel_init(struct net_device *dev); static int ipip_tunnel_init(struct net_device *dev); @@ -134,7 +134,7 @@ static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunne static DEFINE_RWLOCK(ipip_lock); -static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local) +static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local) { unsigned h0 = HASH(remote); unsigned h1 = HASH(local); @@ -160,8 +160,8 @@ static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local) static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t) { - u32 remote = t->parms.iph.daddr; - u32 local = t->parms.iph.saddr; + __be32 remote = t->parms.iph.daddr; + __be32 local = t->parms.iph.saddr; unsigned h = 0; int prio = 0; @@ -203,8 +203,8 @@ static void ipip_tunnel_link(struct ip_tunnel *t) static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create) { - u32 remote = parms->iph.daddr; - u32 local = parms->iph.saddr; + __be32 remote = parms->iph.daddr; + __be32 local = parms->iph.saddr; struct ip_tunnel *t, **tp, *nt; struct net_device *dev; unsigned h = 0; @@ -519,13 +519,13 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct net_device_stats *stats = &tunnel->stat; struct iphdr *tiph = &tunnel->parms.iph; u8 tos = tunnel->parms.iph.tos; - u16 df = tiph->frag_off; + __be16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ struct iphdr *old_iph = skb->nh.iph; struct iphdr *iph; /* Our new IP header */ int max_headroom; /* The extra header space needed */ - u32 dst = tiph->daddr; + __be32 dst = tiph->daddr; int mtu; if (tunnel->recursion++) { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 97cfa97c8ab..ecb5422ea23 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -105,7 +105,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock); In this case data path is free of exclusive locks at all. */ -static kmem_cache_t *mrt_cachep __read_mostly; +static struct kmem_cache *mrt_cachep __read_mostly; static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); @@ -1493,7 +1493,7 @@ static int pim_rcv(struct sk_buff * skb) if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || (pim->flags&PIM_NULL_REGISTER) || (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && - (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))) + csum_fold(skb_checksum(skb, 0, skb->len, 0)))) goto drop; /* check if the inner packet is destined to mcast group */ diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index e7752334d29..6c40899aa16 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -80,10 +80,9 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) if (!pp->unregister_app) return -EOPNOTSUPP; - inc = kmalloc(sizeof(struct ip_vs_app), GFP_KERNEL); + inc = kmemdup(app, sizeof(*inc), GFP_KERNEL); if (!inc) return -ENOMEM; - memcpy(inc, app, sizeof(*inc)); INIT_LIST_HEAD(&inc->p_list); INIT_LIST_HEAD(&inc->incs_list); inc->app = app; diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 8832eb517d5..8086787a2c5 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -44,7 +44,7 @@ static struct list_head *ip_vs_conn_tab; /* SLAB cache for IPVS connections */ -static kmem_cache_t *ip_vs_conn_cachep __read_mostly; +static struct kmem_cache *ip_vs_conn_cachep __read_mostly; /* counter for current IPVS connections */ static atomic_t ip_vs_conn_count = ATOMIC_INIT(0); diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 1445bb47fea..34257520a3a 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -536,9 +536,9 @@ static unsigned int ip_vs_post_routing(unsigned int hooknum, return NF_STOP; } -u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) +__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) { - return (u16) csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); + return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); } static inline struct sk_buff * diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index f261616e460..9b933381ebb 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -221,10 +221,10 @@ static void update_defense_level(void) * Timer for checking the defense */ #define DEFENSE_TIMER_PERIOD 1*HZ -static void defense_work_handler(void *data); -static DECLARE_WORK(defense_work, defense_work_handler, NULL); +static void defense_work_handler(struct work_struct *work); +static DECLARE_DELAYED_WORK(defense_work, defense_work_handler); -static void defense_work_handler(void *data) +static void defense_work_handler(struct work_struct *work) { update_defense_level(); if (atomic_read(&ip_vs_dropentry)) diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index 6d398f10aa9..687c1de1146 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c @@ -200,7 +200,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, from = n_cp->vaddr; port = n_cp->vport; sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from), - ntohs(port)&255, (ntohs(port)>>8)&255); + (ntohs(port)>>8)&255, ntohs(port)&255); buf_len = strlen(buf); /* diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index 524751e031d..a4385a2180e 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -45,6 +45,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> +#include <linux/jiffies.h> /* for sysctl */ #include <linux/fs.h> diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 08990192b6e..fe1af5d079a 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -43,6 +43,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> +#include <linux/jiffies.h> /* for sysctl */ #include <linux/fs.h> diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c index c4528b5c800..e844ddb82b9 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/ipv4/ipvs/ip_vs_proto.c @@ -118,13 +118,7 @@ void ip_vs_protocol_timeout_change(int flags) int * ip_vs_create_timeout_table(int *table, int size) { - int *t; - - t = kmalloc(size, GFP_ATOMIC); - if (t == NULL) - return NULL; - memcpy(t, table, size); - return t; + return kmemdup(table, size, GFP_ATOMIC); } diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index bfe779e7459..16a9ebee2fe 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -84,7 +84,7 @@ tcp_conn_schedule(struct sk_buff *skb, } if (th->syn && - (svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol, + (svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol, skb->nh.iph->daddr, th->dest))) { if (ip_vs_todrop()) { /* @@ -116,9 +116,9 @@ tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip, __be16 oldport, __be16 newport) { tcph->check = - ip_vs_check_diff(~oldip, newip, - ip_vs_check_diff(oldport ^ htonl(0xFFFF), - newport, tcph->check)); + csum_fold(ip_vs_check_diff4(oldip, newip, + ip_vs_check_diff2(oldport, newport, + ~csum_unfold(tcph->check)))); } @@ -490,16 +490,18 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, static struct list_head tcp_apps[TCP_APP_TAB_SIZE]; static DEFINE_SPINLOCK(tcp_app_lock); -static inline __u16 tcp_app_hashkey(__u16 port) +static inline __u16 tcp_app_hashkey(__be16 port) { - return ((port >> TCP_APP_TAB_BITS) ^ port) & TCP_APP_TAB_MASK; + return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port) + & TCP_APP_TAB_MASK; } static int tcp_register_app(struct ip_vs_app *inc) { struct ip_vs_app *i; - __u16 hash, port = inc->port; + __u16 hash; + __be16 port = inc->port; int ret = 0; hash = tcp_app_hashkey(port); diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 54aa7603591..03f0a414cfa 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -89,7 +89,7 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, return 0; } - if ((svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol, + if ((svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol, skb->nh.iph->daddr, uh->dest))) { if (ip_vs_todrop()) { /* @@ -121,11 +121,11 @@ udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip, __be16 oldport, __be16 newport) { uhdr->check = - ip_vs_check_diff(~oldip, newip, - ip_vs_check_diff(oldport ^ htonl(0xFFFF), - newport, uhdr->check)); + csum_fold(ip_vs_check_diff4(oldip, newip, + ip_vs_check_diff2(oldport, newport, + ~csum_unfold(uhdr->check)))); if (!uhdr->check) - uhdr->check = htonl(0xFFFF); + uhdr->check = CSUM_MANGLED_0; } static int @@ -173,7 +173,7 @@ udp_snat_handler(struct sk_buff **pskb, cp->protocol, (*pskb)->csum); if (udph->check == 0) - udph->check = htonl(0xFFFF); + udph->check = CSUM_MANGLED_0; IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", pp->name, udph->check, (char*)&(udph->check) - (char*)udph); @@ -228,7 +228,7 @@ udp_dnat_handler(struct sk_buff **pskb, cp->protocol, (*pskb)->csum); if (udph->check == 0) - udph->check = 0xFFFF; + udph->check = CSUM_MANGLED_0; (*pskb)->ip_summed = CHECKSUM_UNNECESSARY; } return 1; @@ -282,16 +282,18 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) static struct list_head udp_apps[UDP_APP_TAB_SIZE]; static DEFINE_SPINLOCK(udp_app_lock); -static inline __u16 udp_app_hashkey(__u16 port) +static inline __u16 udp_app_hashkey(__be16 port) { - return ((port >> UDP_APP_TAB_BITS) ^ port) & UDP_APP_TAB_MASK; + return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) + & UDP_APP_TAB_MASK; } static int udp_register_app(struct ip_vs_app *inc) { struct ip_vs_app *i; - __u16 hash, port = inc->port; + __u16 hash; + __be16 port = inc->port; int ret = 0; hash = udp_app_hashkey(port); diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 91a075edd68..7ea2d981a93 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -657,7 +657,7 @@ static void sync_master_loop(void) if (stop_master_sync) break; - ssleep(1); + msleep_interruptible(1000); } /* clean up the sync_buff queue */ @@ -714,7 +714,7 @@ static void sync_backup_loop(void) if (stop_backup_sync) break; - ssleep(1); + msleep_interruptible(1000); } /* release the sending multicast socket */ @@ -826,7 +826,7 @@ static int fork_sync_thread(void *startup) if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) { IP_VS_ERR("could not create sync_thread due to %d... " "retrying.\n", pid); - ssleep(1); + msleep_interruptible(1000); goto repeat; } @@ -849,10 +849,12 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) ip_vs_sync_state |= state; if (state == IP_VS_STATE_MASTER) { - strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, sizeof(ip_vs_master_mcast_ifn)); + strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, + sizeof(ip_vs_master_mcast_ifn)); ip_vs_master_syncid = syncid; } else { - strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, sizeof(ip_vs_backup_mcast_ifn)); + strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, + sizeof(ip_vs_backup_mcast_ifn)); ip_vs_backup_syncid = syncid; } @@ -860,7 +862,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) if ((pid = kernel_thread(fork_sync_thread, &startup, 0)) < 0) { IP_VS_ERR("could not create fork_sync_thread due to %d... " "retrying.\n", pid); - ssleep(1); + msleep_interruptible(1000); goto repeat; } @@ -880,7 +882,8 @@ int stop_sync_thread(int state) IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); IP_VS_INFO("stopping sync thread %d ...\n", - (state == IP_VS_STATE_MASTER) ? sync_master_pid : sync_backup_pid); + (state == IP_VS_STATE_MASTER) ? + sync_master_pid : sync_backup_pid); __set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&stop_sync_wait, &wait); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index e2005c6810a..a68966059b5 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -27,9 +27,7 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) fl.nl_u.ip4_u.saddr = iph->saddr; fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; -#ifdef CONFIG_IP_ROUTE_FWMARK - fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; -#endif + fl.mark = (*pskb)->mark; if (ip_route_output_key(&rt, &fl) != 0) return -1; @@ -164,17 +162,17 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info) return 0; } -unsigned int nf_ip_checksum(struct sk_buff *skb, unsigned int hook, +__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol) { struct iphdr *iph = skb->nh.iph; - unsigned int csum = 0; + __sum16 csum = 0; switch (skb->ip_summed) { case CHECKSUM_COMPLETE: if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN) break; - if ((protocol == 0 && !(u16)csum_fold(skb->csum)) || + if ((protocol == 0 && !csum_fold(skb->csum)) || !csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len - dataoff, protocol, skb->csum)) { diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index d88c292f118..f6026d4ac42 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -6,7 +6,7 @@ menu "IP: Netfilter Configuration" depends on INET && NETFILTER config NF_CONNTRACK_IPV4 - tristate "IPv4 support for new connection tracking (EXPERIMENTAL)" + tristate "IPv4 connection tracking support (required for NAT) (EXPERIMENTAL)" depends on EXPERIMENTAL && NF_CONNTRACK ---help--- Connection tracking keeps a record of what packets have passed @@ -19,21 +19,18 @@ config NF_CONNTRACK_IPV4 To compile it as a module, choose M here. If unsure, say N. -# connection tracking, helpers and protocols -config IP_NF_CONNTRACK - tristate "Connection tracking (required for masq/NAT)" - ---help--- - Connection tracking keeps a record of what packets have passed - through your machine, in order to figure out how they are related - into connections. - - This is required to do Masquerading or other kinds of Network - Address Translation (except for Fast NAT). It can also be used to - enhance packet filtering (see `Connection state match support' - below). +config NF_CONNTRACK_PROC_COMPAT + bool "proc/sysctl compatibility with old connection tracking" + depends on NF_CONNTRACK_IPV4 + default y + help + This option enables /proc and sysctl compatibility with the old + layer 3 dependant connection tracking. This is needed to keep + old programs that have not been adapted to the new names working. - To compile it as a module, choose M here. If unsure, say N. + If unsure, say Y. +# connection tracking, helpers and protocols config IP_NF_CT_ACCT bool "Connection tracking flow accounting" depends on IP_NF_CONNTRACK @@ -315,20 +312,6 @@ config IP_NF_MATCH_ADDRTYPE If you want to compile it as a module, say M here and read <file:Documentation/modules.txt>. If unsure, say `N'. -config IP_NF_MATCH_HASHLIMIT - tristate 'hashlimit match support' - depends on IP_NF_IPTABLES - help - This option adds a new iptables `hashlimit' match. - - As opposed to `limit', this match dynamically creates a hash table - of limit buckets, based on your selection of source/destination - ip addresses and/or ports. - - It enables you to express policies like `10kpps for any given - destination IP' or `500pps from any given source IP' with a single - IPtables rule. - # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" @@ -404,7 +387,7 @@ config IP_NF_TARGET_TCPMSS To compile it as a module, choose M here. If unsure, say N. -# NAT + specific targets +# NAT + specific targets: ip_conntrack config IP_NF_NAT tristate "Full NAT" depends on IP_NF_IPTABLES && IP_NF_CONNTRACK @@ -415,14 +398,30 @@ config IP_NF_NAT To compile it as a module, choose M here. If unsure, say N. +# NAT + specific targets: nf_conntrack +config NF_NAT + tristate "Full NAT" + depends on IP_NF_IPTABLES && NF_CONNTRACK_IPV4 + help + The Full NAT option allows masquerading, port forwarding and other + forms of full Network Address Port Translation. It is controlled by + the `nat' table in iptables: see the man page for iptables(8). + + To compile it as a module, choose M here. If unsure, say N. + config IP_NF_NAT_NEEDED bool - depends on IP_NF_NAT != n + depends on IP_NF_NAT + default y + +config NF_NAT_NEEDED + bool + depends on NF_NAT default y config IP_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" - depends on IP_NF_NAT + depends on (NF_NAT || IP_NF_NAT) help Masquerading is a special case of NAT: all outgoing connections are changed to seem to come from a particular interface's address, and @@ -434,7 +433,7 @@ config IP_NF_TARGET_MASQUERADE config IP_NF_TARGET_REDIRECT tristate "REDIRECT target support" - depends on IP_NF_NAT + depends on (NF_NAT || IP_NF_NAT) help REDIRECT is a special case of NAT: all incoming connections are mapped onto the incoming interface's address, causing the packets to @@ -445,7 +444,7 @@ config IP_NF_TARGET_REDIRECT config IP_NF_TARGET_NETMAP tristate "NETMAP target support" - depends on IP_NF_NAT + depends on (NF_NAT || IP_NF_NAT) help NETMAP is an implementation of static 1:1 NAT mapping of network addresses. It maps the network address part, while keeping the host @@ -456,7 +455,7 @@ config IP_NF_TARGET_NETMAP config IP_NF_TARGET_SAME tristate "SAME target support" - depends on IP_NF_NAT + depends on (NF_NAT || IP_NF_NAT) help This option adds a `SAME' target, which works like the standard SNAT target, but attempts to give clients the same IP for all connections. @@ -478,19 +477,52 @@ config IP_NF_NAT_SNMP_BASIC To compile it as a module, choose M here. If unsure, say N. +config NF_NAT_SNMP_BASIC + tristate "Basic SNMP-ALG support (EXPERIMENTAL)" + depends on EXPERIMENTAL && NF_NAT + ---help--- + + This module implements an Application Layer Gateway (ALG) for + SNMP payloads. In conjunction with NAT, it allows a network + management system to access multiple private networks with + conflicting addresses. It works by modifying IP addresses + inside SNMP payloads to match IP-layer NAT mapping. + + This is the "basic" form of SNMP-ALG, as described in RFC 2962 + + To compile it as a module, choose M here. If unsure, say N. + +# If they want FTP, set to $CONFIG_IP_NF_NAT (m or y), +# or $CONFIG_IP_NF_FTP (m or y), whichever is weaker. +# From kconfig-language.txt: +# +# <expr> '&&' <expr> (6) +# +# (6) Returns the result of min(/expr/, /expr/). +config NF_NAT_PROTO_GRE + tristate + depends on NF_NAT && NF_CT_PROTO_GRE + +config IP_NF_NAT_FTP + tristate + depends on IP_NF_IPTABLES && IP_NF_CONNTRACK && IP_NF_NAT + default IP_NF_NAT && IP_NF_FTP + +config NF_NAT_FTP + tristate + depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_FTP + config IP_NF_NAT_IRC tristate depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n default IP_NF_NAT if IP_NF_IRC=y default m if IP_NF_IRC=m -# If they want FTP, set to $CONFIG_IP_NF_NAT (m or y), -# or $CONFIG_IP_NF_FTP (m or y), whichever is weaker. Argh. -config IP_NF_NAT_FTP +config NF_NAT_IRC tristate - depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n - default IP_NF_NAT if IP_NF_FTP=y - default m if IP_NF_FTP=m + depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_IRC config IP_NF_NAT_TFTP tristate @@ -498,30 +530,56 @@ config IP_NF_NAT_TFTP default IP_NF_NAT if IP_NF_TFTP=y default m if IP_NF_TFTP=m +config NF_NAT_TFTP + tristate + depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_TFTP + config IP_NF_NAT_AMANDA tristate depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n default IP_NF_NAT if IP_NF_AMANDA=y default m if IP_NF_AMANDA=m +config NF_NAT_AMANDA + tristate + depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_AMANDA + config IP_NF_NAT_PPTP tristate depends on IP_NF_NAT!=n && IP_NF_PPTP!=n default IP_NF_NAT if IP_NF_PPTP=y default m if IP_NF_PPTP=m +config NF_NAT_PPTP + tristate + depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_PPTP + select NF_NAT_PROTO_GRE + config IP_NF_NAT_H323 tristate depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n default IP_NF_NAT if IP_NF_H323=y default m if IP_NF_H323=m +config NF_NAT_H323 + tristate + depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_H323 + config IP_NF_NAT_SIP tristate depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n default IP_NF_NAT if IP_NF_SIP=y default m if IP_NF_SIP=m +config NF_NAT_SIP + tristate + depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + default NF_NAT && NF_CONNTRACK_SIP + # mangle + specific targets config IP_NF_MANGLE tristate "Packet mangling" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 09aaed1a806..15e741aeb29 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -5,17 +5,23 @@ # objects for the standalone - connection tracking / NAT ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o +nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o +ifneq ($(CONFIG_NF_NAT),) +iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o +else iptable_nat-objs := ip_nat_rule.o ip_nat_standalone.o +endif ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o -ip_conntrack_h323-objs := ip_conntrack_helper_h323.o ip_conntrack_helper_h323_asn1.o +ip_conntrack_h323-objs := ip_conntrack_helper_h323.o ../../netfilter/nf_conntrack_h323_asn1.o ip_nat_h323-objs := ip_nat_helper_h323.o # connection tracking obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o obj-$(CONFIG_IP_NF_NAT) += ip_nat.o +obj-$(CONFIG_NF_NAT) += nf_nat.o # conntrack netlink interface obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o @@ -34,7 +40,7 @@ obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o obj-$(CONFIG_IP_NF_SIP) += ip_conntrack_sip.o obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o -# NAT helpers +# NAT helpers (ip_conntrack) obj-$(CONFIG_IP_NF_NAT_H323) += ip_nat_h323.o obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o @@ -43,6 +49,19 @@ obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o obj-$(CONFIG_IP_NF_NAT_SIP) += ip_nat_sip.o +# NAT helpers (nf_conntrack) +obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o +obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o +obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o +obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o +obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o +obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o +obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o +obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o + +# NAT protocols (nf_nat) +obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o + # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o @@ -50,10 +69,10 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o +obj-$(CONFIG_NF_NAT) += iptable_nat.o obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o # matches -obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o @@ -89,6 +108,11 @@ obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o # objects for l3 independent conntrack nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o +ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) +ifeq ($(CONFIG_PROC_FS),y) +nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o +endif +endif # l3 independent conntrack obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 0849f1cced1..9aa22398b3d 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -358,6 +358,7 @@ static int mark_source_chains(struct xt_table_info *newinfo, for (;;) { struct arpt_standard_target *t = (void *)arpt_get_target(e); + int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) { printk("arptables: loop hook %u pos %u %08X.\n", @@ -368,13 +369,20 @@ static int mark_source_chains(struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); /* Unconditional return/END. */ - if (e->target_offset == sizeof(struct arpt_entry) + if ((e->target_offset == sizeof(struct arpt_entry) && (strcmp(t->target.u.user.name, ARPT_STANDARD_TARGET) == 0) && t->verdict < 0 - && unconditional(&e->arp)) { + && unconditional(&e->arp)) || visited) { unsigned int oldpos, size; + if (t->verdict < -NF_MAX_VERDICT - 1) { + duprintf("mark_source_chains: bad " + "negative verdict (%i)\n", + t->verdict); + return 0; + } + /* Return: backtrack through the last * big jump. */ @@ -404,6 +412,14 @@ static int mark_source_chains(struct xt_table_info *newinfo, if (strcmp(t->target.u.user.name, ARPT_STANDARD_TARGET) == 0 && newpos >= 0) { + if (newpos > newinfo->size - + sizeof(struct arpt_entry)) { + duprintf("mark_source_chains: " + "bad verdict (%i)\n", + newpos); + return 0; + } + /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); @@ -426,8 +442,6 @@ static int mark_source_chains(struct xt_table_info *newinfo, static inline int standard_check(const struct arpt_entry_target *t, unsigned int max_offset) { - struct arpt_standard_target *targ = (void *)t; - /* Check standard info. */ if (t->u.target_size != ARPT_ALIGN(sizeof(struct arpt_standard_target))) { @@ -437,18 +451,6 @@ static inline int standard_check(const struct arpt_entry_target *t, return 0; } - if (targ->verdict >= 0 - && targ->verdict > max_offset - sizeof(struct arpt_entry)) { - duprintf("arpt_standard_check: bad verdict (%i)\n", - targ->verdict); - return 0; - } - - if (targ->verdict < -NF_MAX_VERDICT - 1) { - duprintf("arpt_standard_check: bad negative verdict (%i)\n", - targ->verdict); - return 0; - } return 1; } @@ -466,7 +468,13 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i return -EINVAL; } + if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset) + return -EINVAL; + t = arpt_get_target(e); + if (e->target_offset + t->u.target_size > e->next_offset) + return -EINVAL; + target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name, t->u.user.revision), "arpt_%s", t->u.user.name); @@ -633,7 +641,7 @@ static int translate_table(const char *name, if (ret != 0) { ARPT_ENTRY_ITERATE(entry0, newinfo->size, - cleanup_entry, &i); + cleanup_entry, &i); return ret; } diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index 6c7383a8e42..ad246ba7790 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -92,6 +92,7 @@ static int help(struct sk_buff **pskb, char pbuf[sizeof("65535")], *tmp; u_int16_t port, len; int ret = NF_ACCEPT; + typeof(ip_nat_amanda_hook) ip_nat_amanda; /* Only look at packets from the Amanda server */ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) @@ -161,9 +162,11 @@ static int help(struct sk_buff **pskb, exp->mask.dst.protonum = 0xFF; exp->mask.dst.u.tcp.port = htons(0xFFFF); - if (ip_nat_amanda_hook) - ret = ip_nat_amanda_hook(pskb, ctinfo, off - dataoff, - len, exp); + /* RCU read locked by nf_hook_slow */ + ip_nat_amanda = rcu_dereference(ip_nat_amanda_hook); + if (ip_nat_amanda) + ret = ip_nat_amanda(pskb, ctinfo, off - dataoff, + len, exp); else if (ip_conntrack_expect_related(exp) != 0) ret = NF_DROP; ip_conntrack_expect_put(exp); @@ -180,7 +183,7 @@ static struct ip_conntrack_helper amanda_helper = { .help = help, .name = "amanda", - .tuple = { .src = { .u = { __constant_htons(10080) } }, + .tuple = { .src = { .u = { .udp = {.port = __constant_htons(10080) } } }, .dst = { .protonum = IPPROTO_UDP }, }, .mask = { .src = { .u = { 0xFFFF } }, diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 143c4668538..8556a4f4f60 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -40,9 +40,6 @@ /* ip_conntrack_lock protects the main hash table, protocol/helper/expected registrations, conntrack timers*/ -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include <linux/netfilter_ipv4/ip_conntrack.h> #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> #include <linux/netfilter_ipv4/ip_conntrack_helper.h> @@ -68,8 +65,8 @@ static LIST_HEAD(helpers); unsigned int ip_conntrack_htable_size __read_mostly = 0; int ip_conntrack_max __read_mostly; struct list_head *ip_conntrack_hash __read_mostly; -static kmem_cache_t *ip_conntrack_cachep __read_mostly; -static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly; +static struct kmem_cache *ip_conntrack_cachep __read_mostly; +static struct kmem_cache *ip_conntrack_expect_cachep __read_mostly; struct ip_conntrack ip_conntrack_untracked; unsigned int ip_ct_log_invalid __read_mostly; static LIST_HEAD(unconfirmed); @@ -201,7 +198,6 @@ ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse, /* ip_conntrack_expect helper functions */ void ip_ct_unlink_expect(struct ip_conntrack_expect *exp) { - ASSERT_WRITE_LOCK(&ip_conntrack_lock); IP_NF_ASSERT(!timer_pending(&exp->timeout)); list_del(&exp->list); CONNTRACK_STAT_INC(expect_delete); @@ -225,22 +221,22 @@ __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) struct ip_conntrack_expect *i; list_for_each_entry(i, &ip_conntrack_expect_list, list) { - if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { - atomic_inc(&i->use); + if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) return i; - } } return NULL; } /* Just find a expectation corresponding to a tuple. */ struct ip_conntrack_expect * -ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) +ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) { struct ip_conntrack_expect *i; read_lock_bh(&ip_conntrack_lock); i = __ip_conntrack_expect_find(tuple); + if (i) + atomic_inc(&i->use); read_unlock_bh(&ip_conntrack_lock); return i; @@ -294,7 +290,6 @@ static void clean_from_lists(struct ip_conntrack *ct) { DEBUGP("clean_from_lists(%p)\n", ct); - ASSERT_WRITE_LOCK(&ip_conntrack_lock); list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); @@ -373,7 +368,6 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, struct ip_conntrack_tuple_hash *h; unsigned int hash = hash_conntrack(tuple); - ASSERT_READ_LOCK(&ip_conntrack_lock); list_for_each_entry(h, &ip_conntrack_hash[hash], list) { if (tuplehash_to_ctrack(h) != ignored_conntrack && ip_ct_tuple_equal(tuple, &h->tuple)) { diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c index 93dcf960662..0410c99caca 100644 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -310,6 +310,7 @@ static int help(struct sk_buff **pskb, struct ip_conntrack_expect *exp; unsigned int i; int found = 0, ends_in_nl; + typeof(ip_nat_ftp_hook) ip_nat_ftp; /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED @@ -433,9 +434,10 @@ static int help(struct sk_buff **pskb, /* Now, NAT might want to mangle the packet, and register the * (possibly changed) expectation itself. */ - if (ip_nat_ftp_hook) - ret = ip_nat_ftp_hook(pskb, ctinfo, search[dir][i].ftptype, - matchoff, matchlen, exp, &seq); + ip_nat_ftp = rcu_dereference(ip_nat_ftp_hook); + if (ip_nat_ftp) + ret = ip_nat_ftp(pskb, ctinfo, search[dir][i].ftptype, + matchoff, matchlen, exp, &seq); else { /* Can't expect this? Best to drop packet now. */ if (ip_conntrack_expect_related(exp) != 0) diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c index 7b7441202bf..aabfe1c0690 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_h323.c @@ -237,6 +237,7 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct, u_int16_t rtp_port; struct ip_conntrack_expect *rtp_exp; struct ip_conntrack_expect *rtcp_exp; + typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp; /* Read RTP or RTCP address */ if (!get_h245_addr(*data, addr, &ip, &port) || @@ -279,11 +280,11 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct, rtcp_exp->flags = 0; if (ct->tuplehash[dir].tuple.src.ip != - ct->tuplehash[!dir].tuple.dst.ip && nat_rtp_rtcp_hook) { + ct->tuplehash[!dir].tuple.dst.ip && + (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook))) { /* NAT needed */ - ret = nat_rtp_rtcp_hook(pskb, ct, ctinfo, data, dataoff, - addr, port, rtp_port, rtp_exp, - rtcp_exp); + ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + addr, port, rtp_port, rtp_exp, rtcp_exp); } else { /* Conntrack only */ rtp_exp->expectfn = NULL; rtcp_exp->expectfn = NULL; @@ -328,6 +329,7 @@ static int expect_t120(struct sk_buff **pskb, __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp = NULL; + typeof(nat_t120_hook) nat_t120; /* Read T.120 address */ if (!get_h245_addr(*data, addr, &ip, &port) || @@ -350,10 +352,11 @@ static int expect_t120(struct sk_buff **pskb, exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple channels */ if (ct->tuplehash[dir].tuple.src.ip != - ct->tuplehash[!dir].tuple.dst.ip && nat_t120_hook) { + ct->tuplehash[!dir].tuple.dst.ip && + (nat_t120 = rcu_dereference(nat_t120_hook))) { /* NAT needed */ - ret = nat_t120_hook(pskb, ct, ctinfo, data, dataoff, addr, - port, exp); + ret = nat_t120(pskb, ct, ctinfo, data, dataoff, addr, + port, exp); } else { /* Conntrack only */ exp->expectfn = NULL; if (ip_conntrack_expect_related(exp) == 0) { @@ -651,6 +654,7 @@ static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct, __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp = NULL; + typeof(nat_h245_hook) nat_h245; /* Read h245Address */ if (!get_h225_addr(*data, addr, &ip, &port) || @@ -673,10 +677,11 @@ static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct, exp->flags = 0; if (ct->tuplehash[dir].tuple.src.ip != - ct->tuplehash[!dir].tuple.dst.ip && nat_h245_hook) { + ct->tuplehash[!dir].tuple.dst.ip && + (nat_h245 = rcu_dereference(nat_h245_hook))) { /* NAT needed */ - ret = nat_h245_hook(pskb, ct, ctinfo, data, dataoff, addr, - port, exp); + ret = nat_h245(pskb, ct, ctinfo, data, dataoff, addr, + port, exp); } else { /* Conntrack only */ exp->expectfn = ip_conntrack_h245_expect; @@ -712,6 +717,7 @@ static int expect_callforwarding(struct sk_buff **pskb, __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp = NULL; + typeof(nat_callforwarding_hook) nat_callforwarding; /* Read alternativeAddress */ if (!get_h225_addr(*data, addr, &ip, &port) || port == 0) @@ -759,10 +765,11 @@ static int expect_callforwarding(struct sk_buff **pskb, exp->flags = 0; if (ct->tuplehash[dir].tuple.src.ip != - ct->tuplehash[!dir].tuple.dst.ip && nat_callforwarding_hook) { + ct->tuplehash[!dir].tuple.dst.ip && + (nat_callforwarding = rcu_dereference(nat_callforwarding_hook))) { /* Need NAT */ - ret = nat_callforwarding_hook(pskb, ct, ctinfo, data, dataoff, - addr, port, exp); + ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff, + addr, port, exp); } else { /* Conntrack only */ exp->expectfn = ip_conntrack_q931_expect; @@ -793,6 +800,7 @@ static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct, int i; __be32 ip; u_int16_t port; + typeof(set_h225_addr_hook) set_h225_addr; DEBUGP("ip_ct_q931: Setup\n"); @@ -803,8 +811,10 @@ static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct, return -1; } + set_h225_addr = rcu_dereference(set_h225_addr_hook); + if ((setup->options & eSetup_UUIE_destCallSignalAddress) && - (set_h225_addr_hook) && + (set_h225_addr) && get_h225_addr(*data, &setup->destCallSignalAddress, &ip, &port) && ip != ct->tuplehash[!dir].tuple.src.ip) { DEBUGP("ip_ct_q931: set destCallSignalAddress " @@ -812,17 +822,17 @@ static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct, NIPQUAD(ip), port, NIPQUAD(ct->tuplehash[!dir].tuple.src.ip), ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port)); - ret = set_h225_addr_hook(pskb, data, dataoff, - &setup->destCallSignalAddress, - ct->tuplehash[!dir].tuple.src.ip, - ntohs(ct->tuplehash[!dir].tuple.src. - u.tcp.port)); + ret = set_h225_addr(pskb, data, dataoff, + &setup->destCallSignalAddress, + ct->tuplehash[!dir].tuple.src.ip, + ntohs(ct->tuplehash[!dir].tuple.src. + u.tcp.port)); if (ret < 0) return -1; } if ((setup->options & eSetup_UUIE_sourceCallSignalAddress) && - (set_h225_addr_hook) && + (set_h225_addr) && get_h225_addr(*data, &setup->sourceCallSignalAddress, &ip, &port) && ip != ct->tuplehash[!dir].tuple.dst.ip) { DEBUGP("ip_ct_q931: set sourceCallSignalAddress " @@ -830,11 +840,11 @@ static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct, NIPQUAD(ip), port, NIPQUAD(ct->tuplehash[!dir].tuple.dst.ip), ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port)); - ret = set_h225_addr_hook(pskb, data, dataoff, - &setup->sourceCallSignalAddress, - ct->tuplehash[!dir].tuple.dst.ip, - ntohs(ct->tuplehash[!dir].tuple.dst. - u.tcp.port)); + ret = set_h225_addr(pskb, data, dataoff, + &setup->sourceCallSignalAddress, + ct->tuplehash[!dir].tuple.dst.ip, + ntohs(ct->tuplehash[!dir].tuple.dst. + u.tcp.port)); if (ret < 0) return -1; } @@ -1153,7 +1163,7 @@ static struct ip_conntrack_helper ip_conntrack_helper_q931 = { .me = THIS_MODULE, .max_expected = H323_RTP_CHANNEL_MAX * 4 + 4 /* T.120 and H.245 */ , .timeout = 240, - .tuple = {.src = {.u = {__constant_htons(Q931_PORT)}}, + .tuple = {.src = {.u = {.tcp = {.port = __constant_htons(Q931_PORT)}}}, .dst = {.protonum = IPPROTO_TCP}}, .mask = {.src = {.u = {0xFFFF}}, .dst = {.protonum = 0xFF}}, @@ -1231,6 +1241,7 @@ static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct, __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp; + typeof(nat_q931_hook) nat_q931; /* Look for the first related address */ for (i = 0; i < count; i++) { @@ -1258,9 +1269,9 @@ static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct, exp->mask.dst.protonum = 0xFF; exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple calls */ - if (nat_q931_hook) { /* Need NAT */ - ret = nat_q931_hook(pskb, ct, ctinfo, data, addr, i, - port, exp); + nat_q931 = rcu_dereference(nat_q931_hook); + if (nat_q931) { /* Need NAT */ + ret = nat_q931(pskb, ct, ctinfo, data, addr, i, port, exp); } else { /* Conntrack only */ exp->expectfn = ip_conntrack_q931_expect; @@ -1288,11 +1299,14 @@ static int process_grq(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, unsigned char **data, GatekeeperRequest * grq) { + typeof(set_ras_addr_hook) set_ras_addr; + DEBUGP("ip_ct_ras: GRQ\n"); - if (set_ras_addr_hook) /* NATed */ - return set_ras_addr_hook(pskb, ct, ctinfo, data, - &grq->rasAddress, 1); + set_ras_addr = rcu_dereference(set_ras_addr_hook); + if (set_ras_addr) /* NATed */ + return set_ras_addr(pskb, ct, ctinfo, data, + &grq->rasAddress, 1); return 0; } @@ -1362,6 +1376,7 @@ static int process_rrq(struct sk_buff **pskb, struct ip_conntrack *ct, { struct ip_ct_h323_master *info = &ct->help.ct_h323_info; int ret; + typeof(set_ras_addr_hook) set_ras_addr; DEBUGP("ip_ct_ras: RRQ\n"); @@ -1371,10 +1386,11 @@ static int process_rrq(struct sk_buff **pskb, struct ip_conntrack *ct, if (ret < 0) return -1; - if (set_ras_addr_hook) { - ret = set_ras_addr_hook(pskb, ct, ctinfo, data, - rrq->rasAddress.item, - rrq->rasAddress.count); + set_ras_addr = rcu_dereference(set_ras_addr_hook); + if (set_ras_addr) { + ret = set_ras_addr(pskb, ct, ctinfo, data, + rrq->rasAddress.item, + rrq->rasAddress.count); if (ret < 0) return -1; } @@ -1397,13 +1413,15 @@ static int process_rcf(struct sk_buff **pskb, struct ip_conntrack *ct, int dir = CTINFO2DIR(ctinfo); int ret; struct ip_conntrack_expect *exp; + typeof(set_sig_addr_hook) set_sig_addr; DEBUGP("ip_ct_ras: RCF\n"); - if (set_sig_addr_hook) { - ret = set_sig_addr_hook(pskb, ct, ctinfo, data, - rcf->callSignalAddress.item, - rcf->callSignalAddress.count); + set_sig_addr = rcu_dereference(set_sig_addr_hook); + if (set_sig_addr) { + ret = set_sig_addr(pskb, ct, ctinfo, data, + rcf->callSignalAddress.item, + rcf->callSignalAddress.count); if (ret < 0) return -1; } @@ -1417,7 +1435,7 @@ static int process_rcf(struct sk_buff **pskb, struct ip_conntrack *ct, DEBUGP ("ip_ct_ras: set RAS connection timeout to %u seconds\n", info->timeout); - ip_ct_refresh_acct(ct, ctinfo, NULL, info->timeout * HZ); + ip_ct_refresh(ct, *pskb, info->timeout * HZ); /* Set expect timeout */ read_lock_bh(&ip_conntrack_lock); @@ -1448,13 +1466,15 @@ static int process_urq(struct sk_buff **pskb, struct ip_conntrack *ct, struct ip_ct_h323_master *info = &ct->help.ct_h323_info; int dir = CTINFO2DIR(ctinfo); int ret; + typeof(set_sig_addr_hook) set_sig_addr; DEBUGP("ip_ct_ras: URQ\n"); - if (set_sig_addr_hook) { - ret = set_sig_addr_hook(pskb, ct, ctinfo, data, - urq->callSignalAddress.item, - urq->callSignalAddress.count); + set_sig_addr = rcu_dereference(set_sig_addr_hook); + if (set_sig_addr) { + ret = set_sig_addr(pskb, ct, ctinfo, data, + urq->callSignalAddress.item, + urq->callSignalAddress.count); if (ret < 0) return -1; } @@ -1465,7 +1485,7 @@ static int process_urq(struct sk_buff **pskb, struct ip_conntrack *ct, info->sig_port[!dir] = 0; /* Give it 30 seconds for UCF or URJ */ - ip_ct_refresh_acct(ct, ctinfo, NULL, 30 * HZ); + ip_ct_refresh(ct, *pskb, 30 * HZ); return 0; } @@ -1479,28 +1499,30 @@ static int process_arq(struct sk_buff **pskb, struct ip_conntrack *ct, int dir = CTINFO2DIR(ctinfo); __be32 ip; u_int16_t port; + typeof(set_h225_addr_hook) set_h225_addr; DEBUGP("ip_ct_ras: ARQ\n"); + set_h225_addr = rcu_dereference(set_h225_addr_hook); if ((arq->options & eAdmissionRequest_destCallSignalAddress) && get_h225_addr(*data, &arq->destCallSignalAddress, &ip, &port) && ip == ct->tuplehash[dir].tuple.src.ip && - port == info->sig_port[dir] && set_h225_addr_hook) { + port == info->sig_port[dir] && set_h225_addr) { /* Answering ARQ */ - return set_h225_addr_hook(pskb, data, 0, - &arq->destCallSignalAddress, - ct->tuplehash[!dir].tuple.dst.ip, - info->sig_port[!dir]); + return set_h225_addr(pskb, data, 0, + &arq->destCallSignalAddress, + ct->tuplehash[!dir].tuple.dst.ip, + info->sig_port[!dir]); } if ((arq->options & eAdmissionRequest_srcCallSignalAddress) && get_h225_addr(*data, &arq->srcCallSignalAddress, &ip, &port) && - ip == ct->tuplehash[dir].tuple.src.ip && set_h225_addr_hook) { + ip == ct->tuplehash[dir].tuple.src.ip && set_h225_addr) { /* Calling ARQ */ - return set_h225_addr_hook(pskb, data, 0, - &arq->srcCallSignalAddress, - ct->tuplehash[!dir].tuple.dst.ip, - port); + return set_h225_addr(pskb, data, 0, + &arq->srcCallSignalAddress, + ct->tuplehash[!dir].tuple.dst.ip, + port); } return 0; @@ -1516,6 +1538,7 @@ static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct, __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp; + typeof(set_sig_addr_hook) set_sig_addr; DEBUGP("ip_ct_ras: ACF\n"); @@ -1523,10 +1546,10 @@ static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct, return 0; if (ip == ct->tuplehash[dir].tuple.dst.ip) { /* Answering ACF */ - if (set_sig_addr_hook) - return set_sig_addr_hook(pskb, ct, ctinfo, data, - &acf->destCallSignalAddress, - 1); + set_sig_addr = rcu_dereference(set_sig_addr_hook); + if (set_sig_addr) + return set_sig_addr(pskb, ct, ctinfo, data, + &acf->destCallSignalAddress, 1); return 0; } @@ -1566,11 +1589,14 @@ static int process_lrq(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, unsigned char **data, LocationRequest * lrq) { + typeof(set_ras_addr_hook) set_ras_addr; + DEBUGP("ip_ct_ras: LRQ\n"); - if (set_ras_addr_hook) - return set_ras_addr_hook(pskb, ct, ctinfo, data, - &lrq->replyAddress, 1); + set_ras_addr = rcu_dereference(set_ras_addr_hook); + if (set_ras_addr) + return set_ras_addr(pskb, ct, ctinfo, data, + &lrq->replyAddress, 1); return 0; } @@ -1629,20 +1655,24 @@ static int process_irr(struct sk_buff **pskb, struct ip_conntrack *ct, unsigned char **data, InfoRequestResponse * irr) { int ret; + typeof(set_ras_addr_hook) set_ras_addr; + typeof(set_sig_addr_hook) set_sig_addr; DEBUGP("ip_ct_ras: IRR\n"); - if (set_ras_addr_hook) { - ret = set_ras_addr_hook(pskb, ct, ctinfo, data, - &irr->rasAddress, 1); + set_ras_addr = rcu_dereference(set_ras_addr_hook); + if (set_ras_addr) { + ret = set_ras_addr(pskb, ct, ctinfo, data, + &irr->rasAddress, 1); if (ret < 0) return -1; } - if (set_sig_addr_hook) { - ret = set_sig_addr_hook(pskb, ct, ctinfo, data, - irr->callSignalAddress.item, - irr->callSignalAddress.count); + set_sig_addr = rcu_dereference(set_sig_addr_hook); + if (set_sig_addr) { + ret = set_sig_addr(pskb, ct, ctinfo, data, + irr->callSignalAddress.item, + irr->callSignalAddress.count); if (ret < 0) return -1; } @@ -1746,7 +1776,7 @@ static struct ip_conntrack_helper ip_conntrack_helper_ras = { .me = THIS_MODULE, .max_expected = 32, .timeout = 240, - .tuple = {.src = {.u = {__constant_htons(RAS_PORT)}}, + .tuple = {.src = {.u = {.tcp = {.port = __constant_htons(RAS_PORT)}}}, .dst = {.protonum = IPPROTO_UDP}}, .mask = {.src = {.u = {0xFFFE}}, .dst = {.protonum = 0xFF}}, diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index a2af5e0c7f9..4d19373bbf0 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -124,6 +124,8 @@ EXPORT_SYMBOL(pptp_msg_name); static void pptp_expectfn(struct ip_conntrack *ct, struct ip_conntrack_expect *exp) { + typeof(ip_nat_pptp_hook_expectfn) ip_nat_pptp_expectfn; + DEBUGP("increasing timeouts\n"); /* increase timeout of GRE data channel conntrack entry */ @@ -133,7 +135,9 @@ static void pptp_expectfn(struct ip_conntrack *ct, /* Can you see how rusty this code is, compared with the pre-2.6.11 * one? That's what happened to my shiny newnat of 2002 ;( -HW */ - if (!ip_nat_pptp_hook_expectfn) { + rcu_read_lock(); + ip_nat_pptp_expectfn = rcu_dereference(ip_nat_pptp_hook_expectfn); + if (!ip_nat_pptp_expectfn) { struct ip_conntrack_tuple inv_t; struct ip_conntrack_expect *exp_other; @@ -142,7 +146,7 @@ static void pptp_expectfn(struct ip_conntrack *ct, DEBUGP("trying to unexpect other dir: "); DUMP_TUPLE(&inv_t); - exp_other = ip_conntrack_expect_find(&inv_t); + exp_other = ip_conntrack_expect_find_get(&inv_t); if (exp_other) { /* delete other expectation. */ DEBUGP("found\n"); @@ -153,8 +157,9 @@ static void pptp_expectfn(struct ip_conntrack *ct, } } else { /* we need more than simple inversion */ - ip_nat_pptp_hook_expectfn(ct, exp); + ip_nat_pptp_expectfn(ct, exp); } + rcu_read_unlock(); } static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t) @@ -176,7 +181,7 @@ static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t) ip_conntrack_put(sibling); return 1; } else { - exp = ip_conntrack_expect_find(t); + exp = ip_conntrack_expect_find_get(t); if (exp) { DEBUGP("unexpect_related of expect %p\n", exp); ip_conntrack_unexpect_related(exp); @@ -226,6 +231,7 @@ exp_gre(struct ip_conntrack *ct, { struct ip_conntrack_expect *exp_orig, *exp_reply; int ret = 1; + typeof(ip_nat_pptp_hook_exp_gre) ip_nat_pptp_exp_gre; exp_orig = ip_conntrack_expect_alloc(ct); if (exp_orig == NULL) @@ -262,8 +268,9 @@ exp_gre(struct ip_conntrack *ct, exp_reply->tuple.dst.u.gre.key = peer_callid; exp_reply->tuple.dst.protonum = IPPROTO_GRE; - if (ip_nat_pptp_hook_exp_gre) - ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); + ip_nat_pptp_exp_gre = rcu_dereference(ip_nat_pptp_hook_exp_gre); + if (ip_nat_pptp_exp_gre) + ip_nat_pptp_exp_gre(exp_orig, exp_reply); if (ip_conntrack_expect_related(exp_orig) != 0) goto out_put_both; if (ip_conntrack_expect_related(exp_reply) != 0) @@ -303,6 +310,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; u_int16_t msg; __be16 cid = 0, pcid = 0; + typeof(ip_nat_pptp_hook_inbound) ip_nat_pptp_inbound; msg = ntohs(ctlh->messageType); DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); @@ -402,9 +410,9 @@ pptp_inbound_pkt(struct sk_buff **pskb, goto invalid; } - if (ip_nat_pptp_hook_inbound) - return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh, - pptpReq); + ip_nat_pptp_inbound = rcu_dereference(ip_nat_pptp_hook_inbound); + if (ip_nat_pptp_inbound) + return ip_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq); return NF_ACCEPT; invalid: @@ -427,6 +435,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; u_int16_t msg; __be16 cid = 0, pcid = 0; + typeof(ip_nat_pptp_hook_outbound) ip_nat_pptp_outbound; msg = ntohs(ctlh->messageType); DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); @@ -492,9 +501,9 @@ pptp_outbound_pkt(struct sk_buff **pskb, goto invalid; } - if (ip_nat_pptp_hook_outbound) - return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh, - pptpReq); + ip_nat_pptp_outbound = rcu_dereference(ip_nat_pptp_hook_outbound); + if (ip_nat_pptp_outbound) + return ip_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq); return NF_ACCEPT; invalid: diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c index 75f7c3db161..91832eca410 100644 --- a/net/ipv4/netfilter/ip_conntrack_irc.c +++ b/net/ipv4/netfilter/ip_conntrack_irc.c @@ -114,6 +114,7 @@ static int help(struct sk_buff **pskb, u_int16_t dcc_port; int i, ret = NF_ACCEPT; char *addr_beg_p, *addr_end_p; + typeof(ip_nat_irc_hook) ip_nat_irc; DEBUGP("entered\n"); @@ -222,11 +223,12 @@ static int help(struct sk_buff **pskb, { .tcp = { htons(0xFFFF) } }, 0xFF }}); exp->expectfn = NULL; exp->flags = 0; - if (ip_nat_irc_hook) - ret = ip_nat_irc_hook(pskb, ctinfo, - addr_beg_p - ib_ptr, - addr_end_p - addr_beg_p, - exp); + ip_nat_irc = rcu_dereference(ip_nat_irc_hook); + if (ip_nat_irc) + ret = ip_nat_irc(pskb, ctinfo, + addr_beg_p - ib_ptr, + addr_end_p - addr_beg_p, + exp); else if (ip_conntrack_expect_related(exp) != 0) ret = NF_DROP; ip_conntrack_expect_put(exp); diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 262d0d44ec1..5fcf91d617c 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -153,6 +153,7 @@ ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct) return ret; nfattr_failure: + ip_conntrack_proto_put(proto); return -1; } @@ -319,8 +320,6 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, } else if (events & (IPCT_NEW | IPCT_RELATED)) { type = IPCTNL_MSG_CT_NEW; flags = NLM_F_CREATE|NLM_F_EXCL; - /* dump everything */ - events = ~0UL; group = NFNLGRP_CONNTRACK_NEW; } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) { type = IPCTNL_MSG_CT_NEW; @@ -355,28 +354,35 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nfattr_failure; NFA_NEST_END(skb, nest_parms); - - /* NAT stuff is now a status flag */ - if ((events & IPCT_STATUS || events & IPCT_NATINFO) - && ctnetlink_dump_status(skb, ct) < 0) - goto nfattr_failure; - if (events & IPCT_REFRESH - && ctnetlink_dump_timeout(skb, ct) < 0) - goto nfattr_failure; - if (events & IPCT_PROTOINFO - && ctnetlink_dump_protoinfo(skb, ct) < 0) - goto nfattr_failure; - if (events & IPCT_HELPINFO - && ctnetlink_dump_helpinfo(skb, ct) < 0) - goto nfattr_failure; - if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) - goto nfattr_failure; + if (events & IPCT_DESTROY) { + if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) + goto nfattr_failure; + } else { + if (ctnetlink_dump_status(skb, ct) < 0) + goto nfattr_failure; - if (events & IPCT_MARK - && ctnetlink_dump_mark(skb, ct) < 0) - goto nfattr_failure; + if (ctnetlink_dump_timeout(skb, ct) < 0) + goto nfattr_failure; + + if (events & IPCT_PROTOINFO + && ctnetlink_dump_protoinfo(skb, ct) < 0) + goto nfattr_failure; + + if ((events & IPCT_HELPER || ct->helper) + && ctnetlink_dump_helpinfo(skb, ct) < 0) + goto nfattr_failure; + + if ((events & IPCT_MARK || ct->mark) + && ctnetlink_dump_mark(skb, ct) < 0) + goto nfattr_failure; + + if (events & IPCT_COUNTER_FILLING && + (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)) + goto nfattr_failure; + } nlh->nlmsg_len = skb->tail - b; nfnetlink_send(skb, 0, group, 0); @@ -742,7 +748,6 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, ip_conntrack_put(ct); return -ENOMEM; } - NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid; err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, IPCTNL_MSG_CT_NEW, 1, ct); @@ -945,9 +950,11 @@ ctnetlink_create_conntrack(struct nfattr *cda[], ct->timeout.expires = jiffies + ct->timeout.expires * HZ; ct->status |= IPS_CONFIRMED; - err = ctnetlink_change_status(ct, cda); - if (err < 0) - goto err; + if (cda[CTA_STATUS-1]) { + err = ctnetlink_change_status(ct, cda); + if (err < 0) + goto err; + } if (cda[CTA_PROTOINFO-1]) { err = ctnetlink_change_protoinfo(ct, cda); @@ -1256,7 +1263,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - exp = ip_conntrack_expect_find(&tuple); + exp = ip_conntrack_expect_find_get(&tuple); if (!exp) return -ENOENT; @@ -1272,8 +1279,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb2) goto out; - NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid; - + err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, 1, exp); @@ -1310,7 +1316,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return err; /* bump usage count to 2 */ - exp = ip_conntrack_expect_find(&tuple); + exp = ip_conntrack_expect_find_get(&tuple); if (!exp) return -ENOENT; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c index 5fe026f467d..ac1c49ef36a 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -34,8 +34,6 @@ #include <linux/interrupt.h> static DEFINE_RWLOCK(ip_ct_gre_lock); -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> #include <linux/netfilter_ipv4/ip_conntrack_helper.h> diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c index f4f75995a9e..3a26d63eed8 100644 --- a/net/ipv4/netfilter/ip_conntrack_sip.c +++ b/net/ipv4/netfilter/ip_conntrack_sip.c @@ -52,20 +52,56 @@ unsigned int (*ip_nat_sdp_hook)(struct sk_buff **pskb, const char *dptr); EXPORT_SYMBOL_GPL(ip_nat_sdp_hook); -int ct_sip_get_info(const char *dptr, size_t dlen, - unsigned int *matchoff, - unsigned int *matchlen, - struct sip_header_nfo *hnfo); -EXPORT_SYMBOL_GPL(ct_sip_get_info); - - static int digits_len(const char *dptr, const char *limit, int *shift); static int epaddr_len(const char *dptr, const char *limit, int *shift); static int skp_digits_len(const char *dptr, const char *limit, int *shift); static int skp_epaddr_len(const char *dptr, const char *limit, int *shift); -struct sip_header_nfo ct_sip_hdrs[] = { - { /* Via header */ +struct sip_header_nfo { + const char *lname; + const char *sname; + const char *ln_str; + size_t lnlen; + size_t snlen; + size_t ln_strlen; + int case_sensitive; + int (*match_len)(const char *, const char *, int *); +}; + +static struct sip_header_nfo ct_sip_hdrs[] = { + [POS_REG_REQ_URI] = { /* SIP REGISTER request URI */ + .lname = "sip:", + .lnlen = sizeof("sip:") - 1, + .ln_str = ":", + .ln_strlen = sizeof(":") - 1, + .match_len = epaddr_len + }, + [POS_REQ_URI] = { /* SIP request URI */ + .lname = "sip:", + .lnlen = sizeof("sip:") - 1, + .ln_str = "@", + .ln_strlen = sizeof("@") - 1, + .match_len = epaddr_len + }, + [POS_FROM] = { /* SIP From header */ + .lname = "From:", + .lnlen = sizeof("From:") - 1, + .sname = "\r\nf:", + .snlen = sizeof("\r\nf:") - 1, + .ln_str = "sip:", + .ln_strlen = sizeof("sip:") - 1, + .match_len = skp_epaddr_len, + }, + [POS_TO] = { /* SIP To header */ + .lname = "To:", + .lnlen = sizeof("To:") - 1, + .sname = "\r\nt:", + .snlen = sizeof("\r\nt:") - 1, + .ln_str = "sip:", + .ln_strlen = sizeof("sip:") - 1, + .match_len = skp_epaddr_len, + }, + [POS_VIA] = { /* SIP Via header */ .lname = "Via:", .lnlen = sizeof("Via:") - 1, .sname = "\r\nv:", @@ -74,7 +110,7 @@ struct sip_header_nfo ct_sip_hdrs[] = { .ln_strlen = sizeof("UDP ") - 1, .match_len = epaddr_len, }, - { /* Contact header */ + [POS_CONTACT] = { /* SIP Contact header */ .lname = "Contact:", .lnlen = sizeof("Contact:") - 1, .sname = "\r\nm:", @@ -83,7 +119,7 @@ struct sip_header_nfo ct_sip_hdrs[] = { .ln_strlen = sizeof("sip:") - 1, .match_len = skp_epaddr_len }, - { /* Content length header */ + [POS_CONTENT] = { /* SIP Content length header */ .lname = "Content-Length:", .lnlen = sizeof("Content-Length:") - 1, .sname = "\r\nl:", @@ -92,7 +128,8 @@ struct sip_header_nfo ct_sip_hdrs[] = { .ln_strlen = sizeof(":") - 1, .match_len = skp_digits_len }, - { /* SDP media info */ + [POS_MEDIA] = { /* SDP media info */ + .case_sensitive = 1, .lname = "\nm=", .lnlen = sizeof("\nm=") - 1, .sname = "\rm=", @@ -101,7 +138,8 @@ struct sip_header_nfo ct_sip_hdrs[] = { .ln_strlen = sizeof("audio ") - 1, .match_len = digits_len }, - { /* SDP owner address*/ + [POS_OWNER] = { /* SDP owner address*/ + .case_sensitive = 1, .lname = "\no=", .lnlen = sizeof("\no=") - 1, .sname = "\ro=", @@ -110,7 +148,8 @@ struct sip_header_nfo ct_sip_hdrs[] = { .ln_strlen = sizeof("IN IP4 ") - 1, .match_len = epaddr_len }, - { /* SDP connection info */ + [POS_CONNECTION] = { /* SDP connection info */ + .case_sensitive = 1, .lname = "\nc=", .lnlen = sizeof("\nc=") - 1, .sname = "\rc=", @@ -119,16 +158,8 @@ struct sip_header_nfo ct_sip_hdrs[] = { .ln_strlen = sizeof("IN IP4 ") - 1, .match_len = epaddr_len }, - { /* Requests headers */ - .lname = "sip:", - .lnlen = sizeof("sip:") - 1, - .sname = "sip:", - .snlen = sizeof("sip:") - 1, /* yes, i know.. ;) */ - .ln_str = "@", - .ln_strlen = sizeof("@") - 1, - .match_len = epaddr_len - }, - { /* SDP version header */ + [POS_SDP_HEADER] = { /* SDP version header */ + .case_sensitive = 1, .lname = "\nv=", .lnlen = sizeof("\nv=") - 1, .sname = "\rv=", @@ -138,7 +169,6 @@ struct sip_header_nfo ct_sip_hdrs[] = { .match_len = digits_len } }; -EXPORT_SYMBOL_GPL(ct_sip_hdrs); /* get line lenght until first CR or LF seen. */ int ct_sip_lnlen(const char *line, const char *limit) @@ -159,13 +189,19 @@ EXPORT_SYMBOL_GPL(ct_sip_lnlen); /* Linear string search, case sensitive. */ const char *ct_sip_search(const char *needle, const char *haystack, - size_t needle_len, size_t haystack_len) + size_t needle_len, size_t haystack_len, + int case_sensitive) { const char *limit = haystack + (haystack_len - needle_len); while (haystack <= limit) { - if (memcmp(haystack, needle, needle_len) == 0) - return haystack; + if (case_sensitive) { + if (strncmp(haystack, needle, needle_len) == 0) + return haystack; + } else { + if (strnicmp(haystack, needle, needle_len) == 0) + return haystack; + } haystack++; } return NULL; @@ -263,8 +299,9 @@ static int skp_epaddr_len(const char *dptr, const char *limit, int *shift) int ct_sip_get_info(const char *dptr, size_t dlen, unsigned int *matchoff, unsigned int *matchlen, - struct sip_header_nfo *hnfo) + enum sip_header_pos pos) { + struct sip_header_nfo *hnfo = &ct_sip_hdrs[pos]; const char *limit, *aux, *k = dptr; int shift = 0; @@ -272,12 +309,14 @@ int ct_sip_get_info(const char *dptr, size_t dlen, while (dptr <= limit) { if ((strncmp(dptr, hnfo->lname, hnfo->lnlen) != 0) && - (strncmp(dptr, hnfo->sname, hnfo->snlen) != 0)) { + (hnfo->sname == NULL || + strncmp(dptr, hnfo->sname, hnfo->snlen) != 0)) { dptr++; continue; } aux = ct_sip_search(hnfo->ln_str, dptr, hnfo->ln_strlen, - ct_sip_lnlen(dptr, limit)); + ct_sip_lnlen(dptr, limit), + hnfo->case_sensitive); if (!aux) { DEBUGP("'%s' not found in '%s'.\n", hnfo->ln_str, hnfo->lname); @@ -298,6 +337,7 @@ int ct_sip_get_info(const char *dptr, size_t dlen, DEBUGP("%s header not found.\n", hnfo->lname); return 0; } +EXPORT_SYMBOL_GPL(ct_sip_get_info); static int set_expected_rtp(struct sk_buff **pskb, struct ip_conntrack *ct, @@ -308,6 +348,7 @@ static int set_expected_rtp(struct sk_buff **pskb, struct ip_conntrack_expect *exp; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); int ret; + typeof(ip_nat_sdp_hook) ip_nat_sdp; exp = ip_conntrack_expect_alloc(ct); if (exp == NULL) @@ -328,8 +369,9 @@ static int set_expected_rtp(struct sk_buff **pskb, exp->expectfn = NULL; exp->flags = 0; - if (ip_nat_sdp_hook) - ret = ip_nat_sdp_hook(pskb, ctinfo, exp, dptr); + ip_nat_sdp = rcu_dereference(ip_nat_sdp_hook); + if (ip_nat_sdp) + ret = ip_nat_sdp(pskb, ctinfo, exp, dptr); else { if (ip_conntrack_expect_related(exp) != 0) ret = NF_DROP; @@ -351,6 +393,7 @@ static int sip_help(struct sk_buff **pskb, int matchoff, matchlen; __be32 ipaddr; u_int16_t port; + typeof(ip_nat_sip_hook) ip_nat_sip; /* No Data ? */ dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); @@ -368,8 +411,9 @@ static int sip_help(struct sk_buff **pskb, goto out; } - if (ip_nat_sip_hook) { - if (!ip_nat_sip_hook(pskb, ctinfo, ct, &dptr)) { + ip_nat_sip = rcu_dereference(ip_nat_sip_hook); + if (ip_nat_sip) { + if (!ip_nat_sip(pskb, ctinfo, ct, &dptr)) { ret = NF_DROP; goto out; } @@ -389,7 +433,7 @@ static int sip_help(struct sk_buff **pskb, } /* Get ip and port address from SDP packet. */ if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen, - &ct_sip_hdrs[POS_CONNECTION]) > 0) { + POS_CONNECTION) > 0) { /* We'll drop only if there are parse problems. */ if (parse_ipaddr(dptr + matchoff, NULL, &ipaddr, @@ -398,7 +442,7 @@ static int sip_help(struct sk_buff **pskb, goto out; } if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen, - &ct_sip_hdrs[POS_MEDIA]) > 0) { + POS_MEDIA) > 0) { port = simple_strtoul(dptr + matchoff, NULL, 10); if (port < 1024) { diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 02135756562..86efb544967 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -28,9 +28,6 @@ #include <net/ip.h> #include <net/route.h> -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include <linux/netfilter_ipv4/ip_conntrack.h> #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> #include <linux/netfilter_ipv4/ip_conntrack_core.h> @@ -139,7 +136,6 @@ static int ct_seq_show(struct seq_file *s, void *v) const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash); struct ip_conntrack_protocol *proto; - ASSERT_READ_LOCK(&ip_conntrack_lock); IP_NF_ASSERT(conntrack); /* we only want to print DIR_ORIGINAL */ @@ -926,7 +922,7 @@ EXPORT_SYMBOL(__ip_ct_refresh_acct); EXPORT_SYMBOL(ip_conntrack_expect_alloc); EXPORT_SYMBOL(ip_conntrack_expect_put); EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); -EXPORT_SYMBOL_GPL(ip_conntrack_expect_find); +EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get); EXPORT_SYMBOL(ip_conntrack_expect_related); EXPORT_SYMBOL(ip_conntrack_unexpect_related); EXPORT_SYMBOL_GPL(ip_conntrack_expect_list); diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c index fe0b634dd37..ef56de2eff0 100644 --- a/net/ipv4/netfilter/ip_conntrack_tftp.c +++ b/net/ipv4/netfilter/ip_conntrack_tftp.c @@ -50,6 +50,7 @@ static int tftp_help(struct sk_buff **pskb, struct tftphdr _tftph, *tfh; struct ip_conntrack_expect *exp; unsigned int ret = NF_ACCEPT; + typeof(ip_nat_tftp_hook) ip_nat_tftp; tfh = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr), @@ -81,8 +82,9 @@ static int tftp_help(struct sk_buff **pskb, DEBUGP("expect: "); DUMP_TUPLE(&exp->tuple); DUMP_TUPLE(&exp->mask); - if (ip_nat_tftp_hook) - ret = ip_nat_tftp_hook(pskb, ctinfo, exp); + ip_nat_tftp = rcu_dereference(ip_nat_tftp_hook); + if (ip_nat_tftp) + ret = ip_nat_tftp(pskb, ctinfo, exp); else if (ip_conntrack_expect_related(exp) != 0) ret = NF_DROP; ip_conntrack_expect_put(exp); diff --git a/net/ipv4/netfilter/ip_nat_amanda.c b/net/ipv4/netfilter/ip_nat_amanda.c index 3a888715bbf..85df1a9aed3 100644 --- a/net/ipv4/netfilter/ip_nat_amanda.c +++ b/net/ipv4/netfilter/ip_nat_amanda.c @@ -70,15 +70,14 @@ static unsigned int help(struct sk_buff **pskb, static void __exit ip_nat_amanda_fini(void) { - ip_nat_amanda_hook = NULL; - /* Make sure noone calls it, meanwhile. */ - synchronize_net(); + rcu_assign_pointer(ip_nat_amanda_hook, NULL); + synchronize_rcu(); } static int __init ip_nat_amanda_init(void) { - BUG_ON(ip_nat_amanda_hook); - ip_nat_amanda_hook = help; + BUG_ON(rcu_dereference(ip_nat_amanda_hook)); + rcu_assign_pointer(ip_nat_amanda_hook, help); return 0; } diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 4b6260a9740..9d1a5175dcd 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -362,12 +362,10 @@ manip_pkt(u_int16_t proto, iph = (void *)(*pskb)->data + iphdroff; if (maniptype == IP_NAT_MANIP_SRC) { - iph->check = nf_csum_update(~iph->saddr, target->src.ip, - iph->check); + nf_csum_replace4(&iph->check, iph->saddr, target->src.ip); iph->saddr = target->src.ip; } else { - iph->check = nf_csum_update(~iph->daddr, target->dst.ip, - iph->check); + nf_csum_replace4(&iph->check, iph->daddr, target->dst.ip); iph->daddr = target->dst.ip; } return 1; diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c index a71c233d811..913960e1380 100644 --- a/net/ipv4/netfilter/ip_nat_ftp.c +++ b/net/ipv4/netfilter/ip_nat_ftp.c @@ -156,15 +156,14 @@ static unsigned int ip_nat_ftp(struct sk_buff **pskb, static void __exit ip_nat_ftp_fini(void) { - ip_nat_ftp_hook = NULL; - /* Make sure noone calls it, meanwhile. */ - synchronize_net(); + rcu_assign_pointer(ip_nat_ftp_hook, NULL); + synchronize_rcu(); } static int __init ip_nat_ftp_init(void) { - BUG_ON(ip_nat_ftp_hook); - ip_nat_ftp_hook = ip_nat_ftp; + BUG_ON(rcu_dereference(ip_nat_ftp_hook)); + rcu_assign_pointer(ip_nat_ftp_hook, ip_nat_ftp); return 0; } diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index 3bf85848055..ee80feb4b2a 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -188,10 +188,8 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, csum_partial((char *)tcph, datalen, 0)); } else - tcph->check = nf_proto_csum_update(*pskb, - htons(oldlen) ^ htons(0xFFFF), - htons(datalen), - tcph->check, 1); + nf_proto_csum_replace2(&tcph->check, *pskb, + htons(oldlen), htons(datalen), 1); if (rep_len != match_len) { set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); @@ -264,12 +262,10 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, csum_partial((char *)udph, datalen, 0)); if (!udph->check) - udph->check = -1; + udph->check = CSUM_MANGLED_0; } else - udph->check = nf_proto_csum_update(*pskb, - htons(oldlen) ^ htons(0xFFFF), - htons(datalen), - udph->check, 1); + nf_proto_csum_replace2(&udph->check, *pskb, + htons(oldlen), htons(datalen), 1); return 1; } EXPORT_SYMBOL(ip_nat_mangle_udp_packet); @@ -307,14 +303,10 @@ sack_adjust(struct sk_buff *skb, ntohl(sack->start_seq), new_start_seq, ntohl(sack->end_seq), new_end_seq); - tcph->check = nf_proto_csum_update(skb, - ~sack->start_seq, - new_start_seq, - tcph->check, 0); - tcph->check = nf_proto_csum_update(skb, - ~sack->end_seq, - new_end_seq, - tcph->check, 0); + nf_proto_csum_replace4(&tcph->check, skb, + sack->start_seq, new_start_seq, 0); + nf_proto_csum_replace4(&tcph->check, skb, + sack->end_seq, new_end_seq, 0); sack->start_seq = new_start_seq; sack->end_seq = new_end_seq; sackoff += sizeof(*sack); @@ -397,10 +389,8 @@ ip_nat_seq_adjust(struct sk_buff **pskb, else newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); - tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq, - tcph->check, 0); - tcph->check = nf_proto_csum_update(*pskb, ~tcph->ack_seq, newack, - tcph->check, 0); + nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0); + nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0); DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), diff --git a/net/ipv4/netfilter/ip_nat_helper_h323.c b/net/ipv4/netfilter/ip_nat_helper_h323.c index 4a7d34466ee..bdc99ef6159 100644 --- a/net/ipv4/netfilter/ip_nat_helper_h323.c +++ b/net/ipv4/netfilter/ip_nat_helper_h323.c @@ -563,25 +563,25 @@ static int nat_callforwarding(struct sk_buff **pskb, struct ip_conntrack *ct, /****************************************************************************/ static int __init init(void) { - BUG_ON(set_h245_addr_hook != NULL); - BUG_ON(set_h225_addr_hook != NULL); - BUG_ON(set_sig_addr_hook != NULL); - BUG_ON(set_ras_addr_hook != NULL); - BUG_ON(nat_rtp_rtcp_hook != NULL); - BUG_ON(nat_t120_hook != NULL); - BUG_ON(nat_h245_hook != NULL); - BUG_ON(nat_callforwarding_hook != NULL); - BUG_ON(nat_q931_hook != NULL); - - set_h245_addr_hook = set_h245_addr; - set_h225_addr_hook = set_h225_addr; - set_sig_addr_hook = set_sig_addr; - set_ras_addr_hook = set_ras_addr; - nat_rtp_rtcp_hook = nat_rtp_rtcp; - nat_t120_hook = nat_t120; - nat_h245_hook = nat_h245; - nat_callforwarding_hook = nat_callforwarding; - nat_q931_hook = nat_q931; + BUG_ON(rcu_dereference(set_h245_addr_hook) != NULL); + BUG_ON(rcu_dereference(set_h225_addr_hook) != NULL); + BUG_ON(rcu_dereference(set_sig_addr_hook) != NULL); + BUG_ON(rcu_dereference(set_ras_addr_hook) != NULL); + BUG_ON(rcu_dereference(nat_rtp_rtcp_hook) != NULL); + BUG_ON(rcu_dereference(nat_t120_hook) != NULL); + BUG_ON(rcu_dereference(nat_h245_hook) != NULL); + BUG_ON(rcu_dereference(nat_callforwarding_hook) != NULL); + BUG_ON(rcu_dereference(nat_q931_hook) != NULL); + + rcu_assign_pointer(set_h245_addr_hook, set_h245_addr); + rcu_assign_pointer(set_h225_addr_hook, set_h225_addr); + rcu_assign_pointer(set_sig_addr_hook, set_sig_addr); + rcu_assign_pointer(set_ras_addr_hook, set_ras_addr); + rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp); + rcu_assign_pointer(nat_t120_hook, nat_t120); + rcu_assign_pointer(nat_h245_hook, nat_h245); + rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding); + rcu_assign_pointer(nat_q931_hook, nat_q931); DEBUGP("ip_nat_h323: init success\n"); return 0; @@ -590,16 +590,16 @@ static int __init init(void) /****************************************************************************/ static void __exit fini(void) { - set_h245_addr_hook = NULL; - set_h225_addr_hook = NULL; - set_sig_addr_hook = NULL; - set_ras_addr_hook = NULL; - nat_rtp_rtcp_hook = NULL; - nat_t120_hook = NULL; - nat_h245_hook = NULL; - nat_callforwarding_hook = NULL; - nat_q931_hook = NULL; - synchronize_net(); + rcu_assign_pointer(set_h245_addr_hook, NULL); + rcu_assign_pointer(set_h225_addr_hook, NULL); + rcu_assign_pointer(set_sig_addr_hook, NULL); + rcu_assign_pointer(set_ras_addr_hook, NULL); + rcu_assign_pointer(nat_rtp_rtcp_hook, NULL); + rcu_assign_pointer(nat_t120_hook, NULL); + rcu_assign_pointer(nat_h245_hook, NULL); + rcu_assign_pointer(nat_callforwarding_hook, NULL); + rcu_assign_pointer(nat_q931_hook, NULL); + synchronize_rcu(); } /****************************************************************************/ diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c index 329fdcd7d70..ec957bbb536 100644 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -101,7 +101,7 @@ static void pptp_nat_expected(struct ip_conntrack *ct, DEBUGP("trying to unexpect other dir: "); DUMP_TUPLE(&t); - other_exp = ip_conntrack_expect_find(&t); + other_exp = ip_conntrack_expect_find_get(&t); if (other_exp) { ip_conntrack_unexpect_related(other_exp); ip_conntrack_expect_put(other_exp); @@ -315,17 +315,17 @@ static int __init ip_nat_helper_pptp_init(void) if (ret < 0) return ret; - BUG_ON(ip_nat_pptp_hook_outbound); - ip_nat_pptp_hook_outbound = &pptp_outbound_pkt; + BUG_ON(rcu_dereference(ip_nat_pptp_hook_outbound)); + rcu_assign_pointer(ip_nat_pptp_hook_outbound, pptp_outbound_pkt); - BUG_ON(ip_nat_pptp_hook_inbound); - ip_nat_pptp_hook_inbound = &pptp_inbound_pkt; + BUG_ON(rcu_dereference(ip_nat_pptp_hook_inbound)); + rcu_assign_pointer(ip_nat_pptp_hook_inbound, pptp_inbound_pkt); - BUG_ON(ip_nat_pptp_hook_exp_gre); - ip_nat_pptp_hook_exp_gre = &pptp_exp_gre; + BUG_ON(rcu_dereference(ip_nat_pptp_hook_exp_gre)); + rcu_assign_pointer(ip_nat_pptp_hook_exp_gre, pptp_exp_gre); - BUG_ON(ip_nat_pptp_hook_expectfn); - ip_nat_pptp_hook_expectfn = &pptp_nat_expected; + BUG_ON(rcu_dereference(ip_nat_pptp_hook_expectfn)); + rcu_assign_pointer(ip_nat_pptp_hook_expectfn, pptp_nat_expected); printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION); return 0; @@ -335,14 +335,13 @@ static void __exit ip_nat_helper_pptp_fini(void) { DEBUGP("cleanup_module\n" ); - ip_nat_pptp_hook_expectfn = NULL; - ip_nat_pptp_hook_exp_gre = NULL; - ip_nat_pptp_hook_inbound = NULL; - ip_nat_pptp_hook_outbound = NULL; + rcu_assign_pointer(ip_nat_pptp_hook_expectfn, NULL); + rcu_assign_pointer(ip_nat_pptp_hook_exp_gre, NULL); + rcu_assign_pointer(ip_nat_pptp_hook_inbound, NULL); + rcu_assign_pointer(ip_nat_pptp_hook_outbound, NULL); + synchronize_rcu(); ip_nat_proto_gre_fini(); - /* Make sure noone calls it, meanwhile */ - synchronize_net(); printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION); } diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c index a767123e082..feb26b48f1d 100644 --- a/net/ipv4/netfilter/ip_nat_irc.c +++ b/net/ipv4/netfilter/ip_nat_irc.c @@ -98,15 +98,14 @@ static unsigned int help(struct sk_buff **pskb, static void __exit ip_nat_irc_fini(void) { - ip_nat_irc_hook = NULL; - /* Make sure noone calls it, meanwhile. */ - synchronize_net(); + rcu_assign_pointer(ip_nat_irc_hook, NULL); + synchronize_rcu(); } static int __init ip_nat_irc_init(void) { - BUG_ON(ip_nat_irc_hook); - ip_nat_irc_hook = help; + BUG_ON(rcu_dereference(ip_nat_irc_hook)); + rcu_assign_pointer(ip_nat_irc_hook, help); return 0; } diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c index bf91f9312b3..95810202d84 100644 --- a/net/ipv4/netfilter/ip_nat_proto_gre.c +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -129,11 +129,9 @@ gre_manip_pkt(struct sk_buff **pskb, } if (greh->csum) { /* FIXME: Never tested this code... */ - *(gre_csum(greh)) = - nf_proto_csum_update(*pskb, - ~*(gre_key(greh)), - tuple->dst.u.gre.key, - *(gre_csum(greh)), 0); + nf_proto_csum_replace4(gre_csum(greh), *pskb, + *(gre_key(greh)), + tuple->dst.u.gre.key, 0); } *(gre_key(greh)) = tuple->dst.u.gre.key; break; diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c index 3f6efc13ac7..fb716edd5bc 100644 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c @@ -24,8 +24,8 @@ icmp_in_range(const struct ip_conntrack_tuple *tuple, const union ip_conntrack_manip_proto *min, const union ip_conntrack_manip_proto *max) { - return (tuple->src.u.icmp.id >= min->icmp.id - && tuple->src.u.icmp.id <= max->icmp.id); + return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) && + ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); } static int @@ -66,10 +66,8 @@ icmp_manip_pkt(struct sk_buff **pskb, return 0; hdr = (struct icmphdr *)((*pskb)->data + hdroff); - hdr->checksum = nf_proto_csum_update(*pskb, - hdr->un.echo.id ^ htons(0xFFFF), - tuple->src.u.icmp.id, - hdr->checksum, 0); + nf_proto_csum_replace2(&hdr->checksum, *pskb, + hdr->un.echo.id, tuple->src.u.icmp.id, 0); hdr->un.echo.id = tuple->src.u.icmp.id; return 1; } diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index 12deb13b93b..b586d18b3fb 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -129,9 +129,8 @@ tcp_manip_pkt(struct sk_buff **pskb, if (hdrsize < sizeof(*hdr)) return 1; - hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1); - hdr->check = nf_proto_csum_update(*pskb, oldport ^ htons(0xFFFF), newport, - hdr->check, 0); + nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1); + nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0); return 1; } diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index 4bbec7730d1..5ced0877b32 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -115,13 +115,10 @@ udp_manip_pkt(struct sk_buff **pskb, } if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) { - hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, - hdr->check, 1); - hdr->check = nf_proto_csum_update(*pskb, - *portptr ^ htons(0xFFFF), newport, - hdr->check, 0); + nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1); + nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport, 0); if (!hdr->check) - hdr->check = -1; + hdr->check = CSUM_MANGLED_0; } *portptr = newport; return 1; diff --git a/net/ipv4/netfilter/ip_nat_sip.c b/net/ipv4/netfilter/ip_nat_sip.c index 71fc2730a00..6223abc924f 100644 --- a/net/ipv4/netfilter/ip_nat_sip.c +++ b/net/ipv4/netfilter/ip_nat_sip.c @@ -29,27 +29,70 @@ MODULE_DESCRIPTION("SIP NAT helper"); #define DEBUGP(format, args...) #endif -extern struct sip_header_nfo ct_sip_hdrs[]; +struct addr_map { + struct { + char src[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; + char dst[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; + unsigned int srclen, srciplen; + unsigned int dstlen, dstiplen; + } addr[IP_CT_DIR_MAX]; +}; + +static void addr_map_init(struct ip_conntrack *ct, struct addr_map *map) +{ + struct ip_conntrack_tuple *t; + enum ip_conntrack_dir dir; + unsigned int n; + + for (dir = 0; dir < IP_CT_DIR_MAX; dir++) { + t = &ct->tuplehash[dir].tuple; + + n = sprintf(map->addr[dir].src, "%u.%u.%u.%u", + NIPQUAD(t->src.ip)); + map->addr[dir].srciplen = n; + n += sprintf(map->addr[dir].src + n, ":%u", + ntohs(t->src.u.udp.port)); + map->addr[dir].srclen = n; + + n = sprintf(map->addr[dir].dst, "%u.%u.%u.%u", + NIPQUAD(t->dst.ip)); + map->addr[dir].dstiplen = n; + n += sprintf(map->addr[dir].dst + n, ":%u", + ntohs(t->dst.u.udp.port)); + map->addr[dir].dstlen = n; + } +} -static unsigned int mangle_sip_packet(struct sk_buff **pskb, - enum ip_conntrack_info ctinfo, - struct ip_conntrack *ct, - const char **dptr, size_t dlen, - char *buffer, int bufflen, - struct sip_header_nfo *hnfo) +static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, + struct ip_conntrack *ct, const char **dptr, size_t dlen, + enum sip_header_pos pos, struct addr_map *map) { - unsigned int matchlen, matchoff; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + unsigned int matchlen, matchoff, addrlen; + char *addr; - if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, hnfo) <= 0) - return 0; + if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, pos) <= 0) + return 1; + + if ((matchlen == map->addr[dir].srciplen || + matchlen == map->addr[dir].srclen) && + memcmp(*dptr + matchoff, map->addr[dir].src, matchlen) == 0) { + addr = map->addr[!dir].dst; + addrlen = map->addr[!dir].dstlen; + } else if ((matchlen == map->addr[dir].dstiplen || + matchlen == map->addr[dir].dstlen) && + memcmp(*dptr + matchoff, map->addr[dir].dst, matchlen) == 0) { + addr = map->addr[!dir].src; + addrlen = map->addr[!dir].srclen; + } else + return 1; if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo, - matchoff, matchlen, buffer, bufflen)) + matchoff, matchlen, addr, addrlen)) return 0; - - /* We need to reload this. Thanks Patrick. */ *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); return 1; + } static unsigned int ip_nat_sip(struct sk_buff **pskb, @@ -57,70 +100,61 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb, struct ip_conntrack *ct, const char **dptr) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; - unsigned int bufflen, dataoff; - __be32 ip; - __be16 port; + enum sip_header_pos pos; + struct addr_map map; + int dataoff, datalen; dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); + datalen = (*pskb)->len - dataoff; + if (datalen < sizeof("SIP/2.0") - 1) + return NF_DROP; + + addr_map_init(ct, &map); + + /* Basic rules: requests and responses. */ + if (strncmp(*dptr, "SIP/2.0", sizeof("SIP/2.0") - 1) != 0) { + /* 10.2: Constructing the REGISTER Request: + * + * The "userinfo" and "@" components of the SIP URI MUST NOT + * be present. + */ + if (datalen >= sizeof("REGISTER") - 1 && + strncmp(*dptr, "REGISTER", sizeof("REGISTER") - 1) == 0) + pos = POS_REG_REQ_URI; + else + pos = POS_REQ_URI; + + if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map)) + return NF_DROP; + } + + if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) || + !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) || + !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) || + !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map)) + return NF_DROP; + return NF_ACCEPT; +} - ip = ct->tuplehash[!dir].tuple.dst.ip; - port = ct->tuplehash[!dir].tuple.dst.u.udp.port; - bufflen = sprintf(buffer, "%u.%u.%u.%u:%u", NIPQUAD(ip), ntohs(port)); +static unsigned int mangle_sip_packet(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct ip_conntrack *ct, + const char **dptr, size_t dlen, + char *buffer, int bufflen, + enum sip_header_pos pos) +{ + unsigned int matchlen, matchoff; - /* short packet ? */ - if (((*pskb)->len - dataoff) < (sizeof("SIP/2.0") - 1)) + if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, pos) <= 0) return 0; - /* Basic rules: requests and responses. */ - if (memcmp(*dptr, "SIP/2.0", sizeof("SIP/2.0") - 1) == 0) { - const char *aux; - - if ((ctinfo) < IP_CT_IS_REPLY) { - mangle_sip_packet(pskb, ctinfo, ct, dptr, - (*pskb)->len - dataoff, - buffer, bufflen, - &ct_sip_hdrs[POS_CONTACT]); - return 1; - } + if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo, + matchoff, matchlen, buffer, bufflen)) + return 0; - if (!mangle_sip_packet(pskb, ctinfo, ct, dptr, - (*pskb)->len - dataoff, - buffer, bufflen, &ct_sip_hdrs[POS_VIA])) - return 0; - - /* This search should ignore case, but later.. */ - aux = ct_sip_search("CSeq:", *dptr, sizeof("CSeq:") - 1, - (*pskb)->len - dataoff); - if (!aux) - return 0; - - if (!ct_sip_search("REGISTER", aux, sizeof("REGISTER"), - ct_sip_lnlen(aux, *dptr + (*pskb)->len - dataoff))) - return 1; - - return mangle_sip_packet(pskb, ctinfo, ct, dptr, - (*pskb)->len - dataoff, - buffer, bufflen, - &ct_sip_hdrs[POS_CONTACT]); - } - if ((ctinfo) < IP_CT_IS_REPLY) { - if (!mangle_sip_packet(pskb, ctinfo, ct, dptr, - (*pskb)->len - dataoff, - buffer, bufflen, &ct_sip_hdrs[POS_VIA])) - return 0; - - /* Mangle Contact if exists only. - watch udp_nat_mangle()! */ - mangle_sip_packet(pskb, ctinfo, ct, dptr, (*pskb)->len - dataoff, - buffer, bufflen, &ct_sip_hdrs[POS_CONTACT]); - return 1; - } - /* This mangle requests headers. */ - return mangle_sip_packet(pskb, ctinfo, ct, dptr, - ct_sip_lnlen(*dptr, - *dptr + (*pskb)->len - dataoff), - buffer, bufflen, &ct_sip_hdrs[POS_REQ_HEADER]); + /* We need to reload this. Thanks Patrick. */ + *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); + return 1; } static int mangle_content_len(struct sk_buff **pskb, @@ -136,7 +170,7 @@ static int mangle_content_len(struct sk_buff **pskb, /* Get actual SDP lenght */ if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff, - &matchlen, &ct_sip_hdrs[POS_SDP_HEADER]) > 0) { + &matchlen, POS_SDP_HEADER) > 0) { /* since ct_sip_get_info() give us a pointer passing 'v=' we need to add 2 bytes in this count. */ @@ -144,7 +178,7 @@ static int mangle_content_len(struct sk_buff **pskb, /* Now, update SDP lenght */ if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff, - &matchlen, &ct_sip_hdrs[POS_CONTENT]) > 0) { + &matchlen, POS_CONTENT) > 0) { bufflen = sprintf(buffer, "%u", c_len); @@ -170,17 +204,17 @@ static unsigned int mangle_sdp(struct sk_buff **pskb, /* Mangle owner and contact info. */ bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, - buffer, bufflen, &ct_sip_hdrs[POS_OWNER])) + buffer, bufflen, POS_OWNER)) return 0; if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, - buffer, bufflen, &ct_sip_hdrs[POS_CONNECTION])) + buffer, bufflen, POS_CONNECTION)) return 0; /* Mangle media port. */ bufflen = sprintf(buffer, "%u", port); if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, - buffer, bufflen, &ct_sip_hdrs[POS_MEDIA])) + buffer, bufflen, POS_MEDIA)) return 0; return mangle_content_len(pskb, ctinfo, ct, dptr); @@ -230,18 +264,17 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb, static void __exit fini(void) { - ip_nat_sip_hook = NULL; - ip_nat_sdp_hook = NULL; - /* Make sure noone calls it, meanwhile. */ - synchronize_net(); + rcu_assign_pointer(ip_nat_sip_hook, NULL); + rcu_assign_pointer(ip_nat_sdp_hook, NULL); + synchronize_rcu(); } static int __init init(void) { - BUG_ON(ip_nat_sip_hook); - BUG_ON(ip_nat_sdp_hook); - ip_nat_sip_hook = ip_nat_sip; - ip_nat_sdp_hook = ip_nat_sdp; + BUG_ON(rcu_dereference(ip_nat_sip_hook)); + BUG_ON(rcu_dereference(ip_nat_sdp_hook)); + rcu_assign_pointer(ip_nat_sip_hook, ip_nat_sip); + rcu_assign_pointer(ip_nat_sdp_hook, ip_nat_sdp); return 0; } diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c index 168f45fa189..c3d9f3b090c 100644 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c @@ -64,7 +64,7 @@ MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway"); #define SNMP_PORT 161 #define SNMP_TRAP_PORT 162 -#define NOCT1(n) (u_int8_t )((n) & 0xff) +#define NOCT1(n) (*(u8 *)n) static int debug; static DEFINE_SPINLOCK(snmp_lock); @@ -613,7 +613,7 @@ struct snmp_v1_trap static inline void mangle_address(unsigned char *begin, unsigned char *addr, const struct oct1_map *map, - u_int16_t *check); + __sum16 *check); struct snmp_cnv { unsigned int class; @@ -873,38 +873,24 @@ static unsigned char snmp_request_decode(struct asn1_ctx *ctx, * Fast checksum update for possibly oddly-aligned UDP byte, from the * code example in the draft. */ -static void fast_csum(unsigned char *csum, +static void fast_csum(__sum16 *csum, const unsigned char *optr, const unsigned char *nptr, - int odd) + int offset) { - long x, old, new; - - x = csum[0] * 256 + csum[1]; - - x =~ x & 0xFFFF; - - if (odd) old = optr[0] * 256; - else old = optr[0]; - - x -= old & 0xFFFF; - if (x <= 0) { - x--; - x &= 0xFFFF; - } - - if (odd) new = nptr[0] * 256; - else new = nptr[0]; - - x += new & 0xFFFF; - if (x & 0x10000) { - x++; - x &= 0xFFFF; + unsigned char s[4]; + + if (offset & 1) { + s[0] = s[2] = 0; + s[1] = ~*optr; + s[3] = *nptr; + } else { + s[1] = s[3] = 0; + s[0] = ~*optr; + s[2] = *nptr; } - - x =~ x & 0xFFFF; - csum[0] = x / 256; - csum[1] = x & 0xFF; + + *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum))); } /* @@ -915,9 +901,9 @@ static void fast_csum(unsigned char *csum, static inline void mangle_address(unsigned char *begin, unsigned char *addr, const struct oct1_map *map, - u_int16_t *check) + __sum16 *check) { - if (map->from == NOCT1(*addr)) { + if (map->from == NOCT1(addr)) { u_int32_t old; if (debug) @@ -927,11 +913,8 @@ static inline void mangle_address(unsigned char *begin, /* Update UDP checksum if being used */ if (*check) { - unsigned char odd = !((addr - begin) % 2); - - fast_csum((unsigned char *)check, - &map->from, &map->to, odd); - + fast_csum(check, + &map->from, &map->to, addr - begin); } if (debug) @@ -943,7 +926,7 @@ static inline void mangle_address(unsigned char *begin, static unsigned char snmp_trap_decode(struct asn1_ctx *ctx, struct snmp_v1_trap *trap, const struct oct1_map *map, - u_int16_t *check) + __sum16 *check) { unsigned int cls, con, tag, len; unsigned char *end; @@ -1037,7 +1020,7 @@ static void hex_dump(unsigned char *buf, size_t len) static int snmp_parse_mangle(unsigned char *msg, u_int16_t len, const struct oct1_map *map, - u_int16_t *check) + __sum16 *check) { unsigned char *eoc, *end; unsigned int cls, con, tag, vers, pdutype; @@ -1223,12 +1206,12 @@ static int snmp_translate(struct ip_conntrack *ct, */ if (dir == IP_CT_DIR_ORIGINAL) { /* SNAT traps */ - map.from = NOCT1(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip); - map.to = NOCT1(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip); + map.from = NOCT1(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip); + map.to = NOCT1(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip); } else { /* DNAT replies */ - map.from = NOCT1(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); - map.to = NOCT1(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip); + map.from = NOCT1(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); + map.to = NOCT1(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip); } if (map.from == map.to) @@ -1294,11 +1277,11 @@ static struct ip_conntrack_helper snmp_helper = { .help = help, .name = "snmp", - .tuple = { .src = { .u = { __constant_htons(SNMP_PORT) } }, - .dst = { .protonum = IPPROTO_UDP }, + .tuple = {.src = {.u = {.udp = {.port = __constant_htons(SNMP_PORT)}}}, + .dst = {.protonum = IPPROTO_UDP}, }, - .mask = { .src = { .u = { 0xFFFF } }, - .dst = { .protonum = 0xFF }, + .mask = {.src = {.u = {0xFFFF}}, + .dst = {.protonum = 0xFF}, }, }; @@ -1309,11 +1292,11 @@ static struct ip_conntrack_helper snmp_trap_helper = { .help = help, .name = "snmp_trap", - .tuple = { .src = { .u = { __constant_htons(SNMP_TRAP_PORT) } }, - .dst = { .protonum = IPPROTO_UDP }, + .tuple = {.src = {.u = {.udp = {.port = __constant_htons(SNMP_TRAP_PORT)}}}, + .dst = {.protonum = IPPROTO_UDP}, }, - .mask = { .src = { .u = { 0xFFFF } }, - .dst = { .protonum = 0xFF }, + .mask = {.src = {.u = {0xFFFF}}, + .dst = {.protonum = 0xFF}, }, }; diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index d85d2de5044..ad66328baa5 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -44,12 +44,6 @@ #define DEBUGP(format, args...) #endif -#define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING" \ - : ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \ - : ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT" \ - : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \ - : "*ERROR*"))) - #ifdef CONFIG_XFRM static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) { diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c index 94a78015451..604793536fc 100644 --- a/net/ipv4/netfilter/ip_nat_tftp.c +++ b/net/ipv4/netfilter/ip_nat_tftp.c @@ -55,15 +55,14 @@ static unsigned int help(struct sk_buff **pskb, static void __exit ip_nat_tftp_fini(void) { - ip_nat_tftp_hook = NULL; - /* Make sure noone calls it, meanwhile. */ - synchronize_net(); + rcu_assign_pointer(ip_nat_tftp_hook, NULL); + synchronize_rcu(); } static int __init ip_nat_tftp_init(void) { - BUG_ON(ip_nat_tftp_hook); - ip_nat_tftp_hook = help; + BUG_ON(rcu_dereference(ip_nat_tftp_hook)); + rcu_assign_pointer(ip_nat_tftp_hook, help); return 0; } diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 7edad790478..cd520df4dcf 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -243,7 +243,7 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->data_len = data_len; pmsg->timestamp_sec = entry->skb->tstamp.off_sec; pmsg->timestamp_usec = entry->skb->tstamp.off_usec; - pmsg->mark = entry->skb->nfmark; + pmsg->mark = entry->skb->mark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; @@ -351,9 +351,10 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) if (v->data_len < sizeof(*user_iph)) return 0; diff = v->data_len - e->skb->len; - if (diff < 0) - skb_trim(e->skb, v->data_len); - else if (diff > 0) { + if (diff < 0) { + if (pskb_trim(e->skb, v->data_len)) + return -ENOMEM; + } else if (diff > 0) { if (v->data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 4b90927619b..09696f16aa9 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -384,6 +384,7 @@ mark_source_chains(struct xt_table_info *newinfo, for (;;) { struct ipt_standard_target *t = (void *)ipt_get_target(e); + int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_IP_NUMHOOKS)) { printk("iptables: loop hook %u pos %u %08X.\n", @@ -394,13 +395,20 @@ mark_source_chains(struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_IP_NUMHOOKS)); /* Unconditional return/END. */ - if (e->target_offset == sizeof(struct ipt_entry) + if ((e->target_offset == sizeof(struct ipt_entry) && (strcmp(t->target.u.user.name, IPT_STANDARD_TARGET) == 0) && t->verdict < 0 - && unconditional(&e->ip)) { + && unconditional(&e->ip)) || visited) { unsigned int oldpos, size; + if (t->verdict < -NF_MAX_VERDICT - 1) { + duprintf("mark_source_chains: bad " + "negative verdict (%i)\n", + t->verdict); + return 0; + } + /* Return: backtrack through the last big jump. */ do { @@ -438,6 +446,13 @@ mark_source_chains(struct xt_table_info *newinfo, if (strcmp(t->target.u.user.name, IPT_STANDARD_TARGET) == 0 && newpos >= 0) { + if (newpos > newinfo->size - + sizeof(struct ipt_entry)) { + duprintf("mark_source_chains: " + "bad verdict (%i)\n", + newpos); + return 0; + } /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); @@ -470,28 +485,47 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i) } static inline int -standard_check(const struct ipt_entry_target *t, - unsigned int max_offset) +check_entry(struct ipt_entry *e, const char *name) { - struct ipt_standard_target *targ = (void *)t; + struct ipt_entry_target *t; - /* Check standard info. */ - if (targ->verdict >= 0 - && targ->verdict > max_offset - sizeof(struct ipt_entry)) { - duprintf("ipt_standard_check: bad verdict (%i)\n", - targ->verdict); - return 0; + if (!ip_checkentry(&e->ip)) { + duprintf("ip_tables: ip check failed %p %s.\n", e, name); + return -EINVAL; } - if (targ->verdict < -NF_MAX_VERDICT - 1) { - duprintf("ipt_standard_check: bad negative verdict (%i)\n", - targ->verdict); - return 0; + + if (e->target_offset + sizeof(struct ipt_entry_target) > e->next_offset) + return -EINVAL; + + t = ipt_get_target(e); + if (e->target_offset + t->u.target_size > e->next_offset) + return -EINVAL; + + return 0; +} + +static inline int check_match(struct ipt_entry_match *m, const char *name, + const struct ipt_ip *ip, unsigned int hookmask) +{ + struct ipt_match *match; + int ret; + + match = m->u.kernel.match; + ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m), + name, hookmask, ip->proto, + ip->invflags & IPT_INV_PROTO); + if (!ret && m->u.kernel.match->checkentry + && !m->u.kernel.match->checkentry(name, ip, match, m->data, + hookmask)) { + duprintf("ip_tables: check failed for `%s'.\n", + m->u.kernel.match->name); + ret = -EINVAL; } - return 1; + return ret; } static inline int -check_match(struct ipt_entry_match *m, +find_check_match(struct ipt_entry_match *m, const char *name, const struct ipt_ip *ip, unsigned int hookmask, @@ -504,26 +538,15 @@ check_match(struct ipt_entry_match *m, m->u.user.revision), "ipt_%s", m->u.user.name); if (IS_ERR(match) || !match) { - duprintf("check_match: `%s' not found\n", m->u.user.name); + duprintf("find_check_match: `%s' not found\n", m->u.user.name); return match ? PTR_ERR(match) : -ENOENT; } m->u.kernel.match = match; - ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m), - name, hookmask, ip->proto, - ip->invflags & IPT_INV_PROTO); + ret = check_match(m, name, ip, hookmask); if (ret) goto err; - if (m->u.kernel.match->checkentry - && !m->u.kernel.match->checkentry(name, ip, match, m->data, - hookmask)) { - duprintf("ip_tables: check failed for `%s'.\n", - m->u.kernel.match->name); - ret = -EINVAL; - goto err; - } - (*i)++; return 0; err: @@ -531,10 +554,29 @@ err: return ret; } -static struct ipt_target ipt_standard_target; +static inline int check_target(struct ipt_entry *e, const char *name) +{ + struct ipt_entry_target *t; + struct ipt_target *target; + int ret; + + t = ipt_get_target(e); + target = t->u.kernel.target; + ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), + name, e->comefrom, e->ip.proto, + e->ip.invflags & IPT_INV_PROTO); + if (!ret && t->u.kernel.target->checkentry + && !t->u.kernel.target->checkentry(name, e, target, + t->data, e->comefrom)) { + duprintf("ip_tables: check failed for `%s'.\n", + t->u.kernel.target->name); + ret = -EINVAL; + } + return ret; +} static inline int -check_entry(struct ipt_entry *e, const char *name, unsigned int size, +find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, unsigned int *i) { struct ipt_entry_target *t; @@ -542,13 +584,13 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size, int ret; unsigned int j; - if (!ip_checkentry(&e->ip)) { - duprintf("ip_tables: ip check failed %p %s.\n", e, name); - return -EINVAL; - } + ret = check_entry(e, name); + if (ret) + return ret; j = 0; - ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j); + ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip, + e->comefrom, &j); if (ret != 0) goto cleanup_matches; @@ -558,32 +600,16 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size, t->u.user.revision), "ipt_%s", t->u.user.name); if (IS_ERR(target) || !target) { - duprintf("check_entry: `%s' not found\n", t->u.user.name); + duprintf("find_check_entry: `%s' not found\n", t->u.user.name); ret = target ? PTR_ERR(target) : -ENOENT; goto cleanup_matches; } t->u.kernel.target = target; - ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), - name, e->comefrom, e->ip.proto, - e->ip.invflags & IPT_INV_PROTO); + ret = check_target(e, name); if (ret) goto err; - if (t->u.kernel.target == &ipt_standard_target) { - if (!standard_check(t, size)) { - ret = -EINVAL; - goto err; - } - } else if (t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, t->data, - e->comefrom)) { - duprintf("ip_tables: check failed for `%s'.\n", - t->u.kernel.target->name); - ret = -EINVAL; - goto err; - } - (*i)++; return 0; err: @@ -718,11 +744,11 @@ translate_table(const char *name, /* Finally, each sanity check must pass */ i = 0; ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, - check_entry, name, size, &i); + find_check_entry, name, size, &i); if (ret != 0) { IPT_ENTRY_ITERATE(entry0, newinfo->size, - cleanup_entry, &i); + cleanup_entry, &i); return ret; } @@ -1458,10 +1484,9 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, return -EINVAL; } - if (!ip_checkentry(&e->ip)) { - duprintf("ip_tables: ip check failed %p %s.\n", e, name); - return -EINVAL; - } + ret = check_entry(e, name); + if (ret) + return ret; off = 0; entry_offset = (void *)e - (void *)base; @@ -1477,7 +1502,8 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, t->u.user.revision), "ipt_%s", t->u.user.name); if (IS_ERR(target) || !target) { - duprintf("check_entry: `%s' not found\n", t->u.user.name); + duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", + t->u.user.name); ret = target ? PTR_ERR(target) : -ENOENT; goto cleanup_matches; } @@ -1513,36 +1539,10 @@ cleanup_matches: static inline int compat_copy_match_from_user(struct ipt_entry_match *m, void **dstptr, compat_uint_t *size, const char *name, - const struct ipt_ip *ip, unsigned int hookmask, int *i) + const struct ipt_ip *ip, unsigned int hookmask) { - struct ipt_entry_match *dm; - struct ipt_match *match; - int ret; - - dm = (struct ipt_entry_match *)*dstptr; - match = m->u.kernel.match; xt_compat_match_from_user(m, dstptr, size); - - ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm), - name, hookmask, ip->proto, - ip->invflags & IPT_INV_PROTO); - if (ret) - goto err; - - if (m->u.kernel.match->checkentry - && !m->u.kernel.match->checkentry(name, ip, match, dm->data, - hookmask)) { - duprintf("ip_tables: check failed for `%s'.\n", - m->u.kernel.match->name); - ret = -EINVAL; - goto err; - } - (*i)++; return 0; - -err: - module_put(m->u.kernel.match->me); - return ret; } static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, @@ -1553,19 +1553,18 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, struct ipt_target *target; struct ipt_entry *de; unsigned int origsize; - int ret, h, j; + int ret, h; ret = 0; origsize = *size; de = (struct ipt_entry *)*dstptr; memcpy(de, e, sizeof(struct ipt_entry)); - j = 0; *dstptr += sizeof(struct compat_ipt_entry); ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size, - name, &de->ip, de->comefrom, &j); + name, &de->ip, de->comefrom); if (ret) - goto cleanup_matches; + return ret; de->target_offset = e->target_offset - (origsize - *size); t = ipt_get_target(e); target = t->u.kernel.target; @@ -1578,34 +1577,18 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } + return ret; +} - t = ipt_get_target(de); - target = t->u.kernel.target; - ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), - name, e->comefrom, e->ip.proto, - e->ip.invflags & IPT_INV_PROTO); - if (ret) - goto err; +static inline int compat_check_entry(struct ipt_entry *e, const char *name) +{ + int ret; - ret = -EINVAL; - if (t->u.kernel.target == &ipt_standard_target) { - if (!standard_check(t, *size)) - goto err; - } else if (t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, de, target, - t->data, de->comefrom)) { - duprintf("ip_tables: compat: check failed for `%s'.\n", - t->u.kernel.target->name); - goto err; - } - ret = 0; - return ret; + ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom); + if (ret) + return ret; -err: - module_put(t->u.kernel.target->me); -cleanup_matches: - IPT_MATCH_ITERATE(e, cleanup_match, &j); - return ret; + return check_target(e, name); } static int @@ -1618,7 +1601,7 @@ translate_compat_table(const char *name, unsigned int *hook_entries, unsigned int *underflows) { - unsigned int i; + unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; unsigned int size; @@ -1636,21 +1619,21 @@ translate_compat_table(const char *name, } duprintf("translate_compat_table: size %u\n", info->size); - i = 0; + j = 0; xt_compat_lock(AF_INET); /* Walk through entries, checking offsets. */ ret = IPT_ENTRY_ITERATE(entry0, total_size, check_compat_entry_size_and_hooks, info, &size, entry0, entry0 + total_size, - hook_entries, underflows, &i, name); + hook_entries, underflows, &j, name); if (ret != 0) goto out_unlock; ret = -EINVAL; - if (i != number) { + if (j != number) { duprintf("translate_compat_table: %u not %u entries\n", - i, number); + j, number); goto out_unlock; } @@ -1696,6 +1679,11 @@ translate_compat_table(const char *name, if (!mark_source_chains(newinfo, valid_hooks, entry1)) goto free_newinfo; + ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, + name); + if (ret) + goto free_newinfo; + /* And one copy for every other CPU */ for_each_possible_cpu(i) if (newinfo->entries[i] && newinfo->entries[i] != entry1) @@ -1709,8 +1697,10 @@ translate_compat_table(const char *name, free_newinfo: xt_free_table_info(newinfo); out: + IPT_ENTRY_ITERATE(entry0, total_size, cleanup_entry, &j); return ret; out_unlock: + compat_flush_offsets(); xt_compat_unlock(AF_INET); goto out; } diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 7a29d6e7baa..b1c11160b9d 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -40,8 +40,6 @@ #define DEBUGP #endif -#define ASSERT_READ_LOCK(x) - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("iptables target for CLUSTERIP"); @@ -123,7 +121,6 @@ __clusterip_config_find(__be32 clusterip) { struct list_head *pos; - ASSERT_READ_LOCK(&clusterip_lock); list_for_each(pos, &clusterip_configs) { struct clusterip_config *c = list_entry(pos, struct clusterip_config, list); @@ -170,7 +167,6 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip, struct net_device *dev) { struct clusterip_config *c; - char buffer[16]; c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) @@ -187,12 +183,17 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip, atomic_set(&c->entries, 1); #ifdef CONFIG_PROC_FS - /* create proc dir entry */ - sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); - c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR, clusterip_procdir); - if (!c->pde) { - kfree(c); - return NULL; + { + char buffer[16]; + + /* create proc dir entry */ + sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); + c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR, + clusterip_procdir); + if (!c->pde) { + kfree(c); + return NULL; + } } c->pde->proc_fops = &clusterip_proc_fops; c->pde->data = c; @@ -205,6 +206,7 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip, return c; } +#ifdef CONFIG_PROC_FS static int clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) { @@ -232,6 +234,7 @@ clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) return 1; } +#endif static inline u_int32_t clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) @@ -444,6 +447,12 @@ checkentry(const char *tablename, cipinfo->config = config; } + if (nf_ct_l3proto_try_module_get(target->family) < 0) { + printk(KERN_WARNING "can't load conntrack support for " + "proto=%d\n", target->family); + return 0; + } + return 1; } @@ -457,6 +466,8 @@ static void destroy(const struct xt_target *target, void *targinfo) clusterip_config_entry_put(cipinfo->config); clusterip_config_put(cipinfo->config); + + nf_ct_l3proto_module_put(target->family); } static struct ipt_target clusterip_tgt = { @@ -680,7 +691,7 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input, { #define PROC_WRITELEN 10 char buffer[PROC_WRITELEN+1]; - struct proc_dir_entry *pde = PDE(file->f_dentry->d_inode); + struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); struct clusterip_config *c = pde->data; unsigned long nodenum; @@ -737,8 +748,10 @@ static int __init ipt_clusterip_init(void) CLUSTERIP_VERSION); return 0; +#ifdef CONFIG_PROC_FS cleanup_hook: nf_unregister_hook(&cip_arp_ops); +#endif /* CONFIG_PROC_FS */ cleanup_target: ipt_unregister_target(&clusterip_tgt); return ret; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 1aa4517fbcd..b55d670a24d 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -28,17 +28,16 @@ static inline int set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { struct iphdr *iph = (*pskb)->nh.iph; - u_int16_t oldtos; if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { + __u8 oldtos; if (!skb_make_writable(pskb, sizeof(struct iphdr))) return 0; iph = (*pskb)->nh.iph; oldtos = iph->tos; iph->tos &= ~IPT_ECN_IP_MASK; iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); - iph->check = nf_csum_update(htons(oldtos) ^ htons(0xFFFF), - htons(iph->tos), iph->check); + nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); } return 1; } @@ -72,10 +71,8 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) if (einfo->operation & IPT_ECN_OP_SET_CWR) tcph->cwr = einfo->proto.tcp.cwr; - tcph->check = nf_proto_csum_update((*pskb), - oldval ^ htons(0xFFFF), - ((__be16 *)tcph)[6], - tcph->check, 0); + nf_proto_csum_replace2(&tcph->check, *pskb, + oldval, ((__be16 *)tcph)[6], 0); return 1; } diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 7dc820df8bc..c96de16fefa 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -171,11 +171,15 @@ static void dump_packet(const struct nf_loginfo *info, } break; } - case IPPROTO_UDP: { + case IPPROTO_UDP: + case IPPROTO_UDPLITE: { struct udphdr _udph, *uh; - /* Max length: 10 "PROTO=UDP " */ - printk("PROTO=UDP "); + if (ih->protocol == IPPROTO_UDP) + /* Max length: 10 "PROTO=UDP " */ + printk("PROTO=UDP " ); + else /* Max length: 14 "PROTO=UDPLITE " */ + printk("PROTO=UDPLITE "); if (ntohs(ih->frag_off) & IP_OFFSET) break; @@ -341,6 +345,7 @@ static void dump_packet(const struct nf_loginfo *info, /* IP: 40+46+6+11+127 = 230 */ /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ /* UDP: 10+max(25,20) = 35 */ + /* UDPLITE: 14+max(25,20) = 39 */ /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ /* ESP: 10+max(25)+15 = 50 */ /* AH: 9+max(25)+15 = 49 */ @@ -425,13 +430,8 @@ ipt_log_target(struct sk_buff **pskb, li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - if (loginfo->logflags & IPT_LOG_NFLOG) - nf_log_packet(PF_INET, hooknum, *pskb, in, out, &li, - "%s", loginfo->prefix); - else - ipt_log_packet(PF_INET, hooknum, *pskb, in, out, &li, - loginfo->prefix); - + ipt_log_packet(PF_INET, hooknum, *pskb, in, out, &li, + loginfo->prefix); return IPT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 3dbfcfac8a8..28b9233956b 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -2,7 +2,7 @@ (depending on route). */ /* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -20,7 +20,11 @@ #include <net/checksum.h> #include <net/route.h> #include <linux/netfilter_ipv4.h> +#ifdef CONFIG_NF_NAT_NEEDED +#include <net/netfilter/nf_nat_rule.h> +#else #include <linux/netfilter_ipv4/ip_nat_rule.h> +#endif #include <linux/netfilter_ipv4/ip_tables.h> MODULE_LICENSE("GPL"); @@ -65,23 +69,33 @@ masquerade_target(struct sk_buff **pskb, const struct xt_target *target, const void *targinfo) { +#ifdef CONFIG_NF_NAT_NEEDED + struct nf_conn_nat *nat; +#endif struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; - const struct ip_nat_multi_range_compat *mr; struct ip_nat_range newrange; + const struct ip_nat_multi_range_compat *mr; struct rtable *rt; __be32 newsrc; IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING); ct = ip_conntrack_get(*pskb, &ctinfo); +#ifdef CONFIG_NF_NAT_NEEDED + nat = nfct_nat(ct); +#endif IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); /* Source address is 0.0.0.0 - locally generated packet that is * probably not supposed to be masqueraded. */ +#ifdef CONFIG_NF_NAT_NEEDED + if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) +#else if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip == 0) +#endif return NF_ACCEPT; mr = targinfo; @@ -93,7 +107,11 @@ masquerade_target(struct sk_buff **pskb, } write_lock_bh(&masq_lock); +#ifdef CONFIG_NF_NAT_NEEDED + nat->masq_index = out->ifindex; +#else ct->nat.masq_index = out->ifindex; +#endif write_unlock_bh(&masq_lock); /* Transfer from original range. */ @@ -109,10 +127,17 @@ masquerade_target(struct sk_buff **pskb, static inline int device_cmp(struct ip_conntrack *i, void *ifindex) { +#ifdef CONFIG_NF_NAT_NEEDED + struct nf_conn_nat *nat = nfct_nat(i); +#endif int ret; read_lock_bh(&masq_lock); +#ifdef CONFIG_NF_NAT_NEEDED + ret = (nat->masq_index == (int)(long)ifindex); +#else ret = (i->nat.masq_index == (int)(long)ifindex); +#endif read_unlock_bh(&masq_lock); return ret; diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 58a88f22710..9390e90f2b2 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -15,7 +15,11 @@ #include <linux/netdevice.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> +#ifdef CONFIG_NF_NAT_NEEDED +#include <net/netfilter/nf_nat_rule.h> +#else #include <linux/netfilter_ipv4/ip_nat_rule.h> +#endif #define MODULENAME "NETMAP" MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index c0dcfe9d610..462eceb3a1b 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -1,6 +1,6 @@ /* Redirect. Simple mapping which alters dst to a local IP address. */ /* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -18,7 +18,11 @@ #include <net/protocol.h> #include <net/checksum.h> #include <linux/netfilter_ipv4.h> +#ifdef CONFIG_NF_NAT_NEEDED +#include <net/netfilter/nf_nat_rule.h> +#else #include <linux/netfilter_ipv4/ip_nat_rule.h> +#endif MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index ad0312d0e4f..f0319e5ee43 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -76,7 +76,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) /* This packet will not be the same as the other: clear nf fields */ nf_reset(nskb); - nskb->nfmark = 0; + nskb->mark = 0; skb_init_secmark(nskb); tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl); @@ -114,6 +114,14 @@ static void send_reset(struct sk_buff *oldskb, int hook) tcph->window = 0; tcph->urg_ptr = 0; + /* Adjust TCP checksum */ + tcph->check = 0; + tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr), + nskb->nh.iph->saddr, + nskb->nh.iph->daddr, + csum_partial((char *)tcph, + sizeof(struct tcphdr), 0)); + /* Set DF, id = 0 */ nskb->nh.iph->frag_off = htons(IP_DF); nskb->nh.iph->id = 0; @@ -129,14 +137,8 @@ static void send_reset(struct sk_buff *oldskb, int hook) if (ip_route_me_harder(&nskb, addr_type)) goto free_nskb; - /* Adjust TCP checksum */ nskb->ip_summed = CHECKSUM_NONE; - tcph->check = 0; - tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr), - nskb->nh.iph->saddr, - nskb->nh.iph->daddr, - csum_partial((char *)tcph, - sizeof(struct tcphdr), 0)); + /* Adjust IP TTL */ nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT); diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index b38b13328d7..3dcf2941133 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c @@ -34,7 +34,11 @@ #include <net/protocol.h> #include <net/checksum.h> #include <linux/netfilter_ipv4.h> +#ifdef CONFIG_NF_NAT_NEEDED +#include <net/netfilter/nf_nat_rule.h> +#else #include <linux/netfilter_ipv4/ip_nat_rule.h> +#endif #include <linux/netfilter_ipv4/ipt_SAME.h> MODULE_LICENSE("GPL"); @@ -152,11 +156,17 @@ same_target(struct sk_buff **pskb, Here we calculate the index in same->iparray which holds the ipaddress we should use */ +#ifdef CONFIG_NF_NAT_NEEDED + tmpip = ntohl(t->src.u3.ip); + + if (!(same->info & IPT_SAME_NODST)) + tmpip += ntohl(t->dst.u3.ip); +#else tmpip = ntohl(t->src.ip); if (!(same->info & IPT_SAME_NODST)) tmpip += ntohl(t->dst.ip); - +#endif aindex = tmpip % same->ipnum; new_ip = htonl(same->iparray[aindex]); diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index 108b6b76311..93eb5c3c188 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -97,10 +97,8 @@ ipt_tcpmss_target(struct sk_buff **pskb, opt[i+2] = (newmss & 0xff00) >> 8; opt[i+3] = (newmss & 0x00ff); - tcph->check = nf_proto_csum_update(*pskb, - htons(oldmss)^htons(0xFFFF), - htons(newmss), - tcph->check, 0); + nf_proto_csum_replace2(&tcph->check, *pskb, + htons(oldmss), htons(newmss), 0); return IPT_CONTINUE; } } @@ -126,28 +124,22 @@ ipt_tcpmss_target(struct sk_buff **pskb, opt = (u_int8_t *)tcph + sizeof(struct tcphdr); memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); - tcph->check = nf_proto_csum_update(*pskb, - htons(tcplen) ^ htons(0xFFFF), - htons(tcplen + TCPOLEN_MSS), - tcph->check, 1); + nf_proto_csum_replace2(&tcph->check, *pskb, + htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1); opt[0] = TCPOPT_MSS; opt[1] = TCPOLEN_MSS; opt[2] = (newmss & 0xff00) >> 8; opt[3] = (newmss & 0x00ff); - tcph->check = nf_proto_csum_update(*pskb, htonl(~0), *((__be32 *)opt), - tcph->check, 0); + nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0); oldval = ((__be16 *)tcph)[6]; tcph->doff += TCPOLEN_MSS/4; - tcph->check = nf_proto_csum_update(*pskb, - oldval ^ htons(0xFFFF), - ((__be16 *)tcph)[6], - tcph->check, 0); + nf_proto_csum_replace2(&tcph->check, *pskb, + oldval, ((__be16 *)tcph)[6], 0); newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS); - iph->check = nf_csum_update(iph->tot_len ^ htons(0xFFFF), - newtotlen, iph->check); + nf_csum_replace2(&iph->check, iph->tot_len, newtotlen); iph->tot_len = newtotlen; return IPT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 83b80b3a5d2..18e74ac4d42 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -30,16 +30,15 @@ target(struct sk_buff **pskb, { const struct ipt_tos_target_info *tosinfo = targinfo; struct iphdr *iph = (*pskb)->nh.iph; - u_int16_t oldtos; if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { + __u8 oldtos; if (!skb_make_writable(pskb, sizeof(struct iphdr))) return NF_DROP; iph = (*pskb)->nh.iph; oldtos = iph->tos; iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; - iph->check = nf_csum_update(htons(oldtos) ^ htons(0xFFFF), - htons(iph->tos), iph->check); + nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); } return IPT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index ac9517d62af..fffe5ca82e9 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -54,9 +54,8 @@ ipt_ttl_target(struct sk_buff **pskb, } if (new_ttl != iph->ttl) { - iph->check = nf_csum_update(htons((iph->ttl << 8)) ^ htons(0xFFFF), - htons(new_ttl << 8), - iph->check); + nf_csum_replace2(&iph->check, htons(iph->ttl << 8), + htons(new_ttl << 8)); iph->ttl = new_ttl; } diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 2b104ea54f4..dbd34783a64 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -239,7 +239,7 @@ static void ipt_ulog_packet(unsigned int hooknum, pm->data_len = copy_len; pm->timestamp_sec = skb->tstamp.off_sec; pm->timestamp_usec = skb->tstamp.off_usec; - pm->mark = skb->nfmark; + pm->mark = skb->mark; pm->hook = hooknum; if (prefix != NULL) strncpy(pm->prefix, prefix, sizeof(pm->prefix)); diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 126db44e71a..4db0e73c56f 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -401,7 +401,7 @@ static int recent_seq_open(struct inode *inode, struct file *file) static ssize_t recent_proc_write(struct file *file, const char __user *input, size_t size, loff_t *loff) { - struct proc_dir_entry *pde = PDE(file->f_dentry->d_inode); + struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); struct recent_table *t = pde->data; struct recent_entry *e; char buf[sizeof("+255.255.255.255")], *c = buf; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index b91f3582359..af293988944 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -132,7 +132,7 @@ ipt_local_hook(unsigned int hook, unsigned int ret; u_int8_t tos; __be32 saddr, daddr; - unsigned long nfmark; + u_int32_t mark; /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) @@ -143,7 +143,7 @@ ipt_local_hook(unsigned int hook, } /* Save things which could affect route */ - nfmark = (*pskb)->nfmark; + mark = (*pskb)->mark; saddr = (*pskb)->nh.iph->saddr; daddr = (*pskb)->nh.iph->daddr; tos = (*pskb)->nh.iph->tos; @@ -153,9 +153,7 @@ ipt_local_hook(unsigned int hook, if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE && ((*pskb)->nh.iph->saddr != saddr || (*pskb)->nh.iph->daddr != daddr -#ifdef CONFIG_IP_ROUTE_FWMARK - || (*pskb)->nfmark != nfmark -#endif + || (*pskb)->mark != mark || (*pskb)->nh.iph->tos != tos)) if (ip_route_me_harder(pskb, RTN_UNSPEC)) ret = NF_DROP; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 0af803df82b..471b638cede 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -27,7 +27,7 @@ #include <linux/netfilter_ipv4.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_helper.h> -#include <net/netfilter/nf_conntrack_protocol.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> @@ -38,12 +38,10 @@ #define DEBUGP(format, args...) #endif -DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat); - static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { - u_int32_t _addrs[2], *ap; + __be32 _addrs[2], *ap; ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), sizeof(u_int32_t) * 2, _addrs); if (ap == NULL) @@ -113,10 +111,12 @@ ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, return NF_ACCEPT; } -int nat_module_is_loaded = 0; +int nf_nat_module_is_loaded = 0; +EXPORT_SYMBOL_GPL(nf_nat_module_is_loaded); + static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple) { - if (nat_module_is_loaded) + if (nf_nat_module_is_loaded) return NF_CT_F_NAT; return NF_CT_F_BASIC; @@ -268,43 +268,59 @@ static struct nf_hook_ops ipv4_conntrack_ops[] = { }, }; -#ifdef CONFIG_SYSCTL -/* From nf_conntrack_proto_icmp.c */ -extern unsigned int nf_ct_icmp_timeout; -static struct ctl_table_header *nf_ct_ipv4_sysctl_header; +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) +static int log_invalid_proto_min = 0; +static int log_invalid_proto_max = 255; -static ctl_table nf_ct_sysctl_table[] = { +static ctl_table ip_ct_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_ICMP_TIMEOUT, - .procname = "nf_conntrack_icmp_timeout", - .data = &nf_ct_icmp_timeout, - .maxlen = sizeof(unsigned int), + .ctl_name = NET_IPV4_NF_CONNTRACK_MAX, + .procname = "ip_conntrack_max", + .data = &nf_conntrack_max, + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = &proc_dointvec, }, - { .ctl_name = 0 } -}; - -static ctl_table nf_ct_netfilter_table[] = { { - .ctl_name = NET_NETFILTER, - .procname = "netfilter", - .mode = 0555, - .child = nf_ct_sysctl_table, + .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, + .procname = "ip_conntrack_count", + .data = &nf_conntrack_count, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS, + .procname = "ip_conntrack_buckets", + .data = &nf_conntrack_htable_size, + .maxlen = sizeof(unsigned int), + .mode = 0444, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, + .procname = "ip_conntrack_checksum", + .data = &nf_conntrack_checksum, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, }, - { .ctl_name = 0 } -}; - -static ctl_table nf_ct_net_table[] = { { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = nf_ct_netfilter_table, + .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, + .procname = "ip_conntrack_log_invalid", + .data = &nf_ct_log_invalid, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &log_invalid_proto_min, + .extra2 = &log_invalid_proto_max, }, - { .ctl_name = 0 } + { + .ctl_name = 0 + } }; -#endif +#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */ /* Fast function for those who don't want to parse /proc (and I don't blame them). */ @@ -396,10 +412,8 @@ static int ipv4_nfattr_to_tuple(struct nfattr *tb[], if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) return -EINVAL; - t->src.u3.ip = - *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); - t->dst.u3.ip = - *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]); + t->src.u3.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); + t->dst.u3.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]); return 0; } @@ -426,14 +440,15 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { .tuple_to_nfattr = ipv4_tuple_to_nfattr, .nfattr_to_tuple = ipv4_nfattr_to_tuple, #endif +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + .ctl_table_path = nf_net_ipv4_netfilter_sysctl_path, + .ctl_table = ip_ct_sysctl_table, +#endif .me = THIS_MODULE, }; -extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp4; -extern struct nf_conntrack_protocol nf_conntrack_protocol_udp4; -extern struct nf_conntrack_protocol nf_conntrack_protocol_icmp; - MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); +MODULE_ALIAS("ip_conntrack"); MODULE_LICENSE("GPL"); static int __init nf_conntrack_l3proto_ipv4_init(void) @@ -448,19 +463,19 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) return ret; } - ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp4); + ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4); if (ret < 0) { printk("nf_conntrack_ipv4: can't register tcp.\n"); goto cleanup_sockopt; } - ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp4); + ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4); if (ret < 0) { printk("nf_conntrack_ipv4: can't register udp.\n"); goto cleanup_tcp; } - ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmp); + ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp); if (ret < 0) { printk("nf_conntrack_ipv4: can't register icmp.\n"); goto cleanup_udp; @@ -478,28 +493,24 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) printk("nf_conntrack_ipv4: can't register hooks.\n"); goto cleanup_ipv4; } -#ifdef CONFIG_SYSCTL - nf_ct_ipv4_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); - if (nf_ct_ipv4_sysctl_header == NULL) { - printk("nf_conntrack: can't register to sysctl.\n"); - ret = -ENOMEM; +#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + ret = nf_conntrack_ipv4_compat_init(); + if (ret < 0) goto cleanup_hooks; - } #endif return ret; - -#ifdef CONFIG_SYSCTL +#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) cleanup_hooks: - nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); + nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); #endif cleanup_ipv4: nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); cleanup_icmp: - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmp); + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp); cleanup_udp: - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp4); + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4); cleanup_tcp: - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp4); + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4); cleanup_sockopt: nf_unregister_sockopt(&so_getorigdst); return ret; @@ -508,18 +519,16 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) static void __exit nf_conntrack_l3proto_ipv4_fini(void) { synchronize_net(); -#ifdef CONFIG_SYSCTL - unregister_sysctl_table(nf_ct_ipv4_sysctl_header); +#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + nf_conntrack_ipv4_compat_fini(); #endif nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmp); - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp4); - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp4); + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp); + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4); + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4); nf_unregister_sockopt(&so_getorigdst); } module_init(nf_conntrack_l3proto_ipv4_init); module_exit(nf_conntrack_l3proto_ipv4_fini); - -EXPORT_SYMBOL(nf_ct_ipv4_gather_frags); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c new file mode 100644 index 00000000000..3b31bc64960 --- /dev/null +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -0,0 +1,412 @@ +/* ip_conntrack proc compat - based on ip_conntrack_standalone.c + * + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/types.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/percpu.h> + +#include <linux/netfilter.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_l3proto.h> +#include <net/netfilter/nf_conntrack_l4proto.h> +#include <net/netfilter/nf_conntrack_expect.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +#ifdef CONFIG_NF_CT_ACCT +static unsigned int +seq_print_counters(struct seq_file *s, + const struct ip_conntrack_counter *counter) +{ + return seq_printf(s, "packets=%llu bytes=%llu ", + (unsigned long long)counter->packets, + (unsigned long long)counter->bytes); +} +#else +#define seq_print_counters(x, y) 0 +#endif + +struct ct_iter_state { + unsigned int bucket; +}; + +static struct list_head *ct_get_first(struct seq_file *seq) +{ + struct ct_iter_state *st = seq->private; + + for (st->bucket = 0; + st->bucket < nf_conntrack_htable_size; + st->bucket++) { + if (!list_empty(&nf_conntrack_hash[st->bucket])) + return nf_conntrack_hash[st->bucket].next; + } + return NULL; +} + +static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head) +{ + struct ct_iter_state *st = seq->private; + + head = head->next; + while (head == &nf_conntrack_hash[st->bucket]) { + if (++st->bucket >= nf_conntrack_htable_size) + return NULL; + head = nf_conntrack_hash[st->bucket].next; + } + return head; +} + +static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos) +{ + struct list_head *head = ct_get_first(seq); + + if (head) + while (pos && (head = ct_get_next(seq, head))) + pos--; + return pos ? NULL : head; +} + +static void *ct_seq_start(struct seq_file *seq, loff_t *pos) +{ + read_lock_bh(&nf_conntrack_lock); + return ct_get_idx(seq, *pos); +} + +static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + return ct_get_next(s, v); +} + +static void ct_seq_stop(struct seq_file *s, void *v) +{ + read_unlock_bh(&nf_conntrack_lock); +} + +static int ct_seq_show(struct seq_file *s, void *v) +{ + const struct nf_conntrack_tuple_hash *hash = v; + const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); + struct nf_conntrack_l3proto *l3proto; + struct nf_conntrack_l4proto *l4proto; + + NF_CT_ASSERT(ct); + + /* we only want to print DIR_ORIGINAL */ + if (NF_CT_DIRECTION(hash)) + return 0; + if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num != AF_INET) + return 0; + + l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src.l3num); + NF_CT_ASSERT(l3proto); + l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src.l3num, + ct->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + NF_CT_ASSERT(l4proto); + + if (seq_printf(s, "%-8s %u %ld ", + l4proto->name, + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum, + timer_pending(&ct->timeout) + ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) + return -ENOSPC; + + if (l3proto->print_conntrack(s, ct)) + return -ENOSPC; + + if (l4proto->print_conntrack(s, ct)) + return -ENOSPC; + + if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + l3proto, l4proto)) + return -ENOSPC; + + if (seq_print_counters(s, &ct->counters[IP_CT_DIR_ORIGINAL])) + return -ENOSPC; + + if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) + if (seq_printf(s, "[UNREPLIED] ")) + return -ENOSPC; + + if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, + l3proto, l4proto)) + return -ENOSPC; + + if (seq_print_counters(s, &ct->counters[IP_CT_DIR_REPLY])) + return -ENOSPC; + + if (test_bit(IPS_ASSURED_BIT, &ct->status)) + if (seq_printf(s, "[ASSURED] ")) + return -ENOSPC; + +#ifdef CONFIG_NF_CONNTRACK_MARK + if (seq_printf(s, "mark=%u ", ct->mark)) + return -ENOSPC; +#endif + +#ifdef CONFIG_NF_CONNTRACK_SECMARK + if (seq_printf(s, "secmark=%u ", ct->secmark)) + return -ENOSPC; +#endif + + if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) + return -ENOSPC; + + return 0; +} + +static struct seq_operations ct_seq_ops = { + .start = ct_seq_start, + .next = ct_seq_next, + .stop = ct_seq_stop, + .show = ct_seq_show +}; + +static int ct_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + struct ct_iter_state *st; + int ret; + + st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL); + if (st == NULL) + return -ENOMEM; + ret = seq_open(file, &ct_seq_ops); + if (ret) + goto out_free; + seq = file->private_data; + seq->private = st; + memset(st, 0, sizeof(struct ct_iter_state)); + return ret; +out_free: + kfree(st); + return ret; +} + +static struct file_operations ct_file_ops = { + .owner = THIS_MODULE, + .open = ct_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +/* expects */ +static void *exp_seq_start(struct seq_file *s, loff_t *pos) +{ + struct list_head *e = &nf_conntrack_expect_list; + loff_t i; + + /* strange seq_file api calls stop even if we fail, + * thus we need to grab lock since stop unlocks */ + read_lock_bh(&nf_conntrack_lock); + + if (list_empty(e)) + return NULL; + + for (i = 0; i <= *pos; i++) { + e = e->next; + if (e == &nf_conntrack_expect_list) + return NULL; + } + return e; +} + +static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct list_head *e = v; + + ++*pos; + e = e->next; + + if (e == &nf_conntrack_expect_list) + return NULL; + + return e; +} + +static void exp_seq_stop(struct seq_file *s, void *v) +{ + read_unlock_bh(&nf_conntrack_lock); +} + +static int exp_seq_show(struct seq_file *s, void *v) +{ + struct nf_conntrack_expect *exp = v; + + if (exp->tuple.src.l3num != AF_INET) + return 0; + + if (exp->timeout.function) + seq_printf(s, "%ld ", timer_pending(&exp->timeout) + ? (long)(exp->timeout.expires - jiffies)/HZ : 0); + else + seq_printf(s, "- "); + + seq_printf(s, "proto=%u ", exp->tuple.dst.protonum); + + print_tuple(s, &exp->tuple, + __nf_ct_l3proto_find(exp->tuple.src.l3num), + __nf_ct_l4proto_find(exp->tuple.src.l3num, + exp->tuple.dst.protonum)); + return seq_putc(s, '\n'); +} + +static struct seq_operations exp_seq_ops = { + .start = exp_seq_start, + .next = exp_seq_next, + .stop = exp_seq_stop, + .show = exp_seq_show +}; + +static int exp_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &exp_seq_ops); +} + +static struct file_operations ip_exp_file_ops = { + .owner = THIS_MODULE, + .open = exp_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) +{ + int cpu; + + if (*pos == 0) + return SEQ_START_TOKEN; + + for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu+1; + return &per_cpu(nf_conntrack_stat, cpu); + } + + return NULL; +} + +static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + int cpu; + + for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu+1; + return &per_cpu(nf_conntrack_stat, cpu); + } + + return NULL; +} + +static void ct_cpu_seq_stop(struct seq_file *seq, void *v) +{ +} + +static int ct_cpu_seq_show(struct seq_file *seq, void *v) +{ + unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); + struct ip_conntrack_stat *st = v; + + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); + return 0; + } + + seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " + "%08x %08x %08x %08x %08x %08x %08x %08x \n", + nr_conntracks, + st->searched, + st->found, + st->new, + st->invalid, + st->ignore, + st->delete, + st->delete_list, + st->insert, + st->insert_failed, + st->drop, + st->early_drop, + st->error, + + st->expect_new, + st->expect_create, + st->expect_delete + ); + return 0; +} + +static struct seq_operations ct_cpu_seq_ops = { + .start = ct_cpu_seq_start, + .next = ct_cpu_seq_next, + .stop = ct_cpu_seq_stop, + .show = ct_cpu_seq_show, +}; + +static int ct_cpu_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &ct_cpu_seq_ops); +} + +static struct file_operations ct_cpu_seq_fops = { + .owner = THIS_MODULE, + .open = ct_cpu_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +int __init nf_conntrack_ipv4_compat_init(void) +{ + struct proc_dir_entry *proc, *proc_exp, *proc_stat; + + proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops); + if (!proc) + goto err1; + + proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440, + &ip_exp_file_ops); + if (!proc_exp) + goto err2; + + proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat); + if (!proc_stat) + goto err3; + + proc_stat->proc_fops = &ct_cpu_seq_fops; + proc_stat->owner = THIS_MODULE; + + return 0; + +err3: + proc_net_remove("ip_conntrack_expect"); +err2: + proc_net_remove("ip_conntrack"); +err1: + return -ENOMEM; +} + +void __exit nf_conntrack_ipv4_compat_fini(void) +{ + remove_proc_entry("ip_conntrack", proc_net_stat); + proc_net_remove("ip_conntrack_expect"); + proc_net_remove("ip_conntrack"); +} diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 790f00d500c..db9e7c45d3b 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -22,10 +22,10 @@ #include <net/checksum.h> #include <linux/netfilter_ipv4.h> #include <net/netfilter/nf_conntrack_tuple.h> -#include <net/netfilter/nf_conntrack_protocol.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_core.h> -unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ; +static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ; #if 0 #define DEBUGP printk @@ -152,7 +152,7 @@ icmp_error_message(struct sk_buff *skb, struct icmphdr icmp; struct iphdr ip; } _in, *inside; - struct nf_conntrack_protocol *innerproto; + struct nf_conntrack_l4proto *innerproto; struct nf_conntrack_tuple_hash *h; int dataoff; @@ -170,7 +170,7 @@ icmp_error_message(struct sk_buff *skb, return -NF_ACCEPT; } - innerproto = __nf_ct_proto_find(PF_INET, inside->ip.protocol); + innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp); /* Are they talking about one of our connections? */ if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET, @@ -311,7 +311,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[], tuple->dst.u.icmp.code = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]); tuple->src.u.icmp.id = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); if (tuple->dst.u.icmp.type >= sizeof(invmap) || !invmap[tuple->dst.u.icmp.type]) @@ -321,11 +321,42 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[], } #endif -struct nf_conntrack_protocol nf_conntrack_protocol_icmp = +#ifdef CONFIG_SYSCTL +static struct ctl_table_header *icmp_sysctl_header; +static struct ctl_table icmp_sysctl_table[] = { + { + .ctl_name = NET_NF_CONNTRACK_ICMP_TIMEOUT, + .procname = "nf_conntrack_icmp_timeout", + .data = &nf_ct_icmp_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = 0 + } +}; +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT +static struct ctl_table icmp_compat_sysctl_table[] = { + { + .ctl_name = NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT, + .procname = "ip_conntrack_icmp_timeout", + .data = &nf_ct_icmp_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = 0 + } +}; +#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ +#endif /* CONFIG_SYSCTL */ + +struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = { - .list = { NULL, NULL }, .l3proto = PF_INET, - .proto = IPPROTO_ICMP, + .l4proto = IPPROTO_ICMP, .name = "icmp", .pkt_to_tuple = icmp_pkt_to_tuple, .invert_tuple = icmp_invert_tuple, @@ -341,6 +372,12 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmp = .tuple_to_nfattr = icmp_tuple_to_nfattr, .nfattr_to_tuple = icmp_nfattr_to_tuple, #endif +#ifdef CONFIG_SYSCTL + .ctl_table_header = &icmp_sysctl_header, + .ctl_table = icmp_sysctl_table, +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + .ctl_compat_table = icmp_compat_sysctl_table, +#endif +#endif }; - -EXPORT_SYMBOL(nf_conntrack_protocol_icmp); +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_icmp); diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c new file mode 100644 index 00000000000..0f17098917b --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -0,0 +1,78 @@ +/* Amanda extension for TCP NAT alteration. + * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca> + * based on a copy of HW's ip_nat_irc.c as well as other modules + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/udp.h> + +#include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/nf_nat_rule.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <linux/netfilter/nf_conntrack_amanda.h> + +MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); +MODULE_DESCRIPTION("Amanda NAT helper"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ip_nat_amanda"); + +static unsigned int help(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp) +{ + char buffer[sizeof("65535")]; + u_int16_t port; + unsigned int ret; + + /* Connection comes from client. */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->dir = IP_CT_DIR_ORIGINAL; + + /* When you see the packet, we need to NAT it the same as the + * this one (ie. same IP: it will be TCP and master is UDP). */ + exp->expectfn = nf_nat_follow_master; + + /* Try to get same port: if not, try to change it. */ + for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { + exp->tuple.dst.u.tcp.port = htons(port); + if (nf_conntrack_expect_related(exp) == 0) + break; + } + + if (port == 0) + return NF_DROP; + + sprintf(buffer, "%u", port); + ret = nf_nat_mangle_udp_packet(pskb, exp->master, ctinfo, + matchoff, matchlen, + buffer, strlen(buffer)); + if (ret != NF_ACCEPT) + nf_conntrack_unexpect_related(exp); + return ret; +} + +static void __exit nf_nat_amanda_fini(void) +{ + rcu_assign_pointer(nf_nat_amanda_hook, NULL); + synchronize_rcu(); +} + +static int __init nf_nat_amanda_init(void) +{ + BUG_ON(rcu_dereference(nf_nat_amanda_hook)); + rcu_assign_pointer(nf_nat_amanda_hook, help); + return 0; +} + +module_init(nf_nat_amanda_init); +module_exit(nf_nat_amanda_fini); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c new file mode 100644 index 00000000000..86a92272b05 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -0,0 +1,647 @@ +/* NAT for netfilter; shared with compatibility layer. */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/timer.h> +#include <linux/skbuff.h> +#include <linux/vmalloc.h> +#include <net/checksum.h> +#include <net/icmp.h> +#include <net/ip.h> +#include <net/tcp.h> /* For tcp_prot in getorigdst */ +#include <linux/icmp.h> +#include <linux/udp.h> +#include <linux/jhash.h> + +#include <linux/netfilter_ipv4.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_protocol.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_l3proto.h> +#include <net/netfilter/nf_conntrack_l4proto.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static DEFINE_RWLOCK(nf_nat_lock); + +static struct nf_conntrack_l3proto *l3proto = NULL; + +/* Calculated at init based on memory size */ +static unsigned int nf_nat_htable_size; + +static struct list_head *bysource; + +#define MAX_IP_NAT_PROTO 256 +static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]; + +static inline struct nf_nat_protocol * +__nf_nat_proto_find(u_int8_t protonum) +{ + return nf_nat_protos[protonum]; +} + +struct nf_nat_protocol * +nf_nat_proto_find_get(u_int8_t protonum) +{ + struct nf_nat_protocol *p; + + /* we need to disable preemption to make sure 'p' doesn't get + * removed until we've grabbed the reference */ + preempt_disable(); + p = __nf_nat_proto_find(protonum); + if (!try_module_get(p->me)) + p = &nf_nat_unknown_protocol; + preempt_enable(); + + return p; +} +EXPORT_SYMBOL_GPL(nf_nat_proto_find_get); + +void +nf_nat_proto_put(struct nf_nat_protocol *p) +{ + module_put(p->me); +} +EXPORT_SYMBOL_GPL(nf_nat_proto_put); + +/* We keep an extra hash for each conntrack, for fast searching. */ +static inline unsigned int +hash_by_src(const struct nf_conntrack_tuple *tuple) +{ + /* Original src, to ensure we map it consistently if poss. */ + return jhash_3words((__force u32)tuple->src.u3.ip, tuple->src.u.all, + tuple->dst.protonum, 0) % nf_nat_htable_size; +} + +/* Noone using conntrack by the time this called. */ +static void nf_nat_cleanup_conntrack(struct nf_conn *conn) +{ + struct nf_conn_nat *nat; + if (!(conn->status & IPS_NAT_DONE_MASK)) + return; + + nat = nfct_nat(conn); + write_lock_bh(&nf_nat_lock); + list_del(&nat->info.bysource); + write_unlock_bh(&nf_nat_lock); +} + +/* Is this tuple already taken? (not by us) */ +int +nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack) +{ + /* Conntrack tracking doesn't keep track of outgoing tuples; only + incoming ones. NAT means they don't have a fixed mapping, + so we invert the tuple and look for the incoming reply. + + We could keep a separate hash if this proves too slow. */ + struct nf_conntrack_tuple reply; + + nf_ct_invert_tuplepr(&reply, tuple); + return nf_conntrack_tuple_taken(&reply, ignored_conntrack); +} +EXPORT_SYMBOL(nf_nat_used_tuple); + +/* If we source map this tuple so reply looks like reply_tuple, will + * that meet the constraints of range. */ +static int +in_range(const struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range) +{ + struct nf_nat_protocol *proto; + + proto = __nf_nat_proto_find(tuple->dst.protonum); + /* If we are supposed to map IPs, then we must be in the + range specified, otherwise let this drag us onto a new src IP. */ + if (range->flags & IP_NAT_RANGE_MAP_IPS) { + if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) || + ntohl(tuple->src.u3.ip) > ntohl(range->max_ip)) + return 0; + } + + if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || + proto->in_range(tuple, IP_NAT_MANIP_SRC, + &range->min, &range->max)) + return 1; + + return 0; +} + +static inline int +same_src(const struct nf_conn *ct, + const struct nf_conntrack_tuple *tuple) +{ + const struct nf_conntrack_tuple *t; + + t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + return (t->dst.protonum == tuple->dst.protonum && + t->src.u3.ip == tuple->src.u3.ip && + t->src.u.all == tuple->src.u.all); +} + +/* Only called for SRC manip */ +static int +find_appropriate_src(const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_tuple *result, + const struct nf_nat_range *range) +{ + unsigned int h = hash_by_src(tuple); + struct nf_conn_nat *nat; + struct nf_conn *ct; + + read_lock_bh(&nf_nat_lock); + list_for_each_entry(nat, &bysource[h], info.bysource) { + ct = (struct nf_conn *)((char *)nat - offsetof(struct nf_conn, data)); + if (same_src(ct, tuple)) { + /* Copy source part from reply tuple. */ + nf_ct_invert_tuplepr(result, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + result->dst = tuple->dst; + + if (in_range(result, range)) { + read_unlock_bh(&nf_nat_lock); + return 1; + } + } + } + read_unlock_bh(&nf_nat_lock); + return 0; +} + +/* For [FUTURE] fragmentation handling, we want the least-used + src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus + if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports + 1-65535, we don't do pro-rata allocation based on ports; we choose + the ip with the lowest src-ip/dst-ip/proto usage. +*/ +static void +find_best_ips_proto(struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + const struct nf_conn *ct, + enum nf_nat_manip_type maniptype) +{ + __be32 *var_ipp; + /* Host order */ + u_int32_t minip, maxip, j; + + /* No IP mapping? Do nothing. */ + if (!(range->flags & IP_NAT_RANGE_MAP_IPS)) + return; + + if (maniptype == IP_NAT_MANIP_SRC) + var_ipp = &tuple->src.u3.ip; + else + var_ipp = &tuple->dst.u3.ip; + + /* Fast path: only one choice. */ + if (range->min_ip == range->max_ip) { + *var_ipp = range->min_ip; + return; + } + + /* Hashing source and destination IPs gives a fairly even + * spread in practice (if there are a small number of IPs + * involved, there usually aren't that many connections + * anyway). The consistency means that servers see the same + * client coming from the same IP (some Internet Banking sites + * like this), even across reboots. */ + minip = ntohl(range->min_ip); + maxip = ntohl(range->max_ip); + j = jhash_2words((__force u32)tuple->src.u3.ip, + (__force u32)tuple->dst.u3.ip, 0); + *var_ipp = htonl(minip + j % (maxip - minip + 1)); +} + +/* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING, + * we change the source to map into the range. For NF_IP_PRE_ROUTING + * and NF_IP_LOCAL_OUT, we change the destination to map into the + * range. It might not be possible to get a unique tuple, but we try. + * At worst (or if we race), we will end up with a final duplicate in + * __ip_conntrack_confirm and drop the packet. */ +static void +get_unique_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig_tuple, + const struct nf_nat_range *range, + struct nf_conn *ct, + enum nf_nat_manip_type maniptype) +{ + struct nf_nat_protocol *proto; + + /* 1) If this srcip/proto/src-proto-part is currently mapped, + and that same mapping gives a unique tuple within the given + range, use that. + + This is only required for source (ie. NAT/masq) mappings. + So far, we don't do local source mappings, so multiple + manips not an issue. */ + if (maniptype == IP_NAT_MANIP_SRC) { + if (find_appropriate_src(orig_tuple, tuple, range)) { + DEBUGP("get_unique_tuple: Found current src map\n"); + if (!nf_nat_used_tuple(tuple, ct)) + return; + } + } + + /* 2) Select the least-used IP/proto combination in the given + range. */ + *tuple = *orig_tuple; + find_best_ips_proto(tuple, range, ct, maniptype); + + /* 3) The per-protocol part of the manip is made to map into + the range to make a unique tuple. */ + + proto = nf_nat_proto_find_get(orig_tuple->dst.protonum); + + /* Only bother mapping if it's not already in range and unique */ + if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || + proto->in_range(tuple, maniptype, &range->min, &range->max)) && + !nf_nat_used_tuple(tuple, ct)) { + nf_nat_proto_put(proto); + return; + } + + /* Last change: get protocol to try to obtain unique tuple. */ + proto->unique_tuple(tuple, range, maniptype, ct); + + nf_nat_proto_put(proto); +} + +unsigned int +nf_nat_setup_info(struct nf_conn *ct, + const struct nf_nat_range *range, + unsigned int hooknum) +{ + struct nf_conntrack_tuple curr_tuple, new_tuple; + struct nf_conn_nat *nat = nfct_nat(ct); + struct nf_nat_info *info = &nat->info; + int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); + enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + + NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || + hooknum == NF_IP_POST_ROUTING || + hooknum == NF_IP_LOCAL_IN || + hooknum == NF_IP_LOCAL_OUT); + BUG_ON(nf_nat_initialized(ct, maniptype)); + + /* What we've got will look like inverse of reply. Normally + this is what is in the conntrack, except for prior + manipulations (future optimization: if num_manips == 0, + orig_tp = + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ + nf_ct_invert_tuplepr(&curr_tuple, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + + get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); + + if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { + struct nf_conntrack_tuple reply; + + /* Alter conntrack table so will recognize replies. */ + nf_ct_invert_tuplepr(&reply, &new_tuple); + nf_conntrack_alter_reply(ct, &reply); + + /* Non-atomic: we own this at the moment. */ + if (maniptype == IP_NAT_MANIP_SRC) + ct->status |= IPS_SRC_NAT; + else + ct->status |= IPS_DST_NAT; + } + + /* Place in source hash if this is the first time. */ + if (have_to_hash) { + unsigned int srchash; + + srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + write_lock_bh(&nf_nat_lock); + list_add(&info->bysource, &bysource[srchash]); + write_unlock_bh(&nf_nat_lock); + } + + /* It's done. */ + if (maniptype == IP_NAT_MANIP_DST) + set_bit(IPS_DST_NAT_DONE_BIT, &ct->status); + else + set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); + + return NF_ACCEPT; +} +EXPORT_SYMBOL(nf_nat_setup_info); + +/* Returns true if succeeded. */ +static int +manip_pkt(u_int16_t proto, + struct sk_buff **pskb, + unsigned int iphdroff, + const struct nf_conntrack_tuple *target, + enum nf_nat_manip_type maniptype) +{ + struct iphdr *iph; + struct nf_nat_protocol *p; + + if (!skb_make_writable(pskb, iphdroff + sizeof(*iph))) + return 0; + + iph = (void *)(*pskb)->data + iphdroff; + + /* Manipulate protcol part. */ + p = nf_nat_proto_find_get(proto); + if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) { + nf_nat_proto_put(p); + return 0; + } + nf_nat_proto_put(p); + + iph = (void *)(*pskb)->data + iphdroff; + + if (maniptype == IP_NAT_MANIP_SRC) { + nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); + iph->saddr = target->src.u3.ip; + } else { + nf_csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); + iph->daddr = target->dst.u3.ip; + } + return 1; +} + +/* Do packet manipulations according to nf_nat_setup_info. */ +unsigned int nf_nat_packet(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb) +{ + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + unsigned long statusbit; + enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); + + if (mtype == IP_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; + + /* Invert if this is reply dir. */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; + + /* Non-atomic: these bits don't change. */ + if (ct->status & statusbit) { + struct nf_conntrack_tuple target; + + /* We are aiming to look like inverse of other direction. */ + nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + + if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype)) + return NF_DROP; + } + return NF_ACCEPT; +} +EXPORT_SYMBOL_GPL(nf_nat_packet); + +/* Dir is direction ICMP is coming from (opposite to packet it contains) */ +int nf_nat_icmp_reply_translation(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb) +{ + struct { + struct icmphdr icmp; + struct iphdr ip; + } *inside; + struct nf_conntrack_tuple inner, target; + int hdrlen = (*pskb)->nh.iph->ihl * 4; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + unsigned long statusbit; + enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); + + if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) + return 0; + + inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; + + /* We're actually going to mangle it beyond trivial checksum + adjustment, so make sure the current checksum is correct. */ + if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0)) + return 0; + + /* Must be RELATED */ + NF_CT_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || + (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); + + /* Redirects on non-null nats must be dropped, else they'll + start talking to each other without our translation, and be + confused... --RR */ + if (inside->icmp.type == ICMP_REDIRECT) { + /* If NAT isn't finished, assume it and drop. */ + if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) + return 0; + + if (ct->status & IPS_NAT_MASK) + return 0; + } + + DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n", + *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); + + if (!nf_ct_get_tuple(*pskb, + (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr), + (*pskb)->nh.iph->ihl*4 + + sizeof(struct icmphdr) + inside->ip.ihl*4, + (u_int16_t)AF_INET, + inside->ip.protocol, + &inner, + l3proto, + __nf_ct_l4proto_find((u_int16_t)PF_INET, + inside->ip.protocol))) + return 0; + + /* Change inner back to look like incoming packet. We do the + opposite manip on this hook to normal, because it might not + pass all hooks (locally-generated ICMP). Consider incoming + packet: PREROUTING (DST manip), routing produces ICMP, goes + through POSTROUTING (which must correct the DST manip). */ + if (!manip_pkt(inside->ip.protocol, pskb, + (*pskb)->nh.iph->ihl*4 + sizeof(inside->icmp), + &ct->tuplehash[!dir].tuple, + !manip)) + return 0; + + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + /* Reloading "inside" here since manip_pkt inner. */ + inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; + inside->icmp.checksum = 0; + inside->icmp.checksum = + csum_fold(skb_checksum(*pskb, hdrlen, + (*pskb)->len - hdrlen, 0)); + } + + /* Change outer to look the reply to an incoming packet + * (proto 0 means don't invert per-proto part). */ + if (manip == IP_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; + + /* Invert if this is reply dir. */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; + + if (ct->status & statusbit) { + nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + if (!manip_pkt(0, pskb, 0, &target, manip)) + return 0; + } + + return 1; +} +EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); + +/* Protocol registration. */ +int nf_nat_protocol_register(struct nf_nat_protocol *proto) +{ + int ret = 0; + + write_lock_bh(&nf_nat_lock); + if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { + ret = -EBUSY; + goto out; + } + nf_nat_protos[proto->protonum] = proto; + out: + write_unlock_bh(&nf_nat_lock); + return ret; +} +EXPORT_SYMBOL(nf_nat_protocol_register); + +/* Noone stores the protocol anywhere; simply delete it. */ +void nf_nat_protocol_unregister(struct nf_nat_protocol *proto) +{ + write_lock_bh(&nf_nat_lock); + nf_nat_protos[proto->protonum] = &nf_nat_unknown_protocol; + write_unlock_bh(&nf_nat_lock); + + /* Someone could be still looking at the proto in a bh. */ + synchronize_net(); +} +EXPORT_SYMBOL(nf_nat_protocol_unregister); + +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +int +nf_nat_port_range_to_nfattr(struct sk_buff *skb, + const struct nf_nat_range *range) +{ + NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16), + &range->min.tcp.port); + NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16), + &range->max.tcp.port); + + return 0; + +nfattr_failure: + return -1; +} +EXPORT_SYMBOL_GPL(nf_nat_port_nfattr_to_range); + +int +nf_nat_port_nfattr_to_range(struct nfattr *tb[], struct nf_nat_range *range) +{ + int ret = 0; + + /* we have to return whether we actually parsed something or not */ + + if (tb[CTA_PROTONAT_PORT_MIN-1]) { + ret = 1; + range->min.tcp.port = + *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); + } + + if (!tb[CTA_PROTONAT_PORT_MAX-1]) { + if (ret) + range->max.tcp.port = range->min.tcp.port; + } else { + ret = 1; + range->max.tcp.port = + *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); + } + + return ret; +} +EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nfattr); +#endif + +static int __init nf_nat_init(void) +{ + size_t i; + + /* Leave them the same for the moment. */ + nf_nat_htable_size = nf_conntrack_htable_size; + + /* One vmalloc for both hash tables */ + bysource = vmalloc(sizeof(struct list_head) * nf_nat_htable_size); + if (!bysource) + return -ENOMEM; + + /* Sew in builtin protocols. */ + write_lock_bh(&nf_nat_lock); + for (i = 0; i < MAX_IP_NAT_PROTO; i++) + nf_nat_protos[i] = &nf_nat_unknown_protocol; + nf_nat_protos[IPPROTO_TCP] = &nf_nat_protocol_tcp; + nf_nat_protos[IPPROTO_UDP] = &nf_nat_protocol_udp; + nf_nat_protos[IPPROTO_ICMP] = &nf_nat_protocol_icmp; + write_unlock_bh(&nf_nat_lock); + + for (i = 0; i < nf_nat_htable_size; i++) { + INIT_LIST_HEAD(&bysource[i]); + } + + /* FIXME: Man, this is a hack. <SIGH> */ + NF_CT_ASSERT(nf_conntrack_destroyed == NULL); + nf_conntrack_destroyed = &nf_nat_cleanup_conntrack; + + /* Initialize fake conntrack so that NAT will skip it */ + nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; + + l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); + return 0; +} + +/* Clear NAT section of all conntracks, in case we're loaded again. */ +static int clean_nat(struct nf_conn *i, void *data) +{ + struct nf_conn_nat *nat = nfct_nat(i); + + if (!nat) + return 0; + memset(nat, 0, sizeof(nat)); + i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); + return 0; +} + +static void __exit nf_nat_cleanup(void) +{ + nf_ct_iterate_cleanup(&clean_nat, NULL); + nf_conntrack_destroyed = NULL; + vfree(bysource); + nf_ct_l3proto_put(l3proto); +} + +MODULE_LICENSE("GPL"); + +module_init(nf_nat_init); +module_exit(nf_nat_cleanup); diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c new file mode 100644 index 00000000000..751b5980175 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_ftp.c @@ -0,0 +1,179 @@ +/* FTP extension for TCP NAT alteration. */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/netfilter_ipv4.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/nf_nat_rule.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <linux/netfilter/nf_conntrack_ftp.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); +MODULE_DESCRIPTION("ftp NAT helper"); +MODULE_ALIAS("ip_nat_ftp"); + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +/* FIXME: Time out? --RR */ + +static int +mangle_rfc959_packet(struct sk_buff **pskb, + __be32 newip, + u_int16_t port, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + u32 *seq) +{ + char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")]; + + sprintf(buffer, "%u,%u,%u,%u,%u,%u", + NIPQUAD(newip), port>>8, port&0xFF); + + DEBUGP("calling nf_nat_mangle_tcp_packet\n"); + + *seq += strlen(buffer) - matchlen; + return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, + matchlen, buffer, strlen(buffer)); +} + +/* |1|132.235.1.2|6275| */ +static int +mangle_eprt_packet(struct sk_buff **pskb, + __be32 newip, + u_int16_t port, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + u32 *seq) +{ + char buffer[sizeof("|1|255.255.255.255|65535|")]; + + sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port); + + DEBUGP("calling nf_nat_mangle_tcp_packet\n"); + + *seq += strlen(buffer) - matchlen; + return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, + matchlen, buffer, strlen(buffer)); +} + +/* |1|132.235.1.2|6275| */ +static int +mangle_epsv_packet(struct sk_buff **pskb, + __be32 newip, + u_int16_t port, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + u32 *seq) +{ + char buffer[sizeof("|||65535|")]; + + sprintf(buffer, "|||%u|", port); + + DEBUGP("calling nf_nat_mangle_tcp_packet\n"); + + *seq += strlen(buffer) - matchlen; + return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, + matchlen, buffer, strlen(buffer)); +} + +static int (*mangle[])(struct sk_buff **, __be32, u_int16_t, + unsigned int, unsigned int, struct nf_conn *, + enum ip_conntrack_info, u32 *seq) += { + [NF_CT_FTP_PORT] = mangle_rfc959_packet, + [NF_CT_FTP_PASV] = mangle_rfc959_packet, + [NF_CT_FTP_EPRT] = mangle_eprt_packet, + [NF_CT_FTP_EPSV] = mangle_epsv_packet +}; + +/* So, this packet has hit the connection tracking matching code. + Mangle it, and change the expectation to match the new version. */ +static unsigned int nf_nat_ftp(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + enum nf_ct_ftp_type type, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp, + u32 *seq) +{ + __be32 newip; + u_int16_t port; + int dir = CTINFO2DIR(ctinfo); + struct nf_conn *ct = exp->master; + + DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); + + /* Connection will come from wherever this packet goes, hence !dir */ + newip = ct->tuplehash[!dir].tuple.dst.u3.ip; + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->dir = !dir; + + /* When you see the packet, we need to NAT it the same as the + * this one. */ + exp->expectfn = nf_nat_follow_master; + + /* Try to get same port: if not, try to change it. */ + for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { + exp->tuple.dst.u.tcp.port = htons(port); + if (nf_conntrack_expect_related(exp) == 0) + break; + } + + if (port == 0) + return NF_DROP; + + if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo, + seq)) { + nf_conntrack_unexpect_related(exp); + return NF_DROP; + } + return NF_ACCEPT; +} + +static void __exit nf_nat_ftp_fini(void) +{ + rcu_assign_pointer(nf_nat_ftp_hook, NULL); + synchronize_rcu(); +} + +static int __init nf_nat_ftp_init(void) +{ + BUG_ON(rcu_dereference(nf_nat_ftp_hook)); + rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp); + return 0; +} + +/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ +static int warn_set(const char *val, struct kernel_param *kp) +{ + printk(KERN_INFO KBUILD_MODNAME + ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); + return 0; +} +module_param_call(ports, warn_set, NULL, NULL, 0); + +module_init(nf_nat_ftp_init); +module_exit(nf_nat_ftp_fini); diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c new file mode 100644 index 00000000000..fb9ab0114c2 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -0,0 +1,596 @@ +/* + * H.323 extension for NAT alteration. + * + * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net> + * + * This source code is licensed under General Public License version 2. + * + * Based on the 'brute force' H.323 NAT module by + * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/tcp.h> +#include <net/tcp.h> + +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/nf_nat_rule.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <linux/netfilter/nf_conntrack_h323.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +/****************************************************************************/ +static int set_addr(struct sk_buff **pskb, + unsigned char **data, int dataoff, + unsigned int addroff, __be32 ip, __be16 port) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = ip_conntrack_get(*pskb, &ctinfo); + struct { + __be32 ip; + __be16 port; + } __attribute__ ((__packed__)) buf; + struct tcphdr _tcph, *th; + + buf.ip = ip; + buf.port = port; + addroff += dataoff; + + if ((*pskb)->nh.iph->protocol == IPPROTO_TCP) { + if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, + addroff, sizeof(buf), + (char *) &buf, sizeof(buf))) { + if (net_ratelimit()) + printk("nf_nat_h323: nf_nat_mangle_tcp_packet" + " error\n"); + return -1; + } + + /* Relocate data pointer */ + th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4, + sizeof(_tcph), &_tcph); + if (th == NULL) + return -1; + *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 + + th->doff * 4 + dataoff; + } else { + if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, + addroff, sizeof(buf), + (char *) &buf, sizeof(buf))) { + if (net_ratelimit()) + printk("nf_nat_h323: nf_nat_mangle_udp_packet" + " error\n"); + return -1; + } + /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy + * or pull everything in a linear buffer, so we can safely + * use the skb pointers now */ + *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 + + sizeof(struct udphdr); + } + + return 0; +} + +/****************************************************************************/ +static int set_h225_addr(struct sk_buff **pskb, + unsigned char **data, int dataoff, + TransportAddress *taddr, + union nf_conntrack_address *addr, __be16 port) +{ + return set_addr(pskb, data, dataoff, taddr->ipAddress.ip, + addr->ip, port); +} + +/****************************************************************************/ +static int set_h245_addr(struct sk_buff **pskb, + unsigned char **data, int dataoff, + H245_TransportAddress *taddr, + union nf_conntrack_address *addr, __be16 port) +{ + return set_addr(pskb, data, dataoff, + taddr->unicastAddress.iPAddress.network, + addr->ip, port); +} + +/****************************************************************************/ +static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, + TransportAddress *taddr, int count) +{ + struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + int dir = CTINFO2DIR(ctinfo); + int i; + __be16 port; + union nf_conntrack_address addr; + + for (i = 0; i < count; i++) { + if (get_h225_addr(ct, *data, &taddr[i], &addr, &port)) { + if (addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && + port == info->sig_port[dir]) { + /* GW->GK */ + + /* Fix for Gnomemeeting */ + if (i > 0 && + get_h225_addr(ct, *data, &taddr[0], + &addr, &port) && + (ntohl(addr.ip) & 0xff000000) == 0x7f000000) + i = 0; + + DEBUGP + ("nf_nat_ras: set signal address " + "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(ip), port, + NIPQUAD(ct->tuplehash[!dir].tuple.dst. + ip), info->sig_port[!dir]); + return set_h225_addr(pskb, data, 0, &taddr[i], + &ct->tuplehash[!dir]. + tuple.dst.u3, + info->sig_port[!dir]); + } else if (addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && + port == info->sig_port[dir]) { + /* GK->GW */ + DEBUGP + ("nf_nat_ras: set signal address " + "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(ip), port, + NIPQUAD(ct->tuplehash[!dir].tuple.src. + ip), info->sig_port[!dir]); + return set_h225_addr(pskb, data, 0, &taddr[i], + &ct->tuplehash[!dir]. + tuple.src.u3, + info->sig_port[!dir]); + } + } + } + + return 0; +} + +/****************************************************************************/ +static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, + TransportAddress *taddr, int count) +{ + int dir = CTINFO2DIR(ctinfo); + int i; + __be16 port; + union nf_conntrack_address addr; + + for (i = 0; i < count; i++) { + if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) && + addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && + port == ct->tuplehash[dir].tuple.src.u.udp.port) { + DEBUGP("nf_nat_ras: set rasAddress " + "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(ip), ntohs(port), + NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), + ntohs(ct->tuplehash[!dir].tuple.dst.u.udp. + port)); + return set_h225_addr(pskb, data, 0, &taddr[i], + &ct->tuplehash[!dir].tuple.dst.u3, + ct->tuplehash[!dir].tuple. + dst.u.udp.port); + } + } + + return 0; +} + +/****************************************************************************/ +static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + H245_TransportAddress *taddr, + __be16 port, __be16 rtp_port, + struct nf_conntrack_expect *rtp_exp, + struct nf_conntrack_expect *rtcp_exp) +{ + struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + int dir = CTINFO2DIR(ctinfo); + int i; + u_int16_t nated_port; + + /* Set expectations for NAT */ + rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port; + rtp_exp->expectfn = nf_nat_follow_master; + rtp_exp->dir = !dir; + rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port; + rtcp_exp->expectfn = nf_nat_follow_master; + rtcp_exp->dir = !dir; + + /* Lookup existing expects */ + for (i = 0; i < H323_RTP_CHANNEL_MAX; i++) { + if (info->rtp_port[i][dir] == rtp_port) { + /* Expected */ + + /* Use allocated ports first. This will refresh + * the expects */ + rtp_exp->tuple.dst.u.udp.port = info->rtp_port[i][dir]; + rtcp_exp->tuple.dst.u.udp.port = + htons(ntohs(info->rtp_port[i][dir]) + 1); + break; + } else if (info->rtp_port[i][dir] == 0) { + /* Not expected */ + break; + } + } + + /* Run out of expectations */ + if (i >= H323_RTP_CHANNEL_MAX) { + if (net_ratelimit()) + printk("nf_nat_h323: out of expectations\n"); + return 0; + } + + /* Try to get a pair of ports. */ + for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port); + nated_port != 0; nated_port += 2) { + rtp_exp->tuple.dst.u.udp.port = htons(nated_port); + if (nf_conntrack_expect_related(rtp_exp) == 0) { + rtcp_exp->tuple.dst.u.udp.port = + htons(nated_port + 1); + if (nf_conntrack_expect_related(rtcp_exp) == 0) + break; + nf_conntrack_unexpect_related(rtp_exp); + } + } + + if (nated_port == 0) { /* No port available */ + if (net_ratelimit()) + printk("nf_nat_h323: out of RTP ports\n"); + return 0; + } + + /* Modify signal */ + if (set_h245_addr(pskb, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons((port & htons(1)) ? nated_port + 1 : + nated_port)) == 0) { + /* Save ports */ + info->rtp_port[i][dir] = rtp_port; + info->rtp_port[i][!dir] = htons(nated_port); + } else { + nf_conntrack_unexpect_related(rtp_exp); + nf_conntrack_unexpect_related(rtcp_exp); + return -1; + } + + /* Success */ + DEBUGP("nf_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(rtp_exp->tuple.src.ip), + ntohs(rtp_exp->tuple.src.u.udp.port), + NIPQUAD(rtp_exp->tuple.dst.ip), + ntohs(rtp_exp->tuple.dst.u.udp.port)); + DEBUGP("nf_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(rtcp_exp->tuple.src.ip), + ntohs(rtcp_exp->tuple.src.u.udp.port), + NIPQUAD(rtcp_exp->tuple.dst.ip), + ntohs(rtcp_exp->tuple.dst.u.udp.port)); + + return 0; +} + +/****************************************************************************/ +static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + H245_TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp) +{ + int dir = CTINFO2DIR(ctinfo); + u_int16_t nated_port = ntohs(port); + + /* Set expectations for NAT */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->expectfn = nf_nat_follow_master; + exp->dir = !dir; + + /* Try to get same port: if not, try to change it. */ + for (; nated_port != 0; nated_port++) { + exp->tuple.dst.u.tcp.port = htons(nated_port); + if (nf_conntrack_expect_related(exp) == 0) + break; + } + + if (nated_port == 0) { /* No port available */ + if (net_ratelimit()) + printk("nf_nat_h323: out of TCP ports\n"); + return 0; + } + + /* Modify signal */ + if (set_h245_addr(pskb, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port)) < 0) { + nf_conntrack_unexpect_related(exp); + return -1; + } + + DEBUGP("nf_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port), + NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); + + return 0; +} + +/****************************************************************************/ +static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp) +{ + struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + int dir = CTINFO2DIR(ctinfo); + u_int16_t nated_port = ntohs(port); + + /* Set expectations for NAT */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->expectfn = nf_nat_follow_master; + exp->dir = !dir; + + /* Check existing expects */ + if (info->sig_port[dir] == port) + nated_port = ntohs(info->sig_port[!dir]); + + /* Try to get same port: if not, try to change it. */ + for (; nated_port != 0; nated_port++) { + exp->tuple.dst.u.tcp.port = htons(nated_port); + if (nf_conntrack_expect_related(exp) == 0) + break; + } + + if (nated_port == 0) { /* No port available */ + if (net_ratelimit()) + printk("nf_nat_q931: out of TCP ports\n"); + return 0; + } + + /* Modify signal */ + if (set_h225_addr(pskb, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port)) == 0) { + /* Save ports */ + info->sig_port[dir] = port; + info->sig_port[!dir] = htons(nated_port); + } else { + nf_conntrack_unexpect_related(exp); + return -1; + } + + DEBUGP("nf_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port), + NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); + + return 0; +} + +/**************************************************************************** + * This conntrack expect function replaces nf_conntrack_q931_expect() + * which was set by nf_conntrack_h323.c. + ****************************************************************************/ +static void ip_nat_q931_expect(struct nf_conn *new, + struct nf_conntrack_expect *this) +{ + struct ip_nat_range range; + + if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ + nf_nat_follow_master(new, this); + return; + } + + /* This must be a fresh one. */ + BUG_ON(new->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = IP_NAT_RANGE_MAP_IPS; + range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; + + /* hook doesn't matter, but it has to do source manip */ + nf_nat_setup_info(new, &range, NF_IP_POST_ROUTING); + + /* For DST manip, map port here to where it's expected. */ + range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); + range.min = range.max = this->saved_proto; + range.min_ip = range.max_ip = + new->master->tuplehash[!this->dir].tuple.src.u3.ip; + + /* hook doesn't matter, but it has to do destination manip */ + nf_nat_setup_info(new, &range, NF_IP_PRE_ROUTING); +} + +/****************************************************************************/ +static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, TransportAddress *taddr, int idx, + __be16 port, struct nf_conntrack_expect *exp) +{ + struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + int dir = CTINFO2DIR(ctinfo); + u_int16_t nated_port = ntohs(port); + union nf_conntrack_address addr; + + /* Set expectations for NAT */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->expectfn = ip_nat_q931_expect; + exp->dir = !dir; + + /* Check existing expects */ + if (info->sig_port[dir] == port) + nated_port = ntohs(info->sig_port[!dir]); + + /* Try to get same port: if not, try to change it. */ + for (; nated_port != 0; nated_port++) { + exp->tuple.dst.u.tcp.port = htons(nated_port); + if (nf_conntrack_expect_related(exp) == 0) + break; + } + + if (nated_port == 0) { /* No port available */ + if (net_ratelimit()) + printk("nf_nat_ras: out of TCP ports\n"); + return 0; + } + + /* Modify signal */ + if (set_h225_addr(pskb, data, 0, &taddr[idx], + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port)) == 0) { + /* Save ports */ + info->sig_port[dir] = port; + info->sig_port[!dir] = htons(nated_port); + + /* Fix for Gnomemeeting */ + if (idx > 0 && + get_h225_addr(ct, *data, &taddr[0], &addr, &port) && + (ntohl(addr.ip) & 0xff000000) == 0x7f000000) { + set_h225_addr_hook(pskb, data, 0, &taddr[0], + &ct->tuplehash[!dir].tuple.dst.u3, + info->sig_port[!dir]); + } + } else { + nf_conntrack_unexpect_related(exp); + return -1; + } + + /* Success */ + DEBUGP("nf_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port), + NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); + + return 0; +} + +/****************************************************************************/ +static void ip_nat_callforwarding_expect(struct nf_conn *new, + struct nf_conntrack_expect *this) +{ + struct nf_nat_range range; + + /* This must be a fresh one. */ + BUG_ON(new->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = IP_NAT_RANGE_MAP_IPS; + range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; + + /* hook doesn't matter, but it has to do source manip */ + nf_nat_setup_info(new, &range, NF_IP_POST_ROUTING); + + /* For DST manip, map port here to where it's expected. */ + range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); + range.min = range.max = this->saved_proto; + range.min_ip = range.max_ip = this->saved_ip; + + /* hook doesn't matter, but it has to do destination manip */ + nf_nat_setup_info(new, &range, NF_IP_PRE_ROUTING); +} + +/****************************************************************************/ +static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp) +{ + int dir = CTINFO2DIR(ctinfo); + u_int16_t nated_port; + + /* Set expectations for NAT */ + exp->saved_ip = exp->tuple.dst.u3.ip; + exp->tuple.dst.u3.ip = ct->tuplehash[!dir].tuple.dst.u3.ip; + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->expectfn = ip_nat_callforwarding_expect; + exp->dir = !dir; + + /* Try to get same port: if not, try to change it. */ + for (nated_port = ntohs(port); nated_port != 0; nated_port++) { + exp->tuple.dst.u.tcp.port = htons(nated_port); + if (nf_conntrack_expect_related(exp) == 0) + break; + } + + if (nated_port == 0) { /* No port available */ + if (net_ratelimit()) + printk("nf_nat_q931: out of TCP ports\n"); + return 0; + } + + /* Modify signal */ + if (!set_h225_addr(pskb, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port)) == 0) { + nf_conntrack_unexpect_related(exp); + return -1; + } + + /* Success */ + DEBUGP("nf_nat_q931: expect Call Forwarding " + "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port), + NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); + + return 0; +} + +/****************************************************************************/ +static int __init init(void) +{ + BUG_ON(rcu_dereference(set_h245_addr_hook) != NULL); + BUG_ON(rcu_dereference(set_h225_addr_hook) != NULL); + BUG_ON(rcu_dereference(set_sig_addr_hook) != NULL); + BUG_ON(rcu_dereference(set_ras_addr_hook) != NULL); + BUG_ON(rcu_dereference(nat_rtp_rtcp_hook) != NULL); + BUG_ON(rcu_dereference(nat_t120_hook) != NULL); + BUG_ON(rcu_dereference(nat_h245_hook) != NULL); + BUG_ON(rcu_dereference(nat_callforwarding_hook) != NULL); + BUG_ON(rcu_dereference(nat_q931_hook) != NULL); + + rcu_assign_pointer(set_h245_addr_hook, set_h245_addr); + rcu_assign_pointer(set_h225_addr_hook, set_h225_addr); + rcu_assign_pointer(set_sig_addr_hook, set_sig_addr); + rcu_assign_pointer(set_ras_addr_hook, set_ras_addr); + rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp); + rcu_assign_pointer(nat_t120_hook, nat_t120); + rcu_assign_pointer(nat_h245_hook, nat_h245); + rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding); + rcu_assign_pointer(nat_q931_hook, nat_q931); + + DEBUGP("nf_nat_h323: init success\n"); + return 0; +} + +/****************************************************************************/ +static void __exit fini(void) +{ + rcu_assign_pointer(set_h245_addr_hook, NULL); + rcu_assign_pointer(set_h225_addr_hook, NULL); + rcu_assign_pointer(set_sig_addr_hook, NULL); + rcu_assign_pointer(set_ras_addr_hook, NULL); + rcu_assign_pointer(nat_rtp_rtcp_hook, NULL); + rcu_assign_pointer(nat_t120_hook, NULL); + rcu_assign_pointer(nat_h245_hook, NULL); + rcu_assign_pointer(nat_callforwarding_hook, NULL); + rcu_assign_pointer(nat_q931_hook, NULL); + synchronize_rcu(); +} + +/****************************************************************************/ +module_init(init); +module_exit(fini); + +MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>"); +MODULE_DESCRIPTION("H.323 NAT helper"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ip_nat_h323"); diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c new file mode 100644 index 00000000000..98fbfc84d18 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -0,0 +1,433 @@ +/* ip_nat_helper.c - generic support functions for NAT helpers + * + * (C) 2000-2002 Harald Welte <laforge@netfilter.org> + * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/kmod.h> +#include <linux/types.h> +#include <linux/timer.h> +#include <linux/skbuff.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <net/checksum.h> +#include <net/tcp.h> + +#include <linux/netfilter_ipv4.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_protocol.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_helper.h> + +#if 0 +#define DEBUGP printk +#define DUMP_OFFSET(x) printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos); +#else +#define DEBUGP(format, args...) +#define DUMP_OFFSET(x) +#endif + +static DEFINE_SPINLOCK(nf_nat_seqofs_lock); + +/* Setup TCP sequence correction given this change at this sequence */ +static inline void +adjust_tcp_sequence(u32 seq, + int sizediff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + int dir; + struct nf_nat_seq *this_way, *other_way; + struct nf_conn_nat *nat = nfct_nat(ct); + + DEBUGP("nf_nat_resize_packet: old_size = %u, new_size = %u\n", + (*skb)->len, new_size); + + dir = CTINFO2DIR(ctinfo); + + this_way = &nat->info.seq[dir]; + other_way = &nat->info.seq[!dir]; + + DEBUGP("nf_nat_resize_packet: Seq_offset before: "); + DUMP_OFFSET(this_way); + + spin_lock_bh(&nf_nat_seqofs_lock); + + /* SYN adjust. If it's uninitialized, or this is after last + * correction, record it: we don't handle more than one + * adjustment in the window, but do deal with common case of a + * retransmit */ + if (this_way->offset_before == this_way->offset_after || + before(this_way->correction_pos, seq)) { + this_way->correction_pos = seq; + this_way->offset_before = this_way->offset_after; + this_way->offset_after += sizediff; + } + spin_unlock_bh(&nf_nat_seqofs_lock); + + DEBUGP("nf_nat_resize_packet: Seq_offset after: "); + DUMP_OFFSET(this_way); +} + +/* Frobs data inside this packet, which is linear. */ +static void mangle_contents(struct sk_buff *skb, + unsigned int dataoff, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len) +{ + unsigned char *data; + + BUG_ON(skb_is_nonlinear(skb)); + data = (unsigned char *)skb->nh.iph + dataoff; + + /* move post-replacement */ + memmove(data + match_offset + rep_len, + data + match_offset + match_len, + skb->tail - (data + match_offset + match_len)); + + /* insert data from buffer */ + memcpy(data + match_offset, rep_buffer, rep_len); + + /* update skb info */ + if (rep_len > match_len) { + DEBUGP("nf_nat_mangle_packet: Extending packet by " + "%u from %u bytes\n", rep_len - match_len, + skb->len); + skb_put(skb, rep_len - match_len); + } else { + DEBUGP("nf_nat_mangle_packet: Shrinking packet from " + "%u from %u bytes\n", match_len - rep_len, + skb->len); + __skb_trim(skb, skb->len + rep_len - match_len); + } + + /* fix IP hdr checksum information */ + skb->nh.iph->tot_len = htons(skb->len); + ip_send_check(skb->nh.iph); +} + +/* Unusual, but possible case. */ +static int enlarge_skb(struct sk_buff **pskb, unsigned int extra) +{ + struct sk_buff *nskb; + + if ((*pskb)->len + extra > 65535) + return 0; + + nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC); + if (!nskb) + return 0; + + /* Transfer socket to new skb. */ + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + return 1; +} + +/* Generic function for mangling variable-length address changes inside + * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX + * command in FTP). + * + * Takes care about all the nasty sequence number changes, checksumming, + * skb enlargement, ... + * + * */ +int +nf_nat_mangle_tcp_packet(struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len) +{ + struct iphdr *iph; + struct tcphdr *tcph; + int oldlen, datalen; + + if (!skb_make_writable(pskb, (*pskb)->len)) + return 0; + + if (rep_len > match_len && + rep_len - match_len > skb_tailroom(*pskb) && + !enlarge_skb(pskb, rep_len - match_len)) + return 0; + + SKB_LINEAR_ASSERT(*pskb); + + iph = (*pskb)->nh.iph; + tcph = (void *)iph + iph->ihl*4; + + oldlen = (*pskb)->len - iph->ihl*4; + mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4, + match_offset, match_len, rep_buffer, rep_len); + + datalen = (*pskb)->len - iph->ihl*4; + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + tcph->check = 0; + tcph->check = tcp_v4_check(tcph, datalen, + iph->saddr, iph->daddr, + csum_partial((char *)tcph, + datalen, 0)); + } else + nf_proto_csum_replace2(&tcph->check, *pskb, + htons(oldlen), htons(datalen), 1); + + if (rep_len != match_len) { + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); + adjust_tcp_sequence(ntohl(tcph->seq), + (int)rep_len - (int)match_len, + ct, ctinfo); + /* Tell TCP window tracking about seq change */ + nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4, + ct, CTINFO2DIR(ctinfo)); + } + return 1; +} +EXPORT_SYMBOL(nf_nat_mangle_tcp_packet); + +/* Generic function for mangling variable-length address changes inside + * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX + * command in the Amanda protocol) + * + * Takes care about all the nasty sequence number changes, checksumming, + * skb enlargement, ... + * + * XXX - This function could be merged with nf_nat_mangle_tcp_packet which + * should be fairly easy to do. + */ +int +nf_nat_mangle_udp_packet(struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len) +{ + struct iphdr *iph; + struct udphdr *udph; + int datalen, oldlen; + + /* UDP helpers might accidentally mangle the wrong packet */ + iph = (*pskb)->nh.iph; + if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) + + match_offset + match_len) + return 0; + + if (!skb_make_writable(pskb, (*pskb)->len)) + return 0; + + if (rep_len > match_len && + rep_len - match_len > skb_tailroom(*pskb) && + !enlarge_skb(pskb, rep_len - match_len)) + return 0; + + iph = (*pskb)->nh.iph; + udph = (void *)iph + iph->ihl*4; + + oldlen = (*pskb)->len - iph->ihl*4; + mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph), + match_offset, match_len, rep_buffer, rep_len); + + /* update the length of the UDP packet */ + datalen = (*pskb)->len - iph->ihl*4; + udph->len = htons(datalen); + + /* fix udp checksum if udp checksum was previously calculated */ + if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL) + return 1; + + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + udph->check = 0; + udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, + datalen, IPPROTO_UDP, + csum_partial((char *)udph, + datalen, 0)); + if (!udph->check) + udph->check = CSUM_MANGLED_0; + } else + nf_proto_csum_replace2(&udph->check, *pskb, + htons(oldlen), htons(datalen), 1); + + return 1; +} +EXPORT_SYMBOL(nf_nat_mangle_udp_packet); + +/* Adjust one found SACK option including checksum correction */ +static void +sack_adjust(struct sk_buff *skb, + struct tcphdr *tcph, + unsigned int sackoff, + unsigned int sackend, + struct nf_nat_seq *natseq) +{ + while (sackoff < sackend) { + struct tcp_sack_block_wire *sack; + __be32 new_start_seq, new_end_seq; + + sack = (void *)skb->data + sackoff; + if (after(ntohl(sack->start_seq) - natseq->offset_before, + natseq->correction_pos)) + new_start_seq = htonl(ntohl(sack->start_seq) + - natseq->offset_after); + else + new_start_seq = htonl(ntohl(sack->start_seq) + - natseq->offset_before); + + if (after(ntohl(sack->end_seq) - natseq->offset_before, + natseq->correction_pos)) + new_end_seq = htonl(ntohl(sack->end_seq) + - natseq->offset_after); + else + new_end_seq = htonl(ntohl(sack->end_seq) + - natseq->offset_before); + + DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", + ntohl(sack->start_seq), new_start_seq, + ntohl(sack->end_seq), new_end_seq); + + nf_proto_csum_replace4(&tcph->check, skb, + sack->start_seq, new_start_seq, 0); + nf_proto_csum_replace4(&tcph->check, skb, + sack->end_seq, new_end_seq, 0); + sack->start_seq = new_start_seq; + sack->end_seq = new_end_seq; + sackoff += sizeof(*sack); + } +} + +/* TCP SACK sequence number adjustment */ +static inline unsigned int +nf_nat_sack_adjust(struct sk_buff **pskb, + struct tcphdr *tcph, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + unsigned int dir, optoff, optend; + struct nf_conn_nat *nat = nfct_nat(ct); + + optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr); + optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4; + + if (!skb_make_writable(pskb, optend)) + return 0; + + dir = CTINFO2DIR(ctinfo); + + while (optoff < optend) { + /* Usually: option, length. */ + unsigned char *op = (*pskb)->data + optoff; + + switch (op[0]) { + case TCPOPT_EOL: + return 1; + case TCPOPT_NOP: + optoff++; + continue; + default: + /* no partial options */ + if (optoff + 1 == optend || + optoff + op[1] > optend || + op[1] < 2) + return 0; + if (op[0] == TCPOPT_SACK && + op[1] >= 2+TCPOLEN_SACK_PERBLOCK && + ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) + sack_adjust(*pskb, tcph, optoff+2, + optoff+op[1], + &nat->info.seq[!dir]); + optoff += op[1]; + } + } + return 1; +} + +/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ +int +nf_nat_seq_adjust(struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + struct tcphdr *tcph; + int dir; + __be32 newseq, newack; + struct nf_conn_nat *nat = nfct_nat(ct); + struct nf_nat_seq *this_way, *other_way; + + dir = CTINFO2DIR(ctinfo); + + this_way = &nat->info.seq[dir]; + other_way = &nat->info.seq[!dir]; + + if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) + return 0; + + tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; + if (after(ntohl(tcph->seq), this_way->correction_pos)) + newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); + else + newseq = htonl(ntohl(tcph->seq) + this_way->offset_before); + + if (after(ntohl(tcph->ack_seq) - other_way->offset_before, + other_way->correction_pos)) + newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after); + else + newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); + + nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0); + nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0); + + DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n", + ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), + ntohl(newack)); + + tcph->seq = newseq; + tcph->ack_seq = newack; + + if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo)) + return 0; + + nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4, ct, dir); + + return 1; +} +EXPORT_SYMBOL(nf_nat_seq_adjust); + +/* Setup NAT on this expected conntrack so it follows master. */ +/* If we fail to get a free NAT slot, we'll get dropped on confirm */ +void nf_nat_follow_master(struct nf_conn *ct, + struct nf_conntrack_expect *exp) +{ + struct nf_nat_range range; + + /* This must be a fresh one. */ + BUG_ON(ct->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = IP_NAT_RANGE_MAP_IPS; + range.min_ip = range.max_ip + = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; + /* hook doesn't matter, but it has to do source manip */ + nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); + + /* For DST manip, map port here to where it's expected. */ + range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); + range.min = range.max = exp->saved_proto; + range.min_ip = range.max_ip + = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; + /* hook doesn't matter, but it has to do destination manip */ + nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); +} +EXPORT_SYMBOL(nf_nat_follow_master); diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c new file mode 100644 index 00000000000..9b8c0daea74 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_irc.c @@ -0,0 +1,101 @@ +/* IRC extension for TCP NAT alteration. + * + * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org> + * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation + * based on a copy of RR's ip_nat_ftp.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/tcp.h> +#include <linux/kernel.h> + +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/nf_nat_rule.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <linux/netfilter/nf_conntrack_irc.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("IRC (DCC) NAT helper"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ip_nat_irc"); + +static unsigned int help(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp) +{ + char buffer[sizeof("4294967296 65635")]; + u_int32_t ip; + u_int16_t port; + unsigned int ret; + + DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n", + expect->seq, exp_irc_info->len, ntohl(tcph->seq)); + + /* Reply comes from server. */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->dir = IP_CT_DIR_REPLY; + exp->expectfn = nf_nat_follow_master; + + /* Try to get same port: if not, try to change it. */ + for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { + exp->tuple.dst.u.tcp.port = htons(port); + if (nf_conntrack_expect_related(exp) == 0) + break; + } + + if (port == 0) + return NF_DROP; + + ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip); + sprintf(buffer, "%u %u", ip, port); + DEBUGP("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n", + buffer, NIPQUAD(ip), port); + + ret = nf_nat_mangle_tcp_packet(pskb, exp->master, ctinfo, + matchoff, matchlen, buffer, + strlen(buffer)); + if (ret != NF_ACCEPT) + nf_conntrack_unexpect_related(exp); + return ret; +} + +static void __exit nf_nat_irc_fini(void) +{ + rcu_assign_pointer(nf_nat_irc_hook, NULL); + synchronize_rcu(); +} + +static int __init nf_nat_irc_init(void) +{ + BUG_ON(rcu_dereference(nf_nat_irc_hook)); + rcu_assign_pointer(nf_nat_irc_hook, help); + return 0; +} + +/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ +static int warn_set(const char *val, struct kernel_param *kp) +{ + printk(KERN_INFO KBUILD_MODNAME + ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); + return 0; +} +module_param_call(ports, warn_set, NULL, NULL, 0); + +module_init(nf_nat_irc_init); +module_exit(nf_nat_irc_fini); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c new file mode 100644 index 00000000000..0ae45b79a4e --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -0,0 +1,315 @@ +/* + * nf_nat_pptp.c + * + * NAT support for PPTP (Point to Point Tunneling Protocol). + * PPTP is a a protocol for creating virtual private networks. + * It is a specification defined by Microsoft and some vendors + * working with Microsoft. PPTP is built on top of a modified + * version of the Internet Generic Routing Encapsulation Protocol. + * GRE is defined in RFC 1701 and RFC 1702. Documentation of + * PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + * TODO: - NAT to a unique tuple, not to TCP source port + * (needs netfilter tuple reservation) + */ + +#include <linux/module.h> +#include <linux/tcp.h> + +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/nf_nat_rule.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <linux/netfilter/nf_conntrack_proto_gre.h> +#include <linux/netfilter/nf_conntrack_pptp.h> + +#define NF_NAT_PPTP_VERSION "3.0" + +#define REQ_CID(req, off) (*(__be16 *)((char *)(req) + (off))) + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP"); +MODULE_ALIAS("ip_nat_pptp"); + +#if 0 +extern const char *pptp_msg_name[]; +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ + __FUNCTION__, ## args) +#else +#define DEBUGP(format, args...) +#endif + +static void pptp_nat_expected(struct nf_conn *ct, + struct nf_conntrack_expect *exp) +{ + struct nf_conn *master = ct->master; + struct nf_conntrack_expect *other_exp; + struct nf_conntrack_tuple t; + struct nf_ct_pptp_master *ct_pptp_info; + struct nf_nat_pptp *nat_pptp_info; + struct ip_nat_range range; + + ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; + nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; + + /* And here goes the grand finale of corrosion... */ + if (exp->dir == IP_CT_DIR_ORIGINAL) { + DEBUGP("we are PNS->PAC\n"); + /* therefore, build tuple for PAC->PNS */ + t.src.l3num = AF_INET; + t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip; + t.src.u.gre.key = ct_pptp_info->pac_call_id; + t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip; + t.dst.u.gre.key = ct_pptp_info->pns_call_id; + t.dst.protonum = IPPROTO_GRE; + } else { + DEBUGP("we are PAC->PNS\n"); + /* build tuple for PNS->PAC */ + t.src.l3num = AF_INET; + t.src.u3.ip = master->tuplehash[exp->dir].tuple.src.u3.ip; + t.src.u.gre.key = nat_pptp_info->pns_call_id; + t.dst.u3.ip = master->tuplehash[exp->dir].tuple.dst.u3.ip; + t.dst.u.gre.key = nat_pptp_info->pac_call_id; + t.dst.protonum = IPPROTO_GRE; + } + + DEBUGP("trying to unexpect other dir: "); + NF_CT_DUMP_TUPLE(&t); + other_exp = nf_conntrack_expect_find_get(&t); + if (other_exp) { + nf_conntrack_unexpect_related(other_exp); + nf_conntrack_expect_put(other_exp); + DEBUGP("success\n"); + } else { + DEBUGP("not found!\n"); + } + + /* This must be a fresh one. */ + BUG_ON(ct->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = IP_NAT_RANGE_MAP_IPS; + range.min_ip = range.max_ip + = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; + if (exp->dir == IP_CT_DIR_ORIGINAL) { + range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; + range.min = range.max = exp->saved_proto; + } + /* hook doesn't matter, but it has to do source manip */ + nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); + + /* For DST manip, map port here to where it's expected. */ + range.flags = IP_NAT_RANGE_MAP_IPS; + range.min_ip = range.max_ip + = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; + if (exp->dir == IP_CT_DIR_REPLY) { + range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; + range.min = range.max = exp->saved_proto; + } + /* hook doesn't matter, but it has to do destination manip */ + nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); +} + +/* outbound packets == from PNS to PAC */ +static int +pptp_outbound_pkt(struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) + +{ + struct nf_ct_pptp_master *ct_pptp_info; + struct nf_nat_pptp *nat_pptp_info; + u_int16_t msg; + __be16 new_callid; + unsigned int cid_off; + + ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info; + nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; + + new_callid = ct_pptp_info->pns_call_id; + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); + /* FIXME: ideally we would want to reserve a call ID + * here. current netfilter NAT core is not able to do + * this :( For now we use TCP source port. This breaks + * multiple calls within one control session */ + + /* save original call ID in nat_info */ + nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; + + /* don't use tcph->source since we are at a DSTmanip + * hook (e.g. PREROUTING) and pkt is not mangled yet */ + new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + + /* save new call ID in ct info */ + ct_pptp_info->pns_call_id = new_callid; + break; + case PPTP_IN_CALL_REPLY: + cid_off = offsetof(union pptp_ctrl_union, icack.callID); + break; + case PPTP_CALL_CLEAR_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); + break; + default: + DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, + (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0]); + /* fall through */ + case PPTP_SET_LINK_INFO: + /* only need to NAT in case PAC is behind NAT box */ + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass + * down to here */ + DEBUGP("altering call id from 0x%04x to 0x%04x\n", + ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); + + /* mangle packet */ + if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, + cid_off + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader), + sizeof(new_callid), (char *)&new_callid, + sizeof(new_callid)) == 0) + return NF_DROP; + return NF_ACCEPT; +} + +static void +pptp_exp_gre(struct nf_conntrack_expect *expect_orig, + struct nf_conntrack_expect *expect_reply) +{ + struct nf_conn *ct = expect_orig->master; + struct nf_ct_pptp_master *ct_pptp_info; + struct nf_nat_pptp *nat_pptp_info; + + ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info; + nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; + + /* save original PAC call ID in nat_info */ + nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; + + /* alter expectation for PNS->PAC direction */ + expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id; + expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id; + expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id; + expect_orig->dir = IP_CT_DIR_ORIGINAL; + + /* alter expectation for PAC->PNS direction */ + expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id; + expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id; + expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id; + expect_reply->dir = IP_CT_DIR_REPLY; +} + +/* inbound packets == from PAC to PNS */ +static int +pptp_inbound_pkt(struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) +{ + struct nf_nat_pptp *nat_pptp_info; + u_int16_t msg; + __be16 new_pcid; + unsigned int pcid_off; + + nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; + new_pcid = nat_pptp_info->pns_call_id; + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REPLY: + pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID); + break; + case PPTP_IN_CALL_CONNECT: + pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID); + break; + case PPTP_IN_CALL_REQUEST: + /* only need to nat in case PAC is behind NAT box */ + return NF_ACCEPT; + case PPTP_WAN_ERROR_NOTIFY: + pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID); + break; + case PPTP_CALL_DISCONNECT_NOTIFY: + pcid_off = offsetof(union pptp_ctrl_union, disc.callID); + break; + case PPTP_SET_LINK_INFO: + pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID); + break; + default: + DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0]); + /* fall through */ + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST, + * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */ + + /* mangle packet */ + DEBUGP("altering peer call id from 0x%04x to 0x%04x\n", + ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); + + if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, + pcid_off + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader), + sizeof(new_pcid), (char *)&new_pcid, + sizeof(new_pcid)) == 0) + return NF_DROP; + return NF_ACCEPT; +} + +static int __init nf_nat_helper_pptp_init(void) +{ + nf_nat_need_gre(); + + BUG_ON(rcu_dereference(nf_nat_pptp_hook_outbound)); + rcu_assign_pointer(nf_nat_pptp_hook_outbound, pptp_outbound_pkt); + + BUG_ON(rcu_dereference(nf_nat_pptp_hook_inbound)); + rcu_assign_pointer(nf_nat_pptp_hook_inbound, pptp_inbound_pkt); + + BUG_ON(rcu_dereference(nf_nat_pptp_hook_exp_gre)); + rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, pptp_exp_gre); + + BUG_ON(rcu_dereference(nf_nat_pptp_hook_expectfn)); + rcu_assign_pointer(nf_nat_pptp_hook_expectfn, pptp_nat_expected); + return 0; +} + +static void __exit nf_nat_helper_pptp_fini(void) +{ + rcu_assign_pointer(nf_nat_pptp_hook_expectfn, NULL); + rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, NULL); + rcu_assign_pointer(nf_nat_pptp_hook_inbound, NULL); + rcu_assign_pointer(nf_nat_pptp_hook_outbound, NULL); + synchronize_rcu(); +} + +module_init(nf_nat_helper_pptp_init); +module_exit(nf_nat_helper_pptp_fini); diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c new file mode 100644 index 00000000000..d3de579e09d --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -0,0 +1,179 @@ +/* + * nf_nat_proto_gre.c + * + * NAT protocol helper module for GRE. + * + * GRE is a generic encapsulation protocol, which is generally not very + * suited for NAT, as it has no protocol-specific part as port numbers. + * + * It has an optional key field, which may help us distinguishing two + * connections between the same two hosts. + * + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * + * PPTP is built on top of a modified version of GRE, and has a mandatory + * field called "CallID", which serves us for the same purpose as the key + * field in plain GRE. + * + * Documentation about PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> + +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_rule.h> +#include <net/netfilter/nf_nat_protocol.h> +#include <linux/netfilter/nf_conntrack_proto_gre.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); + +#if 0 +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ + __FUNCTION__, ## args) +#else +#define DEBUGP(x, args...) +#endif + +/* is key in given range between min and max */ +static int +gre_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) +{ + __be16 key; + + if (maniptype == IP_NAT_MANIP_SRC) + key = tuple->src.u.gre.key; + else + key = tuple->dst.u.gre.key; + + return ntohs(key) >= ntohs(min->gre.key) && + ntohs(key) <= ntohs(max->gre.key); +} + +/* generate unique tuple ... */ +static int +gre_unique_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *conntrack) +{ + static u_int16_t key; + __be16 *keyptr; + unsigned int min, i, range_size; + + if (maniptype == IP_NAT_MANIP_SRC) + keyptr = &tuple->src.u.gre.key; + else + keyptr = &tuple->dst.u.gre.key; + + if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { + DEBUGP("%p: NATing GRE PPTP\n", conntrack); + min = 1; + range_size = 0xffff; + } else { + min = ntohs(range->min.gre.key); + range_size = ntohs(range->max.gre.key) - min + 1; + } + + DEBUGP("min = %u, range_size = %u\n", min, range_size); + + for (i = 0; i < range_size; i++, key++) { + *keyptr = htons(min + key % range_size); + if (!nf_nat_used_tuple(tuple, conntrack)) + return 1; + } + + DEBUGP("%p: no NAT mapping\n", conntrack); + return 0; +} + +/* manipulate a GRE packet according to maniptype */ +static int +gre_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct gre_hdr *greh; + struct gre_hdr_pptp *pgreh; + struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + unsigned int hdroff = iphdroff + iph->ihl * 4; + + /* pgreh includes two optional 32bit fields which are not required + * to be there. That's where the magic '8' comes from */ + if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh) - 8)) + return 0; + + greh = (void *)(*pskb)->data + hdroff; + pgreh = (struct gre_hdr_pptp *)greh; + + /* we only have destination manip of a packet, since 'source key' + * is not present in the packet itself */ + if (maniptype != IP_NAT_MANIP_DST) + return 1; + switch (greh->version) { + case 0: + if (!greh->key) { + DEBUGP("can't nat GRE w/o key\n"); + break; + } + if (greh->csum) { + /* FIXME: Never tested this code... */ + nf_proto_csum_replace4(gre_csum(greh), *pskb, + *(gre_key(greh)), + tuple->dst.u.gre.key, 0); + } + *(gre_key(greh)) = tuple->dst.u.gre.key; + break; + case GRE_VERSION_PPTP: + DEBUGP("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); + pgreh->call_id = tuple->dst.u.gre.key; + break; + default: + DEBUGP("can't nat unknown GRE version\n"); + return 0; + } + return 1; +} + +static struct nf_nat_protocol gre __read_mostly = { + .name = "GRE", + .protonum = IPPROTO_GRE, + .manip_pkt = gre_manip_pkt, + .in_range = gre_in_range, + .unique_tuple = gre_unique_tuple, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .range_to_nfattr = nf_nat_port_range_to_nfattr, + .nfattr_to_range = nf_nat_port_nfattr_to_range, +#endif +}; + +int __init nf_nat_proto_gre_init(void) +{ + return nf_nat_protocol_register(&gre); +} + +void __exit nf_nat_proto_gre_fini(void) +{ + nf_nat_protocol_unregister(&gre); +} + +module_init(nf_nat_proto_gre_init); +module_exit(nf_nat_proto_gre_fini); + +void nf_nat_need_gre(void) +{ + return; +} +EXPORT_SYMBOL_GPL(nf_nat_need_gre); diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c new file mode 100644 index 00000000000..dcfd772972d --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -0,0 +1,86 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/init.h> +#include <linux/ip.h> +#include <linux/icmp.h> + +#include <linux/netfilter.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_rule.h> +#include <net/netfilter/nf_nat_protocol.h> + +static int +icmp_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) +{ + return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) && + ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); +} + +static int +icmp_unique_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + static u_int16_t id; + unsigned int range_size; + unsigned int i; + + range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1; + /* If no range specified... */ + if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) + range_size = 0xFFFF; + + for (i = 0; i < range_size; i++, id++) { + tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + + (id % range_size)); + if (!nf_nat_used_tuple(tuple, ct)) + return 1; + } + return 0; +} + +static int +icmp_manip_pkt(struct sk_buff **pskb, + unsigned int iphdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + struct icmphdr *hdr; + unsigned int hdroff = iphdroff + iph->ihl*4; + + if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) + return 0; + + hdr = (struct icmphdr *)((*pskb)->data + hdroff); + nf_proto_csum_replace2(&hdr->checksum, *pskb, + hdr->un.echo.id, tuple->src.u.icmp.id, 0); + hdr->un.echo.id = tuple->src.u.icmp.id; + return 1; +} + +struct nf_nat_protocol nf_nat_protocol_icmp = { + .name = "ICMP", + .protonum = IPPROTO_ICMP, + .me = THIS_MODULE, + .manip_pkt = icmp_manip_pkt, + .in_range = icmp_in_range, + .unique_tuple = icmp_unique_tuple, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .range_to_nfattr = nf_nat_port_range_to_nfattr, + .nfattr_to_range = nf_nat_port_nfattr_to_range, +#endif +}; diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c new file mode 100644 index 00000000000..7e26a7e9bee --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c @@ -0,0 +1,148 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/init.h> +#include <linux/ip.h> +#include <linux/tcp.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/nfnetlink_conntrack.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_rule.h> +#include <net/netfilter/nf_nat_protocol.h> +#include <net/netfilter/nf_nat_core.h> + +static int +tcp_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) +{ + __be16 port; + + if (maniptype == IP_NAT_MANIP_SRC) + port = tuple->src.u.tcp.port; + else + port = tuple->dst.u.tcp.port; + + return ntohs(port) >= ntohs(min->tcp.port) && + ntohs(port) <= ntohs(max->tcp.port); +} + +static int +tcp_unique_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + static u_int16_t port; + __be16 *portptr; + unsigned int range_size, min, i; + + if (maniptype == IP_NAT_MANIP_SRC) + portptr = &tuple->src.u.tcp.port; + else + portptr = &tuple->dst.u.tcp.port; + + /* If no range specified... */ + if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { + /* If it's dst rewrite, can't change port */ + if (maniptype == IP_NAT_MANIP_DST) + return 0; + + /* Map privileged onto privileged. */ + if (ntohs(*portptr) < 1024) { + /* Loose convention: >> 512 is credential passing */ + if (ntohs(*portptr)<512) { + min = 1; + range_size = 511 - min + 1; + } else { + min = 600; + range_size = 1023 - min + 1; + } + } else { + min = 1024; + range_size = 65535 - 1024 + 1; + } + } else { + min = ntohs(range->min.tcp.port); + range_size = ntohs(range->max.tcp.port) - min + 1; + } + + for (i = 0; i < range_size; i++, port++) { + *portptr = htons(min + port % range_size); + if (!nf_nat_used_tuple(tuple, ct)) + return 1; + } + return 0; +} + +static int +tcp_manip_pkt(struct sk_buff **pskb, + unsigned int iphdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + struct tcphdr *hdr; + unsigned int hdroff = iphdroff + iph->ihl*4; + __be32 oldip, newip; + __be16 *portptr, newport, oldport; + int hdrsize = 8; /* TCP connection tracking guarantees this much */ + + /* this could be a inner header returned in icmp packet; in such + cases we cannot update the checksum field since it is outside of + the 8 bytes of transport layer headers we are guaranteed */ + if ((*pskb)->len >= hdroff + sizeof(struct tcphdr)) + hdrsize = sizeof(struct tcphdr); + + if (!skb_make_writable(pskb, hdroff + hdrsize)) + return 0; + + iph = (struct iphdr *)((*pskb)->data + iphdroff); + hdr = (struct tcphdr *)((*pskb)->data + hdroff); + + if (maniptype == IP_NAT_MANIP_SRC) { + /* Get rid of src ip and src pt */ + oldip = iph->saddr; + newip = tuple->src.u3.ip; + newport = tuple->src.u.tcp.port; + portptr = &hdr->source; + } else { + /* Get rid of dst ip and dst pt */ + oldip = iph->daddr; + newip = tuple->dst.u3.ip; + newport = tuple->dst.u.tcp.port; + portptr = &hdr->dest; + } + + oldport = *portptr; + *portptr = newport; + + if (hdrsize < sizeof(*hdr)) + return 1; + + nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1); + nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0); + return 1; +} + +struct nf_nat_protocol nf_nat_protocol_tcp = { + .name = "TCP", + .protonum = IPPROTO_TCP, + .me = THIS_MODULE, + .manip_pkt = tcp_manip_pkt, + .in_range = tcp_in_range, + .unique_tuple = tcp_unique_tuple, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .range_to_nfattr = nf_nat_port_range_to_nfattr, + .nfattr_to_range = nf_nat_port_nfattr_to_range, +#endif +}; diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c new file mode 100644 index 00000000000..ab0ce4c8699 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c @@ -0,0 +1,138 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/init.h> +#include <linux/ip.h> +#include <linux/udp.h> + +#include <linux/netfilter.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_rule.h> +#include <net/netfilter/nf_nat_protocol.h> + +static int +udp_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) +{ + __be16 port; + + if (maniptype == IP_NAT_MANIP_SRC) + port = tuple->src.u.udp.port; + else + port = tuple->dst.u.udp.port; + + return ntohs(port) >= ntohs(min->udp.port) && + ntohs(port) <= ntohs(max->udp.port); +} + +static int +udp_unique_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + static u_int16_t port; + __be16 *portptr; + unsigned int range_size, min, i; + + if (maniptype == IP_NAT_MANIP_SRC) + portptr = &tuple->src.u.udp.port; + else + portptr = &tuple->dst.u.udp.port; + + /* If no range specified... */ + if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { + /* If it's dst rewrite, can't change port */ + if (maniptype == IP_NAT_MANIP_DST) + return 0; + + if (ntohs(*portptr) < 1024) { + /* Loose convention: >> 512 is credential passing */ + if (ntohs(*portptr)<512) { + min = 1; + range_size = 511 - min + 1; + } else { + min = 600; + range_size = 1023 - min + 1; + } + } else { + min = 1024; + range_size = 65535 - 1024 + 1; + } + } else { + min = ntohs(range->min.udp.port); + range_size = ntohs(range->max.udp.port) - min + 1; + } + + for (i = 0; i < range_size; i++, port++) { + *portptr = htons(min + port % range_size); + if (!nf_nat_used_tuple(tuple, ct)) + return 1; + } + return 0; +} + +static int +udp_manip_pkt(struct sk_buff **pskb, + unsigned int iphdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + struct udphdr *hdr; + unsigned int hdroff = iphdroff + iph->ihl*4; + __be32 oldip, newip; + __be16 *portptr, newport; + + if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) + return 0; + + iph = (struct iphdr *)((*pskb)->data + iphdroff); + hdr = (struct udphdr *)((*pskb)->data + hdroff); + + if (maniptype == IP_NAT_MANIP_SRC) { + /* Get rid of src ip and src pt */ + oldip = iph->saddr; + newip = tuple->src.u3.ip; + newport = tuple->src.u.udp.port; + portptr = &hdr->source; + } else { + /* Get rid of dst ip and dst pt */ + oldip = iph->daddr; + newip = tuple->dst.u3.ip; + newport = tuple->dst.u.udp.port; + portptr = &hdr->dest; + } + if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) { + nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1); + nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport, + 0); + if (!hdr->check) + hdr->check = CSUM_MANGLED_0; + } + *portptr = newport; + return 1; +} + +struct nf_nat_protocol nf_nat_protocol_udp = { + .name = "UDP", + .protonum = IPPROTO_UDP, + .me = THIS_MODULE, + .manip_pkt = udp_manip_pkt, + .in_range = udp_in_range, + .unique_tuple = udp_unique_tuple, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .range_to_nfattr = nf_nat_port_range_to_nfattr, + .nfattr_to_range = nf_nat_port_nfattr_to_range, +#endif +}; diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c new file mode 100644 index 00000000000..f50d0203f9c --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c @@ -0,0 +1,54 @@ +/* The "unknown" protocol. This is what is used for protocols we + * don't understand. It's returned by ip_ct_find_proto(). + */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/init.h> + +#include <linux/netfilter.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_rule.h> +#include <net/netfilter/nf_nat_protocol.h> + +static int unknown_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type manip_type, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) +{ + return 1; +} + +static int unknown_unique_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + /* Sorry: we can't help you; if it's not unique, we can't frob + anything. */ + return 0; +} + +static int +unknown_manip_pkt(struct sk_buff **pskb, + unsigned int iphdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + return 1; +} + +struct nf_nat_protocol nf_nat_unknown_protocol = { + .name = "unknown", + /* .me isn't set: getting a ref to this cannot fail. */ + .manip_pkt = unknown_manip_pkt, + .in_range = unknown_in_range, + .unique_tuple = unknown_unique_tuple, +}; diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c new file mode 100644 index 00000000000..b868ee0195d --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -0,0 +1,343 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Everything about the rules for NAT. */ +#include <linux/types.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/module.h> +#include <linux/kmod.h> +#include <linux/skbuff.h> +#include <linux/proc_fs.h> +#include <net/checksum.h> +#include <net/route.h> +#include <linux/bitops.h> + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_rule.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +#define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT)) + +static struct +{ + struct ipt_replace repl; + struct ipt_standard entries[3]; + struct ipt_error term; +} nat_initial_table __initdata = { + .repl = { + .name = "nat", + .valid_hooks = NAT_VALID_HOOKS, + .num_entries = 4, + .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), + .hook_entry = { + [NF_IP_PRE_ROUTING] = 0, + [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard), + [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 }, + .underflow = { + [NF_IP_PRE_ROUTING] = 0, + [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard), + [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 }, + }, + .entries = { + /* PRE_ROUTING */ + { + .entry = { + .target_offset = sizeof(struct ipt_entry), + .next_offset = sizeof(struct ipt_standard), + }, + .target = { + .target = { + .u = { + .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)), + }, + }, + .verdict = -NF_ACCEPT - 1, + }, + }, + /* POST_ROUTING */ + { + .entry = { + .target_offset = sizeof(struct ipt_entry), + .next_offset = sizeof(struct ipt_standard), + }, + .target = { + .target = { + .u = { + .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)), + }, + }, + .verdict = -NF_ACCEPT - 1, + }, + }, + /* LOCAL_OUT */ + { + .entry = { + .target_offset = sizeof(struct ipt_entry), + .next_offset = sizeof(struct ipt_standard), + }, + .target = { + .target = { + .u = { + .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)), + }, + }, + .verdict = -NF_ACCEPT - 1, + }, + }, + }, + /* ERROR */ + .term = { + .entry = { + .target_offset = sizeof(struct ipt_entry), + .next_offset = sizeof(struct ipt_error), + }, + .target = { + .target = { + .u = { + .user = { + .target_size = IPT_ALIGN(sizeof(struct ipt_error_target)), + .name = IPT_ERROR_TARGET, + }, + }, + }, + .errorname = "ERROR", + }, + } +}; + +static struct ipt_table nat_table = { + .name = "nat", + .valid_hooks = NAT_VALID_HOOKS, + .lock = RW_LOCK_UNLOCKED, + .me = THIS_MODULE, + .af = AF_INET, +}; + +/* Source NAT */ +static unsigned int ipt_snat_target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const struct xt_target *target, + const void *targinfo) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + const struct nf_nat_multi_range_compat *mr = targinfo; + + NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING); + + ct = nf_ct_get(*pskb, &ctinfo); + + /* Connection must be valid and new. */ + NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + NF_CT_ASSERT(out); + + return nf_nat_setup_info(ct, &mr->range[0], hooknum); +} + +/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ +static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) +{ + static int warned = 0; + struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; + struct rtable *rt; + + if (ip_route_output_key(&rt, &fl) != 0) + return; + + if (rt->rt_src != srcip && !warned) { + printk("NAT: no longer support implicit source local NAT\n"); + printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n", + NIPQUAD(srcip), NIPQUAD(dstip)); + warned = 1; + } + ip_rt_put(rt); +} + +static unsigned int ipt_dnat_target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const struct xt_target *target, + const void *targinfo) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + const struct nf_nat_multi_range_compat *mr = targinfo; + + NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || + hooknum == NF_IP_LOCAL_OUT); + + ct = nf_ct_get(*pskb, &ctinfo); + + /* Connection must be valid and new. */ + NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + + if (hooknum == NF_IP_LOCAL_OUT && + mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) + warn_if_extra_mangle((*pskb)->nh.iph->daddr, + mr->range[0].min_ip); + + return nf_nat_setup_info(ct, &mr->range[0], hooknum); +} + +static int ipt_snat_checkentry(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) +{ + struct nf_nat_multi_range_compat *mr = targinfo; + + /* Must be a valid range */ + if (mr->rangesize != 1) { + printk("SNAT: multiple ranges no longer supported\n"); + return 0; + } + return 1; +} + +static int ipt_dnat_checkentry(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) +{ + struct nf_nat_multi_range_compat *mr = targinfo; + + /* Must be a valid range */ + if (mr->rangesize != 1) { + printk("DNAT: multiple ranges no longer supported\n"); + return 0; + } + return 1; +} + +inline unsigned int +alloc_null_binding(struct nf_conn *ct, + struct nf_nat_info *info, + unsigned int hooknum) +{ + /* Force range to this IP; let proto decide mapping for + per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + Use reply in case it's already been mangled (eg local packet). + */ + __be32 ip + = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC + ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip + : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); + struct nf_nat_range range + = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } }; + + DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n", + ct, NIPQUAD(ip)); + return nf_nat_setup_info(ct, &range, hooknum); +} + +unsigned int +alloc_null_binding_confirmed(struct nf_conn *ct, + struct nf_nat_info *info, + unsigned int hooknum) +{ + __be32 ip + = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC + ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip + : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); + u_int16_t all + = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC + ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all + : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all); + struct nf_nat_range range + = { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } }; + + DEBUGP("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n", + ct, NIPQUAD(ip)); + return nf_nat_setup_info(ct, &range, hooknum); +} + +int nf_nat_rule_find(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + struct nf_conn *ct, + struct nf_nat_info *info) +{ + int ret; + + ret = ipt_do_table(pskb, hooknum, in, out, &nat_table); + + if (ret == NF_ACCEPT) { + if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) + /* NUL mapping */ + ret = alloc_null_binding(ct, info, hooknum); + } + return ret; +} + +static struct ipt_target ipt_snat_reg = { + .name = "SNAT", + .target = ipt_snat_target, + .targetsize = sizeof(struct nf_nat_multi_range_compat), + .table = "nat", + .hooks = 1 << NF_IP_POST_ROUTING, + .checkentry = ipt_snat_checkentry, + .family = AF_INET, +}; + +static struct xt_target ipt_dnat_reg = { + .name = "DNAT", + .target = ipt_dnat_target, + .targetsize = sizeof(struct nf_nat_multi_range_compat), + .table = "nat", + .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT), + .checkentry = ipt_dnat_checkentry, + .family = AF_INET, +}; + +int __init nf_nat_rule_init(void) +{ + int ret; + + ret = ipt_register_table(&nat_table, &nat_initial_table.repl); + if (ret != 0) + return ret; + ret = xt_register_target(&ipt_snat_reg); + if (ret != 0) + goto unregister_table; + + ret = xt_register_target(&ipt_dnat_reg); + if (ret != 0) + goto unregister_snat; + + return ret; + + unregister_snat: + xt_unregister_target(&ipt_snat_reg); + unregister_table: + ipt_unregister_table(&nat_table); + + return ret; +} + +void nf_nat_rule_cleanup(void) +{ + xt_unregister_target(&ipt_dnat_reg); + xt_unregister_target(&ipt_snat_reg); + ipt_unregister_table(&nat_table); +} diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c new file mode 100644 index 00000000000..3d524b95731 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -0,0 +1,283 @@ +/* SIP extension for UDP NAT alteration. + * + * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> + * based on RR's ip_nat_ftp.c and other modules. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/udp.h> + +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/nf_nat_rule.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <linux/netfilter/nf_conntrack_sip.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>"); +MODULE_DESCRIPTION("SIP NAT helper"); +MODULE_ALIAS("ip_nat_sip"); + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +struct addr_map { + struct { + char src[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; + char dst[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; + unsigned int srclen, srciplen; + unsigned int dstlen, dstiplen; + } addr[IP_CT_DIR_MAX]; +}; + +static void addr_map_init(struct nf_conn *ct, struct addr_map *map) +{ + struct nf_conntrack_tuple *t; + enum ip_conntrack_dir dir; + unsigned int n; + + for (dir = 0; dir < IP_CT_DIR_MAX; dir++) { + t = &ct->tuplehash[dir].tuple; + + n = sprintf(map->addr[dir].src, "%u.%u.%u.%u", + NIPQUAD(t->src.u3.ip)); + map->addr[dir].srciplen = n; + n += sprintf(map->addr[dir].src + n, ":%u", + ntohs(t->src.u.udp.port)); + map->addr[dir].srclen = n; + + n = sprintf(map->addr[dir].dst, "%u.%u.%u.%u", + NIPQUAD(t->dst.u3.ip)); + map->addr[dir].dstiplen = n; + n += sprintf(map->addr[dir].dst + n, ":%u", + ntohs(t->dst.u.udp.port)); + map->addr[dir].dstlen = n; + } +} + +static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, + struct nf_conn *ct, const char **dptr, size_t dlen, + enum sip_header_pos pos, struct addr_map *map) +{ + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + unsigned int matchlen, matchoff, addrlen; + char *addr; + + if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0) + return 1; + + if ((matchlen == map->addr[dir].srciplen || + matchlen == map->addr[dir].srclen) && + memcmp(*dptr + matchoff, map->addr[dir].src, matchlen) == 0) { + addr = map->addr[!dir].dst; + addrlen = map->addr[!dir].dstlen; + } else if ((matchlen == map->addr[dir].dstiplen || + matchlen == map->addr[dir].dstlen) && + memcmp(*dptr + matchoff, map->addr[dir].dst, matchlen) == 0) { + addr = map->addr[!dir].src; + addrlen = map->addr[!dir].srclen; + } else + return 1; + + if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, + matchoff, matchlen, addr, addrlen)) + return 0; + *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); + return 1; + +} + +static unsigned int ip_nat_sip(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct nf_conn *ct, + const char **dptr) +{ + enum sip_header_pos pos; + struct addr_map map; + int dataoff, datalen; + + dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); + datalen = (*pskb)->len - dataoff; + if (datalen < sizeof("SIP/2.0") - 1) + return NF_DROP; + + addr_map_init(ct, &map); + + /* Basic rules: requests and responses. */ + if (strncmp(*dptr, "SIP/2.0", sizeof("SIP/2.0") - 1) != 0) { + /* 10.2: Constructing the REGISTER Request: + * + * The "userinfo" and "@" components of the SIP URI MUST NOT + * be present. + */ + if (datalen >= sizeof("REGISTER") - 1 && + strncmp(*dptr, "REGISTER", sizeof("REGISTER") - 1) == 0) + pos = POS_REG_REQ_URI; + else + pos = POS_REQ_URI; + + if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map)) + return NF_DROP; + } + + if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) || + !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) || + !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) || + !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map)) + return NF_DROP; + return NF_ACCEPT; +} + +static unsigned int mangle_sip_packet(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct nf_conn *ct, + const char **dptr, size_t dlen, + char *buffer, int bufflen, + enum sip_header_pos pos) +{ + unsigned int matchlen, matchoff; + + if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0) + return 0; + + if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, + matchoff, matchlen, buffer, bufflen)) + return 0; + + /* We need to reload this. Thanks Patrick. */ + *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); + return 1; +} + +static int mangle_content_len(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct nf_conn *ct, + const char *dptr) +{ + unsigned int dataoff, matchoff, matchlen; + char buffer[sizeof("65536")]; + int bufflen; + + dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); + + /* Get actual SDP lenght */ + if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff, + &matchlen, POS_SDP_HEADER) > 0) { + + /* since ct_sip_get_info() give us a pointer passing 'v=' + we need to add 2 bytes in this count. */ + int c_len = (*pskb)->len - dataoff - matchoff + 2; + + /* Now, update SDP length */ + if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff, + &matchlen, POS_CONTENT) > 0) { + + bufflen = sprintf(buffer, "%u", c_len); + return nf_nat_mangle_udp_packet(pskb, ct, ctinfo, + matchoff, matchlen, + buffer, bufflen); + } + } + return 0; +} + +static unsigned int mangle_sdp(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct nf_conn *ct, + __be32 newip, u_int16_t port, + const char *dptr) +{ + char buffer[sizeof("nnn.nnn.nnn.nnn")]; + unsigned int dataoff, bufflen; + + dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); + + /* Mangle owner and contact info. */ + bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); + if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, + buffer, bufflen, POS_OWNER_IP4)) + return 0; + + if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, + buffer, bufflen, POS_CONNECTION_IP4)) + return 0; + + /* Mangle media port. */ + bufflen = sprintf(buffer, "%u", port); + if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, + buffer, bufflen, POS_MEDIA)) + return 0; + + return mangle_content_len(pskb, ctinfo, ct, dptr); +} + +/* So, this packet has hit the connection tracking matching code. + Mangle it, and change the expectation to match the new version. */ +static unsigned int ip_nat_sdp(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct nf_conntrack_expect *exp, + const char *dptr) +{ + struct nf_conn *ct = exp->master; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + __be32 newip; + u_int16_t port; + + DEBUGP("ip_nat_sdp():\n"); + + /* Connection will come from reply */ + newip = ct->tuplehash[!dir].tuple.dst.u3.ip; + + exp->tuple.dst.u3.ip = newip; + exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port; + exp->dir = !dir; + + /* When you see the packet, we need to NAT it the same as the + this one. */ + exp->expectfn = nf_nat_follow_master; + + /* Try to get same port: if not, try to change it. */ + for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) { + exp->tuple.dst.u.udp.port = htons(port); + if (nf_conntrack_expect_related(exp) == 0) + break; + } + + if (port == 0) + return NF_DROP; + + if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) { + nf_conntrack_unexpect_related(exp); + return NF_DROP; + } + return NF_ACCEPT; +} + +static void __exit nf_nat_sip_fini(void) +{ + rcu_assign_pointer(nf_nat_sip_hook, NULL); + rcu_assign_pointer(nf_nat_sdp_hook, NULL); + synchronize_rcu(); +} + +static int __init nf_nat_sip_init(void) +{ + BUG_ON(rcu_dereference(nf_nat_sip_hook)); + BUG_ON(rcu_dereference(nf_nat_sdp_hook)); + rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); + rcu_assign_pointer(nf_nat_sdp_hook, ip_nat_sdp); + return 0; +} + +module_init(nf_nat_sip_init); +module_exit(nf_nat_sip_fini); diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c new file mode 100644 index 00000000000..f12528fe1bf --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -0,0 +1,1332 @@ +/* + * nf_nat_snmp_basic.c + * + * Basic SNMP Application Layer Gateway + * + * This IP NAT module is intended for use with SNMP network + * discovery and monitoring applications where target networks use + * conflicting private address realms. + * + * Static NAT is used to remap the networks from the view of the network + * management system at the IP layer, and this module remaps some application + * layer addresses to match. + * + * The simplest form of ALG is performed, where only tagged IP addresses + * are modified. The module does not need to be MIB aware and only scans + * messages at the ASN.1/BER level. + * + * Currently, only SNMPv1 and SNMPv2 are supported. + * + * More information on ALG and associated issues can be found in + * RFC 2962 + * + * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory + * McLean & Jochen Friedrich, stripped down for use in the kernel. + * + * Copyright (c) 2000 RP Internet (www.rpi.net.au). + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: James Morris <jmorris@intercode.com.au> + * + * Updates: + * 2000-08-06: Convert to new helper API (Harald Welte). + * + */ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <net/checksum.h> +#include <net/udp.h> + +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_nat_helper.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); +MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway"); +MODULE_ALIAS("ip_nat_snmp_basic"); + +#define SNMP_PORT 161 +#define SNMP_TRAP_PORT 162 +#define NOCT1(n) (*(u8 *)n) + +static int debug; +static DEFINE_SPINLOCK(snmp_lock); + +/* + * Application layer address mapping mimics the NAT mapping, but + * only for the first octet in this case (a more flexible system + * can be implemented if needed). + */ +struct oct1_map +{ + u_int8_t from; + u_int8_t to; +}; + + +/***************************************************************************** + * + * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse) + * + *****************************************************************************/ + +/* Class */ +#define ASN1_UNI 0 /* Universal */ +#define ASN1_APL 1 /* Application */ +#define ASN1_CTX 2 /* Context */ +#define ASN1_PRV 3 /* Private */ + +/* Tag */ +#define ASN1_EOC 0 /* End Of Contents */ +#define ASN1_BOL 1 /* Boolean */ +#define ASN1_INT 2 /* Integer */ +#define ASN1_BTS 3 /* Bit String */ +#define ASN1_OTS 4 /* Octet String */ +#define ASN1_NUL 5 /* Null */ +#define ASN1_OJI 6 /* Object Identifier */ +#define ASN1_OJD 7 /* Object Description */ +#define ASN1_EXT 8 /* External */ +#define ASN1_SEQ 16 /* Sequence */ +#define ASN1_SET 17 /* Set */ +#define ASN1_NUMSTR 18 /* Numerical String */ +#define ASN1_PRNSTR 19 /* Printable String */ +#define ASN1_TEXSTR 20 /* Teletext String */ +#define ASN1_VIDSTR 21 /* Video String */ +#define ASN1_IA5STR 22 /* IA5 String */ +#define ASN1_UNITIM 23 /* Universal Time */ +#define ASN1_GENTIM 24 /* General Time */ +#define ASN1_GRASTR 25 /* Graphical String */ +#define ASN1_VISSTR 26 /* Visible String */ +#define ASN1_GENSTR 27 /* General String */ + +/* Primitive / Constructed methods*/ +#define ASN1_PRI 0 /* Primitive */ +#define ASN1_CON 1 /* Constructed */ + +/* + * Error codes. + */ +#define ASN1_ERR_NOERROR 0 +#define ASN1_ERR_DEC_EMPTY 2 +#define ASN1_ERR_DEC_EOC_MISMATCH 3 +#define ASN1_ERR_DEC_LENGTH_MISMATCH 4 +#define ASN1_ERR_DEC_BADVALUE 5 + +/* + * ASN.1 context. + */ +struct asn1_ctx +{ + int error; /* Error condition */ + unsigned char *pointer; /* Octet just to be decoded */ + unsigned char *begin; /* First octet */ + unsigned char *end; /* Octet after last octet */ +}; + +/* + * Octet string (not null terminated) + */ +struct asn1_octstr +{ + unsigned char *data; + unsigned int len; +}; + +static void asn1_open(struct asn1_ctx *ctx, + unsigned char *buf, + unsigned int len) +{ + ctx->begin = buf; + ctx->end = buf + len; + ctx->pointer = buf; + ctx->error = ASN1_ERR_NOERROR; +} + +static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch) +{ + if (ctx->pointer >= ctx->end) { + ctx->error = ASN1_ERR_DEC_EMPTY; + return 0; + } + *ch = *(ctx->pointer)++; + return 1; +} + +static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag) +{ + unsigned char ch; + + *tag = 0; + + do + { + if (!asn1_octet_decode(ctx, &ch)) + return 0; + *tag <<= 7; + *tag |= ch & 0x7F; + } while ((ch & 0x80) == 0x80); + return 1; +} + +static unsigned char asn1_id_decode(struct asn1_ctx *ctx, + unsigned int *cls, + unsigned int *con, + unsigned int *tag) +{ + unsigned char ch; + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + *cls = (ch & 0xC0) >> 6; + *con = (ch & 0x20) >> 5; + *tag = (ch & 0x1F); + + if (*tag == 0x1F) { + if (!asn1_tag_decode(ctx, tag)) + return 0; + } + return 1; +} + +static unsigned char asn1_length_decode(struct asn1_ctx *ctx, + unsigned int *def, + unsigned int *len) +{ + unsigned char ch, cnt; + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + if (ch == 0x80) + *def = 0; + else { + *def = 1; + + if (ch < 0x80) + *len = ch; + else { + cnt = (unsigned char) (ch & 0x7F); + *len = 0; + + while (cnt > 0) { + if (!asn1_octet_decode(ctx, &ch)) + return 0; + *len <<= 8; + *len |= ch; + cnt--; + } + } + } + return 1; +} + +static unsigned char asn1_header_decode(struct asn1_ctx *ctx, + unsigned char **eoc, + unsigned int *cls, + unsigned int *con, + unsigned int *tag) +{ + unsigned int def, len; + + if (!asn1_id_decode(ctx, cls, con, tag)) + return 0; + + def = len = 0; + if (!asn1_length_decode(ctx, &def, &len)) + return 0; + + if (def) + *eoc = ctx->pointer + len; + else + *eoc = NULL; + return 1; +} + +static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc) +{ + unsigned char ch; + + if (eoc == 0) { + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + if (ch != 0x00) { + ctx->error = ASN1_ERR_DEC_EOC_MISMATCH; + return 0; + } + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + if (ch != 0x00) { + ctx->error = ASN1_ERR_DEC_EOC_MISMATCH; + return 0; + } + return 1; + } else { + if (ctx->pointer != eoc) { + ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH; + return 0; + } + return 1; + } +} + +static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc) +{ + ctx->pointer = eoc; + return 1; +} + +static unsigned char asn1_long_decode(struct asn1_ctx *ctx, + unsigned char *eoc, + long *integer) +{ + unsigned char ch; + unsigned int len; + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + *integer = (signed char) ch; + len = 1; + + while (ctx->pointer < eoc) { + if (++len > sizeof (long)) { + ctx->error = ASN1_ERR_DEC_BADVALUE; + return 0; + } + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + *integer <<= 8; + *integer |= ch; + } + return 1; +} + +static unsigned char asn1_uint_decode(struct asn1_ctx *ctx, + unsigned char *eoc, + unsigned int *integer) +{ + unsigned char ch; + unsigned int len; + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + *integer = ch; + if (ch == 0) len = 0; + else len = 1; + + while (ctx->pointer < eoc) { + if (++len > sizeof (unsigned int)) { + ctx->error = ASN1_ERR_DEC_BADVALUE; + return 0; + } + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + *integer <<= 8; + *integer |= ch; + } + return 1; +} + +static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx, + unsigned char *eoc, + unsigned long *integer) +{ + unsigned char ch; + unsigned int len; + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + *integer = ch; + if (ch == 0) len = 0; + else len = 1; + + while (ctx->pointer < eoc) { + if (++len > sizeof (unsigned long)) { + ctx->error = ASN1_ERR_DEC_BADVALUE; + return 0; + } + + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + *integer <<= 8; + *integer |= ch; + } + return 1; +} + +static unsigned char asn1_octets_decode(struct asn1_ctx *ctx, + unsigned char *eoc, + unsigned char **octets, + unsigned int *len) +{ + unsigned char *ptr; + + *len = 0; + + *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC); + if (*octets == NULL) { + if (net_ratelimit()) + printk("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + + ptr = *octets; + while (ctx->pointer < eoc) { + if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) { + kfree(*octets); + *octets = NULL; + return 0; + } + (*len)++; + } + return 1; +} + +static unsigned char asn1_subid_decode(struct asn1_ctx *ctx, + unsigned long *subid) +{ + unsigned char ch; + + *subid = 0; + + do { + if (!asn1_octet_decode(ctx, &ch)) + return 0; + + *subid <<= 7; + *subid |= ch & 0x7F; + } while ((ch & 0x80) == 0x80); + return 1; +} + +static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, + unsigned char *eoc, + unsigned long **oid, + unsigned int *len) +{ + unsigned long subid; + unsigned int size; + unsigned long *optr; + + size = eoc - ctx->pointer + 1; + *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); + if (*oid == NULL) { + if (net_ratelimit()) + printk("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + + optr = *oid; + + if (!asn1_subid_decode(ctx, &subid)) { + kfree(*oid); + *oid = NULL; + return 0; + } + + if (subid < 40) { + optr [0] = 0; + optr [1] = subid; + } else if (subid < 80) { + optr [0] = 1; + optr [1] = subid - 40; + } else { + optr [0] = 2; + optr [1] = subid - 80; + } + + *len = 2; + optr += 2; + + while (ctx->pointer < eoc) { + if (++(*len) > size) { + ctx->error = ASN1_ERR_DEC_BADVALUE; + kfree(*oid); + *oid = NULL; + return 0; + } + + if (!asn1_subid_decode(ctx, optr++)) { + kfree(*oid); + *oid = NULL; + return 0; + } + } + return 1; +} + +/***************************************************************************** + * + * SNMP decoding routines (gxsnmp author Dirk Wisse) + * + *****************************************************************************/ + +/* SNMP Versions */ +#define SNMP_V1 0 +#define SNMP_V2C 1 +#define SNMP_V2 2 +#define SNMP_V3 3 + +/* Default Sizes */ +#define SNMP_SIZE_COMM 256 +#define SNMP_SIZE_OBJECTID 128 +#define SNMP_SIZE_BUFCHR 256 +#define SNMP_SIZE_BUFINT 128 +#define SNMP_SIZE_SMALLOBJECTID 16 + +/* Requests */ +#define SNMP_PDU_GET 0 +#define SNMP_PDU_NEXT 1 +#define SNMP_PDU_RESPONSE 2 +#define SNMP_PDU_SET 3 +#define SNMP_PDU_TRAP1 4 +#define SNMP_PDU_BULK 5 +#define SNMP_PDU_INFORM 6 +#define SNMP_PDU_TRAP2 7 + +/* Errors */ +#define SNMP_NOERROR 0 +#define SNMP_TOOBIG 1 +#define SNMP_NOSUCHNAME 2 +#define SNMP_BADVALUE 3 +#define SNMP_READONLY 4 +#define SNMP_GENERROR 5 +#define SNMP_NOACCESS 6 +#define SNMP_WRONGTYPE 7 +#define SNMP_WRONGLENGTH 8 +#define SNMP_WRONGENCODING 9 +#define SNMP_WRONGVALUE 10 +#define SNMP_NOCREATION 11 +#define SNMP_INCONSISTENTVALUE 12 +#define SNMP_RESOURCEUNAVAILABLE 13 +#define SNMP_COMMITFAILED 14 +#define SNMP_UNDOFAILED 15 +#define SNMP_AUTHORIZATIONERROR 16 +#define SNMP_NOTWRITABLE 17 +#define SNMP_INCONSISTENTNAME 18 + +/* General SNMP V1 Traps */ +#define SNMP_TRAP_COLDSTART 0 +#define SNMP_TRAP_WARMSTART 1 +#define SNMP_TRAP_LINKDOWN 2 +#define SNMP_TRAP_LINKUP 3 +#define SNMP_TRAP_AUTFAILURE 4 +#define SNMP_TRAP_EQPNEIGHBORLOSS 5 +#define SNMP_TRAP_ENTSPECIFIC 6 + +/* SNMPv1 Types */ +#define SNMP_NULL 0 +#define SNMP_INTEGER 1 /* l */ +#define SNMP_OCTETSTR 2 /* c */ +#define SNMP_DISPLAYSTR 2 /* c */ +#define SNMP_OBJECTID 3 /* ul */ +#define SNMP_IPADDR 4 /* uc */ +#define SNMP_COUNTER 5 /* ul */ +#define SNMP_GAUGE 6 /* ul */ +#define SNMP_TIMETICKS 7 /* ul */ +#define SNMP_OPAQUE 8 /* c */ + +/* Additional SNMPv2 Types */ +#define SNMP_UINTEGER 5 /* ul */ +#define SNMP_BITSTR 9 /* uc */ +#define SNMP_NSAP 10 /* uc */ +#define SNMP_COUNTER64 11 /* ul */ +#define SNMP_NOSUCHOBJECT 12 +#define SNMP_NOSUCHINSTANCE 13 +#define SNMP_ENDOFMIBVIEW 14 + +union snmp_syntax +{ + unsigned char uc[0]; /* 8 bit unsigned */ + char c[0]; /* 8 bit signed */ + unsigned long ul[0]; /* 32 bit unsigned */ + long l[0]; /* 32 bit signed */ +}; + +struct snmp_object +{ + unsigned long *id; + unsigned int id_len; + unsigned short type; + unsigned int syntax_len; + union snmp_syntax syntax; +}; + +struct snmp_request +{ + unsigned long id; + unsigned int error_status; + unsigned int error_index; +}; + +struct snmp_v1_trap +{ + unsigned long *id; + unsigned int id_len; + unsigned long ip_address; /* pointer */ + unsigned int general; + unsigned int specific; + unsigned long time; +}; + +/* SNMP types */ +#define SNMP_IPA 0 +#define SNMP_CNT 1 +#define SNMP_GGE 2 +#define SNMP_TIT 3 +#define SNMP_OPQ 4 +#define SNMP_C64 6 + +/* SNMP errors */ +#define SERR_NSO 0 +#define SERR_NSI 1 +#define SERR_EOM 2 + +static inline void mangle_address(unsigned char *begin, + unsigned char *addr, + const struct oct1_map *map, + __sum16 *check); +struct snmp_cnv +{ + unsigned int class; + unsigned int tag; + int syntax; +}; + +static struct snmp_cnv snmp_conv [] = +{ + {ASN1_UNI, ASN1_NUL, SNMP_NULL}, + {ASN1_UNI, ASN1_INT, SNMP_INTEGER}, + {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR}, + {ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR}, + {ASN1_UNI, ASN1_OJI, SNMP_OBJECTID}, + {ASN1_APL, SNMP_IPA, SNMP_IPADDR}, + {ASN1_APL, SNMP_CNT, SNMP_COUNTER}, /* Counter32 */ + {ASN1_APL, SNMP_GGE, SNMP_GAUGE}, /* Gauge32 == Unsigned32 */ + {ASN1_APL, SNMP_TIT, SNMP_TIMETICKS}, + {ASN1_APL, SNMP_OPQ, SNMP_OPAQUE}, + + /* SNMPv2 data types and errors */ + {ASN1_UNI, ASN1_BTS, SNMP_BITSTR}, + {ASN1_APL, SNMP_C64, SNMP_COUNTER64}, + {ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT}, + {ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE}, + {ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW}, + {0, 0, -1} +}; + +static unsigned char snmp_tag_cls2syntax(unsigned int tag, + unsigned int cls, + unsigned short *syntax) +{ + struct snmp_cnv *cnv; + + cnv = snmp_conv; + + while (cnv->syntax != -1) { + if (cnv->tag == tag && cnv->class == cls) { + *syntax = cnv->syntax; + return 1; + } + cnv++; + } + return 0; +} + +static unsigned char snmp_object_decode(struct asn1_ctx *ctx, + struct snmp_object **obj) +{ + unsigned int cls, con, tag, len, idlen; + unsigned short type; + unsigned char *eoc, *end, *p; + unsigned long *lp, *id; + unsigned long ul; + long l; + + *obj = NULL; + id = NULL; + + if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag)) + return 0; + + if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) + return 0; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + return 0; + + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI) + return 0; + + if (!asn1_oid_decode(ctx, end, &id, &idlen)) + return 0; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) { + kfree(id); + return 0; + } + + if (con != ASN1_PRI) { + kfree(id); + return 0; + } + + type = 0; + if (!snmp_tag_cls2syntax(tag, cls, &type)) { + kfree(id); + return 0; + } + + l = 0; + switch (type) { + case SNMP_INTEGER: + len = sizeof(long); + if (!asn1_long_decode(ctx, end, &l)) { + kfree(id); + return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, + GFP_ATOMIC); + if (*obj == NULL) { + kfree(id); + if (net_ratelimit()) + printk("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + (*obj)->syntax.l[0] = l; + break; + case SNMP_OCTETSTR: + case SNMP_OPAQUE: + if (!asn1_octets_decode(ctx, end, &p, &len)) { + kfree(id); + return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, + GFP_ATOMIC); + if (*obj == NULL) { + kfree(id); + if (net_ratelimit()) + printk("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + memcpy((*obj)->syntax.c, p, len); + kfree(p); + break; + case SNMP_NULL: + case SNMP_NOSUCHOBJECT: + case SNMP_NOSUCHINSTANCE: + case SNMP_ENDOFMIBVIEW: + len = 0; + *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); + if (*obj == NULL) { + kfree(id); + if (net_ratelimit()) + printk("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + if (!asn1_null_decode(ctx, end)) { + kfree(id); + kfree(*obj); + *obj = NULL; + return 0; + } + break; + case SNMP_OBJECTID: + if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) { + kfree(id); + return 0; + } + len *= sizeof(unsigned long); + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { + kfree(lp); + kfree(id); + if (net_ratelimit()) + printk("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + memcpy((*obj)->syntax.ul, lp, len); + kfree(lp); + break; + case SNMP_IPADDR: + if (!asn1_octets_decode(ctx, end, &p, &len)) { + kfree(id); + return 0; + } + if (len != 4) { + kfree(p); + kfree(id); + return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { + kfree(p); + kfree(id); + if (net_ratelimit()) + printk("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + memcpy((*obj)->syntax.uc, p, len); + kfree(p); + break; + case SNMP_COUNTER: + case SNMP_GAUGE: + case SNMP_TIMETICKS: + len = sizeof(unsigned long); + if (!asn1_ulong_decode(ctx, end, &ul)) { + kfree(id); + return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { + kfree(id); + if (net_ratelimit()) + printk("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + (*obj)->syntax.ul[0] = ul; + break; + default: + kfree(id); + return 0; + } + + (*obj)->syntax_len = len; + (*obj)->type = type; + (*obj)->id = id; + (*obj)->id_len = idlen; + + if (!asn1_eoc_decode(ctx, eoc)) { + kfree(id); + kfree(*obj); + *obj = NULL; + return 0; + } + return 1; +} + +static unsigned char snmp_request_decode(struct asn1_ctx *ctx, + struct snmp_request *request) +{ + unsigned int cls, con, tag; + unsigned char *end; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + return 0; + + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) + return 0; + + if (!asn1_ulong_decode(ctx, end, &request->id)) + return 0; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + return 0; + + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) + return 0; + + if (!asn1_uint_decode(ctx, end, &request->error_status)) + return 0; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + return 0; + + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) + return 0; + + if (!asn1_uint_decode(ctx, end, &request->error_index)) + return 0; + + return 1; +} + +/* + * Fast checksum update for possibly oddly-aligned UDP byte, from the + * code example in the draft. + */ +static void fast_csum(__sum16 *csum, + const unsigned char *optr, + const unsigned char *nptr, + int offset) +{ + unsigned char s[4]; + + if (offset & 1) { + s[0] = s[2] = 0; + s[1] = ~*optr; + s[3] = *nptr; + } else { + s[1] = s[3] = 0; + s[0] = ~*optr; + s[2] = *nptr; + } + + *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum))); +} + +/* + * Mangle IP address. + * - begin points to the start of the snmp messgae + * - addr points to the start of the address + */ +static inline void mangle_address(unsigned char *begin, + unsigned char *addr, + const struct oct1_map *map, + __sum16 *check) +{ + if (map->from == NOCT1(addr)) { + u_int32_t old; + + if (debug) + memcpy(&old, (unsigned char *)addr, sizeof(old)); + + *addr = map->to; + + /* Update UDP checksum if being used */ + if (*check) { + fast_csum(check, + &map->from, &map->to, addr - begin); + + } + + if (debug) + printk(KERN_DEBUG "bsalg: mapped %u.%u.%u.%u to " + "%u.%u.%u.%u\n", NIPQUAD(old), NIPQUAD(*addr)); + } +} + +static unsigned char snmp_trap_decode(struct asn1_ctx *ctx, + struct snmp_v1_trap *trap, + const struct oct1_map *map, + __sum16 *check) +{ + unsigned int cls, con, tag, len; + unsigned char *end; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + return 0; + + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI) + return 0; + + if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len)) + return 0; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + goto err_id_free; + + if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) || + (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS))) + goto err_id_free; + + if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len)) + goto err_id_free; + + /* IPv4 only */ + if (len != 4) + goto err_addr_free; + + mangle_address(ctx->begin, ctx->pointer - 4, map, check); + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + goto err_addr_free; + + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) + goto err_addr_free; + + if (!asn1_uint_decode(ctx, end, &trap->general)) + goto err_addr_free; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + goto err_addr_free; + + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) + goto err_addr_free; + + if (!asn1_uint_decode(ctx, end, &trap->specific)) + goto err_addr_free; + + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) + goto err_addr_free; + + if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) || + (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT))) + goto err_addr_free; + + if (!asn1_ulong_decode(ctx, end, &trap->time)) + goto err_addr_free; + + return 1; + +err_addr_free: + kfree((unsigned long *)trap->ip_address); + +err_id_free: + kfree(trap->id); + + return 0; +} + +/***************************************************************************** + * + * Misc. routines + * + *****************************************************************************/ + +static void hex_dump(unsigned char *buf, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (i && !(i % 16)) + printk("\n"); + printk("%02x ", *(buf + i)); + } + printk("\n"); +} + +/* + * Parse and mangle SNMP message according to mapping. + * (And this is the fucking 'basic' method). + */ +static int snmp_parse_mangle(unsigned char *msg, + u_int16_t len, + const struct oct1_map *map, + __sum16 *check) +{ + unsigned char *eoc, *end; + unsigned int cls, con, tag, vers, pdutype; + struct asn1_ctx ctx; + struct asn1_octstr comm; + struct snmp_object **obj; + + if (debug > 1) + hex_dump(msg, len); + + asn1_open(&ctx, msg, len); + + /* + * Start of SNMP message. + */ + if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag)) + return 0; + if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) + return 0; + + /* + * Version 1 or 2 handled. + */ + if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag)) + return 0; + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) + return 0; + if (!asn1_uint_decode (&ctx, end, &vers)) + return 0; + if (debug > 1) + printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1); + if (vers > 1) + return 1; + + /* + * Community. + */ + if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag)) + return 0; + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS) + return 0; + if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len)) + return 0; + if (debug > 1) { + unsigned int i; + + printk(KERN_DEBUG "bsalg: community: "); + for (i = 0; i < comm.len; i++) + printk("%c", comm.data[i]); + printk("\n"); + } + kfree(comm.data); + + /* + * PDU type + */ + if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype)) + return 0; + if (cls != ASN1_CTX || con != ASN1_CON) + return 0; + if (debug > 1) { + unsigned char *pdus[] = { + [SNMP_PDU_GET] = "get", + [SNMP_PDU_NEXT] = "get-next", + [SNMP_PDU_RESPONSE] = "response", + [SNMP_PDU_SET] = "set", + [SNMP_PDU_TRAP1] = "trapv1", + [SNMP_PDU_BULK] = "bulk", + [SNMP_PDU_INFORM] = "inform", + [SNMP_PDU_TRAP2] = "trapv2" + }; + + if (pdutype > SNMP_PDU_TRAP2) + printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype); + else + printk(KERN_DEBUG "bsalg: pdu: %s\n", pdus[pdutype]); + } + if (pdutype != SNMP_PDU_RESPONSE && + pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2) + return 1; + + /* + * Request header or v1 trap + */ + if (pdutype == SNMP_PDU_TRAP1) { + struct snmp_v1_trap trap; + unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check); + + if (ret) { + kfree(trap.id); + kfree((unsigned long *)trap.ip_address); + } else + return ret; + + } else { + struct snmp_request req; + + if (!snmp_request_decode(&ctx, &req)) + return 0; + + if (debug > 1) + printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u " + "error_index=%u\n", req.id, req.error_status, + req.error_index); + } + + /* + * Loop through objects, look for IP addresses to mangle. + */ + if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag)) + return 0; + + if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) + return 0; + + obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); + if (obj == NULL) { + if (net_ratelimit()) + printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__); + return 0; + } + + while (!asn1_eoc_decode(&ctx, eoc)) { + unsigned int i; + + if (!snmp_object_decode(&ctx, obj)) { + if (*obj) { + kfree((*obj)->id); + kfree(*obj); + } + kfree(obj); + return 0; + } + + if (debug > 1) { + printk(KERN_DEBUG "bsalg: object: "); + for (i = 0; i < (*obj)->id_len; i++) { + if (i > 0) + printk("."); + printk("%lu", (*obj)->id[i]); + } + printk(": type=%u\n", (*obj)->type); + + } + + if ((*obj)->type == SNMP_IPADDR) + mangle_address(ctx.begin, ctx.pointer - 4 , map, check); + + kfree((*obj)->id); + kfree(*obj); + } + kfree(obj); + + if (!asn1_eoc_decode(&ctx, eoc)) + return 0; + + return 1; +} + +/***************************************************************************** + * + * NAT routines. + * + *****************************************************************************/ + +/* + * SNMP translation routine. + */ +static int snmp_translate(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + struct sk_buff **pskb) +{ + struct iphdr *iph = (*pskb)->nh.iph; + struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); + u_int16_t udplen = ntohs(udph->len); + u_int16_t paylen = udplen - sizeof(struct udphdr); + int dir = CTINFO2DIR(ctinfo); + struct oct1_map map; + + /* + * Determine mappping for application layer addresses based + * on NAT manipulations for the packet. + */ + if (dir == IP_CT_DIR_ORIGINAL) { + /* SNAT traps */ + map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip); + map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip); + } else { + /* DNAT replies */ + map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip); + map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip); + } + + if (map.from == map.to) + return NF_ACCEPT; + + if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr), + paylen, &map, &udph->check)) { + if (net_ratelimit()) + printk(KERN_WARNING "bsalg: parser failed\n"); + return NF_DROP; + } + return NF_ACCEPT; +} + +/* We don't actually set up expectations, just adjust internal IP + * addresses if this is being NATted */ +static int help(struct sk_buff **pskb, unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + int dir = CTINFO2DIR(ctinfo); + unsigned int ret; + struct iphdr *iph = (*pskb)->nh.iph; + struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); + + /* SNMP replies and originating SNMP traps get mangled */ + if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY) + return NF_ACCEPT; + if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL) + return NF_ACCEPT; + + /* No NAT? */ + if (!(ct->status & IPS_NAT_MASK)) + return NF_ACCEPT; + + /* + * Make sure the packet length is ok. So far, we were only guaranteed + * to have a valid length IP header plus 8 bytes, which means we have + * enough room for a UDP header. Just verify the UDP length field so we + * can mess around with the payload. + */ + if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) { + if (net_ratelimit()) + printk(KERN_WARNING "SNMP: dropping malformed packet " + "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n", + NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); + return NF_DROP; + } + + if (!skb_make_writable(pskb, (*pskb)->len)) + return NF_DROP; + + spin_lock_bh(&snmp_lock); + ret = snmp_translate(ct, ctinfo, pskb); + spin_unlock_bh(&snmp_lock); + return ret; +} + +static struct nf_conntrack_helper snmp_helper __read_mostly = { + .max_expected = 0, + .timeout = 180, + .me = THIS_MODULE, + .help = help, + .name = "snmp", + .tuple.src.l3num = AF_INET, + .tuple.src.u.udp.port = __constant_htons(SNMP_PORT), + .tuple.dst.protonum = IPPROTO_UDP, + .mask.src.l3num = 0xFFFF, + .mask.src.u.udp.port = __constant_htons(0xFFFF), + .mask.dst.protonum = 0xFF, +}; + +static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { + .max_expected = 0, + .timeout = 180, + .me = THIS_MODULE, + .help = help, + .name = "snmp_trap", + .tuple.src.l3num = AF_INET, + .tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT), + .tuple.dst.protonum = IPPROTO_UDP, + .mask.src.l3num = 0xFFFF, + .mask.src.u.udp.port = __constant_htons(0xFFFF), + .mask.dst.protonum = 0xFF, +}; + +/***************************************************************************** + * + * Module stuff. + * + *****************************************************************************/ + +static int __init nf_nat_snmp_basic_init(void) +{ + int ret = 0; + + ret = nf_conntrack_helper_register(&snmp_helper); + if (ret < 0) + return ret; + ret = nf_conntrack_helper_register(&snmp_trap_helper); + if (ret < 0) { + nf_conntrack_helper_unregister(&snmp_helper); + return ret; + } + return ret; +} + +static void __exit nf_nat_snmp_basic_fini(void) +{ + nf_conntrack_helper_unregister(&snmp_helper); + nf_conntrack_helper_unregister(&snmp_trap_helper); +} + +module_init(nf_nat_snmp_basic_init); +module_exit(nf_nat_snmp_basic_fini); + +module_param(debug, int, 0600); diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c new file mode 100644 index 00000000000..730a7a44c88 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -0,0 +1,406 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/types.h> +#include <linux/icmp.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/proc_fs.h> +#include <net/ip.h> +#include <net/checksum.h> +#include <linux/spinlock.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_rule.h> +#include <net/netfilter/nf_nat_protocol.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_helper.h> +#include <linux/netfilter_ipv4/ip_tables.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +#define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING" \ + : ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \ + : ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT" \ + : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \ + : "*ERROR*"))) + +#ifdef CONFIG_XFRM +static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + struct nf_conn *ct; + struct nf_conntrack_tuple *t; + enum ip_conntrack_info ctinfo; + enum ip_conntrack_dir dir; + unsigned long statusbit; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) + return; + dir = CTINFO2DIR(ctinfo); + t = &ct->tuplehash[dir].tuple; + + if (dir == IP_CT_DIR_ORIGINAL) + statusbit = IPS_DST_NAT; + else + statusbit = IPS_SRC_NAT; + + if (ct->status & statusbit) { + fl->fl4_dst = t->dst.u3.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP) + fl->fl_ip_dport = t->dst.u.tcp.port; + } + + statusbit ^= IPS_NAT_MASK; + + if (ct->status & statusbit) { + fl->fl4_src = t->src.u3.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP) + fl->fl_ip_sport = t->src.u.tcp.port; + } +} +#endif + +static unsigned int +nf_nat_fn(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + struct nf_conn_nat *nat; + struct nf_nat_info *info; + /* maniptype == SRC for postrouting. */ + enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + + /* We never see fragments: conntrack defrags on pre-routing + and local-out, and nf_nat_out protects post-routing. */ + NF_CT_ASSERT(!((*pskb)->nh.iph->frag_off + & htons(IP_MF|IP_OFFSET))); + + ct = nf_ct_get(*pskb, &ctinfo); + /* Can't track? It's not due to stress, or conntrack would + have dropped it. Hence it's the user's responsibilty to + packet filter it out, or implement conntrack/NAT for that + protocol. 8) --RR */ + if (!ct) { + /* Exception: ICMP redirect to new connection (not in + hash table yet). We must not let this through, in + case we're doing NAT to the same network. */ + if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { + struct icmphdr _hdr, *hp; + + hp = skb_header_pointer(*pskb, + (*pskb)->nh.iph->ihl*4, + sizeof(_hdr), &_hdr); + if (hp != NULL && + hp->type == ICMP_REDIRECT) + return NF_DROP; + } + return NF_ACCEPT; + } + + /* Don't try to NAT if this packet is not conntracked */ + if (ct == &nf_conntrack_untracked) + return NF_ACCEPT; + + nat = nfct_nat(ct); + if (!nat) + return NF_DROP; + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED+IP_CT_IS_REPLY: + if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { + if (!nf_nat_icmp_reply_translation(ct, ctinfo, + hooknum, pskb)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ + case IP_CT_NEW: + info = &nat->info; + + /* Seen it before? This can happen for loopback, retrans, + or local packets.. */ + if (!nf_nat_initialized(ct, maniptype)) { + unsigned int ret; + + if (unlikely(nf_ct_is_confirmed(ct))) + /* NAT module was loaded late */ + ret = alloc_null_binding_confirmed(ct, info, + hooknum); + else if (hooknum == NF_IP_LOCAL_IN) + /* LOCAL_IN hook doesn't have a chain! */ + ret = alloc_null_binding(ct, info, hooknum); + else + ret = nf_nat_rule_find(pskb, hooknum, in, out, + ct, info); + + if (ret != NF_ACCEPT) { + return ret; + } + } else + DEBUGP("Already setup manip %s for ct %p\n", + maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", + ct); + break; + + default: + /* ESTABLISHED */ + NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || + ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); + info = &nat->info; + } + + NF_CT_ASSERT(info); + return nf_nat_packet(ct, ctinfo, hooknum, pskb); +} + +static unsigned int +nf_nat_in(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + __be32 daddr = (*pskb)->nh.iph->daddr; + + ret = nf_nat_fn(hooknum, pskb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + daddr != (*pskb)->nh.iph->daddr) { + dst_release((*pskb)->dst); + (*pskb)->dst = NULL; + } + return ret; +} + +static unsigned int +nf_nat_out(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ +#ifdef CONFIG_XFRM + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; +#endif + unsigned int ret; + + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct iphdr) || + (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) + return NF_ACCEPT; + + ret = nf_nat_fn(hooknum, pskb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.src.u3.ip != + ct->tuplehash[!dir].tuple.dst.u3.ip + || ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all + ) + return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP; + } +#endif + return ret; +} + +static unsigned int +nf_nat_local_fn(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned int ret; + + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct iphdr) || + (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) + return NF_ACCEPT; + + ret = nf_nat_fn(hooknum, pskb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.dst.u3.ip != + ct->tuplehash[!dir].tuple.src.u3.ip +#ifdef CONFIG_XFRM + || ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all +#endif + ) + if (ip_route_me_harder(pskb, RTN_UNSPEC)) + ret = NF_DROP; + } + return ret; +} + +static unsigned int +nf_nat_adjust(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + + ct = nf_ct_get(*pskb, &ctinfo); + if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { + DEBUGP("nf_nat_standalone: adjusting sequence number\n"); + if (!nf_nat_seq_adjust(pskb, ct, ctinfo)) + return NF_DROP; + } + return NF_ACCEPT; +} + +/* We must be after connection tracking and before packet filtering. */ + +static struct nf_hook_ops nf_nat_ops[] = { + /* Before packet filtering, change destination */ + { + .hook = nf_nat_in, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_PRE_ROUTING, + .priority = NF_IP_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_out, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_POST_ROUTING, + .priority = NF_IP_PRI_NAT_SRC, + }, + /* After conntrack, adjust sequence number */ + { + .hook = nf_nat_adjust, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_POST_ROUTING, + .priority = NF_IP_PRI_NAT_SEQ_ADJUST, + }, + /* Before packet filtering, change destination */ + { + .hook = nf_nat_local_fn, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_LOCAL_OUT, + .priority = NF_IP_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_fn, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_LOCAL_IN, + .priority = NF_IP_PRI_NAT_SRC, + }, + /* After conntrack, adjust sequence number */ + { + .hook = nf_nat_adjust, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_LOCAL_IN, + .priority = NF_IP_PRI_NAT_SEQ_ADJUST, + }, +}; + +static int __init nf_nat_standalone_init(void) +{ + int size, ret = 0; + + need_conntrack(); + + size = ALIGN(sizeof(struct nf_conn), __alignof__(struct nf_conn_nat)) + + sizeof(struct nf_conn_nat); + ret = nf_conntrack_register_cache(NF_CT_F_NAT, "nf_nat:base", size); + if (ret < 0) { + printk(KERN_ERR "nf_nat_init: Unable to create slab cache\n"); + return ret; + } + + size = ALIGN(size, __alignof__(struct nf_conn_help)) + + sizeof(struct nf_conn_help); + ret = nf_conntrack_register_cache(NF_CT_F_NAT|NF_CT_F_HELP, + "nf_nat:help", size); + if (ret < 0) { + printk(KERN_ERR "nf_nat_init: Unable to create slab cache\n"); + goto cleanup_register_cache; + } +#ifdef CONFIG_XFRM + BUG_ON(ip_nat_decode_session != NULL); + ip_nat_decode_session = nat_decode_session; +#endif + ret = nf_nat_rule_init(); + if (ret < 0) { + printk("nf_nat_init: can't setup rules.\n"); + goto cleanup_decode_session; + } + ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); + if (ret < 0) { + printk("nf_nat_init: can't register hooks.\n"); + goto cleanup_rule_init; + } + nf_nat_module_is_loaded = 1; + return ret; + + cleanup_rule_init: + nf_nat_rule_cleanup(); + cleanup_decode_session: +#ifdef CONFIG_XFRM + ip_nat_decode_session = NULL; + synchronize_net(); +#endif + nf_conntrack_unregister_cache(NF_CT_F_NAT|NF_CT_F_HELP); + cleanup_register_cache: + nf_conntrack_unregister_cache(NF_CT_F_NAT); + return ret; +} + +static void __exit nf_nat_standalone_fini(void) +{ + nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); + nf_nat_rule_cleanup(); + nf_nat_module_is_loaded = 0; +#ifdef CONFIG_XFRM + ip_nat_decode_session = NULL; + synchronize_net(); +#endif + /* Conntrack caches are unregistered in nf_conntrack_cleanup */ +} + +module_init(nf_nat_standalone_init); +module_exit(nf_nat_standalone_fini); + +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ip_nat"); diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c new file mode 100644 index 00000000000..2566b79de22 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_tftp.c @@ -0,0 +1,52 @@ +/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/udp.h> + +#include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/nf_nat_rule.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <linux/netfilter/nf_conntrack_tftp.h> + +MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); +MODULE_DESCRIPTION("TFTP NAT helper"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ip_nat_tftp"); + +static unsigned int help(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct nf_conntrack_expect *exp) +{ + struct nf_conn *ct = exp->master; + + exp->saved_proto.udp.port + = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; + exp->dir = IP_CT_DIR_REPLY; + exp->expectfn = nf_nat_follow_master; + if (nf_conntrack_expect_related(exp) != 0) + return NF_DROP; + return NF_ACCEPT; +} + +static void __exit nf_nat_tftp_fini(void) +{ + rcu_assign_pointer(nf_nat_tftp_hook, NULL); + synchronize_rcu(); +} + +static int __init nf_nat_tftp_init(void) +{ + BUG_ON(rcu_dereference(nf_nat_tftp_hook)); + rcu_assign_pointer(nf_nat_tftp_hook, help); + return 0; +} + +module_init(nf_nat_tftp_init); +module_exit(nf_nat_tftp_fini); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 9c6cbe3d9fb..cd873da54cb 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -38,6 +38,7 @@ #include <net/protocol.h> #include <net/tcp.h> #include <net/udp.h> +#include <net/udplite.h> #include <linux/inetdevice.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -66,6 +67,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), atomic_read(&tcp_memory_allocated)); seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); + seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues, atomic_read(&ip_frag_mem)); @@ -304,6 +306,17 @@ static int snmp_seq_show(struct seq_file *seq, void *v) fold_field((void **) udp_statistics, snmp4_udp_list[i].entry)); + /* the UDP and UDP-Lite MIBs are the same */ + seq_puts(seq, "\nUdpLite:"); + for (i = 0; snmp4_udp_list[i].name != NULL; i++) + seq_printf(seq, " %s", snmp4_udp_list[i].name); + + seq_puts(seq, "\nUdpLite:"); + for (i = 0; snmp4_udp_list[i].name != NULL; i++) + seq_printf(seq, " %lu", + fold_field((void **) udplite_statistics, + snmp4_udp_list[i].entry) ); + seq_putc(seq, '\n'); return 0; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index b430cf2a4f6..a6c63bbd9dd 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -329,7 +329,7 @@ error: return err; } -static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) +static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) { struct iovec *iov; u8 __user *type = NULL; @@ -338,7 +338,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) unsigned int i; if (!msg->msg_iov) - return; + return 0; for (i = 0; i < msg->msg_iovlen; i++) { iov = &msg->msg_iov[i]; @@ -360,8 +360,9 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) code = iov->iov_base; if (type && code) { - get_user(fl->fl_icmp_type, type); - get_user(fl->fl_icmp_code, code); + if (get_user(fl->fl_icmp_type, type) || + get_user(fl->fl_icmp_code, code)) + return -EFAULT; probed = 1; } break; @@ -372,6 +373,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (probed) break; } + return 0; } static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, @@ -480,8 +482,11 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .proto = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, }; - if (!inet->hdrincl) - raw_probe_proto_opt(&fl, msg); + if (!inet->hdrincl) { + err = raw_probe_proto_opt(&fl, msg); + if (err) + goto done; + } security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); @@ -849,8 +854,8 @@ static void raw_seq_stop(struct seq_file *seq, void *v) static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i) { struct inet_sock *inet = inet_sk(sp); - unsigned int dest = inet->daddr, - src = inet->rcv_saddr; + __be32 dest = inet->daddr, + src = inet->rcv_saddr; __u16 destp = 0, srcp = inet->num; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 925ee4dfc32..1aaff0a2e09 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -566,11 +566,9 @@ static inline u32 rt_score(struct rtable *rt) static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { - return ((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | - (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) | -#ifdef CONFIG_IP_ROUTE_FWMARK - (fl1->nl_u.ip4_u.fwmark ^ fl2->nl_u.ip4_u.fwmark) | -#endif + return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | + (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) | + (fl1->mark ^ fl2->mark) | (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | (fl1->oif ^ fl2->oif) | @@ -1643,9 +1641,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; -#ifdef CONFIG_IP_ROUTE_FWMARK - rth->fl.fl4_fwmark= skb->nfmark; -#endif + rth->fl.mark = skb->mark; rth->fl.fl4_src = saddr; rth->rt_src = saddr; #ifdef CONFIG_NET_CLS_ROUTE @@ -1784,14 +1780,12 @@ static inline int __mkroute_input(struct sk_buff *skb, #endif if (in_dev->cnf.no_policy) rth->u.dst.flags |= DST_NOPOLICY; - if (in_dev->cnf.no_xfrm) + if (out_dev->cnf.no_xfrm) rth->u.dst.flags |= DST_NOXFRM; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; -#ifdef CONFIG_IP_ROUTE_FWMARK - rth->fl.fl4_fwmark= skb->nfmark; -#endif + rth->fl.mark = skb->mark; rth->fl.fl4_src = saddr; rth->rt_src = saddr; rth->rt_gateway = daddr; @@ -1920,10 +1914,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, .saddr = saddr, .tos = tos, .scope = RT_SCOPE_UNIVERSE, -#ifdef CONFIG_IP_ROUTE_FWMARK - .fwmark = skb->nfmark -#endif } }, + .mark = skb->mark, .iif = dev->ifindex }; unsigned flags = 0; u32 itag = 0; @@ -2034,9 +2026,7 @@ local_input: rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; -#ifdef CONFIG_IP_ROUTE_FWMARK - rth->fl.fl4_fwmark= skb->nfmark; -#endif + rth->fl.mark = skb->mark; rth->fl.fl4_src = saddr; rth->rt_src = saddr; #ifdef CONFIG_NET_CLS_ROUTE @@ -2113,9 +2103,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.fl4_src == saddr && rth->fl.iif == iif && rth->fl.oif == 0 && -#ifdef CONFIG_IP_ROUTE_FWMARK - rth->fl.fl4_fwmark == skb->nfmark && -#endif + rth->fl.mark == skb->mark && rth->fl.fl4_tos == tos) { rth->u.dst.lastuse = jiffies; dst_hold(&rth->u.dst); @@ -2239,9 +2227,7 @@ static inline int __mkroute_output(struct rtable **result, rth->fl.fl4_tos = tos; rth->fl.fl4_src = oldflp->fl4_src; rth->fl.oif = oldflp->oif; -#ifdef CONFIG_IP_ROUTE_FWMARK - rth->fl.fl4_fwmark= oldflp->fl4_fwmark; -#endif + rth->fl.mark = oldflp->mark; rth->rt_dst = fl->fl4_dst; rth->rt_src = fl->fl4_src; rth->rt_iif = oldflp->oif ? : dev_out->ifindex; @@ -2385,10 +2371,8 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) .scope = ((tos & RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE), -#ifdef CONFIG_IP_ROUTE_FWMARK - .fwmark = oldflp->fl4_fwmark -#endif } }, + .mark = oldflp->mark, .iif = loopback_dev.ifindex, .oif = oldflp->oif }; struct fib_result res; @@ -2583,9 +2567,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) rth->fl.fl4_src == flp->fl4_src && rth->fl.iif == 0 && rth->fl.oif == flp->oif && -#ifdef CONFIG_IP_ROUTE_FWMARK - rth->fl.fl4_fwmark == flp->fl4_fwmark && -#endif + rth->fl.mark == flp->mark && !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK))) { @@ -2647,7 +2629,8 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, struct rtable *rt = (struct rtable*)skb->dst; struct rtmsg *r; struct nlmsghdr *nlh; - struct rta_cacheinfo ci; + long expires; + u32 id = 0, ts = 0, tsage = 0, error; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); if (nlh == NULL) @@ -2694,20 +2677,13 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) goto nla_put_failure; - ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); - ci.rta_used = rt->u.dst.__use; - ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); - if (rt->u.dst.expires) - ci.rta_expires = jiffies_to_clock_t(rt->u.dst.expires - jiffies); - else - ci.rta_expires = 0; - ci.rta_error = rt->u.dst.error; - ci.rta_id = ci.rta_ts = ci.rta_tsage = 0; + error = rt->u.dst.error; + expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; if (rt->peer) { - ci.rta_id = rt->peer->ip_id_count; + id = rt->peer->ip_id_count; if (rt->peer->tcp_ts_stamp) { - ci.rta_ts = rt->peer->tcp_ts; - ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; + ts = rt->peer->tcp_ts; + tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; } } @@ -2726,7 +2702,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, } else { if (err == -EMSGSIZE) goto nla_put_failure; - ci.rta_error = err; + error = err; } } } else @@ -2734,7 +2710,9 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); } - NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); + if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage, + expires, error) < 0) + goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -2894,8 +2872,7 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, - size_t newlen, - void **context) + size_t newlen) { int delay; if (newlen != sizeof(int)) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 661e0a4bca7..6b19530905a 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -35,23 +35,23 @@ module_init(init_syncookies); #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) -static u32 cookie_hash(u32 saddr, u32 daddr, u32 sport, u32 dport, +static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, u32 count, int c) { __u32 tmp[16 + 5 + SHA_WORKSPACE_WORDS]; memcpy(tmp + 3, syncookie_secret[c], sizeof(syncookie_secret[c])); - tmp[0] = saddr; - tmp[1] = daddr; - tmp[2] = (sport << 16) + dport; + tmp[0] = (__force u32)saddr; + tmp[1] = (__force u32)daddr; + tmp[2] = ((__force u32)sport << 16) + (__force u32)dport; tmp[3] = count; sha_transform(tmp + 16, (__u8 *)tmp, tmp + 16 + 5); return tmp[17]; } -static __u32 secure_tcp_syn_cookie(__u32 saddr, __u32 daddr, __u16 sport, - __u16 dport, __u32 sseq, __u32 count, +static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport, + __be16 dport, __u32 sseq, __u32 count, __u32 data) { /* @@ -80,8 +80,8 @@ static __u32 secure_tcp_syn_cookie(__u32 saddr, __u32 daddr, __u16 sport, * "maxdiff" if the current (passed-in) "count". The return value * is (__u32)-1 if this test fails. */ -static __u32 check_tcp_syn_cookie(__u32 cookie, __u32 saddr, __u32 daddr, - __u16 sport, __u16 dport, __u32 sseq, +static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr, + __be16 sport, __be16 dport, __u32 sseq, __u32 count, __u32 maxdiff) { __u32 diff; @@ -220,7 +220,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, } ireq = inet_rsk(req); treq = tcp_rsk(req); - treq->rcv_isn = htonl(skb->h.th->seq) - 1; + treq->rcv_isn = ntohl(skb->h.th->seq) - 1; treq->snt_isn = cookie; req->mss = mss; ireq->rmt_port = skb->h.th->source; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e82a5be894b..fabf69a9108 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -51,8 +51,7 @@ int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp, static int ipv4_sysctl_forward_strategy(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, size_t newlen) { int *valp = table->data; int new; @@ -111,8 +110,7 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, size_t newlen) { char val[TCP_CA_NAME_MAX]; ctl_table tbl = { @@ -122,19 +120,71 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, int ret; tcp_get_default_congestion_control(val); - ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen, - context); + ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); if (ret == 0 && newval && newlen) ret = tcp_set_default_congestion_control(val); return ret; } -static int __init tcp_congestion_default(void) +static int proc_tcp_available_congestion_control(ctl_table *ctl, + int write, struct file * filp, + void __user *buffer, size_t *lenp, + loff_t *ppos) { - return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG); + ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, }; + int ret; + + tbl.data = kmalloc(tbl.maxlen, GFP_USER); + if (!tbl.data) + return -ENOMEM; + tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX); + ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); + kfree(tbl.data); + return ret; } -late_initcall(tcp_congestion_default); +static int proc_allowed_congestion_control(ctl_table *ctl, + int write, struct file * filp, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; + int ret; + + tbl.data = kmalloc(tbl.maxlen, GFP_USER); + if (!tbl.data) + return -ENOMEM; + + tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen); + ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); + if (write && ret == 0) + ret = tcp_set_allowed_congestion_control(tbl.data); + kfree(tbl.data); + return ret; +} + +static int strategy_allowed_congestion_control(ctl_table *table, int __user *name, + int nlen, void __user *oldval, + size_t __user *oldlenp, + void __user *newval, + size_t newlen) +{ + ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; + int ret; + + tbl.data = kmalloc(tbl.maxlen, GFP_USER); + if (!tbl.data) + return -ENOMEM; + + tcp_get_available_congestion_control(tbl.data, tbl.maxlen); + ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); + if (ret == 0 && newval && newlen) + ret = tcp_set_allowed_congestion_control(tbl.data); + kfree(tbl.data); + + return ret; + +} ctl_table ipv4_table[] = { { @@ -738,6 +788,21 @@ ctl_table ipv4_table[] = { .proc_handler = &proc_dointvec, }, #endif /* CONFIG_NETLABEL */ + { + .ctl_name = NET_TCP_AVAIL_CONG_CONTROL, + .procname = "tcp_available_congestion_control", + .maxlen = TCP_CA_BUF_MAX, + .mode = 0444, + .proc_handler = &proc_tcp_available_congestion_control, + }, + { + .ctl_name = NET_TCP_ALLOWED_CONG_CONTROL, + .procname = "tcp_allowed_congestion_control", + .maxlen = TCP_CA_BUF_MAX, + .mode = 0644, + .proc_handler = &proc_allowed_congestion_control, + .strategy = &strategy_allowed_congestion_control, + }, { .ctl_name = 0 } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 66e9a729f6d..b67e0dd743b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -258,6 +258,7 @@ #include <linux/bootmem.h> #include <linux/cache.h> #include <linux/err.h> +#include <linux/crypto.h> #include <net/icmp.h> #include <net/tcp.h> @@ -462,11 +463,12 @@ static inline int forced_push(struct tcp_sock *tp) static inline void skb_entail(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) { - skb->csum = 0; - TCP_SKB_CB(skb)->seq = tp->write_seq; - TCP_SKB_CB(skb)->end_seq = tp->write_seq; - TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; - TCP_SKB_CB(skb)->sacked = 0; + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + + skb->csum = 0; + tcb->seq = tcb->end_seq = tp->write_seq; + tcb->flags = TCPCB_FLAG_ACK; + tcb->sacked = 0; skb_header_release(skb); __skb_queue_tail(&sk->sk_write_queue, skb); sk_charge_skb(sk, skb); @@ -1942,6 +1944,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level, } break; +#ifdef CONFIG_TCP_MD5SIG + case TCP_MD5SIG: + /* Read the IP->Key mappings from userspace */ + err = tp->af_specific->md5_parse(sk, optval, optlen); + break; +#endif + default: err = -ENOPROTOOPT; break; @@ -2154,7 +2163,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) struct tcphdr *th; unsigned thlen; unsigned int seq; - unsigned int delta; + __be32 delta; unsigned int oldlen; unsigned int len; @@ -2207,7 +2216,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) do { th->fin = th->psh = 0; - th->check = ~csum_fold(th->check + delta); + th->check = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); if (skb->ip_summed != CHECKSUM_PARTIAL) th->check = csum_fold(csum_partial(skb->h.raw, thlen, skb->csum)); @@ -2221,7 +2231,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) } while (skb->next); delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len); - th->check = ~csum_fold(th->check + delta); + th->check = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); if (skb->ip_summed != CHECKSUM_PARTIAL) th->check = csum_fold(csum_partial(skb->h.raw, thlen, skb->csum)); @@ -2231,6 +2242,136 @@ out: } EXPORT_SYMBOL(tcp_tso_segment); +#ifdef CONFIG_TCP_MD5SIG +static unsigned long tcp_md5sig_users; +static struct tcp_md5sig_pool **tcp_md5sig_pool; +static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); + +static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) +{ + int cpu; + for_each_possible_cpu(cpu) { + struct tcp_md5sig_pool *p = *per_cpu_ptr(pool, cpu); + if (p) { + if (p->md5_desc.tfm) + crypto_free_hash(p->md5_desc.tfm); + kfree(p); + p = NULL; + } + } + free_percpu(pool); +} + +void tcp_free_md5sig_pool(void) +{ + struct tcp_md5sig_pool **pool = NULL; + + spin_lock(&tcp_md5sig_pool_lock); + if (--tcp_md5sig_users == 0) { + pool = tcp_md5sig_pool; + tcp_md5sig_pool = NULL; + } + spin_unlock(&tcp_md5sig_pool_lock); + if (pool) + __tcp_free_md5sig_pool(pool); +} + +EXPORT_SYMBOL(tcp_free_md5sig_pool); + +static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void) +{ + int cpu; + struct tcp_md5sig_pool **pool; + + pool = alloc_percpu(struct tcp_md5sig_pool *); + if (!pool) + return NULL; + + for_each_possible_cpu(cpu) { + struct tcp_md5sig_pool *p; + struct crypto_hash *hash; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + goto out_free; + *per_cpu_ptr(pool, cpu) = p; + + hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (!hash || IS_ERR(hash)) + goto out_free; + + p->md5_desc.tfm = hash; + } + return pool; +out_free: + __tcp_free_md5sig_pool(pool); + return NULL; +} + +struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void) +{ + struct tcp_md5sig_pool **pool; + int alloc = 0; + +retry: + spin_lock(&tcp_md5sig_pool_lock); + pool = tcp_md5sig_pool; + if (tcp_md5sig_users++ == 0) { + alloc = 1; + spin_unlock(&tcp_md5sig_pool_lock); + } else if (!pool) { + tcp_md5sig_users--; + spin_unlock(&tcp_md5sig_pool_lock); + cpu_relax(); + goto retry; + } else + spin_unlock(&tcp_md5sig_pool_lock); + + if (alloc) { + /* we cannot hold spinlock here because this may sleep. */ + struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(); + spin_lock(&tcp_md5sig_pool_lock); + if (!p) { + tcp_md5sig_users--; + spin_unlock(&tcp_md5sig_pool_lock); + return NULL; + } + pool = tcp_md5sig_pool; + if (pool) { + /* oops, it has already been assigned. */ + spin_unlock(&tcp_md5sig_pool_lock); + __tcp_free_md5sig_pool(p); + } else { + tcp_md5sig_pool = pool = p; + spin_unlock(&tcp_md5sig_pool_lock); + } + } + return pool; +} + +EXPORT_SYMBOL(tcp_alloc_md5sig_pool); + +struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) +{ + struct tcp_md5sig_pool **p; + spin_lock(&tcp_md5sig_pool_lock); + p = tcp_md5sig_pool; + if (p) + tcp_md5sig_users++; + spin_unlock(&tcp_md5sig_pool_lock); + return (p ? *per_cpu_ptr(p, cpu) : NULL); +} + +EXPORT_SYMBOL(__tcp_get_md5sig_pool); + +void __tcp_put_md5sig_pool(void) +{ + tcp_free_md5sig_pool(); +} + +EXPORT_SYMBOL(__tcp_put_md5sig_pool); +#endif + extern void __skb_cb_too_small_for_tcp(int, int); extern struct tcp_congestion_ops tcp_reno; @@ -2270,7 +2411,7 @@ void __init tcp_init(void) thash_entries, (num_physpages >= 128 * 1024) ? 13 : 15, - HASH_HIGHMEM, + 0, &tcp_hashinfo.ehash_size, NULL, 0); @@ -2286,7 +2427,7 @@ void __init tcp_init(void) tcp_hashinfo.ehash_size, (num_physpages >= 128 * 1024) ? 13 : 15, - HASH_HIGHMEM, + 0, &tcp_hashinfo.bhash_size, NULL, 64 * 1024); @@ -2316,9 +2457,10 @@ void __init tcp_init(void) sysctl_max_syn_backlog = 128; } - sysctl_tcp_mem[0] = 768 << order; - sysctl_tcp_mem[1] = 1024 << order; - sysctl_tcp_mem[2] = 1536 << order; + /* Allow no more than 3/4 kernel memory (usually less) allocated to TCP */ + sysctl_tcp_mem[0] = (1536 / sizeof (struct inet_bind_hashbucket)) << order; + sysctl_tcp_mem[1] = sysctl_tcp_mem[0] * 4 / 3; + sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); max_share = min(4UL*1024*1024, limit); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index af0aca1e6be..5ca7723d079 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -113,7 +113,7 @@ int tcp_set_default_congestion_control(const char *name) spin_lock(&tcp_cong_list_lock); ca = tcp_ca_find(name); #ifdef CONFIG_KMOD - if (!ca) { + if (!ca && capable(CAP_SYS_MODULE)) { spin_unlock(&tcp_cong_list_lock); request_module("tcp_%s", name); @@ -123,6 +123,7 @@ int tcp_set_default_congestion_control(const char *name) #endif if (ca) { + ca->non_restricted = 1; /* default is always allowed */ list_move(&ca->list, &tcp_cong_list); ret = 0; } @@ -131,6 +132,30 @@ int tcp_set_default_congestion_control(const char *name) return ret; } +/* Set default value from kernel configuration at bootup */ +static int __init tcp_congestion_default(void) +{ + return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG); +} +late_initcall(tcp_congestion_default); + + +/* Build string with list of available congestion control values */ +void tcp_get_available_congestion_control(char *buf, size_t maxlen) +{ + struct tcp_congestion_ops *ca; + size_t offs = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(ca, &tcp_cong_list, list) { + offs += snprintf(buf + offs, maxlen - offs, + "%s%s", + offs == 0 ? "" : " ", ca->name); + + } + rcu_read_unlock(); +} + /* Get current default congestion control */ void tcp_get_default_congestion_control(char *name) { @@ -144,6 +169,64 @@ void tcp_get_default_congestion_control(char *name) rcu_read_unlock(); } +/* Built list of non-restricted congestion control values */ +void tcp_get_allowed_congestion_control(char *buf, size_t maxlen) +{ + struct tcp_congestion_ops *ca; + size_t offs = 0; + + *buf = '\0'; + rcu_read_lock(); + list_for_each_entry_rcu(ca, &tcp_cong_list, list) { + if (!ca->non_restricted) + continue; + offs += snprintf(buf + offs, maxlen - offs, + "%s%s", + offs == 0 ? "" : " ", ca->name); + + } + rcu_read_unlock(); +} + +/* Change list of non-restricted congestion control */ +int tcp_set_allowed_congestion_control(char *val) +{ + struct tcp_congestion_ops *ca; + char *clone, *name; + int ret = 0; + + clone = kstrdup(val, GFP_USER); + if (!clone) + return -ENOMEM; + + spin_lock(&tcp_cong_list_lock); + /* pass 1 check for bad entries */ + while ((name = strsep(&clone, " ")) && *name) { + ca = tcp_ca_find(name); + if (!ca) { + ret = -ENOENT; + goto out; + } + } + + /* pass 2 clear */ + list_for_each_entry_rcu(ca, &tcp_cong_list, list) + ca->non_restricted = 0; + + /* pass 3 mark as allowed */ + while ((name = strsep(&val, " ")) && *name) { + ca = tcp_ca_find(name); + WARN_ON(!ca); + if (ca) + ca->non_restricted = 1; + } +out: + spin_unlock(&tcp_cong_list_lock); + + return ret; +} + + /* Change congestion control for socket */ int tcp_set_congestion_control(struct sock *sk, const char *name) { @@ -153,12 +236,25 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) rcu_read_lock(); ca = tcp_ca_find(name); + /* no change asking for existing value */ if (ca == icsk->icsk_ca_ops) goto out; +#ifdef CONFIG_KMOD + /* not found attempt to autoload module */ + if (!ca && capable(CAP_SYS_MODULE)) { + rcu_read_unlock(); + request_module("tcp_%s", name); + rcu_read_lock(); + ca = tcp_ca_find(name); + } +#endif if (!ca) err = -ENOENT; + else if (!(ca->non_restricted || capable(CAP_NET_ADMIN))) + err = -EPERM; + else if (!try_module_get(ca->owner)) err = -EBUSY; @@ -260,6 +356,7 @@ EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); struct tcp_congestion_ops tcp_reno = { .name = "reno", + .non_restricted = 1, .owner = THIS_MODULE, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 283be3cb466..753987a1048 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -26,12 +26,12 @@ struct htcp { u32 alpha; /* Fixed point arith, << 7 */ u8 beta; /* Fixed point arith, << 7 */ u8 modeswitch; /* Delay modeswitch until we had at least one congestion event */ - u32 last_cong; /* Time since last congestion event end */ - u32 undo_last_cong; u16 pkts_acked; u32 packetcount; u32 minRTT; u32 maxRTT; + u32 last_cong; /* Time since last congestion event end */ + u32 undo_last_cong; u32 undo_maxRTT; u32 undo_old_maxB; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cf06accbe68..c701f6abbfc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2677,6 +2677,14 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, opt_rx->sack_ok) { TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th; } +#ifdef CONFIG_TCP_MD5SIG + case TCPOPT_MD5SIG: + /* + * The MD5 Hash has already been + * checked (see tcp_v{4,6}_do_rcv()). + */ + break; +#endif }; ptr+=opsize-2; length-=opsize; @@ -3782,9 +3790,9 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) return err; } -static int __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) +static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) { - int result; + __sum16 result; if (sock_owned_by_user(sk)) { local_bh_enable(); @@ -4227,9 +4235,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * Change state from SYN-SENT only after copied_seq * is initialized. */ tp->copied_seq = tp->rcv_nxt; - mb(); + smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); + security_inet_conn_established(sk, skb); + /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); @@ -4473,7 +4483,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, case TCP_SYN_RECV: if (acceptable) { tp->copied_seq = tp->rcv_nxt; - mb(); + smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); sk->sk_state_change(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 22ef8bd2662..a1222d6968c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -78,6 +78,9 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/crypto.h> +#include <linux/scatterlist.h> + int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; @@ -89,10 +92,19 @@ static struct socket *tcp_socket; void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); +#ifdef CONFIG_TCP_MD5SIG +static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, + __be32 addr); +static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, + __be32 saddr, __be32 daddr, + struct tcphdr *th, int protocol, + int tcplen); +#endif + struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { - .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock), - .lhash_users = ATOMIC_INIT(0), - .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), + .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock), + .lhash_users = ATOMIC_INIT(0), + .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), }; static int tcp_v4_get_port(struct sock *sk, unsigned short snum) @@ -111,7 +123,7 @@ void tcp_unhash(struct sock *sk) inet_unhash(&tcp_hashinfo, sk); } -static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) +static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) { return secure_tcp_sequence_number(skb->nh.iph->daddr, skb->nh.iph->saddr, @@ -205,13 +217,14 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (tcp_death_row.sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { struct inet_peer *peer = rt_get_peer(rt); - - /* VJ's idea. We save last timestamp seen from - * the destination in peer table, when entering state TIME-WAIT - * and initialize rx_opt.ts_recent from it, when trying new connection. + /* + * VJ's idea. We save last timestamp seen from + * the destination in peer table, when entering state + * TIME-WAIT * and initialize rx_opt.ts_recent from it, + * when trying new connection. */ - - if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) { + if (peer != NULL && + peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) { tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; tp->rx_opt.ts_recent = peer->tcp_ts; } @@ -236,7 +249,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err) goto failure; - err = ip_route_newports(&rt, IPPROTO_TCP, inet->sport, inet->dport, sk); + err = ip_route_newports(&rt, IPPROTO_TCP, + inet->sport, inet->dport, sk); if (err) goto failure; @@ -260,7 +274,10 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) return 0; failure: - /* This unhashes the socket and releases the local port, if necessary. */ + /* + * This unhashes the socket and releases the local port, + * if necessary. + */ tcp_set_state(sk, TCP_CLOSE); ip_rt_put(rt); sk->sk_route_caps = 0; @@ -485,8 +502,9 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) struct tcphdr *th = skb->h.th; if (skb->ip_summed == CHECKSUM_PARTIAL) { - th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); - skb->csum = offsetof(struct tcphdr, check); + th->check = ~tcp_v4_check(th, len, + inet->saddr, inet->daddr, 0); + skb->csum_offset = offsetof(struct tcphdr, check); } else { th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr, csum_partial((char *)th, @@ -508,7 +526,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb) th->check = 0; th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0); - skb->csum = offsetof(struct tcphdr, check); + skb->csum_offset = offsetof(struct tcphdr, check); skb->ip_summed = CHECKSUM_PARTIAL; return 0; } @@ -526,11 +544,19 @@ int tcp_v4_gso_send_check(struct sk_buff *skb) * Exception: precedence violation. We do not implement it in any case. */ -static void tcp_v4_send_reset(struct sk_buff *skb) +static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) { struct tcphdr *th = skb->h.th; - struct tcphdr rth; + struct { + struct tcphdr th; +#ifdef CONFIG_TCP_MD5SIG + __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)]; +#endif + } rep; struct ip_reply_arg arg; +#ifdef CONFIG_TCP_MD5SIG + struct tcp_md5sig_key *key; +#endif /* Never send a reset in response to a reset. */ if (th->rst) @@ -540,29 +566,49 @@ static void tcp_v4_send_reset(struct sk_buff *skb) return; /* Swap the send and the receive. */ - memset(&rth, 0, sizeof(struct tcphdr)); - rth.dest = th->source; - rth.source = th->dest; - rth.doff = sizeof(struct tcphdr) / 4; - rth.rst = 1; + memset(&rep, 0, sizeof(rep)); + rep.th.dest = th->source; + rep.th.source = th->dest; + rep.th.doff = sizeof(struct tcphdr) / 4; + rep.th.rst = 1; if (th->ack) { - rth.seq = th->ack_seq; + rep.th.seq = th->ack_seq; } else { - rth.ack = 1; - rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + - skb->len - (th->doff << 2)); + rep.th.ack = 1; + rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + + skb->len - (th->doff << 2)); } - memset(&arg, 0, sizeof arg); - arg.iov[0].iov_base = (unsigned char *)&rth; - arg.iov[0].iov_len = sizeof rth; + memset(&arg, 0, sizeof(arg)); + arg.iov[0].iov_base = (unsigned char *)&rep; + arg.iov[0].iov_len = sizeof(rep.th); + +#ifdef CONFIG_TCP_MD5SIG + key = sk ? tcp_v4_md5_do_lookup(sk, skb->nh.iph->daddr) : NULL; + if (key) { + rep.opt[0] = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_MD5SIG << 8) | + TCPOLEN_MD5SIG); + /* Update length and the length the header thinks exists */ + arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; + rep.th.doff = arg.iov[0].iov_len / 4; + + tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1], + key, + skb->nh.iph->daddr, + skb->nh.iph->saddr, + &rep.th, IPPROTO_TCP, + arg.iov[0].iov_len); + } +#endif arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, - skb->nh.iph->saddr, /*XXX*/ + skb->nh.iph->saddr, /* XXX */ sizeof(struct tcphdr), IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; - ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth); + ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); @@ -572,28 +618,37 @@ static void tcp_v4_send_reset(struct sk_buff *skb) outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, +static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, + struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts) { struct tcphdr *th = skb->h.th; struct { struct tcphdr th; - u32 tsopt[TCPOLEN_TSTAMP_ALIGNED >> 2]; + __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) +#ifdef CONFIG_TCP_MD5SIG + + (TCPOLEN_MD5SIG_ALIGNED >> 2) +#endif + ]; } rep; struct ip_reply_arg arg; +#ifdef CONFIG_TCP_MD5SIG + struct tcp_md5sig_key *key; + struct tcp_md5sig_key tw_key; +#endif memset(&rep.th, 0, sizeof(struct tcphdr)); - memset(&arg, 0, sizeof arg); + memset(&arg, 0, sizeof(arg)); arg.iov[0].iov_base = (unsigned char *)&rep; arg.iov[0].iov_len = sizeof(rep.th); if (ts) { - rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); - rep.tsopt[1] = htonl(tcp_time_stamp); - rep.tsopt[2] = htonl(ts); - arg.iov[0].iov_len = sizeof(rep); + rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + rep.opt[1] = htonl(tcp_time_stamp); + rep.opt[2] = htonl(ts); + arg.iov[0].iov_len = TCPOLEN_TSTAMP_ALIGNED; } /* Swap the send and the receive. */ @@ -605,8 +660,44 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, rep.th.ack = 1; rep.th.window = htons(win); +#ifdef CONFIG_TCP_MD5SIG + /* + * The SKB holds an imcoming packet, but may not have a valid ->sk + * pointer. This is especially the case when we're dealing with a + * TIME_WAIT ack, because the sk structure is long gone, and only + * the tcp_timewait_sock remains. So the md5 key is stashed in that + * structure, and we use it in preference. I believe that (twsk || + * skb->sk) holds true, but we program defensively. + */ + if (!twsk && skb->sk) { + key = tcp_v4_md5_do_lookup(skb->sk, skb->nh.iph->daddr); + } else if (twsk && twsk->tw_md5_keylen) { + tw_key.key = twsk->tw_md5_key; + tw_key.keylen = twsk->tw_md5_keylen; + key = &tw_key; + } else + key = NULL; + + if (key) { + int offset = (ts) ? 3 : 0; + + rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_MD5SIG << 8) | + TCPOLEN_MD5SIG); + arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; + rep.th.doff = arg.iov[0].iov_len/4; + + tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset], + key, + skb->nh.iph->daddr, + skb->nh.iph->saddr, + &rep.th, IPPROTO_TCP, + arg.iov[0].iov_len); + } +#endif arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, - skb->nh.iph->saddr, /*XXX*/ + skb->nh.iph->saddr, /* XXX */ arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; @@ -618,17 +709,20 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) { struct inet_timewait_sock *tw = inet_twsk(sk); - const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); + struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, - tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent); + tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, + tcptw->tw_ts_recent); inet_twsk_put(tw); } -static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) +static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, + struct request_sock *req) { - tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, + tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, + tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent); } @@ -662,8 +756,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, ireq->rmt_addr, ireq->opt); - if (err == NET_XMIT_CN) - err = 0; + err = net_xmit_eval(err); } out: @@ -715,7 +808,423 @@ static struct ip_options *tcp_v4_save_options(struct sock *sk, return dopt; } -struct request_sock_ops tcp_request_sock_ops = { +#ifdef CONFIG_TCP_MD5SIG +/* + * RFC2385 MD5 checksumming requires a mapping of + * IP address->MD5 Key. + * We need to maintain these in the sk structure. + */ + +/* Find the Key structure for an address. */ +static struct tcp_md5sig_key * + tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) +{ + struct tcp_sock *tp = tcp_sk(sk); + int i; + + if (!tp->md5sig_info || !tp->md5sig_info->entries4) + return NULL; + for (i = 0; i < tp->md5sig_info->entries4; i++) { + if (tp->md5sig_info->keys4[i].addr == addr) + return (struct tcp_md5sig_key *) + &tp->md5sig_info->keys4[i]; + } + return NULL; +} + +struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, + struct sock *addr_sk) +{ + return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr); +} + +EXPORT_SYMBOL(tcp_v4_md5_lookup); + +static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, + struct request_sock *req) +{ + return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr); +} + +/* This can be called on a newly created socket, from other files */ +int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, + u8 *newkey, u8 newkeylen) +{ + /* Add Key to the list */ + struct tcp4_md5sig_key *key; + struct tcp_sock *tp = tcp_sk(sk); + struct tcp4_md5sig_key *keys; + + key = (struct tcp4_md5sig_key *)tcp_v4_md5_do_lookup(sk, addr); + if (key) { + /* Pre-existing entry - just update that one. */ + kfree(key->key); + key->key = newkey; + key->keylen = newkeylen; + } else { + struct tcp_md5sig_info *md5sig; + + if (!tp->md5sig_info) { + tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), + GFP_ATOMIC); + if (!tp->md5sig_info) { + kfree(newkey); + return -ENOMEM; + } + } + if (tcp_alloc_md5sig_pool() == NULL) { + kfree(newkey); + return -ENOMEM; + } + md5sig = tp->md5sig_info; + + if (md5sig->alloced4 == md5sig->entries4) { + keys = kmalloc((sizeof(*keys) * + (md5sig->entries4 + 1)), GFP_ATOMIC); + if (!keys) { + kfree(newkey); + tcp_free_md5sig_pool(); + return -ENOMEM; + } + + if (md5sig->entries4) + memcpy(keys, md5sig->keys4, + sizeof(*keys) * md5sig->entries4); + + /* Free old key list, and reference new one */ + if (md5sig->keys4) + kfree(md5sig->keys4); + md5sig->keys4 = keys; + md5sig->alloced4++; + } + md5sig->entries4++; + md5sig->keys4[md5sig->entries4 - 1].addr = addr; + md5sig->keys4[md5sig->entries4 - 1].key = newkey; + md5sig->keys4[md5sig->entries4 - 1].keylen = newkeylen; + } + return 0; +} + +EXPORT_SYMBOL(tcp_v4_md5_do_add); + +static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, + u8 *newkey, u8 newkeylen) +{ + return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr, + newkey, newkeylen); +} + +int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) +{ + struct tcp_sock *tp = tcp_sk(sk); + int i; + + for (i = 0; i < tp->md5sig_info->entries4; i++) { + if (tp->md5sig_info->keys4[i].addr == addr) { + /* Free the key */ + kfree(tp->md5sig_info->keys4[i].key); + tp->md5sig_info->entries4--; + + if (tp->md5sig_info->entries4 == 0) { + kfree(tp->md5sig_info->keys4); + tp->md5sig_info->keys4 = NULL; + } else if (tp->md5sig_info->entries4 != i) { + /* Need to do some manipulation */ + memcpy(&tp->md5sig_info->keys4[i], + &tp->md5sig_info->keys4[i+1], + (tp->md5sig_info->entries4 - i) * + sizeof(struct tcp4_md5sig_key)); + } + tcp_free_md5sig_pool(); + return 0; + } + } + return -ENOENT; +} + +EXPORT_SYMBOL(tcp_v4_md5_do_del); + +static void tcp_v4_clear_md5_list(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + /* Free each key, then the set of key keys, + * the crypto element, and then decrement our + * hold on the last resort crypto. + */ + if (tp->md5sig_info->entries4) { + int i; + for (i = 0; i < tp->md5sig_info->entries4; i++) + kfree(tp->md5sig_info->keys4[i].key); + tp->md5sig_info->entries4 = 0; + tcp_free_md5sig_pool(); + } + if (tp->md5sig_info->keys4) { + kfree(tp->md5sig_info->keys4); + tp->md5sig_info->keys4 = NULL; + tp->md5sig_info->alloced4 = 0; + } +} + +static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, + int optlen) +{ + struct tcp_md5sig cmd; + struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; + u8 *newkey; + + if (optlen < sizeof(cmd)) + return -EINVAL; + + if (copy_from_user(&cmd, optval, sizeof(cmd))) + return -EFAULT; + + if (sin->sin_family != AF_INET) + return -EINVAL; + + if (!cmd.tcpm_key || !cmd.tcpm_keylen) { + if (!tcp_sk(sk)->md5sig_info) + return -ENOENT; + return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr); + } + + if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) + return -EINVAL; + + if (!tcp_sk(sk)->md5sig_info) { + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL); + + if (!p) + return -EINVAL; + + tp->md5sig_info = p; + + } + + newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); + if (!newkey) + return -ENOMEM; + return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr, + newkey, cmd.tcpm_keylen); +} + +static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, + __be32 saddr, __be32 daddr, + struct tcphdr *th, int protocol, + int tcplen) +{ + struct scatterlist sg[4]; + __u16 data_len; + int block = 0; + __sum16 old_checksum; + struct tcp_md5sig_pool *hp; + struct tcp4_pseudohdr *bp; + struct hash_desc *desc; + int err; + unsigned int nbytes = 0; + + /* + * Okay, so RFC2385 is turned on for this connection, + * so we need to generate the MD5 hash for the packet now. + */ + + hp = tcp_get_md5sig_pool(); + if (!hp) + goto clear_hash_noput; + + bp = &hp->md5_blk.ip4; + desc = &hp->md5_desc; + + /* + * 1. the TCP pseudo-header (in the order: source IP address, + * destination IP address, zero-padded protocol number, and + * segment length) + */ + bp->saddr = saddr; + bp->daddr = daddr; + bp->pad = 0; + bp->protocol = protocol; + bp->len = htons(tcplen); + sg_set_buf(&sg[block++], bp, sizeof(*bp)); + nbytes += sizeof(*bp); + + /* 2. the TCP header, excluding options, and assuming a + * checksum of zero/ + */ + old_checksum = th->check; + th->check = 0; + sg_set_buf(&sg[block++], th, sizeof(struct tcphdr)); + nbytes += sizeof(struct tcphdr); + + /* 3. the TCP segment data (if any) */ + data_len = tcplen - (th->doff << 2); + if (data_len > 0) { + unsigned char *data = (unsigned char *)th + (th->doff << 2); + sg_set_buf(&sg[block++], data, data_len); + nbytes += data_len; + } + + /* 4. an independently-specified key or password, known to both + * TCPs and presumably connection-specific + */ + sg_set_buf(&sg[block++], key->key, key->keylen); + nbytes += key->keylen; + + /* Now store the Hash into the packet */ + err = crypto_hash_init(desc); + if (err) + goto clear_hash; + err = crypto_hash_update(desc, sg, nbytes); + if (err) + goto clear_hash; + err = crypto_hash_final(desc, md5_hash); + if (err) + goto clear_hash; + + /* Reset header, and free up the crypto */ + tcp_put_md5sig_pool(); + th->check = old_checksum; + +out: + return 0; +clear_hash: + tcp_put_md5sig_pool(); +clear_hash_noput: + memset(md5_hash, 0, 16); + goto out; +} + +int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, + struct sock *sk, + struct dst_entry *dst, + struct request_sock *req, + struct tcphdr *th, int protocol, + int tcplen) +{ + __be32 saddr, daddr; + + if (sk) { + saddr = inet_sk(sk)->saddr; + daddr = inet_sk(sk)->daddr; + } else { + struct rtable *rt = (struct rtable *)dst; + BUG_ON(!rt); + saddr = rt->rt_src; + daddr = rt->rt_dst; + } + return tcp_v4_do_calc_md5_hash(md5_hash, key, + saddr, daddr, + th, protocol, tcplen); +} + +EXPORT_SYMBOL(tcp_v4_calc_md5_hash); + +static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) +{ + /* + * This gets called for each TCP segment that arrives + * so we want to be efficient. + * We have 3 drop cases: + * o No MD5 hash and one expected. + * o MD5 hash and we're not expecting one. + * o MD5 hash and its wrong. + */ + __u8 *hash_location = NULL; + struct tcp_md5sig_key *hash_expected; + struct iphdr *iph = skb->nh.iph; + struct tcphdr *th = skb->h.th; + int length = (th->doff << 2) - sizeof(struct tcphdr); + int genhash; + unsigned char *ptr; + unsigned char newhash[16]; + + hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr); + + /* + * If the TCP option length is less than the TCP_MD5SIG + * option length, then we can shortcut + */ + if (length < TCPOLEN_MD5SIG) { + if (hash_expected) + return 1; + else + return 0; + } + + /* Okay, we can't shortcut - we have to grub through the options */ + ptr = (unsigned char *)(th + 1); + while (length > 0) { + int opcode = *ptr++; + int opsize; + + switch (opcode) { + case TCPOPT_EOL: + goto done_opts; + case TCPOPT_NOP: + length--; + continue; + default: + opsize = *ptr++; + if (opsize < 2) + goto done_opts; + if (opsize > length) + goto done_opts; + + if (opcode == TCPOPT_MD5SIG) { + hash_location = ptr; + goto done_opts; + } + } + ptr += opsize-2; + length -= opsize; + } +done_opts: + /* We've parsed the options - do we have a hash? */ + if (!hash_expected && !hash_location) + return 0; + + if (hash_expected && !hash_location) { + LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found " + "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", + NIPQUAD(iph->saddr), ntohs(th->source), + NIPQUAD(iph->daddr), ntohs(th->dest)); + return 1; + } + + if (!hash_expected && hash_location) { + LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found " + "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", + NIPQUAD(iph->saddr), ntohs(th->source), + NIPQUAD(iph->daddr), ntohs(th->dest)); + return 1; + } + + /* Okay, so this is hash_expected and hash_location - + * so we need to calculate the checksum. + */ + genhash = tcp_v4_do_calc_md5_hash(newhash, + hash_expected, + iph->saddr, iph->daddr, + th, sk->sk_protocol, + skb->len); + + if (genhash || memcmp(hash_location, newhash, 16) != 0) { + if (net_ratelimit()) { + printk(KERN_INFO "MD5 Hash failed for " + "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n", + NIPQUAD(iph->saddr), ntohs(th->source), + NIPQUAD(iph->daddr), ntohs(th->dest), + genhash ? " tcp_v4_calc_md5_hash failed" : ""); + } + return 1; + } + return 0; +} + +#endif + +struct request_sock_ops tcp_request_sock_ops __read_mostly = { .family = PF_INET, .obj_size = sizeof(struct tcp_request_sock), .rtx_syn_ack = tcp_v4_send_synack, @@ -724,9 +1233,16 @@ struct request_sock_ops tcp_request_sock_ops = { .send_reset = tcp_v4_send_reset, }; +#ifdef CONFIG_TCP_MD5SIG +static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { + .md5_lookup = tcp_v4_reqsk_md5_lookup, +}; +#endif + static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp_timewait_sock), .twsk_unique = tcp_twsk_unique, + .twsk_destructor= tcp_twsk_destructor, }; int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) @@ -774,6 +1290,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (!req) goto drop; +#ifdef CONFIG_TCP_MD5SIG + tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; +#endif + tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = 536; tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; @@ -859,7 +1379,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; } - isn = tcp_v4_init_sequence(sk, skb); + isn = tcp_v4_init_sequence(skb); } tcp_rsk(req)->snt_isn = isn; @@ -892,6 +1412,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct inet_sock *newinet; struct tcp_sock *newtp; struct sock *newsk; +#ifdef CONFIG_TCP_MD5SIG + struct tcp_md5sig_key *key; +#endif if (sk_acceptq_is_full(sk)) goto exit_overflow; @@ -926,6 +1449,22 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = dst_metric(dst, RTAX_ADVMSS); tcp_initialize_rcv_mss(newsk); +#ifdef CONFIG_TCP_MD5SIG + /* Copy over the MD5 key from the original socket */ + if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) { + /* + * We're using one, so create a matching key + * on the newsk structure. If we fail to get + * memory, then we end up not copying the key + * across. Shucks. + */ + char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); + if (newkey != NULL) + tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr, + newkey, key->keylen); + } +#endif + __inet_hash(&tcp_hashinfo, newsk, 0); __inet_inherit_port(&tcp_hashinfo, sk, newsk); @@ -971,7 +1510,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) return sk; } -static int tcp_v4_checksum_init(struct sk_buff *skb) +static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_COMPLETE) { if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, @@ -1001,10 +1540,24 @@ static int tcp_v4_checksum_init(struct sk_buff *skb) */ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) { + struct sock *rsk; +#ifdef CONFIG_TCP_MD5SIG + /* + * We really want to reject the packet as early as possible + * if: + * o We're expecting an MD5'd packet and this is no MD5 tcp option + * o There is an MD5 option and we're not expecting one + */ + if (tcp_v4_inbound_md5_hash(sk, skb)) + goto discard; +#endif + if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ TCP_CHECK_TIMER(sk); - if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) + if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) { + rsk = sk; goto reset; + } TCP_CHECK_TIMER(sk); return 0; } @@ -1018,20 +1571,24 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) goto discard; if (nsk != sk) { - if (tcp_child_process(sk, nsk, skb)) + if (tcp_child_process(sk, nsk, skb)) { + rsk = nsk; goto reset; + } return 0; } } TCP_CHECK_TIMER(sk); - if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) + if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) { + rsk = sk; goto reset; + } TCP_CHECK_TIMER(sk); return 0; reset: - tcp_v4_send_reset(skb); + tcp_v4_send_reset(rsk, skb); discard: kfree_skb(skb); /* Be careful here. If this function gets more complicated and @@ -1140,7 +1697,7 @@ no_tcp_socket: bad_packet: TCP_INC_STATS_BH(TCP_MIB_INERRS); } else { - tcp_v4_send_reset(skb); + tcp_v4_send_reset(NULL, skb); } discard_it: @@ -1263,6 +1820,15 @@ struct inet_connection_sock_af_ops ipv4_specific = { #endif }; +#ifdef CONFIG_TCP_MD5SIG +static struct tcp_sock_af_ops tcp_sock_ipv4_specific = { + .md5_lookup = tcp_v4_md5_lookup, + .calc_md5_hash = tcp_v4_calc_md5_hash, + .md5_add = tcp_v4_md5_add_func, + .md5_parse = tcp_v4_parse_md5_keys, +}; +#endif + /* NOTE: A lot of things set to zero explicitly by call to * sk_alloc() so need not be done here. */ @@ -1302,6 +1868,9 @@ static int tcp_v4_init_sock(struct sock *sk) icsk->icsk_af_ops = &ipv4_specific; icsk->icsk_sync_mss = tcp_sync_mss; +#ifdef CONFIG_TCP_MD5SIG + tp->af_specific = &tcp_sock_ipv4_specific; +#endif sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; @@ -1325,6 +1894,15 @@ int tcp_v4_destroy_sock(struct sock *sk) /* Cleans up our, hopefully empty, out_of_order_queue. */ __skb_queue_purge(&tp->out_of_order_queue); +#ifdef CONFIG_TCP_MD5SIG + /* Clean up the MD5 key list, if any */ + if (tp->md5sig_info) { + tcp_v4_clear_md5_list(sk); + kfree(tp->md5sig_info); + tp->md5sig_info = NULL; + } +#endif + #ifdef CONFIG_NET_DMA /* Cleans up our sk_async_wait_queue */ __skb_queue_purge(&sk->sk_async_wait_queue); @@ -1385,7 +1963,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) if (st->state == TCP_SEQ_STATE_OPENREQ) { struct request_sock *req = cur; - icsk = inet_csk(st->syn_wait_sk); + icsk = inet_csk(st->syn_wait_sk); req = req->dl_next; while (1) { while (req) { @@ -1395,7 +1973,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) } req = req->dl_next; } - if (++st->sbucket >= TCP_SYNQ_HSIZE) + if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) break; get_req: req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; @@ -1543,7 +2121,7 @@ static void *established_get_idx(struct seq_file *seq, loff_t pos) while (rc && pos) { rc = established_get_next(seq, rc); --pos; - } + } return rc; } @@ -1672,7 +2250,7 @@ int tcp_proc_register(struct tcp_seq_afinfo *afinfo) afinfo->seq_fops->read = seq_read; afinfo->seq_fops->llseek = seq_lseek; afinfo->seq_fops->release = seq_release_private; - + p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); if (p) p->data = afinfo; @@ -1686,7 +2264,7 @@ void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo) if (!afinfo) return; proc_net_remove(afinfo->name); - memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); + memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); } static void get_openreq4(struct sock *sk, struct request_sock *req, @@ -1721,8 +2299,8 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); struct inet_sock *inet = inet_sk(sp); - unsigned int dest = inet->daddr; - unsigned int src = inet->rcv_saddr; + __be32 dest = inet->daddr; + __be32 src = inet->rcv_saddr; __u16 destp = ntohs(inet->dport); __u16 srcp = ntohs(inet->sport); @@ -1744,7 +2322,8 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) "%08X %5d %8d %lu %d %p %u %u %u %u %d", i, src, srcp, dest, destp, sp->sk_state, tp->write_seq - tp->snd_una, - (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq), + sp->sk_state == TCP_LISTEN ? sp->sk_ack_backlog : + (tp->rcv_nxt - tp->copied_seq), timer_active, jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, @@ -1759,7 +2338,8 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); } -static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i) +static void get_timewait4_sock(struct inet_timewait_sock *tw, + char *tmpbuf, int i) { __be32 dest, src; __u16 destp, srcp; @@ -1872,7 +2452,8 @@ struct proto tcp_prot = { void __init tcp_v4_init(struct net_proto_family *ops) { - if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, IPPROTO_TCP) < 0) + if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, + IPPROTO_TCP) < 0) panic("Failed to create the TCP control socket.\n"); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0163d982690..4a3889dd194 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -45,8 +45,7 @@ struct inet_timewait_death_row tcp_death_row = { .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, (unsigned long)&tcp_death_row), .twkill_work = __WORK_INITIALIZER(tcp_death_row.twkill_work, - inet_twdr_twkill_work, - &tcp_death_row), + inet_twdr_twkill_work), /* Short-time timewait calendar */ .twcal_hand = -1, @@ -306,6 +305,28 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_ipv6only = np->ipv6only; } #endif + +#ifdef CONFIG_TCP_MD5SIG + /* + * The timewait bucket does not have the key DB from the + * sock structure. We just make a quick copy of the + * md5 key being used (if indeed we are using one) + * so the timewait ack generating code has the key. + */ + do { + struct tcp_md5sig_key *key; + memset(tcptw->tw_md5_key, 0, sizeof(tcptw->tw_md5_key)); + tcptw->tw_md5_keylen = 0; + key = tp->af_specific->md5_lookup(sk, sk); + if (key != NULL) { + memcpy(&tcptw->tw_md5_key, key->key, key->keylen); + tcptw->tw_md5_keylen = key->keylen; + if (tcp_alloc_md5sig_pool() == NULL) + BUG(); + } + } while(0); +#endif + /* Linkage updates. */ __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); @@ -329,14 +350,24 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) * socket up. We've got bigger problems than * non-graceful socket closings. */ - if (net_ratelimit()) - printk(KERN_INFO "TCP: time wait bucket table overflow\n"); + LIMIT_NETDEBUG(KERN_INFO "TCP: time wait bucket table overflow\n"); } tcp_update_metrics(sk); tcp_done(sk); } +void tcp_twsk_destructor(struct sock *sk) +{ +#ifdef CONFIG_TCP_MD5SIG + struct tcp_timewait_sock *twsk = tcp_twsk(sk); + if (twsk->tw_md5_keylen) + tcp_put_md5sig_pool(); +#endif +} + +EXPORT_SYMBOL_GPL(tcp_twsk_destructor); + /* This is not only more efficient than what we used to do, it eliminates * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * @@ -435,6 +466,11 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.ts_recent_stamp = 0; newtp->tcp_header_len = sizeof(struct tcphdr); } +#ifdef CONFIG_TCP_MD5SIG + newtp->md5sig_info = NULL; /*XXX*/ + if (newtp->af_specific->md5_lookup(sk, newsk)) + newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; +#endif if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len) newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; @@ -455,7 +491,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, struct request_sock **prev) { struct tcphdr *th = skb->h.th; - u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); + __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); int paws_reject = 0; struct tcp_options_received tmp_opt; struct sock *child; @@ -617,6 +653,30 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, req, NULL); if (child == NULL) goto listen_overflow; +#ifdef CONFIG_TCP_MD5SIG + else { + /* Copy over the MD5 key from the original socket */ + struct tcp_md5sig_key *key; + struct tcp_sock *tp = tcp_sk(sk); + key = tp->af_specific->md5_lookup(sk, child); + if (key != NULL) { + /* + * We're using one, so create a matching key on the + * newsk structure. If we fail to get memory then we + * end up not copying the key across. Shucks. + */ + char *newkey = kmemdup(key->key, key->keylen, + GFP_ATOMIC); + if (newkey) { + if (!tcp_alloc_md5sig_pool()) + BUG(); + tp->af_specific->md5_add(child, child, + newkey, + key->keylen); + } + } + } +#endif inet_csk_reqsk_queue_unlink(sk, req, prev); inet_csk_reqsk_queue_removed(sk, req); @@ -633,7 +693,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, embryonic_reset: NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS); if (!(flg & TCP_FLAG_RST)) - req->rsk_ops->send_reset(skb); + req->rsk_ops->send_reset(sk, skb); inet_csk_reqsk_queue_drop(sk, req, prev); return NULL; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ca406157724..32c1a972fa3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -270,7 +270,7 @@ static u16 tcp_select_window(struct sock *sk) } static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, - __u32 tstamp) + __u32 tstamp, __u8 **md5_hash) { if (tp->rx_opt.tstamp_ok) { *ptr++ = htonl((TCPOPT_NOP << 24) | @@ -298,16 +298,29 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, tp->rx_opt.eff_sacks--; } } +#ifdef CONFIG_TCP_MD5SIG + if (md5_hash) { + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_MD5SIG << 8) | + TCPOLEN_MD5SIG); + *md5_hash = (__u8 *)ptr; + } +#endif } /* Construct a tcp options header for a SYN or SYN_ACK packet. * If this is every changed make sure to change the definition of * MAX_SYN_SIZE to match the new maximum number of options that you * can generate. + * + * Note - that with the RFC2385 TCP option, we make room for the + * 16 byte MD5 hash. This will be filled in later, so the pointer for the + * location to be filled is passed back up. */ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack, int offer_wscale, int wscale, __u32 tstamp, - __u32 ts_recent) + __u32 ts_recent, __u8 **md5_hash) { /* We always get an MSS option. * The option bytes which will be seen in normal data @@ -346,6 +359,20 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack, (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale)); +#ifdef CONFIG_TCP_MD5SIG + /* + * If MD5 is enabled, then we set the option, and include the size + * (always 18). The actual MD5 hash is added just before the + * packet is sent. + */ + if (md5_hash) { + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_MD5SIG << 8) | + TCPOLEN_MD5SIG); + *md5_hash = (__u8 *) ptr; + } +#endif } /* This routine actually transmits TCP packets queued in by @@ -366,6 +393,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, struct tcp_sock *tp; struct tcp_skb_cb *tcb; int tcp_header_size; +#ifdef CONFIG_TCP_MD5SIG + struct tcp_md5sig_key *md5; + __u8 *md5_hash_location; +#endif struct tcphdr *th; int sysctl_flags; int err; @@ -424,9 +455,18 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); +#ifdef CONFIG_TCP_MD5SIG + /* + * Are we doing MD5 on this segment? If so - make + * room for it. + */ + md5 = tp->af_specific->md5_lookup(sk, sk); + if (md5) + tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; +#endif + th = (struct tcphdr *) skb_push(skb, tcp_header_size); skb->h.th = th; - skb_set_owner_w(skb, sk); /* Build TCP header and checksum it. */ th->source = inet->sport; @@ -461,13 +501,34 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, (sysctl_flags & SYSCTL_FLAG_WSCALE), tp->rx_opt.rcv_wscale, tcb->when, - tp->rx_opt.ts_recent); + tp->rx_opt.ts_recent, + +#ifdef CONFIG_TCP_MD5SIG + md5 ? &md5_hash_location : +#endif + NULL); } else { tcp_build_and_update_options((__be32 *)(th + 1), - tp, tcb->when); + tp, tcb->when, +#ifdef CONFIG_TCP_MD5SIG + md5 ? &md5_hash_location : +#endif + NULL); TCP_ECN_send(sk, tp, skb, tcp_header_size); } +#ifdef CONFIG_TCP_MD5SIG + /* Calculate the MD5 hash, as we have all we need now */ + if (md5) { + tp->af_specific->calc_md5_hash(md5_hash_location, + md5, + sk, NULL, NULL, + skb->h.th, + sk->sk_protocol, + skb->len); + } +#endif + icsk->icsk_af_ops->send_check(sk, skb->len, skb); if (likely(tcb->flags & TCPCB_FLAG_ACK)) @@ -479,19 +540,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) TCP_INC_STATS(TCP_MIB_OUTSEGS); - err = icsk->icsk_af_ops->queue_xmit(skb, 0); + err = icsk->icsk_af_ops->queue_xmit(skb, sk, 0); if (likely(err <= 0)) return err; tcp_enter_cwr(sk); - /* NET_XMIT_CN is special. It does not guarantee, - * that this packet is lost. It tells that device - * is about to start to drop packets or already - * drops some packets of the same priority and - * invokes us to send less aggressively. - */ - return err == NET_XMIT_CN ? 0 : err; + return net_xmit_eval(err); #undef SYSCTL_FLAG_TSTAMPS #undef SYSCTL_FLAG_WSCALE @@ -847,6 +902,11 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)); +#ifdef CONFIG_TCP_MD5SIG + if (tp->af_specific->md5_lookup(sk, sk)) + mss_now -= TCPOLEN_MD5SIG_ALIGNED; +#endif + xmit_size_goal = mss_now; if (doing_tso) { @@ -2040,6 +2100,10 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct tcphdr *th; int tcp_header_size; struct sk_buff *skb; +#ifdef CONFIG_TCP_MD5SIG + struct tcp_md5sig_key *md5; + __u8 *md5_hash_location; +#endif skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); if (skb == NULL) @@ -2055,6 +2119,13 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) + /* SACK_PERM is in the place of NOP NOP of TS */ ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0)); + +#ifdef CONFIG_TCP_MD5SIG + /* Are we doing MD5 on this segment? If so - make room for it */ + md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); + if (md5) + tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; +#endif skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size); memset(th, 0, sizeof(struct tcphdr)); @@ -2092,11 +2163,29 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok, ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale, TCP_SKB_CB(skb)->when, - req->ts_recent); + req->ts_recent, + ( +#ifdef CONFIG_TCP_MD5SIG + md5 ? &md5_hash_location : +#endif + NULL) + ); skb->csum = 0; th->doff = (tcp_header_size >> 2); TCP_INC_STATS(TCP_MIB_OUTSEGS); + +#ifdef CONFIG_TCP_MD5SIG + /* Okay, we have all we need - do the md5 hash if needed */ + if (md5) { + tp->af_specific->calc_md5_hash(md5_hash_location, + md5, + NULL, dst, req, + skb->h.th, sk->sk_protocol, + skb->len); + } +#endif + return skb; } @@ -2115,6 +2204,11 @@ static void tcp_connect_init(struct sock *sk) tp->tcp_header_len = sizeof(struct tcphdr) + (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); +#ifdef CONFIG_TCP_MD5SIG + if (tp->af_specific->md5_lookup(sk, sk) != NULL) + tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; +#endif + /* If user gave his TCP_MAXSEG, record it to clamp */ if (tp->rx_opt.user_mss) tp->rx_opt.mss_clamp = tp->rx_opt.user_mss; diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 4be336f1788..f230eeecf09 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -156,6 +156,8 @@ static __init int tcpprobe_init(void) init_waitqueue_head(&tcpw.wait); spin_lock_init(&tcpw.lock); tcpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &tcpw.lock); + if (IS_ERR(tcpw.fifo)) + return PTR_ERR(tcpw.fifo); if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) goto err0; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index fb09ade5897..3355c276b61 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -297,7 +297,7 @@ static void tcp_retransmit_timer(struct sock *sk) if (net_ratelimit()) { struct inet_sock *inet = inet_sk(sk); printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n", - NIPQUAD(inet->daddr), htons(inet->dport), + NIPQUAD(inet->daddr), ntohs(inet->dport), inet->num, tp->snd_una, tp->snd_nxt); } #endif diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index a3b7aa015a2..ddc4bcc5785 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -42,8 +42,8 @@ * with V_PARAM_SHIFT bits to the right of the binary point. */ #define V_PARAM_SHIFT 1 -static int alpha = 1<<V_PARAM_SHIFT; -static int beta = 3<<V_PARAM_SHIFT; +static int alpha = 2<<V_PARAM_SHIFT; +static int beta = 4<<V_PARAM_SHIFT; static int gamma = 1<<V_PARAM_SHIFT; module_param(alpha, int, 0644); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 865d75214a9..035915fc9ed 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -92,22 +92,16 @@ #include <linux/timer.h> #include <linux/mm.h> #include <linux/inet.h> -#include <linux/ipv6.h> #include <linux/netdevice.h> -#include <net/snmp.h> -#include <net/ip.h> #include <net/tcp_states.h> -#include <net/protocol.h> #include <linux/skbuff.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> -#include <net/sock.h> -#include <net/udp.h> #include <net/icmp.h> #include <net/route.h> -#include <net/inet_common.h> #include <net/checksum.h> #include <net/xfrm.h> +#include "udp_impl.h" /* * Snmp MIB for the UDP layer @@ -120,26 +114,30 @@ DEFINE_RWLOCK(udp_hash_lock); static int udp_port_rover; -static inline int udp_lport_inuse(u16 num) +static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) { struct sock *sk; struct hlist_node *node; - sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)]) + sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) if (inet_sk(sk)->num == num) return 1; return 0; } /** - * udp_get_port - common port lookup for IPv4 and IPv6 + * __udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 * * @sk: socket struct in question * @snum: port number to look up + * @udptable: hash list table, must be of UDP_HTABLE_SIZE + * @port_rover: pointer to record of last unallocated port * @saddr_comp: AF-dependent comparison of bound local IP addresses */ -int udp_get_port(struct sock *sk, unsigned short snum, - int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2)) +int __udp_lib_get_port(struct sock *sk, unsigned short snum, + struct hlist_head udptable[], int *port_rover, + int (*saddr_comp)(const struct sock *sk1, + const struct sock *sk2 ) ) { struct hlist_node *node; struct hlist_head *head; @@ -150,15 +148,15 @@ int udp_get_port(struct sock *sk, unsigned short snum, if (snum == 0) { int best_size_so_far, best, result, i; - if (udp_port_rover > sysctl_local_port_range[1] || - udp_port_rover < sysctl_local_port_range[0]) - udp_port_rover = sysctl_local_port_range[0]; + if (*port_rover > sysctl_local_port_range[1] || + *port_rover < sysctl_local_port_range[0]) + *port_rover = sysctl_local_port_range[0]; best_size_so_far = 32767; - best = result = udp_port_rover; + best = result = *port_rover; for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { int size; - head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[result & (UDP_HTABLE_SIZE - 1)]; if (hlist_empty(head)) { if (result > sysctl_local_port_range[1]) result = sysctl_local_port_range[0] + @@ -179,15 +177,15 @@ int udp_get_port(struct sock *sk, unsigned short snum, result = sysctl_local_port_range[0] + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1)); - if (!udp_lport_inuse(result)) + if (! __udp_lib_lport_inuse(result, udptable)) break; } if (i >= (1 << 16) / UDP_HTABLE_SIZE) goto fail; gotit: - udp_port_rover = snum = result; + *port_rover = snum = result; } else { - head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; sk_for_each(sk2, node, head) if (inet_sk(sk2)->num == snum && @@ -195,12 +193,12 @@ gotit: (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (*saddr_cmp)(sk, sk2) ) + (*saddr_comp)(sk, sk2) ) goto fail; } inet_sk(sk)->num = snum; if (sk_unhashed(sk)) { - head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; sk_add_node(sk, head); sock_prot_inc_use(sk->sk_prot); } @@ -210,7 +208,13 @@ fail: return error; } -static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +__inline__ int udp_get_port(struct sock *sk, unsigned short snum, + int (*scmp)(const struct sock *, const struct sock *)) +{ + return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp); +} + +inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); @@ -224,34 +228,20 @@ static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); } - -static void udp_v4_hash(struct sock *sk) -{ - BUG(); -} - -static void udp_v4_unhash(struct sock *sk) -{ - write_lock_bh(&udp_hash_lock); - if (sk_del_node_init(sk)) { - inet_sk(sk)->num = 0; - sock_prot_dec_use(sk->sk_prot); - } - write_unlock_bh(&udp_hash_lock); -} - /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ -static struct sock *udp_v4_lookup_longway(__be32 saddr, __be16 sport, - __be32 daddr, __be16 dport, int dif) +static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, + int dif, struct hlist_head udptable[]) { struct sock *sk, *result = NULL; struct hlist_node *node; unsigned short hnum = ntohs(dport); int badness = -1; - sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) { + read_lock(&udp_hash_lock); + sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { struct inet_sock *inet = inet_sk(sk); if (inet->num == hnum && !ipv6_only_sock(sk)) { @@ -285,20 +275,10 @@ static struct sock *udp_v4_lookup_longway(__be32 saddr, __be16 sport, } } } - return result; -} - -static __inline__ struct sock *udp_v4_lookup(__be32 saddr, __be16 sport, - __be32 daddr, __be16 dport, int dif) -{ - struct sock *sk; - - read_lock(&udp_hash_lock); - sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif); - if (sk) - sock_hold(sk); + if (result) + sock_hold(result); read_unlock(&udp_hash_lock); - return sk; + return result; } static inline struct sock *udp_v4_mcast_next(struct sock *sk, @@ -340,7 +320,7 @@ found: * to find the appropriate port. */ -void udp_err(struct sk_buff *skb, u32 info) +void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) { struct inet_sock *inet; struct iphdr *iph = (struct iphdr*)skb->data; @@ -351,7 +331,8 @@ void udp_err(struct sk_buff *skb, u32 info) int harderr; int err; - sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex); + sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, + skb->dev->ifindex, udptable ); if (sk == NULL) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; /* No socket for error */ @@ -405,6 +386,11 @@ out: sock_put(sk); } +__inline__ void udp_err(struct sk_buff *skb, u32 info) +{ + return __udp4_lib_err(skb, info, udp_hash); +} + /* * Throw away all pending data and cancel the corking. Socket is locked. */ @@ -419,16 +405,58 @@ static void udp_flush_pending_frames(struct sock *sk) } } +/** + * udp4_hwcsum_outgoing - handle outgoing HW checksumming + * @sk: socket we are sending on + * @skb: sk_buff containing the filled-in UDP header + * (checksum field must be zeroed out) + */ +static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, + __be32 src, __be32 dst, int len ) +{ + unsigned int offset; + struct udphdr *uh = skb->h.uh; + __wsum csum = 0; + + if (skb_queue_len(&sk->sk_write_queue) == 1) { + /* + * Only one fragment on the socket. + */ + skb->csum_offset = offsetof(struct udphdr, check); + uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); + } else { + /* + * HW-checksum won't work as there are two or more + * fragments on the socket so that all csums of sk_buffs + * should be together + */ + offset = skb->h.raw - skb->data; + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + + skb->ip_summed = CHECKSUM_NONE; + + skb_queue_walk(&sk->sk_write_queue, skb) { + csum = csum_add(csum, skb->csum); + } + + uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + } +} + /* * Push out all pending data as one UDP datagram. Socket is locked. */ -static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) +static int udp_push_pending_frames(struct sock *sk) { + struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); struct flowi *fl = &inet->cork.fl; struct sk_buff *skb; struct udphdr *uh; int err = 0; + __wsum csum = 0; /* Grab the skbuff where UDP header space exists. */ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) @@ -443,52 +471,28 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) uh->len = htons(up->len); uh->check = 0; - if (sk->sk_no_check == UDP_CSUM_NOXMIT) { + if (up->pcflag) /* UDP-Lite */ + csum = udplite_csum_outgoing(sk, skb); + + else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ + skb->ip_summed = CHECKSUM_NONE; goto send; - } - if (skb_queue_len(&sk->sk_write_queue) == 1) { - /* - * Only one fragment on the socket. - */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - skb->csum = offsetof(struct udphdr, check); - uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, - up->len, IPPROTO_UDP, 0); - } else { - skb->csum = csum_partial((char *)uh, - sizeof(struct udphdr), skb->csum); - uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, - up->len, IPPROTO_UDP, skb->csum); - if (uh->check == 0) - uh->check = -1; - } - } else { - unsigned int csum = 0; - /* - * HW-checksum won't work as there are two or more - * fragments on the socket so that all csums of sk_buffs - * should be together. - */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - int offset = (unsigned char *)uh - skb->data; - skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ - skb->ip_summed = CHECKSUM_NONE; - } else { - skb->csum = csum_partial((char *)uh, - sizeof(struct udphdr), skb->csum); - } + udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); + goto send; + + } else /* `normal' UDP */ + csum = udp_csum_outgoing(sk, skb); + + /* add protocol-dependent pseudo-header */ + uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, + sk->sk_protocol, csum ); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; - skb_queue_walk(&sk->sk_write_queue, skb) { - csum = csum_add(csum, skb->csum); - } - uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, - up->len, IPPROTO_UDP, csum); - if (uh->check == 0) - uh->check = -1; - } send: err = ip_push_pending_frames(sk); out: @@ -497,12 +501,6 @@ out: return err; } - -static unsigned short udp_check(struct udphdr *uh, int len, __be32 saddr, __be32 daddr, unsigned long base) -{ - return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base)); -} - int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { @@ -516,8 +514,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, __be32 daddr, faddr, saddr; __be16 dport; u8 tos; - int err; + int err, is_udplite = up->pcflag; int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; + int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); if (len > 0xFFFF) return -EMSGSIZE; @@ -622,7 +621,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { .daddr = faddr, .saddr = saddr, .tos = tos } }, - .proto = IPPROTO_UDP, + .proto = sk->sk_protocol, .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; @@ -668,13 +667,14 @@ back_from_confirm: do_append_data: up->len += ulen; - err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, - sizeof(struct udphdr), &ipc, rt, + getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; + err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, + sizeof(struct udphdr), &ipc, rt, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_flush_pending_frames(sk); else if (!corkreq) - err = udp_push_pending_frames(sk, up); + err = udp_push_pending_frames(sk); else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) up->pending = 0; release_sock(sk); @@ -684,7 +684,7 @@ out: if (free) kfree(ipc.opt); if (!err) { - UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); + UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); return len; } /* @@ -695,7 +695,7 @@ out: * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); + UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); } return err; @@ -707,8 +707,8 @@ do_confirm: goto out; } -static int udp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags) +int udp_sendpage(struct sock *sk, struct page *page, int offset, + size_t size, int flags) { struct udp_sock *up = udp_sk(sk); int ret; @@ -747,7 +747,7 @@ static int udp_sendpage(struct sock *sk, struct page *page, int offset, up->len += size; if (!(up->corkflag || (flags&MSG_MORE))) - ret = udp_push_pending_frames(sk, up); + ret = udp_push_pending_frames(sk); if (!ret) ret = size; out: @@ -795,29 +795,18 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) return(0); } -static __inline__ int __udp_checksum_complete(struct sk_buff *skb) -{ - return __skb_checksum_complete(skb); -} - -static __inline__ int udp_checksum_complete(struct sk_buff *skb) -{ - return skb->ip_summed != CHECKSUM_UNNECESSARY && - __udp_checksum_complete(skb); -} - /* * This should be easy, if there is something there we * return it, otherwise we block. */ -static int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; struct sk_buff *skb; - int copied, err; + int copied, err, copy_only, is_udplite = IS_UDPLITE(sk); /* * Check any passed addresses @@ -839,15 +828,25 @@ try_again: msg->msg_flags |= MSG_TRUNC; } - if (skb->ip_summed==CHECKSUM_UNNECESSARY) { - err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, - copied); - } else if (msg->msg_flags&MSG_TRUNC) { - if (__udp_checksum_complete(skb)) + /* + * Decide whether to checksum and/or copy data. + * + * UDP: checksum may have been computed in HW, + * (re-)compute it if message is truncated. + * UDP-Lite: always needs to checksum, no HW support. + */ + copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY); + + if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) { + if (__udp_lib_checksum_complete(skb)) goto csum_copy_err; - err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, - copied); - } else { + copy_only = 1; + } + + if (copy_only) + err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), + msg->msg_iov, copied ); + else { err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); if (err == -EINVAL) @@ -880,7 +879,7 @@ out: return err; csum_copy_err: - UDP_INC_STATS_BH(UDP_MIB_INERRORS); + UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); skb_kill_datagram(sk, skb, flags); @@ -912,11 +911,6 @@ int udp_disconnect(struct sock *sk, int flags) return 0; } -static void udp_close(struct sock *sk, long timeout) -{ - sk_common_release(sk); -} - /* return: * 1 if the the UDP system should process it * 0 if we should drop this packet @@ -928,23 +922,32 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) return 1; #else struct udp_sock *up = udp_sk(sk); - struct udphdr *uh = skb->h.uh; + struct udphdr *uh; struct iphdr *iph; int iphlen, len; - __u8 *udpdata = (__u8 *)uh + sizeof(struct udphdr); - __be32 *udpdata32 = (__be32 *)udpdata; + __u8 *udpdata; + __be32 *udpdata32; __u16 encap_type = up->encap_type; /* if we're overly short, let UDP handle it */ - if (udpdata > skb->tail) + len = skb->len - sizeof(struct udphdr); + if (len <= 0) return 1; /* if this is not encapsulated socket, then just return now */ if (!encap_type) return 1; - len = skb->tail - udpdata; + /* If this is a paged skb, make sure we pull up + * whatever data we need to look at. */ + if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8))) + return 1; + + /* Now we can get the pointers */ + uh = skb->h.uh; + udpdata = (__u8 *)uh + sizeof(struct udphdr); + udpdata32 = (__be32 *)udpdata; switch (encap_type) { default: @@ -1013,7 +1016,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) * Note that in the success and error cases, the skb is assumed to * have either been requeued or freed. */ -static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) +int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int rc; @@ -1021,10 +1024,8 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) /* * Charge it to the socket, dropping if the queue is full. */ - if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { - kfree_skb(skb); - return -1; - } + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) + goto drop; nf_reset(skb); if (up->encap_type) { @@ -1048,31 +1049,68 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) if (ret < 0) { /* process the ESP packet */ ret = xfrm4_rcv_encap(skb, up->encap_type); - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS); + UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); return -ret; } /* FALLTHROUGH -- it's a UDP Packet */ } - if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) { - if (__udp_checksum_complete(skb)) { - UDP_INC_STATS_BH(UDP_MIB_INERRORS); - kfree_skb(skb); - return -1; + /* + * UDP-Lite specific tests, ignored on UDP sockets + */ + if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + + /* + * MIB statistics other than incrementing the error count are + * disabled for the following two types of errors: these depend + * on the application settings, not on the functioning of the + * protocol stack as such. + * + * RFC 3828 here recommends (sec 3.3): "There should also be a + * way ... to ... at least let the receiving application block + * delivery of packets with coverage values less than a value + * provided by the application." + */ + if (up->pcrlen == 0) { /* full coverage was set */ + LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " + "%d while full coverage %d requested\n", + UDP_SKB_CB(skb)->cscov, skb->len); + goto drop; } + /* The next case involves violating the min. coverage requested + * by the receiver. This is subtle: if receiver wants x and x is + * greater than the buffersize/MTU then receiver will complain + * that it wants x while sender emits packets of smaller size y. + * Therefore the above ...()->partial_cov statement is essential. + */ + if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { + LIMIT_NETDEBUG(KERN_WARNING + "UDPLITE: coverage %d too small, need min %d\n", + UDP_SKB_CB(skb)->cscov, up->pcrlen); + goto drop; + } + } + + if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) { + if (__udp_lib_checksum_complete(skb)) + goto drop; skb->ip_summed = CHECKSUM_UNNECESSARY; } if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) - UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); - UDP_INC_STATS_BH(UDP_MIB_INERRORS); - kfree_skb(skb); - return -1; + UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag); + goto drop; } - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS); + + UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); return 0; + +drop: + UDP_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag); + kfree_skb(skb); + return -1; } /* @@ -1081,14 +1119,16 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) * Note: called only from the BH handler context, * so we don't need to lock the hashes. */ -static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, - __be32 saddr, __be32 daddr) +static int __udp4_lib_mcast_deliver(struct sk_buff *skb, + struct udphdr *uh, + __be32 saddr, __be32 daddr, + struct hlist_head udptable[]) { struct sock *sk; int dif; read_lock(&udp_hash_lock); - sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); + sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); dif = skb->dev->ifindex; sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { @@ -1122,65 +1162,75 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, * Otherwise, csum completion requires chacksumming packet body, * including udp header and folding it to skb->csum. */ -static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, - unsigned short ulen, __be32 saddr, __be32 daddr) +static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh) { if (uh->check == 0) { skb->ip_summed = CHECKSUM_UNNECESSARY; } else if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) + if (!csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, + skb->len, IPPROTO_UDP, skb->csum )) skb->ip_summed = CHECKSUM_UNNECESSARY; } if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); + skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, + skb->nh.iph->daddr, + skb->len, IPPROTO_UDP, 0); /* Probably, we should checksum udp header (it should be in cache * in any case) and data in tiny packets (< rx copybreak). */ + + /* UDP = UDP-Lite with a non-partial checksum coverage */ + UDP_SKB_CB(skb)->partial_cov = 0; } /* * All we need to do is get the socket, and then do a checksum. */ -int udp_rcv(struct sk_buff *skb) +int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], + int is_udplite) { struct sock *sk; - struct udphdr *uh; + struct udphdr *uh = skb->h.uh; unsigned short ulen; struct rtable *rt = (struct rtable*)skb->dst; __be32 saddr = skb->nh.iph->saddr; __be32 daddr = skb->nh.iph->daddr; - int len = skb->len; /* - * Validate the packet and the UDP length. + * Validate the packet. */ if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto no_header; - - uh = skb->h.uh; + goto drop; /* No space for header. */ ulen = ntohs(uh->len); - - if (ulen > len || ulen < sizeof(*uh)) + if (ulen > skb->len) goto short_packet; - if (pskb_trim_rcsum(skb, ulen)) - goto short_packet; + if(! is_udplite ) { /* UDP validates ulen. */ + + if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) + goto short_packet; - udp_checksum_init(skb, uh, ulen, saddr, daddr); + udp4_csum_init(skb, uh); + + } else { /* UDP-Lite validates cscov. */ + if (udplite4_csum_init(skb, uh)) + goto csum_error; + } if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) - return udp_v4_mcast_deliver(skb, uh, saddr, daddr); + return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); - sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex); + sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, + skb->dev->ifindex, udptable ); if (sk != NULL) { int ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); /* a return value > 0 means to resubmit the input, but - * it it wants the return to be -protocol, or 0 + * it wants the return to be -protocol, or 0 */ if (ret > 0) return -ret; @@ -1192,10 +1242,10 @@ int udp_rcv(struct sk_buff *skb) nf_reset(skb); /* No socket. Drop packet silently, if checksum is wrong */ - if (udp_checksum_complete(skb)) + if (udp_lib_checksum_complete(skb)) goto csum_error; - UDP_INC_STATS_BH(UDP_MIB_NOPORTS); + UDP_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); /* @@ -1206,36 +1256,40 @@ int udp_rcv(struct sk_buff *skb) return(0); short_packet: - LIMIT_NETDEBUG(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", + LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", + is_udplite? "-Lite" : "", NIPQUAD(saddr), ntohs(uh->source), ulen, - len, + skb->len, NIPQUAD(daddr), ntohs(uh->dest)); -no_header: - UDP_INC_STATS_BH(UDP_MIB_INERRORS); - kfree_skb(skb); - return(0); + goto drop; csum_error: /* * RFC1122: OK. Discards the bad packet silently (as far as * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ - LIMIT_NETDEBUG(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", + LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", + is_udplite? "-Lite" : "", NIPQUAD(saddr), ntohs(uh->source), NIPQUAD(daddr), ntohs(uh->dest), ulen); drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS); + UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return(0); } -static int udp_destroy_sock(struct sock *sk) +__inline__ int udp_rcv(struct sk_buff *skb) +{ + return __udp4_lib_rcv(skb, udp_hash, 0); +} + +int udp_destroy_sock(struct sock *sk) { lock_sock(sk); udp_flush_pending_frames(sk); @@ -1246,8 +1300,9 @@ static int udp_destroy_sock(struct sock *sk) /* * Socket option code for UDP */ -static int do_udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) +int udp_lib_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen, + int (*push_pending_frames)(struct sock *)) { struct udp_sock *up = udp_sk(sk); int val; @@ -1266,7 +1321,7 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname, } else { up->corkflag = 0; lock_sock(sk); - udp_push_pending_frames(sk, up); + (*push_pending_frames)(sk); release_sock(sk); } break; @@ -1284,6 +1339,32 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname, } break; + /* + * UDP-Lite's partial checksum coverage (RFC 3828). + */ + /* The sender sets actual checksum coverage length via this option. + * The case coverage > packet length is handled by send module. */ + case UDPLITE_SEND_CSCOV: + if (!up->pcflag) /* Disable the option on UDP sockets */ + return -ENOPROTOOPT; + if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ + val = 8; + up->pcslen = val; + up->pcflag |= UDPLITE_SEND_CC; + break; + + /* The receiver specifies a minimum checksum coverage value. To make + * sense, this should be set to at least 8 (as done below). If zero is + * used, this again means full checksum coverage. */ + case UDPLITE_RECV_CSCOV: + if (!up->pcflag) /* Disable the option on UDP sockets */ + return -ENOPROTOOPT; + if (val != 0 && val < 8) /* Avoid silly minimal values. */ + val = 8; + up->pcrlen = val; + up->pcflag |= UDPLITE_RECV_CC; + break; + default: err = -ENOPROTOOPT; break; @@ -1292,26 +1373,28 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname, return err; } -static int udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) +int udp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) { - if (level != SOL_UDP) - return ip_setsockopt(sk, level, optname, optval, optlen); - return do_udp_setsockopt(sk, level, optname, optval, optlen); + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_setsockopt(sk, level, optname, optval, optlen, + udp_push_pending_frames); + return ip_setsockopt(sk, level, optname, optval, optlen); } #ifdef CONFIG_COMPAT -static int compat_udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) +int compat_udp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) { - if (level != SOL_UDP) - return compat_ip_setsockopt(sk, level, optname, optval, optlen); - return do_udp_setsockopt(sk, level, optname, optval, optlen); + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_setsockopt(sk, level, optname, optval, optlen, + udp_push_pending_frames); + return compat_ip_setsockopt(sk, level, optname, optval, optlen); } #endif -static int do_udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) +int udp_lib_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { struct udp_sock *up = udp_sk(sk); int val, len; @@ -1333,6 +1416,16 @@ static int do_udp_getsockopt(struct sock *sk, int level, int optname, val = up->encap_type; break; + /* The following two cannot be changed on UDP sockets, the return is + * always 0 (which corresponds to the full checksum coverage of UDP). */ + case UDPLITE_SEND_CSCOV: + val = up->pcslen; + break; + + case UDPLITE_RECV_CSCOV: + val = up->pcrlen; + break; + default: return -ENOPROTOOPT; }; @@ -1344,21 +1437,21 @@ static int do_udp_getsockopt(struct sock *sk, int level, int optname, return 0; } -static int udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) +int udp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { - if (level != SOL_UDP) - return ip_getsockopt(sk, level, optname, optval, optlen); - return do_udp_getsockopt(sk, level, optname, optval, optlen); + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_getsockopt(sk, level, optname, optval, optlen); + return ip_getsockopt(sk, level, optname, optval, optlen); } #ifdef CONFIG_COMPAT -static int compat_udp_getsockopt(struct sock *sk, int level, int optname, +int compat_udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - if (level != SOL_UDP) - return compat_ip_getsockopt(sk, level, optname, optval, optlen); - return do_udp_getsockopt(sk, level, optname, optval, optlen); + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_getsockopt(sk, level, optname, optval, optlen); + return compat_ip_getsockopt(sk, level, optname, optval, optlen); } #endif /** @@ -1378,7 +1471,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) { unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; - + int is_lite = IS_UDPLITE(sk); + /* Check for false positives due to checksum errors */ if ( (mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && @@ -1388,8 +1482,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) spin_lock_bh(&rcvq->lock); while ((skb = skb_peek(rcvq)) != NULL) { - if (udp_checksum_complete(skb)) { - UDP_INC_STATS_BH(UDP_MIB_INERRORS); + if (udp_lib_checksum_complete(skb)) { + UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite); __skb_unlink(skb, rcvq); kfree_skb(skb); } else { @@ -1411,7 +1505,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) struct proto udp_prot = { .name = "UDP", .owner = THIS_MODULE, - .close = udp_close, + .close = udp_lib_close, .connect = ip4_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, @@ -1422,8 +1516,8 @@ struct proto udp_prot = { .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, .backlog_rcv = udp_queue_rcv_skb, - .hash = udp_v4_hash, - .unhash = udp_v4_unhash, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, .obj_size = sizeof(struct udp_sock), #ifdef CONFIG_COMPAT @@ -1442,7 +1536,7 @@ static struct sock *udp_get_first(struct seq_file *seq) for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { struct hlist_node *node; - sk_for_each(sk, node, &udp_hash[state->bucket]) { + sk_for_each(sk, node, state->hashtable + state->bucket) { if (sk->sk_family == state->family) goto found; } @@ -1463,7 +1557,7 @@ try_again: } while (sk && sk->sk_family != state->family); if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { - sk = sk_head(&udp_hash[state->bucket]); + sk = sk_head(state->hashtable + state->bucket); goto try_again; } return sk; @@ -1513,6 +1607,7 @@ static int udp_seq_open(struct inode *inode, struct file *file) if (!s) goto out; s->family = afinfo->family; + s->hashtable = afinfo->hashtable; s->seq_ops.start = udp_seq_start; s->seq_ops.next = udp_seq_next; s->seq_ops.show = afinfo->seq_show; @@ -1579,7 +1674,7 @@ static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) atomic_read(&sp->sk_refcnt), sp); } -static int udp4_seq_show(struct seq_file *seq, void *v) +int udp4_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_printf(seq, "%-127s\n", @@ -1602,6 +1697,7 @@ static struct udp_seq_afinfo udp4_seq_afinfo = { .owner = THIS_MODULE, .name = "udp", .family = AF_INET, + .hashtable = udp_hash, .seq_show = udp4_seq_show, .seq_fops = &udp4_seq_fops, }; @@ -1624,6 +1720,8 @@ EXPORT_SYMBOL(udp_ioctl); EXPORT_SYMBOL(udp_get_port); EXPORT_SYMBOL(udp_prot); EXPORT_SYMBOL(udp_sendmsg); +EXPORT_SYMBOL(udp_lib_getsockopt); +EXPORT_SYMBOL(udp_lib_setsockopt); EXPORT_SYMBOL(udp_poll); #ifdef CONFIG_PROC_FS diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h new file mode 100644 index 00000000000..f6f4277ba6d --- /dev/null +++ b/net/ipv4/udp_impl.h @@ -0,0 +1,38 @@ +#ifndef _UDP4_IMPL_H +#define _UDP4_IMPL_H +#include <net/udp.h> +#include <net/udplite.h> +#include <net/protocol.h> +#include <net/inet_common.h> + +extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int ); +extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []); + +extern int __udp_lib_get_port(struct sock *sk, unsigned short snum, + struct hlist_head udptable[], int *port_rover, + int (*)(const struct sock*,const struct sock*)); +extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *); + + +extern int udp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen); +extern int udp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); + +#ifdef CONFIG_COMPAT +extern int compat_udp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen); +extern int compat_udp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); +#endif +extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len); +extern int udp_sendpage(struct sock *sk, struct page *page, int offset, + size_t size, int flags); +extern int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); +extern int udp_destroy_sock(struct sock *sk); + +#ifdef CONFIG_PROC_FS +extern int udp4_seq_show(struct seq_file *seq, void *v); +#endif +#endif /* _UDP4_IMPL_H */ diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c new file mode 100644 index 00000000000..b28fe1edf98 --- /dev/null +++ b/net/ipv4/udplite.c @@ -0,0 +1,119 @@ +/* + * UDPLITE An implementation of the UDP-Lite protocol (RFC 3828). + * + * Version: $Id: udplite.c,v 1.25 2006/10/19 07:22:36 gerrit Exp $ + * + * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk> + * + * Changes: + * Fixes: + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include "udp_impl.h" +DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly; + +struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; +static int udplite_port_rover; + +int udplite_get_port(struct sock *sk, unsigned short p, + int (*c)(const struct sock *, const struct sock *)) +{ + return __udp_lib_get_port(sk, p, udplite_hash, &udplite_port_rover, c); +} + +static int udplite_v4_get_port(struct sock *sk, unsigned short snum) +{ + return udplite_get_port(sk, snum, ipv4_rcv_saddr_equal); +} + +static int udplite_rcv(struct sk_buff *skb) +{ + return __udp4_lib_rcv(skb, udplite_hash, 1); +} + +static void udplite_err(struct sk_buff *skb, u32 info) +{ + return __udp4_lib_err(skb, info, udplite_hash); +} + +static struct net_protocol udplite_protocol = { + .handler = udplite_rcv, + .err_handler = udplite_err, + .no_policy = 1, +}; + +struct proto udplite_prot = { + .name = "UDP-Lite", + .owner = THIS_MODULE, + .close = udp_lib_close, + .connect = ip4_datagram_connect, + .disconnect = udp_disconnect, + .ioctl = udp_ioctl, + .init = udplite_sk_init, + .destroy = udp_destroy_sock, + .setsockopt = udp_setsockopt, + .getsockopt = udp_getsockopt, + .sendmsg = udp_sendmsg, + .recvmsg = udp_recvmsg, + .sendpage = udp_sendpage, + .backlog_rcv = udp_queue_rcv_skb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .get_port = udplite_v4_get_port, + .obj_size = sizeof(struct udp_sock), +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_udp_setsockopt, + .compat_getsockopt = compat_udp_getsockopt, +#endif +}; + +static struct inet_protosw udplite4_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_UDPLITE, + .prot = &udplite_prot, + .ops = &inet_dgram_ops, + .capability = -1, + .no_check = 0, /* must checksum (RFC 3828) */ + .flags = INET_PROTOSW_PERMANENT, +}; + +#ifdef CONFIG_PROC_FS +static struct file_operations udplite4_seq_fops; +static struct udp_seq_afinfo udplite4_seq_afinfo = { + .owner = THIS_MODULE, + .name = "udplite", + .family = AF_INET, + .hashtable = udplite_hash, + .seq_show = udp4_seq_show, + .seq_fops = &udplite4_seq_fops, +}; +#endif + +void __init udplite4_register(void) +{ + if (proto_register(&udplite_prot, 1)) + goto out_register_err; + + if (inet_add_protocol(&udplite_protocol, IPPROTO_UDPLITE) < 0) + goto out_unregister_proto; + + inet_register_protosw(&udplite4_protosw); + +#ifdef CONFIG_PROC_FS + if (udp_proc_register(&udplite4_seq_afinfo)) /* udplite4_proc_init() */ + printk(KERN_ERR "%s: Cannot register /proc!\n", __FUNCTION__); +#endif + return; + +out_unregister_proto: + proto_unregister(&udplite_prot); +out_register_err: + printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __FUNCTION__); +} + +EXPORT_SYMBOL(udplite_hash); +EXPORT_SYMBOL(udplite_prot); +EXPORT_SYMBOL(udplite_get_port); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 1bed0cdf53e..fb9f69c616f 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -72,8 +72,8 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int struct dst_entry *dst, *dst_prev; struct rtable *rt0 = (struct rtable*)(*dst_p); struct rtable *rt = rt0; - u32 remote = fl->fl4_dst; - u32 local = fl->fl4_src; + __be32 remote = fl->fl4_dst; + __be32 local = fl->fl4_src; struct flowi fl_tunnel = { .nl_u = { .ip4_u = { @@ -199,11 +199,12 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl) if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { switch (iph->protocol) { case IPPROTO_UDP: + case IPPROTO_UDPLITE: case IPPROTO_TCP: case IPPROTO_SCTP: case IPPROTO_DCCP: if (pskb_may_pull(skb, xprth + 4 - skb->data)) { - u16 *ports = (u16 *)xprth; + __be16 *ports = (__be16 *)xprth; fl->fl_ip_sport = ports[0]; fl->fl_ip_dport = ports[1]; @@ -273,6 +274,8 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt.idev)) in_dev_put(xdst->u.rt.idev); + if (likely(xdst->u.rt.peer)) + inet_putpeer(xdst->u.rt.peer); xfrm_dst_destroy(xdst); } diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 6e48f52e197..deb4101a2a8 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -196,10 +196,3 @@ config IPV6_SUBTREES If unsure, say N. -config IPV6_ROUTE_FWMARK - bool "IPv6: use netfilter MARK value as routing key" - depends on IPV6_MULTIPLE_TABLES && NETFILTER - ---help--- - If you say Y here, you will be able to specify different routes for - packets with different mark values (see iptables(8), MARK target). - diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index addcc011bc0..8bacda109b7 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -5,8 +5,8 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ - route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ - protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ + route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ + raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ ip6_flowlabel.o ipv6_syms.o inet6_connection_sock.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b312a5f7a75..9b0a9064315 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -232,7 +232,7 @@ static inline unsigned ipv6_addr_scope2type(unsigned scope) int __ipv6_addr_type(const struct in6_addr *addr) { - u32 st; + __be32 st; st = addr->s6_addr32[0]; @@ -1164,7 +1164,7 @@ record_it: int ipv6_get_saddr(struct dst_entry *dst, struct in6_addr *daddr, struct in6_addr *saddr) { - return ipv6_dev_get_saddr(dst ? ((struct rt6_info *)dst)->rt6i_idev->dev : NULL, daddr, saddr); + return ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, daddr, saddr); } @@ -3098,10 +3098,9 @@ static inline int rt_scope(int ifa_scope) static inline int inet6_ifaddr_msgsize(void) { - return nlmsg_total_size(sizeof(struct ifaddrmsg) + - nla_total_size(16) + - nla_total_size(sizeof(struct ifa_cacheinfo)) + - 128); + return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(16) /* IFA_ADDRESS */ + + nla_total_size(sizeof(struct ifa_cacheinfo)); } static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, @@ -3329,10 +3328,8 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWADDR, 0); - if (err < 0) { - kfree_skb(skb); - goto errout_ifa; - } + /* failure implies BUG in inet6_ifaddr_msgsize() */ + BUG_ON(err < 0); err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); errout_ifa: @@ -3351,10 +3348,8 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) goto errout; err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + /* failure implies BUG in inet6_ifaddr_msgsize() */ + BUG_ON(err < 0); err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: @@ -3365,6 +3360,8 @@ errout: static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, __s32 *array, int bytes) { + BUG_ON(bytes < (DEVCONF_MAX * 4)); + memset(array, 0, bytes); array[DEVCONF_FORWARDING] = cnf->forwarding; array[DEVCONF_HOPLIMIT] = cnf->hop_limit; @@ -3397,80 +3394,76 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; } -/* Maximum length of ifinfomsg attributes */ -#define INET6_IFINFO_RTA_SPACE \ - RTA_SPACE(IFNAMSIZ) /* IFNAME */ + \ - RTA_SPACE(MAX_ADDR_LEN) /* ADDRESS */ + \ - RTA_SPACE(sizeof(u32)) /* MTU */ + \ - RTA_SPACE(sizeof(int)) /* LINK */ + \ - RTA_SPACE(0) /* PROTINFO */ + \ - RTA_SPACE(sizeof(u32)) /* FLAGS */ + \ - RTA_SPACE(sizeof(struct ifla_cacheinfo)) /* CACHEINFO */ + \ - RTA_SPACE(sizeof(__s32[DEVCONF_MAX])) /* CONF */ +static inline size_t inet6_if_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(4) /* IFLA_MTU */ + + nla_total_size(4) /* IFLA_LINK */ + + nla_total_size( /* IFLA_PROTINFO */ + nla_total_size(4) /* IFLA_INET6_FLAGS */ + + nla_total_size(sizeof(struct ifla_cacheinfo)) + + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ + ); +} static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, u32 pid, u32 seq, int event, unsigned int flags) { - struct net_device *dev = idev->dev; - __s32 *array = NULL; - struct ifinfomsg *r; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - struct rtattr *subattr; - __u32 mtu = dev->mtu; - struct ifla_cacheinfo ci; - - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); - r = NLMSG_DATA(nlh); - r->ifi_family = AF_INET6; - r->__ifi_pad = 0; - r->ifi_type = dev->type; - r->ifi_index = dev->ifindex; - r->ifi_flags = dev_get_flags(dev); - r->ifi_change = 0; - - RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name); + struct net_device *dev = idev->dev; + struct nlattr *conf; + struct ifinfomsg *hdr; + struct nlmsghdr *nlh; + void *protoinfo; + struct ifla_cacheinfo ci; + + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); + if (nlh == NULL) + return -ENOBUFS; + + hdr = nlmsg_data(nlh); + hdr->ifi_family = AF_INET6; + hdr->__ifi_pad = 0; + hdr->ifi_type = dev->type; + hdr->ifi_index = dev->ifindex; + hdr->ifi_flags = dev_get_flags(dev); + hdr->ifi_change = 0; + + NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); if (dev->addr_len) - RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); + NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); - RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu); + NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); if (dev->ifindex != dev->iflink) - RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink); - - subattr = (struct rtattr*)skb->tail; + NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); - RTA_PUT(skb, IFLA_PROTINFO, 0, NULL); + protoinfo = nla_nest_start(skb, IFLA_PROTINFO); + if (protoinfo == NULL) + goto nla_put_failure; - /* return the device flags */ - RTA_PUT(skb, IFLA_INET6_FLAGS, sizeof(__u32), &idev->if_flags); + NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags); - /* return interface cacheinfo */ ci.max_reasm_len = IPV6_MAXPLEN; ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100 + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); ci.reachable_time = idev->nd_parms->reachable_time; ci.retrans_time = idev->nd_parms->retrans_time; - RTA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); - - /* return the device sysctl params */ - if ((array = kmalloc(DEVCONF_MAX * sizeof(*array), GFP_ATOMIC)) == NULL) - goto rtattr_failure; - ipv6_store_devconf(&idev->cnf, array, DEVCONF_MAX * sizeof(*array)); - RTA_PUT(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(*array), array); + NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); + + conf = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); + if (conf == NULL) + goto nla_put_failure; + ipv6_store_devconf(&idev->cnf, nla_data(conf), nla_len(conf)); /* XXX - Statistics/MC not implemented */ - subattr->rta_len = skb->tail - (u8*)subattr; - nlh->nlmsg_len = skb->tail - b; - kfree(array); - return skb->len; + nla_nest_end(skb, protoinfo); + return nlmsg_end(skb, nlh); -nlmsg_failure: -rtattr_failure: - kfree(array); - skb_trim(skb, b - skb->data); - return -1; +nla_put_failure: + return nlmsg_cancel(skb, nlh); } static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) @@ -3501,18 +3494,15 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) void inet6_ifinfo_notify(int event, struct inet6_dev *idev) { struct sk_buff *skb; - int payload = sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE; int err = -ENOBUFS; - skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC); + skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC); if (skb == NULL) goto errout; err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + /* failure implies BUG in inet6_if_nlmsg_size() */ + BUG_ON(err < 0); err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: @@ -3520,22 +3510,26 @@ errout: rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); } -/* Maximum length of prefix_cacheinfo attributes */ -#define INET6_PREFIX_RTA_SPACE \ - RTA_SPACE(sizeof(((struct prefix_info *)NULL)->prefix)) /* ADDRESS */ + \ - RTA_SPACE(sizeof(struct prefix_cacheinfo)) /* CACHEINFO */ +static inline size_t inet6_prefix_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct prefixmsg)) + + nla_total_size(sizeof(struct in6_addr)) + + nla_total_size(sizeof(struct prefix_cacheinfo)); +} static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, - struct prefix_info *pinfo, u32 pid, u32 seq, - int event, unsigned int flags) + struct prefix_info *pinfo, u32 pid, u32 seq, + int event, unsigned int flags) { - struct prefixmsg *pmsg; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + struct prefixmsg *pmsg; + struct nlmsghdr *nlh; struct prefix_cacheinfo ci; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*pmsg), flags); - pmsg = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags); + if (nlh == NULL) + return -ENOBUFS; + + pmsg = nlmsg_data(nlh); pmsg->prefix_family = AF_INET6; pmsg->prefix_pad1 = 0; pmsg->prefix_pad2 = 0; @@ -3543,44 +3537,37 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, pmsg->prefix_len = pinfo->prefix_len; pmsg->prefix_type = pinfo->type; pmsg->prefix_pad3 = 0; - pmsg->prefix_flags = 0; if (pinfo->onlink) pmsg->prefix_flags |= IF_PREFIX_ONLINK; if (pinfo->autoconf) pmsg->prefix_flags |= IF_PREFIX_AUTOCONF; - RTA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix); + NLA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix); ci.preferred_time = ntohl(pinfo->prefered); ci.valid_time = ntohl(pinfo->valid); - RTA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci); + NLA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci); - nlh->nlmsg_len = skb->tail - b; - return skb->len; + return nlmsg_end(skb, nlh); -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; +nla_put_failure: + return nlmsg_cancel(skb, nlh); } static void inet6_prefix_notify(int event, struct inet6_dev *idev, struct prefix_info *pinfo) { struct sk_buff *skb; - int payload = sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE; int err = -ENOBUFS; - skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC); + skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC); if (skb == NULL) goto errout; err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + /* failure implies BUG in inet6_prefix_nlmsg_size() */ + BUG_ON(err < 0); err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); errout: @@ -3669,8 +3656,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, size_t newlen) { int *valp = table->data; int new; @@ -3982,10 +3968,9 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf struct addrconf_sysctl_table *t; char *dev_name = NULL; - t = kmalloc(sizeof(*t), GFP_KERNEL); + t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); if (t == NULL) return; - memcpy(t, &addrconf_sysctl, sizeof(*t)); for (i=0; t->addrconf_vars[i].data; i++) { t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf; t->addrconf_vars[i].de = NULL; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 858cae29581..e5cd83b2205 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -49,6 +49,7 @@ #include <net/ip.h> #include <net/ipv6.h> #include <net/udp.h> +#include <net/udplite.h> #include <net/tcp.h> #include <net/ipip.h> #include <net/protocol.h> @@ -221,7 +222,7 @@ lookup_protocol: * the user to assign a number at socket * creation time automatically shares. */ - inet->sport = ntohs(inet->num); + inet->sport = htons(inet->num); sk->sk_prot->hash(sk); } if (sk->sk_prot->init) { @@ -341,7 +342,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; - inet->sport = ntohs(inet->num); + inet->sport = htons(inet->num); inet->dport = 0; inet->daddr = 0; out: @@ -678,7 +679,7 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) if (np->rxopt.all) { if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) || - ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && + ((IPV6_FLOWINFO_MASK & *(__be32*)skb->nh.raw) && np->rxopt.bits.rxflow) || (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) || @@ -719,10 +720,8 @@ snmp6_mib_free(void *ptr[2]) { if (ptr == NULL) return; - if (ptr[0]) - free_percpu(ptr[0]); - if (ptr[1]) - free_percpu(ptr[1]); + free_percpu(ptr[0]); + free_percpu(ptr[1]); ptr[0] = ptr[1] = NULL; } @@ -737,8 +736,13 @@ static int __init init_ipv6_mibs(void) if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib), __alignof__(struct udp_mib)) < 0) goto err_udp_mib; + if (snmp6_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib), + __alignof__(struct udp_mib)) < 0) + goto err_udplite_mib; return 0; +err_udplite_mib: + snmp6_mib_free((void **)udp_stats_in6); err_udp_mib: snmp6_mib_free((void **)icmpv6_statistics); err_icmp_mib: @@ -753,6 +757,7 @@ static void cleanup_ipv6_mibs(void) snmp6_mib_free((void **)ipv6_statistics); snmp6_mib_free((void **)icmpv6_statistics); snmp6_mib_free((void **)udp_stats_in6); + snmp6_mib_free((void **)udplite_stats_in6); } static int __init inet6_init(void) @@ -780,10 +785,14 @@ static int __init inet6_init(void) if (err) goto out_unregister_tcp_proto; - err = proto_register(&rawv6_prot, 1); + err = proto_register(&udplitev6_prot, 1); if (err) goto out_unregister_udp_proto; + err = proto_register(&rawv6_prot, 1); + if (err) + goto out_unregister_udplite_proto; + /* Register the socket-side information for inet6_create. */ for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) @@ -837,6 +846,8 @@ static int __init inet6_init(void) goto proc_tcp6_fail; if (udp6_proc_init()) goto proc_udp6_fail; + if (udplite6_proc_init()) + goto proc_udplite6_fail; if (ipv6_misc_proc_init()) goto proc_misc6_fail; @@ -862,6 +873,7 @@ static int __init inet6_init(void) /* Init v6 transport protocols. */ udpv6_init(); + udplitev6_init(); tcpv6_init(); ipv6_packet_init(); @@ -879,6 +891,8 @@ proc_if6_fail: proc_anycast6_fail: ipv6_misc_proc_exit(); proc_misc6_fail: + udplite6_proc_exit(); +proc_udplite6_fail: udp6_proc_exit(); proc_udp6_fail: tcp6_proc_exit(); @@ -902,6 +916,8 @@ out_unregister_sock: sock_unregister(PF_INET6); out_unregister_raw_proto: proto_unregister(&rawv6_prot); +out_unregister_udplite_proto: + proto_unregister(&udplitev6_prot); out_unregister_udp_proto: proto_unregister(&udpv6_prot); out_unregister_tcp_proto: @@ -919,6 +935,7 @@ static void __exit inet6_exit(void) ac6_proc_exit(); ipv6_misc_proc_exit(); udp6_proc_exit(); + udplite6_proc_exit(); tcp6_proc_exit(); raw6_proc_exit(); #endif diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index b0d83e8e425..12c5a4dec09 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -354,10 +354,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, ah_hlen)) goto out; - tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC); + tmp_hdr = kmemdup(skb->nh.raw, hdr_len, GFP_ATOMIC); if (!tmp_hdr) goto out; - memcpy(tmp_hdr, skb->nh.raw, hdr_len); if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN)) goto free_out; skb->nh.ipv6h->priority = 0; @@ -397,7 +396,7 @@ out: } static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __u32 info) + int type, int code, int offset, __be32 info) { struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 7206747022f..5c94fea90e9 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -207,7 +207,7 @@ out: } void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, - u16 port, u32 info, u8 *payload) + __be16 port, u32 info, u8 *payload) { struct ipv6_pinfo *np = inet6_sk(sk); struct icmp6hdr *icmph = (struct icmp6hdr *)skb->h.raw; @@ -318,13 +318,13 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) ipv6_addr_copy(&sin->sin6_addr, (struct in6_addr *)(skb->nh.raw + serr->addr_offset)); if (np->sndflow) - sin->sin6_flowinfo = *(u32*)(skb->nh.raw + serr->addr_offset - 24) & IPV6_FLOWINFO_MASK; + sin->sin6_flowinfo = *(__be32*)(skb->nh.raw + serr->addr_offset - 24) & IPV6_FLOWINFO_MASK; if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin->sin6_scope_id = IP6CB(skb)->iif; } else { ipv6_addr_set(&sin->sin6_addr, 0, 0, htonl(0xffff), - *(u32*)(skb->nh.raw + serr->addr_offset)); + *(__be32*)(skb->nh.raw + serr->addr_offset)); } } @@ -397,12 +397,12 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) } if (np->rxopt.bits.rxtclass) { - int tclass = (ntohl(*(u32 *)skb->nh.ipv6h) >> 20) & 0xff; + int tclass = (ntohl(*(__be32 *)skb->nh.ipv6h) >> 20) & 0xff; put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); } - if (np->rxopt.bits.rxflow && (*(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) { - u32 flowinfo = *(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK; + if (np->rxopt.bits.rxflow && (*(__be32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) { + __be32 flowinfo = *(__be32*)skb->nh.raw & IPV6_FLOWINFO_MASK; put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); } @@ -560,12 +560,12 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, } if (fl->fl6_flowlabel&IPV6_FLOWINFO_MASK) { - if ((fl->fl6_flowlabel^*(u32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) { + if ((fl->fl6_flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) { err = -EINVAL; goto exit_f; } } - fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(u32 *)CMSG_DATA(cmsg); + fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg); break; case IPV6_2292HOPOPTS: diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e78680a9985..25dcf69cd80 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -256,7 +256,7 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu) } static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __u32 info) + int type, int code, int offset, __be32 info) { struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; struct ipv6_esp_hdr *esph = (struct ipv6_esp_hdr*)(skb->data+offset); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 88c96b10684..0711f92d6a1 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -284,10 +284,12 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) #ifdef CONFIG_IPV6_MIP6 __u16 dstbuf; #endif + struct dst_entry *dst; if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) || !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } @@ -298,7 +300,9 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) dstbuf = opt->dst1; #endif + dst = dst_clone(skb->dst); if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) { + dst_release(dst); skb = *skbp; skb->h.raw += ((skb->h.raw[1]+1)<<3); opt = IP6CB(skb); @@ -310,7 +314,8 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) return 1; } - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); + dst_release(dst); return -1; } @@ -365,7 +370,8 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) || !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } @@ -374,7 +380,8 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr) || skb->pkt_type != PACKET_HOST) { - IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } @@ -388,7 +395,8 @@ looped_back: * processed by own */ if (!addr) { - IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } @@ -410,7 +418,8 @@ looped_back: switch (hdr->type) { case IPV6_SRCRT_TYPE_0: if (hdr->hdrlen & 0x01) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw); return -1; } @@ -419,14 +428,16 @@ looped_back: case IPV6_SRCRT_TYPE_2: /* Silently discard invalid RTH type 2 */ if (hdr->hdrlen != 2 || hdr->segments_left != 1) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } break; #endif default: - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw); return -1; } @@ -439,7 +450,8 @@ looped_back: n = hdr->hdrlen >> 1; if (hdr->segments_left > n) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->segments_left) - skb->nh.raw); return -1; } @@ -449,12 +461,14 @@ looped_back: */ if (skb_cloned(skb)) { struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); - kfree_skb(skb); /* the copy is a forwarded packet */ if (skb2 == NULL) { - IP6_INC_STATS_BH(IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); return -1; } + kfree_skb(skb); *skbp = skb = skb2; opt = IP6CB(skb2); hdr = (struct ipv6_rt_hdr *) skb2->h.raw; @@ -475,12 +489,14 @@ looped_back: if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&skb->nh.ipv6h->saddr, IPPROTO_ROUTING) < 0) { - IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } if (!ipv6_chk_home_addr(addr)) { - IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } @@ -491,7 +507,8 @@ looped_back: } if (ipv6_addr_is_multicast(addr)) { - IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } @@ -510,7 +527,8 @@ looped_back: if (skb->dst->dev->flags&IFF_LOOPBACK) { if (skb->nh.ipv6h->hop_limit <= 1) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); kfree_skb(skb); @@ -632,24 +650,25 @@ static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff) if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) { LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", skb->nh.raw[optoff+1]); - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IPSTATS_MIB_INHDRERRORS); goto drop; } - pkt_len = ntohl(*(u32*)(skb->nh.raw+optoff+2)); + pkt_len = ntohl(*(__be32*)(skb->nh.raw+optoff+2)); if (pkt_len <= IPV6_MAXPLEN) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2); return 0; } if (skb->nh.ipv6h->payload_len) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff); return 0; } if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { - IP6_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 315bc1fbec3..21cbbbddaf4 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -77,7 +77,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp) if (hp == NULL) return -1; if (nexthdr == NEXTHDR_FRAGMENT) { - unsigned short _frag_off, *fp; + __be16 _frag_off, *fp; fp = skb_header_pointer(skb, start+offsetof(struct frag_hdr, frag_off), diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 1896ecb5289..0862809ffcf 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -25,10 +25,6 @@ struct fib6_rule struct fib_rule common; struct rt6key src; struct rt6key dst; -#ifdef CONFIG_IPV6_ROUTE_FWMARK - u32 fwmark; - u32 fwmask; -#endif u8 tclass; }; @@ -67,7 +63,7 @@ struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, fib_rule_put(arg.rule); if (arg.result) - return (struct dst_entry *) arg.result; + return arg.result; dst_hold(&ip6_null_entry.u.dst); return &ip6_null_entry.u.dst; @@ -130,22 +126,13 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff)) return 0; -#ifdef CONFIG_IPV6_ROUTE_FWMARK - if ((r->fwmark ^ fl->fl6_fwmark) & r->fwmask) - return 0; -#endif - return 1; } static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = { - [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, - [FRA_PRIORITY] = { .type = NLA_U32 }, + FRA_GENERIC_POLICY, [FRA_SRC] = { .len = sizeof(struct in6_addr) }, [FRA_DST] = { .len = sizeof(struct in6_addr) }, - [FRA_FWMARK] = { .type = NLA_U32 }, - [FRA_FWMASK] = { .type = NLA_U32 }, - [FRA_TABLE] = { .type = NLA_U32 }, }; static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, @@ -155,8 +142,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, int err = -EINVAL; struct fib6_rule *rule6 = (struct fib6_rule *) rule; - if (frh->src_len > 128 || frh->dst_len > 128 || - (frh->tos & ~IPV6_FLOWINFO_MASK)) + if (frh->src_len > 128 || frh->dst_len > 128) goto errout; if (rule->action == FR_ACT_TO_TBL) { @@ -177,23 +163,6 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, nla_memcpy(&rule6->dst.addr, tb[FRA_DST], sizeof(struct in6_addr)); -#ifdef CONFIG_IPV6_ROUTE_FWMARK - if (tb[FRA_FWMARK]) { - rule6->fwmark = nla_get_u32(tb[FRA_FWMARK]); - if (rule6->fwmark) { - /* - * if the mark value is non-zero, - * all bits are compared by default - * unless a mask is explicitly specified. - */ - rule6->fwmask = 0xFFFFFFFF; - } - } - - if (tb[FRA_FWMASK]) - rule6->fwmask = nla_get_u32(tb[FRA_FWMASK]); -#endif - rule6->src.plen = frh->src_len; rule6->dst.plen = frh->dst_len; rule6->tclass = frh->tos; @@ -225,14 +194,6 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr))) return 0; -#ifdef CONFIG_IPV6_ROUTE_FWMARK - if (tb[FRA_FWMARK] && (rule6->fwmark != nla_get_u32(tb[FRA_FWMARK]))) - return 0; - - if (tb[FRA_FWMASK] && (rule6->fwmask != nla_get_u32(tb[FRA_FWMASK]))) - return 0; -#endif - return 1; } @@ -254,14 +215,6 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr), &rule6->src.addr); -#ifdef CONFIG_IPV6_ROUTE_FWMARK - if (rule6->fwmark) - NLA_PUT_U32(skb, FRA_FWMARK, rule6->fwmark); - - if (rule6->fwmask || rule6->fwmark) - NLA_PUT_U32(skb, FRA_FWMASK, rule6->fwmask); -#endif - return 0; nla_put_failure: @@ -278,6 +231,12 @@ static u32 fib6_rule_default_pref(void) return 0x3FFF; } +static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) +{ + return nla_total_size(16) /* dst */ + + nla_total_size(16); /* src */ +} + static struct fib_rules_ops fib6_rules_ops = { .family = AF_INET6, .rule_size = sizeof(struct fib6_rule), @@ -287,6 +246,7 @@ static struct fib_rules_ops fib6_rules_ops = { .compare = fib6_rule_compare, .fill = fib6_rule_fill, .default_pref = fib6_rule_default_pref, + .nlmsg_payload = fib6_rule_nlmsg_payload, .nlgroup = RTNLGRP_IPV6_RULE, .policy = fib6_rule_policy, .rules_list = &fib6_rules, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4ec876066b3..3dcc4b7f41b 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -177,7 +177,8 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type, */ dst = ip6_route_output(sk, fl); if (dst->error) { - IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(ip6_dst_idev(dst), + IPSTATS_MIB_OUTNOROUTES); } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { res = 1; } else { @@ -233,7 +234,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct len, fl->proto, skb->csum); } else { - u32 tmp_csum = 0; + __wsum tmp_csum = 0; skb_queue_walk(&sk->sk_write_queue, skb) { tmp_csum = csum_add(tmp_csum, skb->csum); @@ -241,13 +242,11 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct tmp_csum = csum_partial((char *)icmp6h, sizeof(struct icmp6hdr), tmp_csum); - tmp_csum = csum_ipv6_magic(&fl->fl6_src, - &fl->fl6_dst, - len, fl->proto, tmp_csum); - icmp6h->icmp6_cksum = tmp_csum; + icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, + &fl->fl6_dst, + len, fl->proto, + tmp_csum); } - if (icmp6h->icmp6_cksum == 0) - icmp6h->icmp6_cksum = -1; ip6_push_pending_frames(sk); out: return err; @@ -263,7 +262,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st { struct icmpv6_msg *msg = (struct icmpv6_msg *) from; struct sk_buff *org_skb = msg->skb; - __u32 csum = 0; + __wsum csum = 0; csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset, to, len, csum); @@ -555,7 +554,7 @@ out: icmpv6_xmit_unlock(); } -static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info) +static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) { struct in6_addr *saddr, *daddr; struct inet6_protocol *ipprot; @@ -637,8 +636,8 @@ static int icmpv6_rcv(struct sk_buff **pskb) break; /* fall through */ case CHECKSUM_NONE: - skb->csum = ~csum_ipv6_magic(saddr, daddr, skb->len, - IPPROTO_ICMPV6, 0); + skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, + IPPROTO_ICMPV6, 0)); if (__skb_checksum_complete(skb)) { LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n", NIP6(*saddr), NIP6(*daddr)); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 827f41d1478..c700302ad51 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -52,20 +52,20 @@ EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); /* * request_sock (formerly open request) hash tables. */ -static u32 inet6_synq_hash(const struct in6_addr *raddr, const u16 rport, +static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, const u32 rnd, const u16 synq_hsize) { - u32 a = raddr->s6_addr32[0]; - u32 b = raddr->s6_addr32[1]; - u32 c = raddr->s6_addr32[2]; + u32 a = (__force u32)raddr->s6_addr32[0]; + u32 b = (__force u32)raddr->s6_addr32[1]; + u32 c = (__force u32)raddr->s6_addr32[2]; a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; c += rnd; __jhash_mix(a, b, c); - a += raddr->s6_addr32[3]; - b += (u32)rport; + a += (__force u32)raddr->s6_addr32[3]; + b += (__force u32)rport; __jhash_mix(a, b, c); return c & (synq_hsize - 1); @@ -73,7 +73,7 @@ static u32 inet6_synq_hash(const struct in6_addr *raddr, const u16 rport, struct request_sock *inet6_csk_search_req(const struct sock *sk, struct request_sock ***prevp, - const __u16 rport, + const __be16 rport, const struct in6_addr *raddr, const struct in6_addr *laddr, const int iif) @@ -139,9 +139,8 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); -int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) +int inet6_csk_xmit(struct sk_buff *skb, struct sock *sk, int ipfragok) { - struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct flowi fl; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 8accd1fbeed..b7e5bae0e34 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -57,7 +57,7 @@ EXPORT_SYMBOL(__inet6_hash); */ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, - const u16 sport, + const __be16 sport, const struct in6_addr *daddr, const u16 hnum, const int dif) @@ -146,8 +146,8 @@ struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, EXPORT_SYMBOL_GPL(inet6_lookup_listener); struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, - const struct in6_addr *saddr, const u16 sport, - const struct in6_addr *daddr, const u16 dport, + const struct in6_addr *saddr, const __be16 sport, + const struct in6_addr *daddr, const __be16 dport, const int dif) { struct sock *sk; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index f98ca30d7c1..96d8310ae9c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -50,7 +50,7 @@ struct rt6_statistics rt6_stats; -static kmem_cache_t * fib6_node_kmem __read_mostly; +static struct kmem_cache * fib6_node_kmem __read_mostly; enum fib_walk_state_t { @@ -139,9 +139,9 @@ static __inline__ u32 fib6_new_sernum(void) * test bit */ -static __inline__ int addr_bit_set(void *token, int fn_bit) +static __inline__ __be32 addr_bit_set(void *token, int fn_bit) { - __u32 *addr = token; + __be32 *addr = token; return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5]; } @@ -150,7 +150,7 @@ static __inline__ struct fib6_node * node_alloc(void) { struct fib6_node *fn; - if ((fn = kmem_cache_alloc(fib6_node_kmem, SLAB_ATOMIC)) != NULL) + if ((fn = kmem_cache_alloc(fib6_node_kmem, GFP_ATOMIC)) != NULL) memset(fn, 0, sizeof(struct fib6_node)); return fn; @@ -434,7 +434,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, struct fib6_node *pn = NULL; struct rt6key *key; int bit; - int dir = 0; + __be32 dir = 0; __u32 sernum = fib6_new_sernum(); RT6_TRACE("fib6_add_1\n"); @@ -829,7 +829,7 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, struct lookup_args *args) { struct fib6_node *fn; - int dir; + __be32 dir; if (unlikely(args->offset == 0)) return NULL; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 1d672b0547f..624fae251f4 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -61,7 +61,7 @@ static DEFINE_RWLOCK(ip6_fl_lock); static DEFINE_RWLOCK(ip6_sk_fl_lock); -static __inline__ struct ip6_flowlabel * __fl_lookup(u32 label) +static __inline__ struct ip6_flowlabel * __fl_lookup(__be32 label) { struct ip6_flowlabel *fl; @@ -72,7 +72,7 @@ static __inline__ struct ip6_flowlabel * __fl_lookup(u32 label) return NULL; } -static struct ip6_flowlabel * fl_lookup(u32 label) +static struct ip6_flowlabel * fl_lookup(__be32 label) { struct ip6_flowlabel *fl; @@ -153,7 +153,7 @@ static void ip6_fl_gc(unsigned long dummy) write_unlock(&ip6_fl_lock); } -static int fl_intern(struct ip6_flowlabel *fl, __u32 label) +static int fl_intern(struct ip6_flowlabel *fl, __be32 label) { fl->label = label & IPV6_FLOWLABEL_MASK; @@ -182,7 +182,7 @@ static int fl_intern(struct ip6_flowlabel *fl, __u32 label) /* Socket flowlabel lists */ -struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, u32 label) +struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label) { struct ipv6_fl_socklist *sfl; struct ipv6_pinfo *np = inet6_sk(sk); @@ -330,8 +330,10 @@ fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int * fl->share = freq->flr_share; addr_type = ipv6_addr_type(&freq->flr_dst); if ((addr_type&IPV6_ADDR_MAPPED) - || addr_type == IPV6_ADDR_ANY) + || addr_type == IPV6_ADDR_ANY) { + err = -EINVAL; goto done; + } ipv6_addr_copy(&fl->dst, &freq->flr_dst); atomic_set(&fl->users, 1); switch (fl->share) { @@ -587,6 +589,8 @@ static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flo while (!fl) { if (++state->bucket <= FL_HASH_MASK) fl = fl_ht[state->bucket]; + else + break; } return fl; } @@ -623,9 +627,13 @@ static void ip6fl_seq_stop(struct seq_file *seq, void *v) read_unlock_bh(&ip6_fl_lock); } -static void ip6fl_fl_seq_show(struct seq_file *seq, struct ip6_flowlabel *fl) +static int ip6fl_seq_show(struct seq_file *seq, void *v) { - while(fl) { + if (v == SEQ_START_TOKEN) + seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n", + "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt"); + else { + struct ip6_flowlabel *fl = v; seq_printf(seq, "%05X %-1d %-6d %-6d %-6ld %-8ld " NIP6_SEQFMT " %-4d\n", (unsigned)ntohl(fl->label), @@ -636,17 +644,7 @@ static void ip6fl_fl_seq_show(struct seq_file *seq, struct ip6_flowlabel *fl) (long)(fl->expires - jiffies)/HZ, NIP6(fl->dst), fl->opt ? fl->opt->opt_nflen : 0); - fl = fl->next; } -} - -static int ip6fl_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n", - "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt"); - else - ip6fl_fl_seq_show(seq, v); return 0; } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 6b8e6d76a58..ad0b8abcdf4 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -60,14 +60,22 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt { struct ipv6hdr *hdr; u32 pkt_len; + struct inet6_dev *idev; - if (skb->pkt_type == PACKET_OTHERHOST) - goto drop; + if (skb->pkt_type == PACKET_OTHERHOST) { + kfree_skb(skb); + return 0; + } + + rcu_read_lock(); - IP6_INC_STATS_BH(IPSTATS_MIB_INRECEIVES); + idev = __in6_dev_get(skb->dev); + + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INRECEIVES); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { - IP6_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS); + rcu_read_unlock(); goto out; } @@ -84,7 +92,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt * arrived via the sending interface (ethX), because of the * nature of scoping architecture. --yoshfuji */ - IP6CB(skb)->iif = skb->dst ? ((struct rt6_info *)skb->dst)->rt6i_idev->dev->ifindex : dev->ifindex; + IP6CB(skb)->iif = skb->dst ? ip6_dst_idev(skb->dst)->dev->ifindex : dev->ifindex; if (unlikely(!pskb_may_pull(skb, sizeof(*hdr)))) goto err; @@ -104,7 +112,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (pkt_len + sizeof(struct ipv6hdr) > skb->len) goto truncated; if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); goto drop; } hdr = skb->nh.ipv6h; @@ -112,17 +120,21 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->nexthdr == NEXTHDR_HOP) { if (ipv6_parse_hopopts(&skb) < 0) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); + rcu_read_unlock(); return 0; } } + rcu_read_unlock(); + return NF_HOOK(PF_INET6,NF_IP6_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish); truncated: - IP6_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INTRUNCATEDPKTS); err: - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); drop: + rcu_read_unlock(); kfree_skb(skb); out: return 0; @@ -140,6 +152,7 @@ static inline int ip6_input_finish(struct sk_buff *skb) unsigned int nhoff; int nexthdr; u8 hash; + struct inet6_dev *idev; /* * Parse extension headers @@ -147,6 +160,7 @@ static inline int ip6_input_finish(struct sk_buff *skb) rcu_read_lock(); resubmit: + idev = ip6_dst_idev(skb->dst); if (!pskb_pull(skb, skb->h.raw - skb->data)) goto discard; nhoff = IP6CB(skb)->nhoff; @@ -185,24 +199,24 @@ resubmit: if (ret > 0) goto resubmit; else if (ret == 0) - IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS); } else { if (!raw_sk) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { - IP6_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INUNKNOWNPROTOS); icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR, nhoff, skb->dev); } } else - IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS); kfree_skb(skb); } rcu_read_unlock(); return 0; discard: - IP6_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS); rcu_read_unlock(); kfree_skb(skb); return 0; @@ -219,7 +233,7 @@ int ip6_mc_input(struct sk_buff *skb) struct ipv6hdr *hdr; int deliver; - IP6_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS); hdr = skb->nh.ipv6h; deliver = likely(!(skb->dev->flags & (IFF_PROMISC|IFF_ALLMULTI))) || diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 66716911962..7b7bd44fbf4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -72,23 +72,14 @@ static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *f static inline int ip6_output_finish(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; - struct hh_cache *hh = dst->hh; - - if (hh) { - int hh_alen; - - read_lock_bh(&hh->hh_lock); - hh_alen = HH_DATA_ALIGN(hh->hh_len); - memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); - read_unlock_bh(&hh->hh_lock); - skb_push(skb, hh->hh_len); - return hh->hh_output(skb); - } else if (dst->neighbour) + + if (dst->hh) + return neigh_hh_output(dst->hh, skb); + else if (dst->neighbour) return dst->neighbour->output(skb); - IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; @@ -118,6 +109,7 @@ static int ip6_output2(struct sk_buff *skb) if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) { struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL; + struct inet6_dev *idev = ip6_dst_idev(skb->dst); if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) && ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr, @@ -133,13 +125,13 @@ static int ip6_output2(struct sk_buff *skb) ip6_dev_loopback_xmit); if (skb->nh.ipv6h->hop_limit == 0) { - IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return 0; } } - IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS); + IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); } return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish); @@ -182,12 +174,14 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (skb_headroom(skb) < head_room) { struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); - kfree_skb(skb); - skb = skb2; - if (skb == NULL) { - IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + if (skb2 == NULL) { + IP6_INC_STATS(ip6_dst_idev(skb->dst), + IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); return -ENOBUFS; } + kfree_skb(skb); + skb = skb2; if (sk) skb_set_owner_w(skb, sk); } @@ -217,7 +211,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (tclass < 0) tclass = 0; - *(u32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; + *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -230,7 +224,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, mtu = dst_mtu(dst); if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) { - IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(ip6_dst_idev(skb->dst), + IPSTATS_MIB_OUTREQUESTS); return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); } @@ -239,7 +234,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); - IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } @@ -267,7 +262,7 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); skb->nh.ipv6h = hdr; - *(u32*)hdr = htonl(0x60000000); + *(__be32*)hdr = htonl(0x60000000); hdr->payload_len = htons(len); hdr->nexthdr = proto; @@ -373,7 +368,7 @@ int ip6_forward(struct sk_buff *skb) goto error; if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { - IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); goto drop; } @@ -406,7 +401,7 @@ int ip6_forward(struct sk_buff *skb) skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; @@ -419,13 +414,13 @@ int ip6_forward(struct sk_buff *skb) if (proxied > 0) return ip6_input(skb); else if (proxied < 0) { - IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); goto drop; } } if (!xfrm6_route_forward(skb)) { - IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); goto drop; } dst = skb->dst; @@ -464,14 +459,14 @@ int ip6_forward(struct sk_buff *skb) /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev); - IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS); - IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } if (skb_cow(skb, dst->dev->hard_header_len)) { - IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); goto drop; } @@ -481,11 +476,11 @@ int ip6_forward(struct sk_buff *skb) hdr->hop_limit--; - IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish); error: - IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); drop: kfree_skb(skb); return -EINVAL; @@ -499,12 +494,12 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) dst_release(to->dst); to->dst = dst_clone(from->dst); to->dev = from->dev; + to->mark = from->mark; #ifdef CONFIG_NET_SCHED to->tc_index = from->tc_index; #endif #ifdef CONFIG_NETFILTER - to->nfmark = from->nfmark; /* Connection association is same as pre-frag packet */ nf_conntrack_put(to->nfct); to->nfct = from->nfct; @@ -571,7 +566,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; unsigned int mtu, hlen, left, len; - u32 frag_id = 0; + __be32 frag_id = 0; int ptr, offset = 0, err=0; u8 *prevhdr, nexthdr = 0; @@ -620,14 +615,13 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) skb_shinfo(skb)->frag_list = NULL; /* BUILD HEADER */ - tmp_hdr = kmalloc(hlen, GFP_ATOMIC); + *prevhdr = NEXTHDR_FRAGMENT; + tmp_hdr = kmemdup(skb->nh.raw, hlen, GFP_ATOMIC); if (!tmp_hdr) { - IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); return -ENOMEM; } - *prevhdr = NEXTHDR_FRAGMENT; - memcpy(tmp_hdr, skb->nh.raw, hlen); __skb_pull(skb, hlen); fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); skb->nh.raw = __skb_push(skb, hlen); @@ -643,7 +637,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) skb->data_len = first_len - skb_headlen(skb); skb->len = first_len; skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr)); - + + dst_hold(&rt->u.dst); for (;;) { /* Prepare header of the next frame, @@ -667,7 +662,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) err = output(skb); if(!err) - IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); + IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES); if (err || !frag) break; @@ -680,7 +675,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) kfree(tmp_hdr); if (err == 0) { - IP6_INC_STATS(IPSTATS_MIB_FRAGOKS); + IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS); + dst_release(&rt->u.dst); return 0; } @@ -690,7 +686,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) frag = skb; } - IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS); + dst_release(&rt->u.dst); return err; } @@ -723,7 +720,8 @@ slow_path: if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); - IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(ip6_dst_idev(skb->dst), + IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; } @@ -784,15 +782,17 @@ slow_path: if (err) goto fail; - IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); + IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES); } + IP6_INC_STATS(ip6_dst_idev(skb->dst), + IPSTATS_MIB_FRAGOKS); kfree_skb(skb); - IP6_INC_STATS(IPSTATS_MIB_FRAGOKS); return err; fail: + IP6_INC_STATS(ip6_dst_idev(skb->dst), + IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); - IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); return err; } @@ -1265,7 +1265,7 @@ alloc_new_skb: return 0; error: inet->cork.length -= length; - IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; } @@ -1311,7 +1311,7 @@ int ip6_push_pending_frames(struct sock *sk) skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr)); - *(u32*)hdr = fl->fl6_flowlabel | + *(__be32*)hdr = fl->fl6_flowlabel | htonl(0x60000000 | ((int)np->cork.tclass << 20)); if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) @@ -1326,7 +1326,7 @@ int ip6_push_pending_frames(struct sock *sk) skb->priority = sk->sk_priority; skb->dst = dst_clone(&rt->u.dst); - IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); if (err) { if (err > 0) @@ -1357,7 +1357,8 @@ void ip6_flush_pending_frames(struct sock *sk) struct sk_buff *skb; while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { - IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(ip6_dst_idev(skb->dst), + IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 84d7ebdb9d2..8d918348f5b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -66,7 +66,7 @@ MODULE_LICENSE("GPL"); #define HASH_SIZE 32 -#define HASH(addr) (((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \ +#define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \ (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \ (HASH_SIZE - 1)) @@ -215,11 +215,10 @@ ip6ip6_tnl_unlink(struct ip6_tnl *t) * Create tunnel matching given parameters. * * Return: - * 0 on success + * created tunnel or NULL **/ -static int -ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt) +static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p) { struct net_device *dev; struct ip6_tnl *t; @@ -236,11 +235,11 @@ ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt) break; } if (i == IP6_TNL_MAX) - return -ENOBUFS; + goto failed; } dev = alloc_netdev(sizeof (*t), name, ip6ip6_tnl_dev_setup); if (dev == NULL) - return -ENOMEM; + goto failed; t = netdev_priv(dev); dev->init = ip6ip6_tnl_dev_init; @@ -248,13 +247,13 @@ ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt) if ((err = register_netdevice(dev)) < 0) { free_netdev(dev); - return err; + goto failed; } dev_hold(dev); - ip6ip6_tnl_link(t); - *pt = t; - return 0; + return t; +failed: + return NULL; } /** @@ -268,32 +267,23 @@ ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt) * tunnel device is created and registered for use. * * Return: - * 0 if tunnel located or created, - * -EINVAL if parameters incorrect, - * -ENODEV if no matching tunnel available + * matching tunnel or NULL **/ -static int -ip6ip6_tnl_locate(struct ip6_tnl_parm *p, struct ip6_tnl **pt, int create) +static struct ip6_tnl *ip6ip6_tnl_locate(struct ip6_tnl_parm *p, int create) { struct in6_addr *remote = &p->raddr; struct in6_addr *local = &p->laddr; struct ip6_tnl *t; - if (p->proto != IPPROTO_IPV6) - return -EINVAL; - for (t = *ip6ip6_bucket(p); t; t = t->next) { if (ipv6_addr_equal(local, &t->parms.laddr) && - ipv6_addr_equal(remote, &t->parms.raddr)) { - *pt = t; - return (create ? -EEXIST : 0); - } + ipv6_addr_equal(remote, &t->parms.raddr)) + return t; } if (!create) - return -ENODEV; - - return ip6_tnl_create(p, pt); + return NULL; + return ip6_tnl_create(p); } /** @@ -391,7 +381,7 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw) static int ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __u32 info) + int type, int code, int offset, __be32 info) { struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data; struct ip6_tnl *t; @@ -434,12 +424,9 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } break; case ICMPV6_PARAMPROB: - /* ignore if parameter problem not caused by a tunnel - encapsulation limit sub-option */ - if (code != ICMPV6_HDR_FIELD) { - break; - } - teli = parse_tlv_tnl_enc_lim(skb, skb->data); + teli = 0; + if (code == ICMPV6_HDR_FIELD) + teli = parse_tlv_tnl_enc_lim(skb, skb->data); if (teli && teli == ntohl(info) - 2) { tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; @@ -451,6 +438,10 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, "tunnel!\n", t->parms.name); rel_msg = 1; } + } else if (net_ratelimit()) { + printk(KERN_WARNING + "%s: Recipient unable to parse tunneled " + "packet!\n ", t->parms.name); } break; case ICMPV6_PKT_TOOBIG: @@ -470,6 +461,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rel_msg && pskb_may_pull(skb, offset + sizeof (*ipv6h))) { struct rt6_info *rt; struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + if (!skb2) goto out; @@ -504,6 +496,27 @@ static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph, if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph))) IP6_ECN_set_ce(inner_iph); } +static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) +{ + struct ip6_tnl_parm *p = &t->parms; + int ret = 0; + + if (p->flags & IP6_TNL_F_CAP_RCV) { + struct net_device *ldev = NULL; + + if (p->link) + ldev = dev_get_by_index(p->link); + + if ((ipv6_addr_is_multicast(&p->laddr) || + likely(ipv6_chk_addr(&p->laddr, ldev, 0))) && + likely(!ipv6_chk_addr(&p->raddr, NULL, 0))) + ret = 1; + + if (ldev) + dev_put(ldev); + } + return ret; +} /** * ip6ip6_rcv - decapsulate IPv6 packet and retransmit it locally @@ -528,7 +541,7 @@ ip6ip6_rcv(struct sk_buff *skb) goto discard; } - if (!(t->parms.flags & IP6_TNL_F_CAP_RCV)) { + if (!ip6_tnl_rcv_ctl(t)) { t->stat.rx_dropped++; read_unlock(&ip6ip6_lock); goto discard; @@ -542,6 +555,7 @@ ip6ip6_rcv(struct sk_buff *skb) skb->dev = t->dev; dst_release(skb->dst); skb->dst = NULL; + nf_reset(skb); if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) ipv6_copy_dscp(ipv6h, skb->nh.ipv6h); ip6ip6_ecn_decapsulate(ipv6h, skb); @@ -559,31 +573,23 @@ discard: return 0; } -static inline struct ipv6_txoptions *create_tel(__u8 encap_limit) -{ - struct ipv6_tlv_tnl_enc_lim *tel; - struct ipv6_txoptions *opt; - __u8 *raw; - - int opt_len = sizeof(*opt) + 8; - - if (!(opt = kzalloc(opt_len, GFP_ATOMIC))) { - return NULL; - } - opt->tot_len = opt_len; - opt->dst0opt = (struct ipv6_opt_hdr *) (opt + 1); - opt->opt_nflen = 8; +struct ipv6_tel_txoption { + struct ipv6_txoptions ops; + __u8 dst_opt[8]; +}; - tel = (struct ipv6_tlv_tnl_enc_lim *) (opt->dst0opt + 1); - tel->type = IPV6_TLV_TNL_ENCAP_LIMIT; - tel->length = 1; - tel->encap_limit = encap_limit; +static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) +{ + memset(opt, 0, sizeof(struct ipv6_tel_txoption)); - raw = (__u8 *) opt->dst0opt; - raw[5] = IPV6_TLV_PADN; - raw[6] = 1; + opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT; + opt->dst_opt[3] = 1; + opt->dst_opt[4] = encap_limit; + opt->dst_opt[5] = IPV6_TLV_PADN; + opt->dst_opt[6] = 1; - return opt; + opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt; + opt->ops.opt_nflen = 8; } /** @@ -606,6 +612,34 @@ ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr) return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); } +static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) +{ + struct ip6_tnl_parm *p = &t->parms; + int ret = 0; + + if (p->flags & IP6_TNL_F_CAP_XMIT) { + struct net_device *ldev = NULL; + + if (p->link) + ldev = dev_get_by_index(p->link); + + if (unlikely(!ipv6_chk_addr(&p->laddr, ldev, 0))) + printk(KERN_WARNING + "%s xmit: Local address not yet configured!\n", + p->name); + else if (!ipv6_addr_is_multicast(&p->raddr) && + unlikely(ipv6_chk_addr(&p->raddr, NULL, 0))) + printk(KERN_WARNING + "%s xmit: Routing loop! " + "Remote address found on this node!\n", + p->name); + else + ret = 1; + if (ldev) + dev_put(ldev); + } + return ret; +} /** * ip6ip6_tnl_xmit - encapsulate packet and send * @skb: the outgoing socket buffer @@ -625,8 +659,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->stat; struct ipv6hdr *ipv6h = skb->nh.ipv6h; - struct ipv6_txoptions *opt = NULL; int encap_limit = -1; + struct ipv6_tel_txoption opt; __u16 offset; struct flowi fl; struct dst_entry *dst; @@ -643,10 +677,9 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_err; } if (skb->protocol != htons(ETH_P_IPV6) || - !(t->parms.flags & IP6_TNL_F_CAP_XMIT) || - ip6ip6_tnl_addr_conflict(t, ipv6h)) { + !ip6_tnl_xmit_ctl(t) || ip6ip6_tnl_addr_conflict(t, ipv6h)) goto tx_err; - } + if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) { struct ipv6_tlv_tnl_enc_lim *tel; tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->nh.raw[offset]; @@ -656,20 +689,17 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_err; } encap_limit = tel->encap_limit - 1; - } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) { + } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; - } + memcpy(&fl, &t->fl, sizeof (fl)); proto = fl.proto; dsfield = ipv6_get_dsfield(ipv6h); if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) - fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_TCLASS_MASK); + fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)) - fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_FLOWLABEL_MASK); - - if (encap_limit >= 0 && (opt = create_tel(encap_limit)) == NULL) - goto tx_err; + fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); if ((dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); @@ -691,7 +721,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_err_dst_release; } mtu = dst_mtu(dst) - sizeof (*ipv6h); - if (opt) { + if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; } @@ -729,12 +759,13 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; - if (opt) - ipv6_push_nfrag_opts(skb, opt, &proto, NULL); - + if (encap_limit >= 0) { + init_tel_txopt(&opt, encap_limit); + ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); + } skb->nh.raw = skb_push(skb, sizeof(struct ipv6hdr)); ipv6h = skb->nh.ipv6h; - *(u32*)ipv6h = fl.fl6_flowlabel | htonl(0x60000000); + *(__be32*)ipv6h = fl.fl6_flowlabel | htonl(0x60000000); dsfield = INET_ECN_encapsulate(0, dsfield); ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); @@ -747,7 +778,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); - if (err == NET_XMIT_SUCCESS || err == NET_XMIT_CN) { + if (net_xmit_eval(err) == 0) { stats->tx_bytes += pkt_len; stats->tx_packets++; } else { @@ -755,9 +786,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) stats->tx_aborted_errors++; } ip6_tnl_dst_store(t, dst); - - kfree(opt); - t->recursion--; return 0; tx_err_link_failure: @@ -765,7 +793,6 @@ tx_err_link_failure: dst_link_failure(skb); tx_err_dst_release: dst_release(dst); - kfree(opt); tx_err: stats->tx_errors++; stats->tx_dropped++; @@ -777,39 +804,19 @@ tx_err: static void ip6_tnl_set_cap(struct ip6_tnl *t) { struct ip6_tnl_parm *p = &t->parms; - struct in6_addr *laddr = &p->laddr; - struct in6_addr *raddr = &p->raddr; - int ltype = ipv6_addr_type(laddr); - int rtype = ipv6_addr_type(raddr); + int ltype = ipv6_addr_type(&p->laddr); + int rtype = ipv6_addr_type(&p->raddr); p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV); - if (ltype != IPV6_ADDR_ANY && rtype != IPV6_ADDR_ANY && - ((ltype|rtype) & - (IPV6_ADDR_UNICAST| - IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL| - IPV6_ADDR_MAPPED|IPV6_ADDR_RESERVED)) == IPV6_ADDR_UNICAST) { - struct net_device *ldev = NULL; - int l_ok = 1; - int r_ok = 1; - - if (p->link) - ldev = dev_get_by_index(p->link); - - if (ltype&IPV6_ADDR_UNICAST && !ipv6_chk_addr(laddr, ldev, 0)) - l_ok = 0; - - if (rtype&IPV6_ADDR_UNICAST && ipv6_chk_addr(raddr, NULL, 0)) - r_ok = 0; - - if (l_ok && r_ok) { - if (ltype&IPV6_ADDR_UNICAST) - p->flags |= IP6_TNL_F_CAP_XMIT; - if (rtype&IPV6_ADDR_UNICAST) - p->flags |= IP6_TNL_F_CAP_RCV; - } - if (ldev) - dev_put(ldev); + if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && + rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && + !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && + (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { + if (ltype&IPV6_ADDR_UNICAST) + p->flags |= IP6_TNL_F_CAP_XMIT; + if (rtype&IPV6_ADDR_UNICAST) + p->flags |= IP6_TNL_F_CAP_RCV; } } @@ -843,8 +850,11 @@ static void ip6ip6_tnl_link_config(struct ip6_tnl *t) dev->iflink = p->link; if (p->flags & IP6_TNL_F_CAP_XMIT) { + int strict = (ipv6_addr_type(&p->raddr) & + (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); + struct rt6_info *rt = rt6_lookup(&p->raddr, &p->laddr, - p->link, 0); + p->link, strict); if (rt == NULL) return; @@ -919,26 +929,20 @@ static int ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { int err = 0; - int create; struct ip6_tnl_parm p; struct ip6_tnl *t = NULL; switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6ip6_fb_tnl_dev) { - if (copy_from_user(&p, - ifr->ifr_ifru.ifru_data, - sizeof (p))) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) { err = -EFAULT; break; } - if ((err = ip6ip6_tnl_locate(&p, &t, 0)) == -ENODEV) - t = netdev_priv(dev); - else if (err) - break; - } else + t = ip6ip6_tnl_locate(&p, 0); + } + if (t == NULL) t = netdev_priv(dev); - memcpy(&p, &t->parms, sizeof (p)); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { err = -EFAULT; @@ -947,35 +951,36 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCADDTUNNEL: case SIOCCHGTUNNEL: err = -EPERM; - create = (cmd == SIOCADDTUNNEL); if (!capable(CAP_NET_ADMIN)) break; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) { - err = -EFAULT; + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) break; - } - if (!create && dev != ip6ip6_fb_tnl_dev) { - t = netdev_priv(dev); - } - if (!t && (err = ip6ip6_tnl_locate(&p, &t, create))) { + err = -EINVAL; + if (p.proto != IPPROTO_IPV6) break; - } - if (cmd == SIOCCHGTUNNEL) { - if (t->dev != dev) { - err = -EEXIST; - break; - } + t = ip6ip6_tnl_locate(&p, cmd == SIOCADDTUNNEL); + if (dev != ip6ip6_fb_tnl_dev && cmd == SIOCCHGTUNNEL) { + if (t != NULL) { + if (t->dev != dev) { + err = -EEXIST; + break; + } + } else + t = netdev_priv(dev); + ip6ip6_tnl_unlink(t); err = ip6ip6_tnl_change(t, &p); ip6ip6_tnl_link(t); netdev_state_change(dev); } - if (copy_to_user(ifr->ifr_ifru.ifru_data, - &t->parms, sizeof (p))) { - err = -EFAULT; - } else { + if (t) { err = 0; - } + if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p))) + err = -EFAULT; + + } else + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); break; case SIOCDELTUNNEL: err = -EPERM; @@ -983,22 +988,18 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; if (dev == ip6ip6_fb_tnl_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, - sizeof (p))) { - err = -EFAULT; + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) break; - } - err = ip6ip6_tnl_locate(&p, &t, 0); - if (err) + err = -ENOENT; + if ((t = ip6ip6_tnl_locate(&p, 0)) == NULL) break; - if (t == netdev_priv(ip6ip6_fb_tnl_dev)) { - err = -EPERM; + err = -EPERM; + if (t->dev == ip6ip6_fb_tnl_dev) break; - } - } else { - t = netdev_priv(dev); + dev = t->dev; } - err = unregister_netdevice(t->dev); + err = unregister_netdevice(dev); break; default: err = -EINVAL; @@ -1149,6 +1150,20 @@ fail: return err; } +static void __exit ip6ip6_destroy_tunnels(void) +{ + int h; + struct ip6_tnl *t; + + for (h = 0; h < HASH_SIZE; h++) { + while ((t = tnls_r_l[h]) != NULL) + unregister_netdevice(t->dev); + } + + t = tnls_wc[0]; + unregister_netdevice(t->dev); +} + /** * ip6_tunnel_cleanup - free resources and unregister protocol **/ @@ -1158,7 +1173,9 @@ static void __exit ip6_tunnel_cleanup(void) if (xfrm6_tunnel_deregister(&ip6ip6_handler)) printk(KERN_INFO "ip6ip6 close: can't deregister tunnel\n"); - unregister_netdev(ip6ip6_fb_tnl_dev); + rtnl_lock(); + ip6ip6_destroy_tunnels(); + rtnl_unlock(); } module_init(ip6_tunnel_init); diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 71f59f18ede..511730b67e9 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -176,7 +176,7 @@ out_ok: } static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __u32 info) + int type, int code, int offset, __be32 info) { __be32 spi; struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index de6b91981b3..352690e2ab8 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -51,6 +51,7 @@ #include <net/inet_common.h> #include <net/tcp.h> #include <net/udp.h> +#include <net/udplite.h> #include <net/xfrm.h> #include <asm/uaccess.h> @@ -239,6 +240,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct sk_buff *pktopt; if (sk->sk_protocol != IPPROTO_UDP && + sk->sk_protocol != IPPROTO_UDPLITE && sk->sk_protocol != IPPROTO_TCP) break; @@ -276,11 +278,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sk->sk_family = PF_INET; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { + struct proto *prot = &udp_prot; + + if (sk->sk_protocol == IPPROTO_UDPLITE) + prot = &udplite_prot; local_bh_disable(); sock_prot_dec_use(sk->sk_prot); - sock_prot_inc_use(&udp_prot); + sock_prot_inc_use(prot); local_bh_enable(); - sk->sk_prot = &udp_prot; + sk->sk_prot = prot; sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } @@ -813,6 +819,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case IPV6_ADDRFORM: if (sk->sk_protocol != IPPROTO_UDP && + sk->sk_protocol != IPPROTO_UDPLITE && sk->sk_protocol != IPPROTO_TCP) return -EINVAL; if (sk->sk_state != TCP_ESTABLISHED) @@ -971,12 +978,27 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_UNICAST_HOPS: - val = np->hop_limit; - break; - case IPV6_MULTICAST_HOPS: - val = np->mcast_hops; + { + struct dst_entry *dst; + + if (optname == IPV6_UNICAST_HOPS) + val = np->hop_limit; + else + val = np->mcast_hops; + + dst = sk_dst_get(sk); + if (dst) { + if (val < 0) + val = dst_metric(dst, RTAX_HOPLIMIT); + if (val < 0) + val = ipv6_get_hoplimit(dst->dev); + dst_release(dst); + } + if (val < 0) + val = ipv6_devconf.hop_limit; break; + } case IPV6_MULTICAST_LOOP: val = np->mc_loop; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 3b114e3fa2f..a1c231a04ac 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -83,7 +83,7 @@ struct mld2_grec { __u8 grec_type; __u8 grec_auxwords; - __u16 grec_nsrcs; + __be16 grec_nsrcs; struct in6_addr grec_mca; struct in6_addr grec_src[0]; }; @@ -91,18 +91,18 @@ struct mld2_grec { struct mld2_report { __u8 type; __u8 resv1; - __u16 csum; - __u16 resv2; - __u16 ngrec; + __sum16 csum; + __be16 resv2; + __be16 ngrec; struct mld2_grec grec[0]; }; struct mld2_query { __u8 type; __u8 code; - __u16 csum; - __u16 mrc; - __u16 resv1; + __sum16 csum; + __be16 mrc; + __be16 resv1; struct in6_addr mca; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 qrv:3, @@ -116,7 +116,7 @@ struct mld2_query { #error "Please fix <asm/byteorder.h>" #endif __u8 qqic; - __u16 nsrcs; + __be16 nsrcs; struct in6_addr srcs[0]; }; @@ -1465,7 +1465,7 @@ static void mld_sendpack(struct sk_buff *skb) struct inet6_dev *idev = in6_dev_get(skb->dev); int err; - IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); payload_len = skb->tail - (unsigned char *)skb->nh.ipv6h - sizeof(struct ipv6hdr); mldlen = skb->tail - skb->h.raw; @@ -1477,9 +1477,9 @@ static void mld_sendpack(struct sk_buff *skb) mld_dev_queue_xmit); if (!err) { ICMP6_INC_STATS(idev,ICMP6_MIB_OUTMSGS); - IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS); + IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); } else - IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); if (likely(idev != NULL)) in6_dev_put(idev); @@ -1763,7 +1763,10 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; - IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); + rcu_read_lock(); + IP6_INC_STATS(__in6_dev_get(dev), + IPSTATS_MIB_OUTREQUESTS); + rcu_read_unlock(); snd_addr = addr; if (type == ICMPV6_MGM_REDUCTION) { snd_addr = &all_routers; @@ -1777,7 +1780,10 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) skb = sock_alloc_send_skb(sk, LL_RESERVED_SPACE(dev) + full_len, 1, &err); if (skb == NULL) { - IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + rcu_read_lock(); + IP6_INC_STATS(__in6_dev_get(dev), + IPSTATS_MIB_OUTDISCARDS); + rcu_read_unlock(); return; } @@ -1816,9 +1822,9 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) else ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBRESPONSES); ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); - IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS); + IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); } else - IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); if (likely(idev != NULL)) in6_dev_put(idev); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 7ccdc8fc5a3..be7dd7db65d 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -262,10 +262,10 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct sel.proto = fl->proto; sel.dport = xfrm_flowi_dport(fl); if (sel.dport) - sel.dport_mask = ~((__u16)0); + sel.dport_mask = htons(~0); sel.sport = xfrm_flowi_sport(fl); if (sel.sport) - sel.sport_mask = ~((__u16)0); + sel.sport_mask = htons(~0); sel.ifindex = fl->oif; err = km_report(IPPROTO_DSTOPTS, &sel, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 41a8a5f0660..6a9f616de37 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -472,7 +472,9 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, inc_opt = 0; } - skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev), + skb = sock_alloc_send_skb(sk, + (MAX_HEADER + sizeof(struct ipv6hdr) + + len + LL_RESERVED_SPACE(dev)), 1, &err); if (skb == NULL) { @@ -513,7 +515,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, skb->dst = dst; idev = in6_dev_get(dst->dev); - IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); if (!err) { ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS); @@ -561,7 +563,9 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, if (send_llinfo) len += ndisc_opt_addr_space(dev); - skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev), + skb = sock_alloc_send_skb(sk, + (MAX_HEADER + sizeof(struct ipv6hdr) + + len + LL_RESERVED_SPACE(dev)), 1, &err); if (skb == NULL) { ND_PRINTK0(KERN_ERR @@ -597,7 +601,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, /* send it! */ skb->dst = dst; idev = in6_dev_get(dst->dev); - IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); if (!err) { ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORSOLICITS); @@ -636,7 +640,9 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, if (dev->addr_len) len += ndisc_opt_addr_space(dev); - skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev), + skb = sock_alloc_send_skb(sk, + (MAX_HEADER + sizeof(struct ipv6hdr) + + len + LL_RESERVED_SPACE(dev)), 1, &err); if (skb == NULL) { ND_PRINTK0(KERN_ERR @@ -670,7 +676,7 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, /* send it! */ skb->dst = dst; idev = in6_dev_get(dst->dev); - IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); if (!err) { ICMP6_INC_STATS(idev, ICMP6_MIB_OUTROUTERSOLICITS); @@ -1261,10 +1267,11 @@ skip_defrtr: } if (ndopts.nd_opts_mtu) { + __be32 n; u32 mtu; - memcpy(&mtu, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu)); - mtu = ntohl(mtu); + memcpy(&n, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu)); + mtu = ntohl(n); if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { ND_PRINTK2(KERN_WARNING @@ -1446,7 +1453,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, rd_len &= ~0x7; len += rd_len; - buff = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev), + buff = sock_alloc_send_skb(sk, + (MAX_HEADER + sizeof(struct ipv6hdr) + + len + LL_RESERVED_SPACE(dev)), 1, &err); if (buff == NULL) { ND_PRINTK0(KERN_ERR @@ -1504,7 +1513,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, buff->dst = dst; idev = in6_dev_get(dst->dev); - IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, buff, NULL, dst->dev, dst_output); if (!err) { ICMP6_INC_STATS(idev, ICMP6_MIB_OUTREDIRECTS); @@ -1658,8 +1667,7 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, size_t newlen) { struct net_device *dev = ctl->extra1; struct inet6_dev *idev; @@ -1672,14 +1680,12 @@ static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, switch (ctl->ctl_name) { case NET_NEIGH_REACHABLE_TIME: ret = sysctl_jiffies(ctl, name, nlen, - oldval, oldlenp, newval, newlen, - context); + oldval, oldlenp, newval, newlen); break; case NET_NEIGH_RETRANS_TIME_MS: case NET_NEIGH_REACHABLE_TIME_MS: ret = sysctl_ms_jiffies(ctl, name, nlen, - oldval, oldlenp, newval, newlen, - context); + oldval, oldlenp, newval, newlen); break; default: ret = 0; @@ -1742,6 +1748,7 @@ int __init ndisc_init(struct net_proto_family *ops) void ndisc_cleanup(void) { + unregister_netdevice_notifier(&ndisc_netdev_notifier); #ifdef CONFIG_SYSCTL neigh_sysctl_unregister(&nd_tbl.parms); #endif diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 580b1aba672..f6294e5bcb3 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -31,7 +31,7 @@ int ip6_route_me_harder(struct sk_buff *skb) #endif if (dst->error) { - IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); dst_release(dst); return -EINVAL; @@ -80,11 +80,11 @@ static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info) return 0; } -unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, +__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol) { struct ipv6hdr *ip6h = skb->nh.ipv6h; - unsigned int csum = 0; + __sum16 csum = 0; switch (skb->ip_summed) { case CHECKSUM_COMPLETE: @@ -100,12 +100,13 @@ unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, } /* fall through */ case CHECKSUM_NONE: - skb->csum = ~csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + skb->csum = ~csum_unfold( + csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb->len - dataoff, protocol, csum_sub(0, skb_checksum(skb, 0, - dataoff, 0))); + dataoff, 0)))); csum = __skb_checksum_complete(skb); } return csum; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 4bc4e5b3379..fc3e5eb4bc3 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -6,7 +6,7 @@ menu "IPv6: Netfilter Configuration (EXPERIMENTAL)" depends on INET && IPV6 && NETFILTER && EXPERIMENTAL config NF_CONNTRACK_IPV6 - tristate "IPv6 support for new connection tracking (EXPERIMENTAL)" + tristate "IPv6 connection tracking support (EXPERIMENTAL)" depends on EXPERIMENTAL && NF_CONNTRACK ---help--- Connection tracking keeps a record of what packets have passed @@ -40,7 +40,7 @@ config IP6_NF_QUEUE To compile it as a module, choose M here. If unsure, say N. config IP6_NF_IPTABLES - tristate "IP6 tables support (required for filtering/masq/NAT)" + tristate "IP6 tables support (required for filtering)" depends on NETFILTER_XTABLES help ip6tables is a general, extensible packet identification framework. diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 9510c24ca8d..d4d9f182441 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -241,7 +241,7 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->data_len = data_len; pmsg->timestamp_sec = entry->skb->tstamp.off_sec; pmsg->timestamp_usec = entry->skb->tstamp.off_usec; - pmsg->mark = entry->skb->nfmark; + pmsg->mark = entry->skb->mark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; @@ -349,9 +349,10 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) if (v->data_len < sizeof(*user_iph)) return 0; diff = v->data_len - e->skb->len; - if (diff < 0) - skb_trim(e->skb, v->data_len); - else if (diff > 0) { + if (diff < 0) { + if (pskb_trim(e->skb, v->data_len)) + return -ENOMEM; + } else if (diff > 0) { if (v->data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { @@ -619,6 +620,7 @@ static ctl_table ipq_root_table[] = { { .ctl_name = 0 } }; +#ifdef CONFIG_PROC_FS static int ipq_get_info(char *buffer, char **start, off_t offset, int length) { @@ -652,6 +654,7 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) len = 0; return len; } +#endif /* CONFIG_PROC_FS */ static struct nf_queue_handler nfqh = { .name = "ip6_queue", diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 53bf977cca6..99502c5da4c 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -413,6 +413,7 @@ mark_source_chains(struct xt_table_info *newinfo, unsigned int pos = newinfo->hook_entry[hook]; struct ip6t_entry *e = (struct ip6t_entry *)(entry0 + pos); + int visited = e->comefrom & (1 << hook); if (!(valid_hooks & (1 << hook))) continue; @@ -433,13 +434,20 @@ mark_source_chains(struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_IP6_NUMHOOKS)); /* Unconditional return/END. */ - if (e->target_offset == sizeof(struct ip6t_entry) + if ((e->target_offset == sizeof(struct ip6t_entry) && (strcmp(t->target.u.user.name, IP6T_STANDARD_TARGET) == 0) && t->verdict < 0 - && unconditional(&e->ipv6)) { + && unconditional(&e->ipv6)) || visited) { unsigned int oldpos, size; + if (t->verdict < -NF_MAX_VERDICT - 1) { + duprintf("mark_source_chains: bad " + "negative verdict (%i)\n", + t->verdict); + return 0; + } + /* Return: backtrack through the last big jump. */ do { @@ -477,6 +485,13 @@ mark_source_chains(struct xt_table_info *newinfo, if (strcmp(t->target.u.user.name, IP6T_STANDARD_TARGET) == 0 && newpos >= 0) { + if (newpos > newinfo->size - + sizeof(struct ip6t_entry)) { + duprintf("mark_source_chains: " + "bad verdict (%i)\n", + newpos); + return 0; + } /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); @@ -509,27 +524,6 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i) } static inline int -standard_check(const struct ip6t_entry_target *t, - unsigned int max_offset) -{ - struct ip6t_standard_target *targ = (void *)t; - - /* Check standard info. */ - if (targ->verdict >= 0 - && targ->verdict > max_offset - sizeof(struct ip6t_entry)) { - duprintf("ip6t_standard_check: bad verdict (%i)\n", - targ->verdict); - return 0; - } - if (targ->verdict < -NF_MAX_VERDICT - 1) { - duprintf("ip6t_standard_check: bad negative verdict (%i)\n", - targ->verdict); - return 0; - } - return 1; -} - -static inline int check_match(struct ip6t_entry_match *m, const char *name, const struct ip6t_ip6 *ipv6, @@ -586,12 +580,19 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size, return -EINVAL; } + if (e->target_offset + sizeof(struct ip6t_entry_target) > + e->next_offset) + return -EINVAL; + j = 0; ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, e->comefrom, &j); if (ret != 0) goto cleanup_matches; t = ip6t_get_target(e); + ret = -EINVAL; + if (e->target_offset + t->u.target_size > e->next_offset) + goto cleanup_matches; target = try_then_request_module(xt_find_target(AF_INET6, t->u.user.name, t->u.user.revision), @@ -609,12 +610,7 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size, if (ret) goto err; - if (t->u.kernel.target == &ip6t_standard_target) { - if (!standard_check(t, size)) { - ret = -EINVAL; - goto err; - } - } else if (t->u.kernel.target->checkentry + if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, e->comefrom)) { duprintf("ip_tables: check failed for `%s'.\n", @@ -761,7 +757,7 @@ translate_table(const char *name, if (ret != 0) { IP6T_ENTRY_ITERATE(entry0, newinfo->size, - cleanup_entry, &i); + cleanup_entry, &i); return ret; } @@ -771,7 +767,7 @@ translate_table(const char *name, memcpy(newinfo->entries[i], entry0, newinfo->size); } - return ret; + return 0; } /* Gets counters. */ @@ -1473,7 +1469,8 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, if (hp == NULL) return -EBADMSG; if (nexthdr == NEXTHDR_FRAGMENT) { - unsigned short _frag_off, *fp; + unsigned short _frag_off; + __be16 *fp; fp = skb_header_pointer(skb, start+offsetof(struct frag_hdr, frag_off), @@ -1486,7 +1483,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, if (_frag_off) { if (target < 0 && ((!ipv6_ext_hdr(hp->nexthdr)) || - nexthdr == NEXTHDR_NONE)) { + hp->nexthdr == NEXTHDR_NONE)) { if (fragoff) *fragoff = _frag_off; return hp->nexthdr; diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 0cf537d3018..33b1faa90d7 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -69,9 +69,9 @@ static void dump_packet(const struct nf_loginfo *info, /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", ntohs(ih->payload_len) + sizeof(struct ipv6hdr), - (ntohl(*(u_int32_t *)ih) & 0x0ff00000) >> 20, + (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, ih->hop_limit, - (ntohl(*(u_int32_t *)ih) & 0x000fffff)); + (ntohl(*(__be32 *)ih) & 0x000fffff)); fragment = 0; ptr = ip6hoff + sizeof(struct ipv6hdr); @@ -270,11 +270,15 @@ static void dump_packet(const struct nf_loginfo *info, } break; } - case IPPROTO_UDP: { + case IPPROTO_UDP: + case IPPROTO_UDPLITE: { struct udphdr _udph, *uh; - /* Max length: 10 "PROTO=UDP " */ - printk("PROTO=UDP "); + if (currenthdr == IPPROTO_UDP) + /* Max length: 10 "PROTO=UDP " */ + printk("PROTO=UDP " ); + else /* Max length: 14 "PROTO=UDPLITE " */ + printk("PROTO=UDPLITE "); if (fragment) break; @@ -436,13 +440,8 @@ ip6t_log_target(struct sk_buff **pskb, li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - if (loginfo->logflags & IP6T_LOG_NFLOG) - nf_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, - "%s", loginfo->prefix); - else - ip6t_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, - loginfo->prefix); - + ip6t_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, + loginfo->prefix); return IP6T_CONTINUE; } diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 386ea260e76..6250e86a6dd 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -149,11 +149,10 @@ ip6t_local_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { - unsigned long nfmark; unsigned int ret; struct in6_addr saddr, daddr; u_int8_t hop_limit; - u_int32_t flowlabel; + u_int32_t flowlabel, mark; #if 0 /* root is playing with raw sockets. */ @@ -165,10 +164,10 @@ ip6t_local_hook(unsigned int hook, } #endif - /* save source/dest address, nfmark, hoplimit, flowlabel, priority, */ + /* save source/dest address, mark, hoplimit, flowlabel, priority, */ memcpy(&saddr, &(*pskb)->nh.ipv6h->saddr, sizeof(saddr)); memcpy(&daddr, &(*pskb)->nh.ipv6h->daddr, sizeof(daddr)); - nfmark = (*pskb)->nfmark; + mark = (*pskb)->mark; hop_limit = (*pskb)->nh.ipv6h->hop_limit; /* flowlabel and prio (includes version, which shouldn't change either */ @@ -179,7 +178,7 @@ ip6t_local_hook(unsigned int hook, if (ret != NF_DROP && ret != NF_STOLEN && (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr)) || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr)) - || (*pskb)->nfmark != nfmark + || (*pskb)->mark != mark || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index e5e53fff9e3..a20615ffccf 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -33,7 +33,7 @@ #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_helper.h> -#include <net/netfilter/nf_conntrack_protocol.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_core.h> @@ -43,8 +43,6 @@ #define DEBUGP(format, args...) #endif -DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); - static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { @@ -211,11 +209,6 @@ out: return nf_conntrack_confirm(pskb); } -extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb); -extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, - struct net_device *in, - struct net_device *out, - int (*okfn)(struct sk_buff *)); static unsigned int ipv6_defrag(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, @@ -331,26 +324,7 @@ static struct nf_hook_ops ipv6_conntrack_ops[] = { }; #ifdef CONFIG_SYSCTL - -/* From nf_conntrack_proto_icmpv6.c */ -extern unsigned int nf_ct_icmpv6_timeout; - -/* From nf_conntrack_reasm.c */ -extern unsigned int nf_ct_frag6_timeout; -extern unsigned int nf_ct_frag6_low_thresh; -extern unsigned int nf_ct_frag6_high_thresh; - -static struct ctl_table_header *nf_ct_ipv6_sysctl_header; - -static ctl_table nf_ct_sysctl_table[] = { - { - .ctl_name = NET_NF_CONNTRACK_ICMPV6_TIMEOUT, - .procname = "nf_conntrack_icmpv6_timeout", - .data = &nf_ct_icmpv6_timeout, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, +static ctl_table nf_ct_ipv6_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_FRAG6_TIMEOUT, .procname = "nf_conntrack_frag6_timeout", @@ -377,26 +351,6 @@ static ctl_table nf_ct_sysctl_table[] = { }, { .ctl_name = 0 } }; - -static ctl_table nf_ct_netfilter_table[] = { - { - .ctl_name = NET_NETFILTER, - .procname = "netfilter", - .mode = 0555, - .child = nf_ct_sysctl_table, - }, - { .ctl_name = 0 } -}; - -static ctl_table nf_ct_net_table[] = { - { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = nf_ct_netfilter_table, - }, - { .ctl_name = 0 } -}; #endif #if defined(CONFIG_NF_CT_NETLINK) || \ @@ -454,16 +408,14 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { .tuple_to_nfattr = ipv6_tuple_to_nfattr, .nfattr_to_tuple = ipv6_nfattr_to_tuple, #endif +#ifdef CONFIG_SYSCTL + .ctl_table_path = nf_net_netfilter_sysctl_path, + .ctl_table = nf_ct_ipv6_sysctl_table, +#endif .get_features = ipv6_get_features, .me = THIS_MODULE, }; -extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp6; -extern struct nf_conntrack_protocol nf_conntrack_protocol_udp6; -extern struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6; -extern int nf_ct_frag6_init(void); -extern void nf_ct_frag6_cleanup(void); - MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6)); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>"); @@ -479,19 +431,19 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) printk("nf_conntrack_ipv6: can't initialize frag6.\n"); return ret; } - ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp6); + ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6); if (ret < 0) { printk("nf_conntrack_ipv6: can't register tcp.\n"); goto cleanup_frag6; } - ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp6); + ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6); if (ret < 0) { printk("nf_conntrack_ipv6: can't register udp.\n"); goto cleanup_tcp; } - ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmpv6); + ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmpv6); if (ret < 0) { printk("nf_conntrack_ipv6: can't register icmpv6.\n"); goto cleanup_udp; @@ -510,28 +462,16 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) "hook.\n"); goto cleanup_ipv6; } -#ifdef CONFIG_SYSCTL - nf_ct_ipv6_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); - if (nf_ct_ipv6_sysctl_header == NULL) { - printk("nf_conntrack: can't register to sysctl.\n"); - ret = -ENOMEM; - goto cleanup_hooks; - } -#endif return ret; -#ifdef CONFIG_SYSCTL - cleanup_hooks: - nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); -#endif cleanup_ipv6: nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6); cleanup_icmpv6: - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmpv6); + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); cleanup_udp: - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp6); + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6); cleanup_tcp: - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp6); + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6); cleanup_frag6: nf_ct_frag6_cleanup(); return ret; @@ -540,14 +480,11 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) static void __exit nf_conntrack_l3proto_ipv6_fini(void) { synchronize_net(); -#ifdef CONFIG_SYSCTL - unregister_sysctl_table(nf_ct_ipv6_sysctl_header); -#endif nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6); - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmpv6); - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp6); - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp6); + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6); + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6); nf_ct_frag6_cleanup(); } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 34d447208ff..3905cacc69a 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -29,11 +29,11 @@ #include <linux/seq_file.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_conntrack_tuple.h> -#include <net/netfilter/nf_conntrack_protocol.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> -unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ; +static unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ; #if 0 #define DEBUGP printk @@ -142,9 +142,6 @@ static int icmpv6_new(struct nf_conn *conntrack, return 1; } -extern int -nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, int len); -extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; static int icmpv6_error_message(struct sk_buff *skb, unsigned int icmp6off, @@ -155,7 +152,7 @@ icmpv6_error_message(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h; struct icmp6hdr _hdr, *hp; unsigned int inip6off; - struct nf_conntrack_protocol *inproto; + struct nf_conntrack_l4proto *inproto; u_int8_t inprotonum; unsigned int inprotoff; @@ -185,7 +182,7 @@ icmpv6_error_message(struct sk_buff *skb, return -NF_ACCEPT; } - inproto = __nf_ct_proto_find(PF_INET6, inprotonum); + inproto = __nf_ct_l4proto_find(PF_INET6, inprotonum); /* Are they talking about one of our connections? */ if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum, @@ -290,7 +287,7 @@ static int icmpv6_nfattr_to_tuple(struct nfattr *tb[], tuple->dst.u.icmp.code = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_CODE-1]); tuple->src.u.icmp.id = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_ID-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMPV6_ID-1]); if (tuple->dst.u.icmp.type < 128 || tuple->dst.u.icmp.type - 128 >= sizeof(invmap) @@ -301,10 +298,27 @@ static int icmpv6_nfattr_to_tuple(struct nfattr *tb[], } #endif -struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 = +#ifdef CONFIG_SYSCTL +static struct ctl_table_header *icmpv6_sysctl_header; +static struct ctl_table icmpv6_sysctl_table[] = { + { + .ctl_name = NET_NF_CONNTRACK_ICMPV6_TIMEOUT, + .procname = "nf_conntrack_icmpv6_timeout", + .data = &nf_ct_icmpv6_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = 0 + } +}; +#endif /* CONFIG_SYSCTL */ + +struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = { .l3proto = PF_INET6, - .proto = IPPROTO_ICMPV6, + .l4proto = IPPROTO_ICMPV6, .name = "icmpv6", .pkt_to_tuple = icmpv6_pkt_to_tuple, .invert_tuple = icmpv6_invert_tuple, @@ -318,6 +332,10 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 = .tuple_to_nfattr = icmpv6_tuple_to_nfattr, .nfattr_to_tuple = icmpv6_nfattr_to_tuple, #endif +#ifdef CONFIG_SYSCTL + .ctl_table_header = &icmpv6_sysctl_header, + .ctl_table = icmpv6_sysctl_table, +#endif }; -EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6); +EXPORT_SYMBOL(nf_conntrack_l4proto_icmpv6); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index bf93c1ea6be..37e5fca923a 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -72,7 +72,7 @@ struct nf_ct_frag6_queue struct hlist_node list; struct list_head lru_list; /* lru list member */ - __u32 id; /* fragment id */ + __be32 id; /* fragment id */ struct in6_addr saddr; struct in6_addr daddr; @@ -115,28 +115,28 @@ static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq) write_unlock(&nf_ct_frag6_lock); } -static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr, +static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, struct in6_addr *daddr) { u32 a, b, c; - a = saddr->s6_addr32[0]; - b = saddr->s6_addr32[1]; - c = saddr->s6_addr32[2]; + a = (__force u32)saddr->s6_addr32[0]; + b = (__force u32)saddr->s6_addr32[1]; + c = (__force u32)saddr->s6_addr32[2]; a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; c += nf_ct_frag6_hash_rnd; __jhash_mix(a, b, c); - a += saddr->s6_addr32[3]; - b += daddr->s6_addr32[0]; - c += daddr->s6_addr32[1]; + a += (__force u32)saddr->s6_addr32[3]; + b += (__force u32)daddr->s6_addr32[0]; + c += (__force u32)daddr->s6_addr32[1]; __jhash_mix(a, b, c); - a += daddr->s6_addr32[2]; - b += daddr->s6_addr32[3]; - c += id; + a += (__force u32)daddr->s6_addr32[2]; + b += (__force u32)daddr->s6_addr32[3]; + c += (__force u32)id; __jhash_mix(a, b, c); return c & (FRAG6Q_HASHSZ - 1); @@ -338,7 +338,7 @@ static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, static struct nf_ct_frag6_queue * -nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst) +nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, struct in6_addr *dst) { struct nf_ct_frag6_queue *fq; @@ -366,7 +366,7 @@ oom: } static __inline__ struct nf_ct_frag6_queue * -fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst) +fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) { struct nf_ct_frag6_queue *fq; struct hlist_node *n; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index efee7a6301a..35249d8487b 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -49,6 +49,8 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) fold_prot_inuse(&tcpv6_prot)); seq_printf(seq, "UDP6: inuse %d\n", fold_prot_inuse(&udpv6_prot)); + seq_printf(seq, "UDPLITE6: inuse %d\n", + fold_prot_inuse(&udplitev6_prot)); seq_printf(seq, "RAW6: inuse %d\n", fold_prot_inuse(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", @@ -133,6 +135,14 @@ static struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_SENTINEL }; +static struct snmp_mib snmp6_udplite6_list[] = { + SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS), + SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS), + SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS), + SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS), + SNMP_MIB_SENTINEL +}; + static unsigned long fold_field(void *mib[], int offt) { @@ -161,11 +171,13 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) if (idev) { seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); + snmp6_seq_show_item(seq, (void **)idev->stats.ipv6, snmp6_ipstats_list); snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list); } else { snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list); snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list); snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list); + snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list); } return 0; } @@ -281,6 +293,9 @@ int snmp6_alloc_dev(struct inet6_dev *idev) if (!idev || !idev->dev) return -EINVAL; + if (snmp6_mib_init((void **)idev->stats.ipv6, sizeof(struct ipstats_mib), + __alignof__(struct ipstats_mib)) < 0) + goto err_ip; if (snmp6_mib_init((void **)idev->stats.icmpv6, sizeof(struct icmpv6_mib), __alignof__(struct icmpv6_mib)) < 0) goto err_icmp; @@ -288,12 +303,15 @@ int snmp6_alloc_dev(struct inet6_dev *idev) return 0; err_icmp: + snmp6_mib_free((void **)idev->stats.ipv6); +err_ip: return err; } int snmp6_free_dev(struct inet6_dev *idev) { snmp6_mib_free((void **)idev->stats.icmpv6); + snmp6_mib_free((void **)idev->stats.ipv6); return 0; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d09329ca326..4ae1b19ada5 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -220,7 +220,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; - __u32 v4addr = 0; + __be32 v4addr = 0; int addr_type; int err; @@ -290,7 +290,7 @@ out: void rawv6_err(struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, u32 info) + int type, int code, int offset, __be32 info) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -370,9 +370,9 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) skb->ip_summed = CHECKSUM_UNNECESSARY; } if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr, + skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, - skb->len, inet->num, 0); + skb->len, inet->num, 0)); if (inet->hdrincl) { if (skb_checksum_complete(skb)) { @@ -479,8 +479,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, int offset; int len; int total_len; - u32 tmp_csum; - u16 csum; + __wsum tmp_csum; + __sum16 csum; if (!rp->checksum) goto send; @@ -530,16 +530,15 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, /* in case cksum was not initialized */ if (unlikely(csum)) - tmp_csum = csum_sub(tmp_csum, csum); + tmp_csum = csum_sub(tmp_csum, csum_unfold(csum)); - tmp_csum = csum_ipv6_magic(&fl->fl6_src, + csum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, total_len, fl->proto, tmp_csum); - if (tmp_csum == 0) - tmp_csum = -1; + if (csum == 0 && fl->proto == IPPROTO_UDP) + csum = CSUM_MANGLED_0; - csum = tmp_csum; if (skb_store_bits(skb, offset, &csum, 2)) BUG(); @@ -586,7 +585,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, if (err) goto error_fault; - IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); if (err > 0) @@ -600,11 +599,11 @@ error_fault: err = -EFAULT; kfree_skb(skb); error: - IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; } -static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) +static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) { struct iovec *iov; u8 __user *type = NULL; @@ -616,7 +615,7 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) int i; if (!msg->msg_iov) - return; + return 0; for (i = 0; i < msg->msg_iovlen; i++) { iov = &msg->msg_iov[i]; @@ -638,8 +637,9 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) code = iov->iov_base; if (type && code) { - get_user(fl->fl_icmp_type, type); - get_user(fl->fl_icmp_code, code); + if (get_user(fl->fl_icmp_type, type) || + get_user(fl->fl_icmp_code, code)) + return -EFAULT; probed = 1; } break; @@ -650,7 +650,8 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) /* check if type field is readable or not. */ if (iov->iov_len > 2 - len) { u8 __user *p = iov->iov_base; - get_user(fl->fl_mh_type, &p[2 - len]); + if (get_user(fl->fl_mh_type, &p[2 - len])) + return -EFAULT; probed = 1; } else len += iov->iov_len; @@ -664,6 +665,7 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (probed) break; } + return 0; } static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, @@ -787,7 +789,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, opt = ipv6_fixup_options(&opt_space, opt); fl.proto = proto; - rawv6_probe_proto_opt(&fl, msg); + err = rawv6_probe_proto_opt(&fl, msg); + if (err) + goto out; ipv6_addr_copy(&fl.fl6_dst, daddr); if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) @@ -850,7 +854,8 @@ back_from_confirm: } done: dst_release(dst); - release_sock(sk); + if (!inet->hdrincl) + release_sock(sk); out: fl6_sock_release(flowlabel); return err<0?err:len; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index f39bbedd132..6f9a9046510 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -47,6 +47,7 @@ #include <net/snmp.h> #include <net/ipv6.h> +#include <net/ip6_route.h> #include <net/protocol.h> #include <net/transp_v6.h> #include <net/rawv6.h> @@ -76,7 +77,7 @@ struct frag_queue struct hlist_node list; struct list_head lru_list; /* lru list member */ - __u32 id; /* fragment id */ + __be32 id; /* fragment id */ struct in6_addr saddr; struct in6_addr daddr; @@ -124,28 +125,28 @@ static __inline__ void fq_unlink(struct frag_queue *fq) * callers should be careful not to use the hash value outside the ipfrag_lock * as doing so could race with ipfrag_hash_rnd being recalculated. */ -static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr, +static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, struct in6_addr *daddr) { u32 a, b, c; - a = saddr->s6_addr32[0]; - b = saddr->s6_addr32[1]; - c = saddr->s6_addr32[2]; + a = (__force u32)saddr->s6_addr32[0]; + b = (__force u32)saddr->s6_addr32[1]; + c = (__force u32)saddr->s6_addr32[2]; a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; c += ip6_frag_hash_rnd; __jhash_mix(a, b, c); - a += saddr->s6_addr32[3]; - b += daddr->s6_addr32[0]; - c += daddr->s6_addr32[1]; + a += (__force u32)saddr->s6_addr32[3]; + b += (__force u32)daddr->s6_addr32[0]; + c += (__force u32)daddr->s6_addr32[1]; __jhash_mix(a, b, c); - a += daddr->s6_addr32[2]; - b += daddr->s6_addr32[3]; - c += id; + a += (__force u32)daddr->s6_addr32[2]; + b += (__force u32)daddr->s6_addr32[3]; + c += (__force u32)id; __jhash_mix(a, b, c); return c & (IP6Q_HASHSZ - 1); @@ -257,7 +258,7 @@ static __inline__ void fq_kill(struct frag_queue *fq) } } -static void ip6_evictor(void) +static void ip6_evictor(struct inet6_dev *idev) { struct frag_queue *fq; struct list_head *tmp; @@ -284,14 +285,14 @@ static void ip6_evictor(void) spin_unlock(&fq->lock); fq_put(fq, &work); - IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); } } static void ip6_frag_expire(unsigned long data) { struct frag_queue *fq = (struct frag_queue *) data; - struct net_device *dev; + struct net_device *dev = NULL; spin_lock(&fq->lock); @@ -300,17 +301,19 @@ static void ip6_frag_expire(unsigned long data) fq_kill(fq); - IP6_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); - IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + dev = dev_get_by_index(fq->iif); + if (!dev) + goto out; + + rcu_read_lock(); + IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); + IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + rcu_read_unlock(); /* Don't send error if the first segment did not arrive. */ if (!(fq->last_in&FIRST_IN) || !fq->fragments) goto out; - dev = dev_get_by_index(fq->iif); - if (!dev) - goto out; - /* But use as source device on which LAST ARRIVED segment was received. And do not use fq->dev @@ -318,8 +321,9 @@ static void ip6_frag_expire(unsigned long data) */ fq->fragments->dev = dev; icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev); - dev_put(dev); out: + if (dev) + dev_put(dev); spin_unlock(&fq->lock); fq_put(fq, NULL); } @@ -366,7 +370,8 @@ static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in) static struct frag_queue * -ip6_frag_create(u32 id, struct in6_addr *src, struct in6_addr *dst) +ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, + struct inet6_dev *idev) { struct frag_queue *fq; @@ -386,12 +391,13 @@ ip6_frag_create(u32 id, struct in6_addr *src, struct in6_addr *dst) return ip6_frag_intern(fq); oom: - IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); return NULL; } static __inline__ struct frag_queue * -fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst) +fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, + struct inet6_dev *idev) { struct frag_queue *fq; struct hlist_node *n; @@ -410,7 +416,7 @@ fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst) } read_unlock(&ip6_frag_lock); - return ip6_frag_create(id, src, dst); + return ip6_frag_create(id, src, dst, idev); } @@ -428,7 +434,8 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off - skb->nh.raw); return; } @@ -455,7 +462,8 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* RFC2460 says always send parameter problem in * this case. -DaveM */ - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, payload_len)); return; @@ -571,7 +579,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, return; err: - IP6_INC_STATS(IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); } @@ -665,7 +673,9 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); - IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); + rcu_read_lock(); + IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS); + rcu_read_unlock(); fq->fragments = NULL; return 1; @@ -677,7 +687,9 @@ out_oom: if (net_ratelimit()) printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n"); out_fail: - IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + rcu_read_lock(); + IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + rcu_read_unlock(); return -1; } @@ -691,16 +703,16 @@ static int ipv6_frag_rcv(struct sk_buff **skbp) hdr = skb->nh.ipv6h; - IP6_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS); /* Jumbo payload inhibits frag. header */ if (hdr->payload_len==0) { - IP6_INC_STATS(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw); return -1; } if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+sizeof(struct frag_hdr))) { - IP6_INC_STATS(IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw); return -1; } @@ -711,16 +723,17 @@ static int ipv6_frag_rcv(struct sk_buff **skbp) if (!(fhdr->frag_off & htons(0xFFF9))) { /* It is not a fragmented frame */ skb->h.raw += sizeof(struct frag_hdr); - IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS); IP6CB(skb)->nhoff = (u8*)fhdr - skb->nh.raw; return 1; } if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh) - ip6_evictor(); + ip6_evictor(ip6_dst_idev(skb->dst)); - if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr)) != NULL) { + if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr, + ip6_dst_idev(skb->dst))) != NULL) { int ret = -1; spin_lock(&fq->lock); @@ -736,7 +749,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp) return ret; } - IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c953466b7af..8c3d56871b5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -330,6 +330,8 @@ static int inline rt6_check_neigh(struct rt6_info *rt) read_lock_bh(&neigh->lock); if (neigh->nud_state & NUD_VALID) m = 2; + else if (!(neigh->nud_state & NUD_FAILED)) + m = 1; read_unlock_bh(&neigh->lock); } return m; @@ -347,9 +349,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif, m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; #endif n = rt6_check_neigh(rt); - if (n > 1) - m |= 16; - else if (!n && strict & RT6_LOOKUP_F_REACHABLE) + if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) return -1; return m; } @@ -380,10 +380,11 @@ static struct rt6_info *rt6_select(struct rt6_info **head, int oif, continue; if (m > mpri) { - rt6_probe(match); + if (strict & RT6_LOOKUP_F_REACHABLE) + rt6_probe(match); match = rt; mpri = m; - } else { + } else if (strict & RT6_LOOKUP_F_REACHABLE) { rt6_probe(rt); } } @@ -439,7 +440,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, if (pref == ICMPV6_ROUTER_PREF_INVALID) pref = ICMPV6_ROUTER_PREF_MEDIUM; - lifetime = htonl(rinfo->lifetime); + lifetime = ntohl(rinfo->lifetime); if (lifetime == 0xffffffff) { /* infinity */ } else if (lifetime > 0x7fffffff/HZ) { @@ -493,7 +494,7 @@ do { \ goto out; \ pn = fn->parent; \ if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ - fn = fib6_lookup(pn->subtree, NULL, saddr); \ + fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ else \ fn = pn; \ if (fn->fn_flags & RTN_RTINFO) \ @@ -636,7 +637,7 @@ static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, int strict = 0; int attempts = 3; int err; - int reachable = RT6_LOOKUP_F_REACHABLE; + int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; strict |= flags & RT6_LOOKUP_F_IFACE; @@ -710,12 +711,10 @@ void ip6_route_input(struct sk_buff *skb) .ip6_u = { .daddr = iph->daddr, .saddr = iph->saddr, -#ifdef CONFIG_IPV6_ROUTE_FWMARK - .fwmark = skb->nfmark, -#endif - .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK, + .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, }, }, + .mark = skb->mark, .proto = iph->nexthdr, }; @@ -733,7 +732,7 @@ static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, int strict = 0; int attempts = 3; int err; - int reachable = RT6_LOOKUP_F_REACHABLE; + int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; strict |= flags & RT6_LOOKUP_F_IFACE; @@ -941,7 +940,7 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev, fib6_force_start_gc(); out: - return (struct dst_entry *)rt; + return &rt->u.dst; } int ndisc_dst_gc(int *more) @@ -1224,7 +1223,7 @@ out: if (idev) in6_dev_put(idev); if (rt) - dst_free((struct dst_entry *) rt); + dst_free(&rt->u.dst); return err; } @@ -1750,9 +1749,9 @@ static inline int ip6_pkt_drop(struct sk_buff *skb, int code) { int type = ipv6_addr_type(&skb->nh.ipv6h->daddr); if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) - IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); - IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTNOROUTES); icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev); kfree_skb(skb); return 0; @@ -1823,7 +1822,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_flags |= RTF_LOCAL; rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); if (rt->rt6i_nexthop == NULL) { - dst_free((struct dst_entry *) rt); + dst_free(&rt->u.dst); return ERR_PTR(-ENOMEM); } @@ -2007,6 +2006,20 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) return ip6_route_add(&cfg); } +static inline size_t rt6_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct rtmsg)) + + nla_total_size(16) /* RTA_SRC */ + + nla_total_size(16) /* RTA_DST */ + + nla_total_size(16) /* RTA_GATEWAY */ + + nla_total_size(16) /* RTA_PREFSRC */ + + nla_total_size(4) /* RTA_TABLE */ + + nla_total_size(4) /* RTA_IIF */ + + nla_total_size(4) /* RTA_OIF */ + + nla_total_size(4) /* RTA_PRIORITY */ + + nla_total_size(sizeof(struct rta_cacheinfo)); +} + static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, int iif, int type, u32 pid, u32 seq, @@ -2014,7 +2027,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, { struct rtmsg *rtm; struct nlmsghdr *nlh; - struct rta_cacheinfo ci; + long expires; u32 table; if (prefix) { /* user wants prefix routes only */ @@ -2088,18 +2101,11 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); - ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); - if (rt->rt6i_expires) - ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies); - else - ci.rta_expires = 0; - ci.rta_used = rt->u.dst.__use; - ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); - ci.rta_error = rt->u.dst.error; - ci.rta_id = 0; - ci.rta_ts = 0; - ci.rta_tsage = 0; - NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); + + expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0; + if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, + expires, rt->u.dst.error) < 0) + goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -2201,7 +2207,6 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) struct sk_buff *skb; u32 pid = 0, seq = 0; struct nlmsghdr *nlh = NULL; - int payload = sizeof(struct rtmsg) + 256; int err = -ENOBUFS; if (info) { @@ -2211,15 +2216,13 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) seq = nlh->nlmsg_seq; } - skb = nlmsg_new(nlmsg_total_size(payload), gfp_any()); + skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); if (skb == NULL) goto errout; err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + /* failure implies BUG in rt6_nlmsg_size() */ + BUG_ON(err < 0); err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); errout: @@ -2247,7 +2250,6 @@ struct rt6_proc_arg static int rt6_info_route(struct rt6_info *rt, void *p_arg) { struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg; - int i; if (arg->skip < arg->offset / RT6_INFO_LEN) { arg->skip++; @@ -2257,38 +2259,28 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) if (arg->len >= arg->length) return 0; - for (i=0; i<16; i++) { - sprintf(arg->buffer + arg->len, "%02x", - rt->rt6i_dst.addr.s6_addr[i]); - arg->len += 2; - } - arg->len += sprintf(arg->buffer + arg->len, " %02x ", + arg->len += sprintf(arg->buffer + arg->len, + NIP6_SEQFMT " %02x ", + NIP6(rt->rt6i_dst.addr), rt->rt6i_dst.plen); #ifdef CONFIG_IPV6_SUBTREES - for (i=0; i<16; i++) { - sprintf(arg->buffer + arg->len, "%02x", - rt->rt6i_src.addr.s6_addr[i]); - arg->len += 2; - } - arg->len += sprintf(arg->buffer + arg->len, " %02x ", + arg->len += sprintf(arg->buffer + arg->len, + NIP6_SEQFMT " %02x ", + NIP6(rt->rt6i_src.addr), rt->rt6i_src.plen); #else - sprintf(arg->buffer + arg->len, - "00000000000000000000000000000000 00 "); - arg->len += 36; + arg->len += sprintf(arg->buffer + arg->len, + "00000000000000000000000000000000 00 "); #endif if (rt->rt6i_nexthop) { - for (i=0; i<16; i++) { - sprintf(arg->buffer + arg->len, "%02x", - rt->rt6i_nexthop->primary_key[i]); - arg->len += 2; - } + arg->len += sprintf(arg->buffer + arg->len, + NIP6_SEQFMT, + NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key))); } else { - sprintf(arg->buffer + arg->len, - "00000000000000000000000000000000"); - arg->len += 32; + arg->len += sprintf(arg->buffer + arg->len, + "00000000000000000000000000000000"); } arg->len += sprintf(arg->buffer + arg->len, " %08x %08x %08x %08x %8s\n", diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index b481a4d780c..77b7b091143 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -60,7 +60,7 @@ */ #define HASH_SIZE 16 -#define HASH(addr) ((addr^(addr>>4))&0xF) +#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) static int ipip6_fb_tunnel_init(struct net_device *dev); static int ipip6_tunnel_init(struct net_device *dev); @@ -76,7 +76,7 @@ static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunne static DEFINE_RWLOCK(ipip6_lock); -static struct ip_tunnel * ipip6_tunnel_lookup(u32 remote, u32 local) +static struct ip_tunnel * ipip6_tunnel_lookup(__be32 remote, __be32 local) { unsigned h0 = HASH(remote); unsigned h1 = HASH(local); @@ -102,8 +102,8 @@ static struct ip_tunnel * ipip6_tunnel_lookup(u32 remote, u32 local) static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t) { - u32 remote = t->parms.iph.daddr; - u32 local = t->parms.iph.saddr; + __be32 remote = t->parms.iph.daddr; + __be32 local = t->parms.iph.saddr; unsigned h = 0; int prio = 0; @@ -144,8 +144,8 @@ static void ipip6_tunnel_link(struct ip_tunnel *t) static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create) { - u32 remote = parms->iph.daddr; - u32 local = parms->iph.saddr; + __be32 remote = parms->iph.daddr; + __be32 local = parms->iph.saddr; struct ip_tunnel *t, **tp, *nt; struct net_device *dev; unsigned h = 0; @@ -405,9 +405,9 @@ out: /* Returns the embedded IPv4 address if the IPv6 address comes from 6to4 (RFC 3056) addr space */ -static inline u32 try_6to4(struct in6_addr *v6dst) +static inline __be32 try_6to4(struct in6_addr *v6dst) { - u32 dst = 0; + __be32 dst = 0; if (v6dst->s6_addr16[0] == htons(0x2002)) { /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ @@ -432,7 +432,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct net_device *tdev; /* Device to other host */ struct iphdr *iph; /* Our new IP header */ int max_headroom; /* The extra header space needed */ - u32 dst = tiph->daddr; + __be32 dst = tiph->daddr; int mtu; struct in6_addr *addr6; int addr_type; @@ -809,7 +809,7 @@ static void __exit sit_destroy_tunnels(void) } } -void __exit sit_cleanup(void) +static void __exit sit_cleanup(void) { inet_del_protocol(&sit_protocol, IPPROTO_IPV6); @@ -819,7 +819,7 @@ void __exit sit_cleanup(void) rtnl_unlock(); } -int __init sit_init(void) +static int __init sit_init(void) { int err; @@ -854,3 +854,4 @@ int __init sit_init(void) module_init(sit_init); module_exit(sit_cleanup); MODULE_LICENSE("GPL"); +MODULE_ALIAS("sit0"); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4c2a7c0cafe..c25e930c2c6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -66,10 +66,13 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/crypto.h> +#include <linux/scatterlist.h> + /* Socket used for sending RSTs and ACKs */ static struct socket *tcp6_socket; -static void tcp_v6_send_reset(struct sk_buff *skb); +static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb); static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req); static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb); @@ -78,6 +81,10 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static struct inet_connection_sock_af_ops ipv6_mapped; static struct inet_connection_sock_af_ops ipv6_specific; +#ifdef CONFIG_TCP_MD5SIG +static struct tcp_sock_af_ops tcp_sock_ipv6_specific; +static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; +#endif static int tcp_v6_get_port(struct sock *sk, unsigned short snum) { @@ -98,27 +105,20 @@ static void tcp_v6_hash(struct sock *sk) } } -static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len, +static __inline__ __sum16 tcp_v6_check(struct tcphdr *th, int len, struct in6_addr *saddr, struct in6_addr *daddr, - unsigned long base) + __wsum base) { return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base); } -static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) +static __u32 tcp_v6_init_sequence(struct sk_buff *skb) { - if (skb->protocol == htons(ETH_P_IPV6)) { - return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32, - skb->nh.ipv6h->saddr.s6_addr32, - skb->h.th->dest, - skb->h.th->source); - } else { - return secure_tcp_sequence_number(skb->nh.iph->daddr, - skb->nh.iph->saddr, - skb->h.th->dest, - skb->h.th->source); - } + return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32, + skb->nh.ipv6h->saddr.s6_addr32, + skb->h.th->dest, + skb->h.th->source); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, @@ -215,6 +215,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, icsk->icsk_af_ops = &ipv6_mapped; sk->sk_backlog_rcv = tcp_v4_do_rcv; +#ifdef CONFIG_TCP_MD5SIG + tp->af_specific = &tcp_sock_ipv6_mapped_specific; +#endif err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); @@ -222,6 +225,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, icsk->icsk_ext_hdr_len = exthdrlen; icsk->icsk_af_ops = &ipv6_specific; sk->sk_backlog_rcv = tcp_v6_do_rcv; +#ifdef CONFIG_TCP_MD5SIG + tp->af_specific = &tcp_sock_ipv6_specific; +#endif goto failure; } else { ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF), @@ -310,7 +316,7 @@ failure: } static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __u32 info) + int type, int code, int offset, __be32 info) { struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); @@ -509,8 +515,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); err = ip6_xmit(sk, skb, &fl, opt, 0); - if (err == NET_XMIT_CN) - err = 0; + err = net_xmit_eval(err); } done: @@ -526,7 +531,396 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req) kfree_skb(inet6_rsk(req)->pktopts); } -static struct request_sock_ops tcp6_request_sock_ops = { +#ifdef CONFIG_TCP_MD5SIG +static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, + struct in6_addr *addr) +{ + struct tcp_sock *tp = tcp_sk(sk); + int i; + + BUG_ON(tp == NULL); + + if (!tp->md5sig_info || !tp->md5sig_info->entries6) + return NULL; + + for (i = 0; i < tp->md5sig_info->entries6; i++) { + if (ipv6_addr_cmp(&tp->md5sig_info->keys6[i].addr, addr) == 0) + return (struct tcp_md5sig_key *)&tp->md5sig_info->keys6[i]; + } + return NULL; +} + +static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk, + struct sock *addr_sk) +{ + return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr); +} + +static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk, + struct request_sock *req) +{ + return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr); +} + +static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer, + char *newkey, u8 newkeylen) +{ + /* Add key to the list */ + struct tcp6_md5sig_key *key; + struct tcp_sock *tp = tcp_sk(sk); + struct tcp6_md5sig_key *keys; + + key = (struct tcp6_md5sig_key*) tcp_v6_md5_do_lookup(sk, peer); + if (key) { + /* modify existing entry - just update that one */ + kfree(key->key); + key->key = newkey; + key->keylen = newkeylen; + } else { + /* reallocate new list if current one is full. */ + if (!tp->md5sig_info) { + tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC); + if (!tp->md5sig_info) { + kfree(newkey); + return -ENOMEM; + } + } + tcp_alloc_md5sig_pool(); + if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) { + keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) * + (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC); + + if (!keys) { + tcp_free_md5sig_pool(); + kfree(newkey); + return -ENOMEM; + } + + if (tp->md5sig_info->entries6) + memmove(keys, tp->md5sig_info->keys6, + (sizeof (tp->md5sig_info->keys6[0]) * + tp->md5sig_info->entries6)); + + kfree(tp->md5sig_info->keys6); + tp->md5sig_info->keys6 = keys; + tp->md5sig_info->alloced6++; + } + + ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr, + peer); + tp->md5sig_info->keys6[tp->md5sig_info->entries6].key = newkey; + tp->md5sig_info->keys6[tp->md5sig_info->entries6].keylen = newkeylen; + + tp->md5sig_info->entries6++; + } + return 0; +} + +static int tcp_v6_md5_add_func(struct sock *sk, struct sock *addr_sk, + u8 *newkey, __u8 newkeylen) +{ + return tcp_v6_md5_do_add(sk, &inet6_sk(addr_sk)->daddr, + newkey, newkeylen); +} + +static int tcp_v6_md5_do_del(struct sock *sk, struct in6_addr *peer) +{ + struct tcp_sock *tp = tcp_sk(sk); + int i; + + for (i = 0; i < tp->md5sig_info->entries6; i++) { + if (ipv6_addr_cmp(&tp->md5sig_info->keys6[i].addr, peer) == 0) { + /* Free the key */ + kfree(tp->md5sig_info->keys6[i].key); + tp->md5sig_info->entries6--; + + if (tp->md5sig_info->entries6 == 0) { + kfree(tp->md5sig_info->keys6); + tp->md5sig_info->keys6 = NULL; + + tcp_free_md5sig_pool(); + + return 0; + } else { + /* shrink the database */ + if (tp->md5sig_info->entries6 != i) + memmove(&tp->md5sig_info->keys6[i], + &tp->md5sig_info->keys6[i+1], + (tp->md5sig_info->entries6 - i) + * sizeof (tp->md5sig_info->keys6[0])); + } + } + } + return -ENOENT; +} + +static void tcp_v6_clear_md5_list (struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + int i; + + if (tp->md5sig_info->entries6) { + for (i = 0; i < tp->md5sig_info->entries6; i++) + kfree(tp->md5sig_info->keys6[i].key); + tp->md5sig_info->entries6 = 0; + tcp_free_md5sig_pool(); + } + + kfree(tp->md5sig_info->keys6); + tp->md5sig_info->keys6 = NULL; + tp->md5sig_info->alloced6 = 0; + + if (tp->md5sig_info->entries4) { + for (i = 0; i < tp->md5sig_info->entries4; i++) + kfree(tp->md5sig_info->keys4[i].key); + tp->md5sig_info->entries4 = 0; + tcp_free_md5sig_pool(); + } + + kfree(tp->md5sig_info->keys4); + tp->md5sig_info->keys4 = NULL; + tp->md5sig_info->alloced4 = 0; +} + +static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, + int optlen) +{ + struct tcp_md5sig cmd; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; + u8 *newkey; + + if (optlen < sizeof(cmd)) + return -EINVAL; + + if (copy_from_user(&cmd, optval, sizeof(cmd))) + return -EFAULT; + + if (sin6->sin6_family != AF_INET6) + return -EINVAL; + + if (!cmd.tcpm_keylen) { + if (!tcp_sk(sk)->md5sig_info) + return -ENOENT; + if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED) + return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]); + return tcp_v6_md5_do_del(sk, &sin6->sin6_addr); + } + + if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) + return -EINVAL; + + if (!tcp_sk(sk)->md5sig_info) { + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_md5sig_info *p; + + p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL); + if (!p) + return -ENOMEM; + + tp->md5sig_info = p; + } + + newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); + if (!newkey) + return -ENOMEM; + if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED) { + return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3], + newkey, cmd.tcpm_keylen); + } + return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen); +} + +static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, + struct in6_addr *saddr, + struct in6_addr *daddr, + struct tcphdr *th, int protocol, + int tcplen) +{ + struct scatterlist sg[4]; + __u16 data_len; + int block = 0; + __sum16 cksum; + struct tcp_md5sig_pool *hp; + struct tcp6_pseudohdr *bp; + struct hash_desc *desc; + int err; + unsigned int nbytes = 0; + + hp = tcp_get_md5sig_pool(); + if (!hp) { + printk(KERN_WARNING "%s(): hash pool not found...\n", __FUNCTION__); + goto clear_hash_noput; + } + bp = &hp->md5_blk.ip6; + desc = &hp->md5_desc; + + /* 1. TCP pseudo-header (RFC2460) */ + ipv6_addr_copy(&bp->saddr, saddr); + ipv6_addr_copy(&bp->daddr, daddr); + bp->len = htonl(tcplen); + bp->protocol = htonl(protocol); + + sg_set_buf(&sg[block++], bp, sizeof(*bp)); + nbytes += sizeof(*bp); + + /* 2. TCP header, excluding options */ + cksum = th->check; + th->check = 0; + sg_set_buf(&sg[block++], th, sizeof(*th)); + nbytes += sizeof(*th); + + /* 3. TCP segment data (if any) */ + data_len = tcplen - (th->doff << 2); + if (data_len > 0) { + u8 *data = (u8 *)th + (th->doff << 2); + sg_set_buf(&sg[block++], data, data_len); + nbytes += data_len; + } + + /* 4. shared key */ + sg_set_buf(&sg[block++], key->key, key->keylen); + nbytes += key->keylen; + + /* Now store the hash into the packet */ + err = crypto_hash_init(desc); + if (err) { + printk(KERN_WARNING "%s(): hash_init failed\n", __FUNCTION__); + goto clear_hash; + } + err = crypto_hash_update(desc, sg, nbytes); + if (err) { + printk(KERN_WARNING "%s(): hash_update failed\n", __FUNCTION__); + goto clear_hash; + } + err = crypto_hash_final(desc, md5_hash); + if (err) { + printk(KERN_WARNING "%s(): hash_final failed\n", __FUNCTION__); + goto clear_hash; + } + + /* Reset header, and free up the crypto */ + tcp_put_md5sig_pool(); + th->check = cksum; +out: + return 0; +clear_hash: + tcp_put_md5sig_pool(); +clear_hash_noput: + memset(md5_hash, 0, 16); + goto out; +} + +static int tcp_v6_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, + struct sock *sk, + struct dst_entry *dst, + struct request_sock *req, + struct tcphdr *th, int protocol, + int tcplen) +{ + struct in6_addr *saddr, *daddr; + + if (sk) { + saddr = &inet6_sk(sk)->saddr; + daddr = &inet6_sk(sk)->daddr; + } else { + saddr = &inet6_rsk(req)->loc_addr; + daddr = &inet6_rsk(req)->rmt_addr; + } + return tcp_v6_do_calc_md5_hash(md5_hash, key, + saddr, daddr, + th, protocol, tcplen); +} + +static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) +{ + __u8 *hash_location = NULL; + struct tcp_md5sig_key *hash_expected; + struct ipv6hdr *ip6h = skb->nh.ipv6h; + struct tcphdr *th = skb->h.th; + int length = (th->doff << 2) - sizeof (*th); + int genhash; + u8 *ptr; + u8 newhash[16]; + + hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr); + + /* If the TCP option is too short, we can short cut */ + if (length < TCPOLEN_MD5SIG) + return hash_expected ? 1 : 0; + + /* parse options */ + ptr = (u8*)(th + 1); + while (length > 0) { + int opcode = *ptr++; + int opsize; + + switch(opcode) { + case TCPOPT_EOL: + goto done_opts; + case TCPOPT_NOP: + length--; + continue; + default: + opsize = *ptr++; + if (opsize < 2 || opsize > length) + goto done_opts; + if (opcode == TCPOPT_MD5SIG) { + hash_location = ptr; + goto done_opts; + } + } + ptr += opsize - 2; + length -= opsize; + } + +done_opts: + /* do we have a hash as expected? */ + if (!hash_expected) { + if (!hash_location) + return 0; + if (net_ratelimit()) { + printk(KERN_INFO "MD5 Hash NOT expected but found " + "(" NIP6_FMT ", %u)->" + "(" NIP6_FMT ", %u)\n", + NIP6(ip6h->saddr), ntohs(th->source), + NIP6(ip6h->daddr), ntohs(th->dest)); + } + return 1; + } + + if (!hash_location) { + if (net_ratelimit()) { + printk(KERN_INFO "MD5 Hash expected but NOT found " + "(" NIP6_FMT ", %u)->" + "(" NIP6_FMT ", %u)\n", + NIP6(ip6h->saddr), ntohs(th->source), + NIP6(ip6h->daddr), ntohs(th->dest)); + } + return 1; + } + + /* check the signature */ + genhash = tcp_v6_do_calc_md5_hash(newhash, + hash_expected, + &ip6h->saddr, &ip6h->daddr, + th, sk->sk_protocol, + skb->len); + if (genhash || memcmp(hash_location, newhash, 16) != 0) { + if (net_ratelimit()) { + printk(KERN_INFO "MD5 Hash %s for " + "(" NIP6_FMT ", %u)->" + "(" NIP6_FMT ", %u)\n", + genhash ? "failed" : "mismatch", + NIP6(ip6h->saddr), ntohs(th->source), + NIP6(ip6h->daddr), ntohs(th->dest)); + } + return 1; + } + return 0; +} +#endif + +static struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .family = AF_INET6, .obj_size = sizeof(struct tcp6_request_sock), .rtx_syn_ack = tcp_v6_send_synack, @@ -535,9 +929,16 @@ static struct request_sock_ops tcp6_request_sock_ops = { .send_reset = tcp_v6_send_reset }; +#ifdef CONFIG_TCP_MD5SIG +static struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { + .md5_lookup = tcp_v6_reqsk_md5_lookup, +}; +#endif + static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .twsk_unique = tcp_twsk_unique, + .twsk_destructor= tcp_twsk_destructor, }; static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) @@ -547,7 +948,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) if (skb->ip_summed == CHECKSUM_PARTIAL) { th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); - skb->csum = offsetof(struct tcphdr, check); + skb->csum_offset = offsetof(struct tcphdr, check); } else { th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, csum_partial((char *)th, th->doff<<2, @@ -569,16 +970,20 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb) th->check = 0; th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, IPPROTO_TCP, 0); - skb->csum = offsetof(struct tcphdr, check); + skb->csum_offset = offsetof(struct tcphdr, check); skb->ip_summed = CHECKSUM_PARTIAL; return 0; } -static void tcp_v6_send_reset(struct sk_buff *skb) +static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) { struct tcphdr *th = skb->h.th, *t1; struct sk_buff *buff; struct flowi fl; + int tot_len = sizeof(*th); +#ifdef CONFIG_TCP_MD5SIG + struct tcp_md5sig_key *key; +#endif if (th->rst) return; @@ -586,25 +991,35 @@ static void tcp_v6_send_reset(struct sk_buff *skb) if (!ipv6_unicast_destination(skb)) return; +#ifdef CONFIG_TCP_MD5SIG + if (sk) + key = tcp_v6_md5_do_lookup(sk, &skb->nh.ipv6h->daddr); + else + key = NULL; + + if (key) + tot_len += TCPOLEN_MD5SIG_ALIGNED; +#endif + /* * We need to grab some memory, and put together an RST, * and then put it into the queue to be sent. */ - buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr), + buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, GFP_ATOMIC); if (buff == NULL) return; - skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)); + skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); - t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr)); + t1 = (struct tcphdr *) skb_push(buff, tot_len); /* Swap the send and the receive. */ memset(t1, 0, sizeof(*t1)); t1->dest = th->source; t1->source = th->dest; - t1->doff = sizeof(*t1)/4; + t1->doff = tot_len / 4; t1->rst = 1; if(th->ack) { @@ -615,6 +1030,22 @@ static void tcp_v6_send_reset(struct sk_buff *skb) + skb->len - (th->doff<<2)); } +#ifdef CONFIG_TCP_MD5SIG + if (key) { + __be32 *opt = (__be32*)(t1 + 1); + opt[0] = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_MD5SIG << 8) | + TCPOLEN_MD5SIG); + tcp_v6_do_calc_md5_hash((__u8*)&opt[1], + key, + &skb->nh.ipv6h->daddr, + &skb->nh.ipv6h->saddr, + t1, IPPROTO_TCP, + tot_len); + } +#endif + buff->csum = csum_partial((char *)t1, sizeof(*t1), 0); memset(&fl, 0, sizeof(fl)); @@ -645,15 +1076,37 @@ static void tcp_v6_send_reset(struct sk_buff *skb) kfree_skb(buff); } -static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts) +static void tcp_v6_send_ack(struct tcp_timewait_sock *tw, + struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts) { struct tcphdr *th = skb->h.th, *t1; struct sk_buff *buff; struct flowi fl; int tot_len = sizeof(struct tcphdr); + __be32 *topt; +#ifdef CONFIG_TCP_MD5SIG + struct tcp_md5sig_key *key; + struct tcp_md5sig_key tw_key; +#endif + +#ifdef CONFIG_TCP_MD5SIG + if (!tw && skb->sk) { + key = tcp_v6_md5_do_lookup(skb->sk, &skb->nh.ipv6h->daddr); + } else if (tw && tw->tw_md5_keylen) { + tw_key.key = tw->tw_md5_key; + tw_key.keylen = tw->tw_md5_keylen; + key = &tw_key; + } else { + key = NULL; + } +#endif if (ts) tot_len += TCPOLEN_TSTAMP_ALIGNED; +#ifdef CONFIG_TCP_MD5SIG + if (key) + tot_len += TCPOLEN_MD5SIG_ALIGNED; +#endif buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, GFP_ATOMIC); @@ -673,15 +1126,29 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 t1->ack_seq = htonl(ack); t1->ack = 1; t1->window = htons(win); + + topt = (__be32 *)(t1 + 1); if (ts) { - u32 *ptr = (u32*)(t1 + 1); - *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - *ptr++ = htonl(tcp_time_stamp); - *ptr = htonl(ts); + *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); + *topt++ = htonl(tcp_time_stamp); + *topt = htonl(ts); } +#ifdef CONFIG_TCP_MD5SIG + if (key) { + *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); + tcp_v6_do_calc_md5_hash((__u8 *)topt, + key, + &skb->nh.ipv6h->daddr, + &skb->nh.ipv6h->saddr, + t1, IPPROTO_TCP, + tot_len); + } +#endif + buff->csum = csum_partial((char *)t1, tot_len, 0); memset(&fl, 0, sizeof(fl)); @@ -712,9 +1179,9 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) { struct inet_timewait_sock *tw = inet_twsk(sk); - const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); + struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcp_v6_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent); @@ -723,7 +1190,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) { - tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent); + tcp_v6_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent); } @@ -794,6 +1261,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (req == NULL) goto drop; +#ifdef CONFIG_TCP_MD5SIG + tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops; +#endif + tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; @@ -822,7 +1293,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) treq->iif = inet6_iif(skb); if (isn == 0) - isn = tcp_v6_init_sequence(sk,skb); + isn = tcp_v6_init_sequence(skb); tcp_rsk(req)->snt_isn = isn; @@ -852,6 +1323,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct tcp_sock *newtp; struct sock *newsk; struct ipv6_txoptions *opt; +#ifdef CONFIG_TCP_MD5SIG + struct tcp_md5sig_key *key; +#endif if (skb->protocol == htons(ETH_P_IP)) { /* @@ -882,6 +1356,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; newsk->sk_backlog_rcv = tcp_v4_do_rcv; +#ifdef CONFIG_TCP_MD5SIG + newtp->af_specific = &tcp_sock_ipv6_mapped_specific; +#endif + newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); @@ -1016,6 +1494,21 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; +#ifdef CONFIG_TCP_MD5SIG + /* Copy over the MD5 key from the original socket */ + if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) { + /* We're using one, so create a matching key + * on the newsk structure. If we fail to get + * memory, then we end up not copying the key + * across. Shucks. + */ + char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); + if (newkey != NULL) + tcp_v6_md5_do_add(newsk, &inet6_sk(sk)->daddr, + newkey, key->keylen); + } +#endif + __inet6_hash(&tcp_hashinfo, newsk); inet_inherit_port(&tcp_hashinfo, sk, newsk); @@ -1031,7 +1524,7 @@ out: return NULL; } -static int tcp_v6_checksum_init(struct sk_buff *skb) +static __sum16 tcp_v6_checksum_init(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_COMPLETE) { if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, @@ -1041,8 +1534,8 @@ static int tcp_v6_checksum_init(struct sk_buff *skb) } } - skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, - &skb->nh.ipv6h->daddr, 0); + skb->csum = ~csum_unfold(tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, + &skb->nh.ipv6h->daddr, 0)); if (skb->len <= 76) { return __skb_checksum_complete(skb); @@ -1075,6 +1568,11 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); +#ifdef CONFIG_TCP_MD5SIG + if (tcp_v6_inbound_md5_hash (sk, skb)) + goto discard; +#endif + if (sk_filter(sk, skb)) goto discard; @@ -1140,7 +1638,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - tcp_v6_send_reset(skb); + tcp_v6_send_reset(sk, skb); discard: if (opt_skb) __kfree_skb(opt_skb); @@ -1265,7 +1763,7 @@ no_tcp_socket: bad_packet: TCP_INC_STATS_BH(TCP_MIB_INERRS); } else { - tcp_v6_send_reset(skb); + tcp_v6_send_reset(NULL, skb); } discard_it: @@ -1344,6 +1842,15 @@ static struct inet_connection_sock_af_ops ipv6_specific = { #endif }; +#ifdef CONFIG_TCP_MD5SIG +static struct tcp_sock_af_ops tcp_sock_ipv6_specific = { + .md5_lookup = tcp_v6_md5_lookup, + .calc_md5_hash = tcp_v6_calc_md5_hash, + .md5_add = tcp_v6_md5_add_func, + .md5_parse = tcp_v6_parse_md5_keys, +}; +#endif + /* * TCP over IPv4 via INET6 API */ @@ -1366,6 +1873,15 @@ static struct inet_connection_sock_af_ops ipv6_mapped = { #endif }; +#ifdef CONFIG_TCP_MD5SIG +static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { + .md5_lookup = tcp_v4_md5_lookup, + .calc_md5_hash = tcp_v4_calc_md5_hash, + .md5_add = tcp_v6_md5_add_func, + .md5_parse = tcp_v6_parse_md5_keys, +}; +#endif + /* NOTE: A lot of things set to zero explicitly by call to * sk_alloc() so need not be done here. */ @@ -1405,6 +1921,10 @@ static int tcp_v6_init_sock(struct sock *sk) sk->sk_write_space = sk_stream_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); +#ifdef CONFIG_TCP_MD5SIG + tp->af_specific = &tcp_sock_ipv6_specific; +#endif + sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; @@ -1415,6 +1935,11 @@ static int tcp_v6_init_sock(struct sock *sk) static int tcp_v6_destroy_sock(struct sock *sk) { +#ifdef CONFIG_TCP_MD5SIG + /* Clean up the MD5 key list */ + if (tcp_sk(sk)->md5sig_info) + tcp_v6_clear_md5_list(sk); +#endif tcp_v4_destroy_sock(sk); return inet6_destroy_sock(sk); } diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 0ef9a35798d..918d07dd121 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -104,7 +104,7 @@ drop: } static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __u32 info) + int type, int code, int offset, __be32 info) { struct xfrm6_tunnel *handler; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e0c3934a7e4..f52a5c3cc0a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -38,26 +38,18 @@ #include <linux/skbuff.h> #include <asm/uaccess.h> -#include <net/sock.h> -#include <net/snmp.h> - -#include <net/ipv6.h> #include <net/ndisc.h> #include <net/protocol.h> #include <net/transp_v6.h> #include <net/ip6_route.h> -#include <net/addrconf.h> -#include <net/ip.h> -#include <net/udp.h> #include <net/raw.h> -#include <net/inet_common.h> #include <net/tcp_states.h> - #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include "udp_impl.h" DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; @@ -66,23 +58,9 @@ static inline int udp_v6_get_port(struct sock *sk, unsigned short snum) return udp_get_port(sk, snum, ipv6_rcv_saddr_equal); } -static void udp_v6_hash(struct sock *sk) -{ - BUG(); -} - -static void udp_v6_unhash(struct sock *sk) -{ - write_lock_bh(&udp_hash_lock); - if (sk_del_node_init(sk)) { - inet_sk(sk)->num = 0; - sock_prot_dec_use(sk->sk_prot); - } - write_unlock_bh(&udp_hash_lock); -} - -static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 dport, int dif) +static struct sock *__udp6_lib_lookup(struct in6_addr *saddr, __be16 sport, + struct in6_addr *daddr, __be16 dport, + int dif, struct hlist_head udptable[]) { struct sock *sk, *result = NULL; struct hlist_node *node; @@ -90,7 +68,7 @@ static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport, int badness = -1; read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) { + sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { struct inet_sock *inet = inet_sk(sk); if (inet->num == hnum && sk->sk_family == PF_INET6) { @@ -132,20 +110,11 @@ static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport, } /* - * - */ - -static void udpv6_close(struct sock *sk, long timeout) -{ - sk_common_release(sk); -} - -/* * This should be easy, if there is something there we * return it, otherwise we block. */ -static int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, +int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { @@ -153,7 +122,7 @@ static int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; size_t copied; - int err; + int err, copy_only, is_udplite = IS_UDPLITE(sk); if (addr_len) *addr_len=sizeof(struct sockaddr_in6); @@ -172,15 +141,21 @@ try_again: msg->msg_flags |= MSG_TRUNC; } - if (skb->ip_summed==CHECKSUM_UNNECESSARY) { - err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, - copied); - } else if (msg->msg_flags&MSG_TRUNC) { - if (__skb_checksum_complete(skb)) + /* + * Decide whether to checksum and/or copy data. + */ + copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY); + + if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) { + if (__udp_lib_checksum_complete(skb)) goto csum_copy_err; - err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, - copied); - } else { + copy_only = 1; + } + + if (copy_only) + err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), + msg->msg_iov, copied ); + else { err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); if (err == -EINVAL) goto csum_copy_err; @@ -231,26 +206,26 @@ csum_copy_err: skb_kill_datagram(sk, skb, flags); if (flags & MSG_DONTWAIT) { - UDP6_INC_STATS_USER(UDP_MIB_INERRORS); + UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); return -EAGAIN; } goto try_again; } -static void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __u32 info) +void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + int type, int code, int offset, __be32 info, + struct hlist_head udptable[] ) { struct ipv6_pinfo *np; struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; - struct net_device *dev = skb->dev; struct in6_addr *saddr = &hdr->saddr; struct in6_addr *daddr = &hdr->daddr; struct udphdr *uh = (struct udphdr*)(skb->data+offset); struct sock *sk; int err; - sk = udp_v6_lookup(daddr, uh->dest, saddr, uh->source, dev->ifindex); - + sk = __udp6_lib_lookup(daddr, uh->dest, + saddr, uh->source, inet6_iif(skb), udptable); if (sk == NULL) return; @@ -271,36 +246,60 @@ out: sock_put(sk); } -static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) +static __inline__ void udpv6_err(struct sk_buff *skb, + struct inet6_skb_parm *opt, int type, + int code, int offset, __be32 info ) +{ + return __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash); +} + +int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { + struct udp_sock *up = udp_sk(sk); int rc; - if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { - kfree_skb(skb); - return -1; - } + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) + goto drop; - if (skb_checksum_complete(skb)) { - UDP6_INC_STATS_BH(UDP_MIB_INERRORS); - kfree_skb(skb); - return 0; + /* + * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). + */ + if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + + if (up->pcrlen == 0) { /* full coverage was set */ + LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage" + " %d while full coverage %d requested\n", + UDP_SKB_CB(skb)->cscov, skb->len); + goto drop; + } + if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { + LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d " + "too small, need min %d\n", + UDP_SKB_CB(skb)->cscov, up->pcrlen); + goto drop; + } } + if (udp_lib_checksum_complete(skb)) + goto drop; + if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) - UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); - UDP6_INC_STATS_BH(UDP_MIB_INERRORS); - kfree_skb(skb); - return 0; + UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag); + goto drop; } - UDP6_INC_STATS_BH(UDP_MIB_INDATAGRAMS); + UDP6_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); return 0; +drop: + UDP6_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag); + kfree_skb(skb); + return -1; } static struct sock *udp_v6_mcast_next(struct sock *sk, - u16 loc_port, struct in6_addr *loc_addr, - u16 rmt_port, struct in6_addr *rmt_addr, + __be16 loc_port, struct in6_addr *loc_addr, + __be16 rmt_port, struct in6_addr *rmt_addr, int dif) { struct hlist_node *node; @@ -339,16 +338,16 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, * Note: called only from the BH handler context, * so we don't need to lock the hashes. */ -static void udpv6_mcast_deliver(struct udphdr *uh, - struct in6_addr *saddr, struct in6_addr *daddr, - struct sk_buff *skb) +static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr, + struct in6_addr *daddr, struct hlist_head udptable[]) { struct sock *sk, *sk2; + const struct udphdr *uh = skb->h.uh; int dif; read_lock(&udp_hash_lock); - sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); - dif = skb->dev->ifindex; + sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); + dif = inet6_iif(skb); sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (!sk) { kfree_skb(skb); @@ -365,9 +364,35 @@ static void udpv6_mcast_deliver(struct udphdr *uh, udpv6_queue_rcv_skb(sk, skb); out: read_unlock(&udp_hash_lock); + return 0; +} + +static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh) + +{ + if (uh->check == 0) { + /* RFC 2460 section 8.1 says that we SHOULD log + this error. Well, it is reasonable. + */ + LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); + return 1; + } + if (skb->ip_summed == CHECKSUM_COMPLETE && + !csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, + skb->len, IPPROTO_UDP, skb->csum )) + skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (skb->ip_summed != CHECKSUM_UNNECESSARY) + skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr, + &skb->nh.ipv6h->daddr, + skb->len, IPPROTO_UDP, + 0)); + + return (UDP_SKB_CB(skb)->partial_cov = 0); } -static int udpv6_rcv(struct sk_buff **pskb) +int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[], + int is_udplite) { struct sk_buff *skb = *pskb; struct sock *sk; @@ -384,44 +409,39 @@ static int udpv6_rcv(struct sk_buff **pskb) uh = skb->h.uh; ulen = ntohs(uh->len); + if (ulen > skb->len) + goto short_packet; - /* Check for jumbo payload */ - if (ulen == 0) - ulen = skb->len; + if(! is_udplite ) { /* UDP validates ulen. */ - if (ulen > skb->len || ulen < sizeof(*uh)) - goto short_packet; + /* Check for jumbo payload */ + if (ulen == 0) + ulen = skb->len; - if (uh->check == 0) { - /* RFC 2460 section 8.1 says that we SHOULD log - this error. Well, it is reasonable. - */ - LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); - goto discard; - } + if (ulen < sizeof(*uh)) + goto short_packet; - if (ulen < skb->len) { - if (pskb_trim_rcsum(skb, ulen)) - goto discard; - saddr = &skb->nh.ipv6h->saddr; - daddr = &skb->nh.ipv6h->daddr; - uh = skb->h.uh; - } + if (ulen < skb->len) { + if (pskb_trim_rcsum(skb, ulen)) + goto short_packet; + saddr = &skb->nh.ipv6h->saddr; + daddr = &skb->nh.ipv6h->daddr; + uh = skb->h.uh; + } - if (skb->ip_summed == CHECKSUM_COMPLETE && - !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) - skb->ip_summed = CHECKSUM_UNNECESSARY; + if (udp6_csum_init(skb, uh)) + goto discard; - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->csum = ~csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, 0); + } else { /* UDP-Lite validates cscov. */ + if (udplite6_csum_init(skb, uh)) + goto discard; + } /* * Multicast receive code */ - if (ipv6_addr_is_multicast(daddr)) { - udpv6_mcast_deliver(uh, saddr, daddr, skb); - return 0; - } + if (ipv6_addr_is_multicast(daddr)) + return __udp6_lib_mcast_deliver(skb, saddr, daddr, udptable); /* Unicast */ @@ -429,15 +449,16 @@ static int udpv6_rcv(struct sk_buff **pskb) * check socket cache ... must talk to Alan about his plans * for sock caches... i'll skip this for now. */ - sk = udp_v6_lookup(saddr, uh->source, daddr, uh->dest, dev->ifindex); + sk = __udp6_lib_lookup(saddr, uh->source, + daddr, uh->dest, inet6_iif(skb), udptable); if (sk == NULL) { if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; - if (skb_checksum_complete(skb)) + if (udp_lib_checksum_complete(skb)) goto discard; - UDP6_INC_STATS_BH(UDP_MIB_NOPORTS); + UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev); @@ -452,14 +473,20 @@ static int udpv6_rcv(struct sk_buff **pskb) return(0); short_packet: - if (net_ratelimit()) - printk(KERN_DEBUG "UDP: short packet: %d/%u\n", ulen, skb->len); + LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n", + is_udplite? "-Lite" : "", ulen, skb->len); discard: - UDP6_INC_STATS_BH(UDP_MIB_INERRORS); + UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return(0); } + +static __inline__ int udpv6_rcv(struct sk_buff **pskb) +{ + return __udp6_lib_rcv(pskb, udp_hash, 0); +} + /* * Throw away all pending data and cancel the corking. Socket is locked. */ @@ -478,13 +505,15 @@ static void udp_v6_flush_pending_frames(struct sock *sk) * Sending */ -static int udp_v6_push_pending_frames(struct sock *sk, struct udp_sock *up) +static int udp_v6_push_pending_frames(struct sock *sk) { struct sk_buff *skb; struct udphdr *uh; + struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); struct flowi *fl = &inet->cork.fl; int err = 0; + __wsum csum = 0; /* Grab the skbuff where UDP header space exists. */ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) @@ -499,35 +528,17 @@ static int udp_v6_push_pending_frames(struct sock *sk, struct udp_sock *up) uh->len = htons(up->len); uh->check = 0; - if (sk->sk_no_check == UDP_CSUM_NOXMIT) { - skb->ip_summed = CHECKSUM_NONE; - goto send; - } - - if (skb_queue_len(&sk->sk_write_queue) == 1) { - skb->csum = csum_partial((char *)uh, - sizeof(struct udphdr), skb->csum); - uh->check = csum_ipv6_magic(&fl->fl6_src, - &fl->fl6_dst, - up->len, fl->proto, skb->csum); - } else { - u32 tmp_csum = 0; + if (up->pcflag) + csum = udplite_csum_outgoing(sk, skb); + else + csum = udp_csum_outgoing(sk, skb); - skb_queue_walk(&sk->sk_write_queue, skb) { - tmp_csum = csum_add(tmp_csum, skb->csum); - } - tmp_csum = csum_partial((char *)uh, - sizeof(struct udphdr), tmp_csum); - tmp_csum = csum_ipv6_magic(&fl->fl6_src, - &fl->fl6_dst, - up->len, fl->proto, tmp_csum); - uh->check = tmp_csum; - - } + /* add protocol-dependent pseudo-header */ + uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, + up->len, fl->proto, csum ); if (uh->check == 0) - uh->check = -1; + uh->check = CSUM_MANGLED_0; -send: err = ip6_push_pending_frames(sk); out: up->len = 0; @@ -535,7 +546,7 @@ out: return err; } -static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, +int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; @@ -555,6 +566,8 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int err; int connected = 0; + int is_udplite = up->pcflag; + int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); /* destination address check */ if (sin6) { @@ -695,7 +708,7 @@ do_udp_sendmsg: opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); - fl.proto = IPPROTO_UDP; + fl.proto = sk->sk_protocol; ipv6_addr_copy(&fl.fl6_dst, daddr); if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) ipv6_addr_copy(&fl.fl6_src, &np->saddr); @@ -762,14 +775,15 @@ back_from_confirm: do_append_data: up->len += ulen; - err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, + getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; + err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl, (struct rt6_info*)dst, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); else if (!corkreq) - err = udp_v6_push_pending_frames(sk, up); + err = udp_v6_push_pending_frames(sk); else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) up->pending = 0; @@ -794,7 +808,7 @@ do_append_data: out: fl6_sock_release(flowlabel); if (!err) { - UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); + UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); return len; } /* @@ -805,7 +819,7 @@ out: * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); + UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); } return err; @@ -817,7 +831,7 @@ do_confirm: goto out; } -static int udpv6_destroy_sock(struct sock *sk) +int udpv6_destroy_sock(struct sock *sk) { lock_sock(sk); udp_v6_flush_pending_frames(sk); @@ -831,119 +845,41 @@ static int udpv6_destroy_sock(struct sock *sk) /* * Socket option code for UDP */ -static int do_udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) -{ - struct udp_sock *up = udp_sk(sk); - int val; - int err = 0; - - if(optlen<sizeof(int)) - return -EINVAL; - - if (get_user(val, (int __user *)optval)) - return -EFAULT; - - switch(optname) { - case UDP_CORK: - if (val != 0) { - up->corkflag = 1; - } else { - up->corkflag = 0; - lock_sock(sk); - udp_v6_push_pending_frames(sk, up); - release_sock(sk); - } - break; - - case UDP_ENCAP: - switch (val) { - case 0: - up->encap_type = val; - break; - default: - err = -ENOPROTOOPT; - break; - } - break; - - default: - err = -ENOPROTOOPT; - break; - }; - - return err; -} - -static int udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) +int udpv6_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) { - if (level != SOL_UDP) - return ipv6_setsockopt(sk, level, optname, optval, optlen); - return do_udpv6_setsockopt(sk, level, optname, optval, optlen); + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_setsockopt(sk, level, optname, optval, optlen, + udp_v6_push_pending_frames); + return ipv6_setsockopt(sk, level, optname, optval, optlen); } #ifdef CONFIG_COMPAT -static int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) +int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) { - if (level != SOL_UDP) - return compat_ipv6_setsockopt(sk, level, optname, - optval, optlen); - return do_udpv6_setsockopt(sk, level, optname, optval, optlen); + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_setsockopt(sk, level, optname, optval, optlen, + udp_v6_push_pending_frames); + return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); } #endif -static int do_udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - struct udp_sock *up = udp_sk(sk); - int val, len; - - if(get_user(len,optlen)) - return -EFAULT; - - len = min_t(unsigned int, len, sizeof(int)); - - if(len < 0) - return -EINVAL; - - switch(optname) { - case UDP_CORK: - val = up->corkflag; - break; - - case UDP_ENCAP: - val = up->encap_type; - break; - - default: - return -ENOPROTOOPT; - }; - - if(put_user(len, optlen)) - return -EFAULT; - if(copy_to_user(optval, &val,len)) - return -EFAULT; - return 0; -} - -static int udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) +int udpv6_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { - if (level != SOL_UDP) - return ipv6_getsockopt(sk, level, optname, optval, optlen); - return do_udpv6_getsockopt(sk, level, optname, optval, optlen); + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_getsockopt(sk, level, optname, optval, optlen); + return ipv6_getsockopt(sk, level, optname, optval, optlen); } #ifdef CONFIG_COMPAT -static int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) +int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { - if (level != SOL_UDP) - return compat_ipv6_getsockopt(sk, level, optname, - optval, optlen); - return do_udpv6_getsockopt(sk, level, optname, optval, optlen); + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_getsockopt(sk, level, optname, optval, optlen); + return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); } #endif @@ -984,7 +920,7 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket atomic_read(&sp->sk_refcnt), sp); } -static int udp6_seq_show(struct seq_file *seq, void *v) +int udp6_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_printf(seq, @@ -1003,6 +939,7 @@ static struct udp_seq_afinfo udp6_seq_afinfo = { .owner = THIS_MODULE, .name = "udp6", .family = AF_INET6, + .hashtable = udp_hash, .seq_show = udp6_seq_show, .seq_fops = &udp6_seq_fops, }; @@ -1022,7 +959,7 @@ void udp6_proc_exit(void) { struct proto udpv6_prot = { .name = "UDPv6", .owner = THIS_MODULE, - .close = udpv6_close, + .close = udp_lib_close, .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, @@ -1032,8 +969,8 @@ struct proto udpv6_prot = { .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, .backlog_rcv = udpv6_queue_rcv_skb, - .hash = udp_v6_hash, - .unhash = udp_v6_unhash, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, .obj_size = sizeof(struct udp6_sock), #ifdef CONFIG_COMPAT diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h new file mode 100644 index 00000000000..ec987889912 --- /dev/null +++ b/net/ipv6/udp_impl.h @@ -0,0 +1,34 @@ +#ifndef _UDP6_IMPL_H +#define _UDP6_IMPL_H +#include <net/udp.h> +#include <net/udplite.h> +#include <net/protocol.h> +#include <net/addrconf.h> +#include <net/inet_common.h> + +extern int __udp6_lib_rcv(struct sk_buff **, struct hlist_head [], int ); +extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, + int , int , int , __be32 , struct hlist_head []); + +extern int udpv6_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); +extern int udpv6_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen); +#ifdef CONFIG_COMPAT +extern int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen); +extern int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); +#endif +extern int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len); +extern int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len); +extern int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); +extern int udpv6_destroy_sock(struct sock *sk); + +#ifdef CONFIG_PROC_FS +extern int udp6_seq_show(struct seq_file *seq, void *v); +#endif +#endif /* _UDP6_IMPL_H */ diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c new file mode 100644 index 00000000000..629f97162fb --- /dev/null +++ b/net/ipv6/udplite.c @@ -0,0 +1,105 @@ +/* + * UDPLITEv6 An implementation of the UDP-Lite protocol over IPv6. + * See also net/ipv4/udplite.c + * + * Version: $Id: udplite.c,v 1.9 2006/10/19 08:28:10 gerrit Exp $ + * + * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk> + * + * Changes: + * Fixes: + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include "udp_impl.h" + +DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly; + +static int udplitev6_rcv(struct sk_buff **pskb) +{ + return __udp6_lib_rcv(pskb, udplite_hash, 1); +} + +static void udplitev6_err(struct sk_buff *skb, + struct inet6_skb_parm *opt, + int type, int code, int offset, __be32 info) +{ + return __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash); +} + +static struct inet6_protocol udplitev6_protocol = { + .handler = udplitev6_rcv, + .err_handler = udplitev6_err, + .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, +}; + +static int udplite_v6_get_port(struct sock *sk, unsigned short snum) +{ + return udplite_get_port(sk, snum, ipv6_rcv_saddr_equal); +} + +struct proto udplitev6_prot = { + .name = "UDPLITEv6", + .owner = THIS_MODULE, + .close = udp_lib_close, + .connect = ip6_datagram_connect, + .disconnect = udp_disconnect, + .ioctl = udp_ioctl, + .init = udplite_sk_init, + .destroy = udpv6_destroy_sock, + .setsockopt = udpv6_setsockopt, + .getsockopt = udpv6_getsockopt, + .sendmsg = udpv6_sendmsg, + .recvmsg = udpv6_recvmsg, + .backlog_rcv = udpv6_queue_rcv_skb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .get_port = udplite_v6_get_port, + .obj_size = sizeof(struct udp6_sock), +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_udpv6_setsockopt, + .compat_getsockopt = compat_udpv6_getsockopt, +#endif +}; + +static struct inet_protosw udplite6_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_UDPLITE, + .prot = &udplitev6_prot, + .ops = &inet6_dgram_ops, + .capability = -1, + .no_check = 0, + .flags = INET_PROTOSW_PERMANENT, +}; + +void __init udplitev6_init(void) +{ + if (inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE) < 0) + printk(KERN_ERR "%s: Could not register.\n", __FUNCTION__); + + inet6_register_protosw(&udplite6_protosw); +} + +#ifdef CONFIG_PROC_FS +static struct file_operations udplite6_seq_fops; +static struct udp_seq_afinfo udplite6_seq_afinfo = { + .owner = THIS_MODULE, + .name = "udplite6", + .family = AF_INET6, + .hashtable = udplite_hash, + .seq_show = udp6_seq_show, + .seq_fops = &udplite6_seq_fops, +}; + +int __init udplite6_proc_init(void) +{ + return udp_proc_register(&udplite6_seq_afinfo); +} + +void udplite6_proc_exit(void) +{ + udp_proc_unregister(&udplite6_seq_afinfo); +} +#endif diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index d400f8fae12..8dffd4daae9 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -274,11 +274,12 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl) break; case IPPROTO_UDP: + case IPPROTO_UDPLITE: case IPPROTO_TCP: case IPPROTO_SCTP: case IPPROTO_DCCP: if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) { - u16 *ports = (u16 *)exthdr; + __be16 *ports = (__be16 *)exthdr; fl->fl_ip_sport = ports[0]; fl->fl_ip_dport = ports[1]; diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 7af227bb155..12e426b9aac 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -50,7 +50,7 @@ static u32 xfrm6_tunnel_spi; #define XFRM6_TUNNEL_SPI_MIN 1 #define XFRM6_TUNNEL_SPI_MAX 0xffffffff -static kmem_cache_t *xfrm6_tunnel_spi_kmem __read_mostly; +static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly; #define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256 #define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256 @@ -62,7 +62,7 @@ static unsigned inline xfrm6_tunnel_spi_hash_byaddr(xfrm_address_t *addr) { unsigned h; - h = addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3]; + h = (__force u32)(addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3]); h ^= h >> 16; h ^= h >> 8; h &= XFRM6_TUNNEL_SPI_BYADDR_HSIZE - 1; @@ -126,7 +126,7 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr) return NULL; } -u32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr) +__be32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr) { struct xfrm6_tunnel_spi *x6spi; u32 spi; @@ -135,7 +135,7 @@ u32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr) x6spi = __xfrm6_tunnel_spi_lookup(saddr); spi = x6spi ? x6spi->spi : 0; read_unlock_bh(&xfrm6_tunnel_spi_lock); - return spi; + return htonl(spi); } EXPORT_SYMBOL(xfrm6_tunnel_spi_lookup); @@ -180,7 +180,7 @@ try_next_2:; spi = 0; goto out; alloc_spi: - x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, SLAB_ATOMIC); + x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, GFP_ATOMIC); if (!x6spi) goto out; @@ -196,7 +196,7 @@ out: return spi; } -u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr) +__be32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr) { struct xfrm6_tunnel_spi *x6spi; u32 spi; @@ -210,7 +210,7 @@ u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr) spi = __xfrm6_tunnel_alloc_spi(saddr); write_unlock_bh(&xfrm6_tunnel_spi_lock); - return spi; + return htonl(spi); } EXPORT_SYMBOL(xfrm6_tunnel_alloc_spi); @@ -265,7 +265,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb) } static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __u32 info) + int type, int code, int offset, __be32 info) { /* xfrm6_tunnel native err handling */ switch (type) { diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index bef3f61569f..76c661566df 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -83,13 +83,13 @@ DEFINE_SPINLOCK(ipx_interfaces_lock); struct ipx_interface *ipx_primary_net; struct ipx_interface *ipx_internal_net; -extern int ipxrtr_add_route(__u32 network, struct ipx_interface *intrfc, +extern int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc, unsigned char *node); extern void ipxrtr_del_routes(struct ipx_interface *intrfc); extern int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, struct iovec *iov, int len, int noblock); extern int ipxrtr_route_skb(struct sk_buff *skb); -extern struct ipx_route *ipxrtr_lookup(__u32 net); +extern struct ipx_route *ipxrtr_lookup(__be32 net); extern int ipxrtr_ioctl(unsigned int cmd, void __user *arg); #undef IPX_REFCNT_DEBUG @@ -177,7 +177,7 @@ static void ipxitf_clear_primary_net(void) } static struct ipx_interface *__ipxitf_find_using_phys(struct net_device *dev, - unsigned short datalink) + __be16 datalink) { struct ipx_interface *i; @@ -190,7 +190,7 @@ out: } static struct ipx_interface *ipxitf_find_using_phys(struct net_device *dev, - unsigned short datalink) + __be16 datalink) { struct ipx_interface *i; @@ -202,7 +202,7 @@ static struct ipx_interface *ipxitf_find_using_phys(struct net_device *dev, return i; } -struct ipx_interface *ipxitf_find_using_net(__u32 net) +struct ipx_interface *ipxitf_find_using_net(__be32 net) { struct ipx_interface *i; @@ -237,7 +237,7 @@ static void ipxitf_insert_socket(struct ipx_interface *intrfc, struct sock *sk) /* caller must hold intrfc->if_sklist_lock */ static struct sock *__ipxitf_find_socket(struct ipx_interface *intrfc, - unsigned short port) + __be16 port) { struct sock *s; struct hlist_node *node; @@ -252,7 +252,7 @@ found: /* caller must hold a reference to intrfc */ static struct sock *ipxitf_find_socket(struct ipx_interface *intrfc, - unsigned short port) + __be16 port) { struct sock *s; @@ -268,7 +268,7 @@ static struct sock *ipxitf_find_socket(struct ipx_interface *intrfc, #ifdef CONFIG_IPX_INTERN static struct sock *ipxitf_find_internal_socket(struct ipx_interface *intrfc, unsigned char *ipx_node, - unsigned short port) + __be16 port) { struct sock *s; struct hlist_node *node; @@ -600,10 +600,10 @@ int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb, char *node) /* see if we need to include the netnum in the route list */ if (IPX_SKB_CB(skb)->last_hop.index >= 0) { - u32 *last_hop = (u32 *)(((u8 *) skb->data) + + __be32 *last_hop = (__be32 *)(((u8 *) skb->data) + sizeof(struct ipxhdr) + IPX_SKB_CB(skb)->last_hop.index * - sizeof(u32)); + sizeof(__be32)); *last_hop = IPX_SKB_CB(skb)->last_hop.netnum; IPX_SKB_CB(skb)->last_hop.index = -1; } @@ -772,7 +772,7 @@ static void ipxitf_discover_netnum(struct ipx_interface *intrfc, } else { printk(KERN_WARNING "IPX: Network number collision " "%lx\n %s %s and %s %s\n", - (unsigned long) htonl(cb->ipx_source_net), + (unsigned long) ntohl(cb->ipx_source_net), ipx_device_name(i), ipx_frame_name(i->if_dlink_type), ipx_device_name(intrfc), @@ -812,7 +812,7 @@ static int ipxitf_pprop(struct ipx_interface *intrfc, struct sk_buff *skb) int i, rc = -EINVAL; struct ipx_interface *ifcs; char *c; - u32 *l; + __be32 *l; /* Illegal packet - too many hops or too short */ /* We decide to throw it away: no broadcasting, no local processing. @@ -833,7 +833,7 @@ static int ipxitf_pprop(struct ipx_interface *intrfc, struct sk_buff *skb) goto out; c = ((u8 *) ipx) + sizeof(struct ipxhdr); - l = (u32 *) c; + l = (__be32 *) c; /* Don't broadcast packet if already seen this net */ for (i = 0; i < IPX_SKB_CB(skb)->ipx_tctrl; i++) @@ -855,7 +855,7 @@ static int ipxitf_pprop(struct ipx_interface *intrfc, struct sk_buff *skb) /* That aren't in the list */ if (ifcs == intrfc) continue; - l = (__u32 *) c; + l = (__be32 *) c; /* don't consider the last entry in the packet list, * it is our netnum, and it is not there yet */ for (i = 0; i < IPX_SKB_CB(skb)->ipx_tctrl; i++) @@ -885,8 +885,8 @@ static void ipxitf_insert(struct ipx_interface *intrfc) ipx_primary_net = intrfc; } -static struct ipx_interface *ipxitf_alloc(struct net_device *dev, __u32 netnum, - unsigned short dlink_type, +static struct ipx_interface *ipxitf_alloc(struct net_device *dev, __be32 netnum, + __be16 dlink_type, struct datalink_proto *dlink, unsigned char internal, int ipx_offset) @@ -960,7 +960,7 @@ static __be16 ipx_map_frame_type(unsigned char type) static int ipxitf_create(struct ipx_interface_definition *idef) { struct net_device *dev; - unsigned short dlink_type = 0; + __be16 dlink_type = 0; struct datalink_proto *datalink = NULL; struct ipx_interface *intrfc; int rc; @@ -1073,7 +1073,7 @@ out: static int ipxitf_delete(struct ipx_interface_definition *idef) { struct net_device *dev = NULL; - unsigned short dlink_type = 0; + __be16 dlink_type = 0; struct ipx_interface *intrfc; int rc = 0; @@ -1110,7 +1110,7 @@ out: } static struct ipx_interface *ipxitf_auto_create(struct net_device *dev, - unsigned short dlink_type) + __be16 dlink_type) { struct ipx_interface *intrfc = NULL; struct datalink_proto *datalink; @@ -1122,7 +1122,7 @@ static struct ipx_interface *ipxitf_auto_create(struct net_device *dev, if (dev->addr_len > IPX_NODE_LEN) goto out; - switch (htons(dlink_type)) { + switch (ntohs(dlink_type)) { case ETH_P_IPX: datalink = pEII_datalink; break; case ETH_P_802_2: datalink = p8022_datalink; break; case ETH_P_SNAP: datalink = pSNAP_datalink; break; @@ -1234,27 +1234,27 @@ static int ipxitf_ioctl(unsigned int cmd, void __user *arg) /* Note: We assume ipx_tctrl==0 and htons(length)==ipx_pktsize */ /* This functions should *not* mess with packet contents */ -__u16 ipx_cksum(struct ipxhdr *packet, int length) +__be16 ipx_cksum(struct ipxhdr *packet, int length) { /* * NOTE: sum is a net byte order quantity, which optimizes the * loop. This only works on big and little endian machines. (I * don't know of a machine that isn't.) */ - /* start at ipx_dest - We skip the checksum field and start with - * ipx_type before the loop, not considering ipx_tctrl in the calc */ - __u16 *p = (__u16 *)&packet->ipx_dest; - __u32 i = (length >> 1) - 1; /* Number of complete words */ - __u32 sum = packet->ipx_type << sizeof(packet->ipx_tctrl); - - /* Loop through all complete words except the checksum field, - * ipx_type (accounted above) and ipx_tctrl (not used in the cksum) */ - while (--i) + /* handle the first 3 words separately; checksum should be skipped + * and ipx_tctrl masked out */ + __u16 *p = (__u16 *)packet; + __u32 sum = p[1] + (p[2] & (__force u16)htons(0x00ff)); + __u32 i = (length >> 1) - 3; /* Number of remaining complete words */ + + /* Loop through them */ + p += 3; + while (i--) sum += *p++; /* Add on the last part word if it exists */ if (packet->ipx_pktsize & htons(1)) - sum += ntohs(0xff00) & *p; + sum += (__force u16)htons(0xff00) & *p; /* Do final fixup */ sum = (sum & 0xffff) + (sum >> 16); @@ -1263,10 +1263,17 @@ __u16 ipx_cksum(struct ipxhdr *packet, int length) if (sum >= 0x10000) sum++; - return ~sum; + /* + * Leave 0 alone; we don't want 0xffff here. Note that we can't get + * here with 0x10000, so this check is the same as ((__u16)sum) + */ + if (sum) + sum = ~sum; + + return (__force __be16)sum; } -const char *ipx_frame_name(unsigned short frame) +const char *ipx_frame_name(__be16 frame) { char* rc = "None"; @@ -1401,7 +1408,7 @@ out: /* caller must hold a reference to intrfc */ -static unsigned short ipx_first_free_socketnum(struct ipx_interface *intrfc) +static __be16 ipx_first_free_socketnum(struct ipx_interface *intrfc) { unsigned short socketNum = intrfc->if_sknum; @@ -1410,7 +1417,7 @@ static unsigned short ipx_first_free_socketnum(struct ipx_interface *intrfc) if (socketNum < IPX_MIN_EPHEMERAL_SOCKET) socketNum = IPX_MIN_EPHEMERAL_SOCKET; - while (__ipxitf_find_socket(intrfc, ntohs(socketNum))) + while (__ipxitf_find_socket(intrfc, htons(socketNum))) if (socketNum > IPX_MAX_EPHEMERAL_SOCKET) socketNum = IPX_MIN_EPHEMERAL_SOCKET; else @@ -1419,7 +1426,7 @@ static unsigned short ipx_first_free_socketnum(struct ipx_interface *intrfc) spin_unlock_bh(&intrfc->if_sklist_lock); intrfc->if_sknum = socketNum; - return ntohs(socketNum); + return htons(socketNum); } static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) @@ -1473,7 +1480,7 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) ipxs->port)) { SOCK_DEBUG(sk, "IPX: bind failed because port %X in use.\n", - ntohs((int)addr->sipx_port)); + ntohs(addr->sipx_port)); goto out_put; } } else { @@ -1488,7 +1495,7 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (ipxitf_find_socket(intrfc, addr->sipx_port)) { SOCK_DEBUG(sk, "IPX: bind failed because port %X in use.\n", - ntohs((int)addr->sipx_port)); + ntohs(addr->sipx_port)); goto out_put; } } @@ -1665,7 +1672,7 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty intrfc = ipxitf_find_using_phys(dev, pt->type); if (!intrfc) { if (ipxcfg_auto_create_interfaces && - ntohl(IPX_SKB_CB(skb)->ipx_dest_net)) { + IPX_SKB_CB(skb)->ipx_dest_net) { intrfc = ipxitf_auto_create(dev, pt->type); if (intrfc) ipxitf_hold(intrfc); diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index 4c0c71206e5..b7463dfca63 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -260,22 +260,22 @@ static int ipx_seq_socket_show(struct seq_file *seq, void *v) ipxs = ipx_sk(s); #ifdef CONFIG_IPX_INTERN seq_printf(seq, "%08lX:%02X%02X%02X%02X%02X%02X:%04X ", - (unsigned long)htonl(ipxs->intrfc->if_netnum), + (unsigned long)ntohl(ipxs->intrfc->if_netnum), ipxs->node[0], ipxs->node[1], ipxs->node[2], ipxs->node[3], - ipxs->node[4], ipxs->node[5], htons(ipxs->port)); + ipxs->node[4], ipxs->node[5], ntohs(ipxs->port)); #else - seq_printf(seq, "%08lX:%04X ", (unsigned long) htonl(ipxs->intrfc->if_netnum), - htons(ipxs->port)); + seq_printf(seq, "%08lX:%04X ", (unsigned long) ntohl(ipxs->intrfc->if_netnum), + ntohs(ipxs->port)); #endif /* CONFIG_IPX_INTERN */ if (s->sk_state != TCP_ESTABLISHED) seq_printf(seq, "%-28s", "Not_Connected"); else { seq_printf(seq, "%08lX:%02X%02X%02X%02X%02X%02X:%04X ", - (unsigned long)htonl(ipxs->dest_addr.net), + (unsigned long)ntohl(ipxs->dest_addr.net), ipxs->dest_addr.node[0], ipxs->dest_addr.node[1], ipxs->dest_addr.node[2], ipxs->dest_addr.node[3], ipxs->dest_addr.node[4], ipxs->dest_addr.node[5], - htons(ipxs->dest_addr.sock)); + ntohs(ipxs->dest_addr.sock)); } seq_printf(seq, "%08X %08X %02X %03d\n", diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c index a30dbb1e08f..68560ee0d79 100644 --- a/net/ipx/ipx_route.c +++ b/net/ipx/ipx_route.c @@ -19,17 +19,17 @@ DEFINE_RWLOCK(ipx_routes_lock); extern struct ipx_interface *ipx_internal_net; -extern __u16 ipx_cksum(struct ipxhdr *packet, int length); -extern struct ipx_interface *ipxitf_find_using_net(__u32 net); +extern __be16 ipx_cksum(struct ipxhdr *packet, int length); +extern struct ipx_interface *ipxitf_find_using_net(__be32 net); extern int ipxitf_demux_socket(struct ipx_interface *intrfc, struct sk_buff *skb, int copy); extern int ipxitf_demux_socket(struct ipx_interface *intrfc, struct sk_buff *skb, int copy); extern int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb, char *node); -extern struct ipx_interface *ipxitf_find_using_net(__u32 net); +extern struct ipx_interface *ipxitf_find_using_net(__be32 net); -struct ipx_route *ipxrtr_lookup(__u32 net) +struct ipx_route *ipxrtr_lookup(__be32 net) { struct ipx_route *r; @@ -48,7 +48,7 @@ unlock: /* * Caller must hold a reference to intrfc */ -int ipxrtr_add_route(__u32 network, struct ipx_interface *intrfc, +int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc, unsigned char *node) { struct ipx_route *rt; @@ -118,7 +118,7 @@ out: return rc; } -static int ipxrtr_delete(__u32 net) +static int ipxrtr_delete(__be32 net) { struct ipx_route *r, *tmp; int rc; @@ -238,7 +238,7 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, /* Apply checksum. Not allowed on 802.3 links. */ if (sk->sk_no_check || intrfc->if_dlink_type == htons(IPX_FRAME_8023)) - ipx->ipx_checksum = 0xFFFF; + ipx->ipx_checksum = htons(0xFFFF); else ipx->ipx_checksum = ipx_cksum(ipx, len + sizeof(struct ipxhdr)); diff --git a/net/irda/discovery.c b/net/irda/discovery.c index 3fefc822c1c..89fd2a2cbca 100644 --- a/net/irda/discovery.c +++ b/net/irda/discovery.c @@ -32,6 +32,7 @@ #include <linux/string.h> #include <linux/socket.h> +#include <linux/fs.h> #include <linux/seq_file.h> #include <net/irda/irda.h> diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index d50a02030ad..262bda808d9 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -61,7 +61,7 @@ static void ircomm_tty_flush_buffer(struct tty_struct *tty); static void ircomm_tty_send_xchar(struct tty_struct *tty, char ch); static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout); static void ircomm_tty_hangup(struct tty_struct *tty); -static void ircomm_tty_do_softint(void *private_); +static void ircomm_tty_do_softint(struct work_struct *work); static void ircomm_tty_shutdown(struct ircomm_tty_cb *self); static void ircomm_tty_stop(struct tty_struct *tty); @@ -389,7 +389,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) self->flow = FLOW_STOP; self->line = line; - INIT_WORK(&self->tqueue, ircomm_tty_do_softint, self); + INIT_WORK(&self->tqueue, ircomm_tty_do_softint); self->max_header_size = IRCOMM_TTY_HDR_UNINITIALISED; self->max_data_size = IRCOMM_TTY_DATA_UNINITIALISED; self->close_delay = 5*HZ/10; @@ -594,15 +594,16 @@ static void ircomm_tty_flush_buffer(struct tty_struct *tty) } /* - * Function ircomm_tty_do_softint (private_) + * Function ircomm_tty_do_softint (work) * * We use this routine to give the write wakeup to the user at at a * safe time (as fast as possible after write have completed). This * can be compared to the Tx interrupt. */ -static void ircomm_tty_do_softint(void *private_) +static void ircomm_tty_do_softint(struct work_struct *work) { - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) private_; + struct ircomm_tty_cb *self = + container_of(work, struct ircomm_tty_cb, tqueue); struct tty_struct *tty; unsigned long flags; struct sk_buff *skb, *ctrl_skb; diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c index 197e3e7ed7e..75e39ea599d 100644 --- a/net/irda/ircomm/ircomm_tty_ioctl.c +++ b/net/irda/ircomm/ircomm_tty_ioctl.c @@ -146,7 +146,7 @@ static void ircomm_tty_change_speed(struct ircomm_tty_cb *self) * do something rational. */ void ircomm_tty_set_termios(struct tty_struct *tty, - struct termios *old_termios) + struct ktermios *old_termios) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; unsigned int cflag = tty->termios->c_cflag; diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 415cf4eec23..8f1c6d65b24 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -27,6 +27,7 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/skbuff.h> +#include <linux/fs.h> #include <linux/string.h> #include <linux/init.h> #include <linux/seq_file.h> @@ -172,7 +173,7 @@ struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv, IRDA_DEBUG(2, "%s()\n", __FUNCTION__); - self = kmalloc(sizeof(struct iriap_cb), GFP_ATOMIC); + self = kzalloc(sizeof(*self), GFP_ATOMIC); if (!self) { IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); return NULL; @@ -181,7 +182,6 @@ struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv, /* * Initialize instance */ - memset(self, 0, sizeof(struct iriap_cb)); self->magic = IAS_MAGIC; self->mode = mode; @@ -451,12 +451,12 @@ static void iriap_getvaluebyclass_confirm(struct iriap_cb *self, n = 2; /* Get length, MSB first */ - len = be16_to_cpu(get_unaligned((__u16 *)(fp+n))); n += 2; + len = be16_to_cpu(get_unaligned((__be16 *)(fp+n))); n += 2; IRDA_DEBUG(4, "%s(), len=%d\n", __FUNCTION__, len); /* Get object ID, MSB first */ - obj_id = be16_to_cpu(get_unaligned((__u16 *)(fp+n))); n += 2; + obj_id = be16_to_cpu(get_unaligned((__be16 *)(fp+n))); n += 2; type = fp[n++]; IRDA_DEBUG(4, "%s(), Value type = %d\n", __FUNCTION__, type); @@ -506,7 +506,7 @@ static void iriap_getvaluebyclass_confirm(struct iriap_cb *self, value = irias_new_string_value(fp+n); break; case IAS_OCT_SEQ: - value_len = be16_to_cpu(get_unaligned((__u16 *)(fp+n))); + value_len = be16_to_cpu(get_unaligned((__be16 *)(fp+n))); n += 2; /* Will truncate to IAS_MAX_OCTET_STRING bytes */ @@ -544,7 +544,7 @@ static void iriap_getvaluebyclass_response(struct iriap_cb *self, { struct sk_buff *tx_skb; int n; - __u32 tmp_be32; + __be32 tmp_be32; __be16 tmp_be16; __u8 *fp; diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c index 56292ab7d65..b1ee99a59c0 100644 --- a/net/irda/irias_object.c +++ b/net/irda/irias_object.c @@ -501,13 +501,12 @@ struct ias_value *irias_new_octseq_value(__u8 *octseq , int len) len = IAS_MAX_OCTET_STRING; value->len = len; - value->t.oct_seq = kmalloc(len, GFP_ATOMIC); + value->t.oct_seq = kmemdup(octseq, len, GFP_ATOMIC); if (value->t.oct_seq == NULL){ IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); kfree(value); return NULL; } - memcpy(value->t.oct_seq, octseq , len); return value; } @@ -522,7 +521,6 @@ struct ias_value *irias_new_missing_value(void) } value->type = IAS_MISSING; - value->len = 0; return value; } diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index 9b962f24771..2bb04ac0932 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -995,7 +995,7 @@ static int __irlan_insert_param(struct sk_buff *skb, char *param, int type, { __u8 *frame; __u8 param_len; - __u16 tmp_le; /* Temporary value in little endian format */ + __le16 tmp_le; /* Temporary value in little endian format */ int n=0; if (skb == NULL) { diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 5073261b9d0..7e5d12ab3b9 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -641,15 +641,13 @@ struct lsap_cb *irlmp_dup(struct lsap_cb *orig, void *instance) } /* Allocate a new instance */ - new = kmalloc(sizeof(struct lsap_cb), GFP_ATOMIC); + new = kmemdup(orig, sizeof(*new), GFP_ATOMIC); if (!new) { IRDA_DEBUG(0, "%s(), unable to kmalloc\n", __FUNCTION__); spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, flags); return NULL; } - /* Dup */ - memcpy(new, orig, sizeof(struct lsap_cb)); /* new->lap = orig->lap; => done in the memcpy() */ /* new->slsap_sel = orig->slsap_sel; => done in the memcpy() */ new->conn_skb = NULL; @@ -1678,7 +1676,8 @@ static int irlmp_slsap_inuse(__u8 slsap_sel) * every IrLAP connection and check every LSAP associated with each * the connection. */ - spin_lock_irqsave(&irlmp->links->hb_spinlock, flags); + spin_lock_irqsave_nested(&irlmp->links->hb_spinlock, flags, + SINGLE_DEPTH_NESTING); lap = (struct lap_cb *) hashbin_get_first(irlmp->links); while (lap != NULL) { IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, goto errlap;); diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c index 1ba8c710663..1d26cd33ea1 100644 --- a/net/irda/irqueue.c +++ b/net/irda/irqueue.c @@ -356,14 +356,13 @@ hashbin_t *hashbin_new(int type) /* * Allocate new hashbin */ - hashbin = kmalloc( sizeof(hashbin_t), GFP_ATOMIC); + hashbin = kzalloc(sizeof(*hashbin), GFP_ATOMIC); if (!hashbin) return NULL; /* * Initialize structure */ - memset(hashbin, 0, sizeof(hashbin_t)); hashbin->hb_type = type; hashbin->magic = HB_MAGIC; //hashbin->hb_current = NULL; diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 3c2e70b77df..03504f3e499 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -26,6 +26,7 @@ #include <linux/skbuff.h> #include <linux/init.h> +#include <linux/fs.h> #include <linux/seq_file.h> #include <asm/byteorder.h> @@ -1099,7 +1100,7 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel, return -ENOMEM; /* Reserve space for MUX_CONTROL and LAP header */ - skb_reserve(tx_skb, TTP_MAX_HEADER); + skb_reserve(tx_skb, TTP_MAX_HEADER + TTP_SAR_HEADER); } else { tx_skb = userdata; /* @@ -1147,7 +1148,7 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel, frame[3] = 0x02; /* Value length */ put_unaligned(cpu_to_be16((__u16) max_sdu_size), - (__u16 *)(frame+4)); + (__be16 *)(frame+4)); } else { /* Insert plain TTP header */ frame = skb_push(tx_skb, TTP_HEADER); @@ -1348,7 +1349,7 @@ int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size, return -ENOMEM; /* Reserve space for MUX_CONTROL and LAP header */ - skb_reserve(tx_skb, TTP_MAX_HEADER); + skb_reserve(tx_skb, TTP_MAX_HEADER + TTP_SAR_HEADER); } else { tx_skb = userdata; /* @@ -1394,7 +1395,7 @@ int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size, frame[3] = 0x02; /* Value length */ put_unaligned(cpu_to_be16((__u16) max_sdu_size), - (__u16 *)(frame+4)); + (__be16 *)(frame+4)); } else { /* Insert TTP header */ frame = skb_push(tx_skb, TTP_HEADER); diff --git a/net/key/af_key.c b/net/key/af_key.c index 20ff7cca1d0..5dd5094659a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -27,6 +27,7 @@ #include <linux/proc_fs.h> #include <linux/init.h> #include <net/xfrm.h> +#include <linux/audit.h> #include <net/sock.h> @@ -1420,6 +1421,9 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, else err = xfrm_state_update(x); + xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, + AUDIT_MAC_IPSEC_ADDSA, err ? 0 : 1, NULL, x); + if (err < 0) { x->km.state = XFRM_STATE_DEAD; __xfrm_state_put(x); @@ -1460,8 +1464,12 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h err = -EPERM; goto out; } - + err = xfrm_state_delete(x); + + xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, + AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x); + if (err < 0) goto out; @@ -1637,12 +1645,15 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd { unsigned proto; struct km_event c; + struct xfrm_audit audit_info; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return -EINVAL; - xfrm_state_flush(proto); + audit_info.loginuid = audit_get_loginuid(current->audit_context); + audit_info.secid = 0; + xfrm_state_flush(proto, &audit_info); c.data.proto = proto; c.seq = hdr->sadb_msg_seq; c.pid = hdr->sadb_msg_pid; @@ -1767,11 +1778,11 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) /* addresses present only in tunnel mode */ if (t->mode == XFRM_MODE_TUNNEL) { - switch (xp->family) { + struct sockaddr *sa; + sa = (struct sockaddr *)(rq+1); + switch(sa->sa_family) { case AF_INET: - sin = (void*)(rq+1); - if (sin->sin_family != AF_INET) - return -EINVAL; + sin = (struct sockaddr_in*)sa; t->saddr.a4 = sin->sin_addr.s_addr; sin++; if (sin->sin_family != AF_INET) @@ -1780,9 +1791,7 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) break; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: - sin6 = (void *)(rq+1); - if (sin6->sin6_family != AF_INET6) - return -EINVAL; + sin6 = (struct sockaddr_in6*)sa; memcpy(t->saddr.a6, &sin6->sin6_addr, sizeof(struct in6_addr)); sin6++; if (sin6->sin6_family != AF_INET6) @@ -1793,7 +1802,10 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) default: return -EINVAL; } - } + t->encap_family = sa->sa_family; + } else + t->encap_family = xp->family; + /* No way to set this via kame pfkey */ t->aalgos = t->ealgos = t->calgos = ~0; xp->xfrm_nr++; @@ -1830,18 +1842,25 @@ static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp) static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp) { + struct xfrm_tmpl *t; int sockaddr_size = pfkey_sockaddr_size(xp->family); - int socklen = (xp->family == AF_INET ? - sizeof(struct sockaddr_in) : - sizeof(struct sockaddr_in6)); + int socklen = 0; + int i; + + for (i=0; i<xp->xfrm_nr; i++) { + t = xp->xfrm_vec + i; + socklen += (t->encap_family == AF_INET ? + sizeof(struct sockaddr_in) : + sizeof(struct sockaddr_in6)); + } return sizeof(struct sadb_msg) + (sizeof(struct sadb_lifetime) * 3) + (sizeof(struct sadb_address) * 2) + (sockaddr_size * 2) + sizeof(struct sadb_x_policy) + - (xp->xfrm_nr * (sizeof(struct sadb_x_ipsecrequest) + - (socklen * 2))) + + (xp->xfrm_nr * sizeof(struct sadb_x_ipsecrequest)) + + (socklen * 2) + pfkey_xfrm_policy2sec_ctx_size(xp); } @@ -1999,7 +2018,9 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i req_size = sizeof(struct sadb_x_ipsecrequest); if (t->mode == XFRM_MODE_TUNNEL) - req_size += 2*socklen; + req_size += ((t->encap_family == AF_INET ? + sizeof(struct sockaddr_in) : + sizeof(struct sockaddr_in6)) * 2); else size -= 2*socklen; rq = (void*)skb_put(skb, req_size); @@ -2015,7 +2036,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE; rq->sadb_x_ipsecrequest_reqid = t->reqid; if (t->mode == XFRM_MODE_TUNNEL) { - switch (xp->family) { + switch (t->encap_family) { case AF_INET: sin = (void*)(rq+1); sin->sin_family = AF_INET; @@ -2195,6 +2216,9 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, hdr->sadb_msg_type != SADB_X_SPDUPDATE); + xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, + AUDIT_MAC_IPSEC_ADDSPD, err ? 0 : 1, xp, NULL); + if (err) goto out; @@ -2272,6 +2296,10 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir-1, &sel, tmp.security, 1); security_xfrm_policy_free(&tmp); + + xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, + AUDIT_MAC_IPSEC_DELSPD, (xp) ? 1 : 0, xp, NULL); + if (xp == NULL) return -ENOENT; @@ -2406,8 +2434,11 @@ static int key_notify_policy_flush(struct km_event *c) static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { struct km_event c; + struct xfrm_audit audit_info; - xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN); + audit_info.loginuid = audit_get_loginuid(current->audit_context); + audit_info.secid = 0; + xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info); c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; c.pid = hdr->sadb_msg_pid; @@ -2938,7 +2969,7 @@ out: return NULL; } -static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport) +static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) { struct sk_buff *skb; struct sadb_msg *hdr; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 2652ead96c6..190bb3e0518 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -64,7 +64,7 @@ static inline u16 llc_ui_next_link_no(int sap) * * Given an ARP header type return the corresponding ethernet protocol. */ -static inline u16 llc_proto_type(u16 arphrd) +static inline __be16 llc_proto_type(u16 arphrd) { return arphrd == ARPHRD_IEEE802_TR ? htons(ETH_P_TR_802_2) : htons(ETH_P_802_2); diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 94d2368ade9..db82aff6e40 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -115,8 +115,8 @@ static inline int llc_fixup_skb(struct sk_buff *skb) skb->h.raw += llc_len; skb_pull(skb, llc_len); if (skb->protocol == htons(ETH_P_802_2)) { - u16 pdulen = eth_hdr(skb)->h_proto, - data_size = ntohs(pdulen) - llc_len; + __be16 pdulen = eth_hdr(skb)->h_proto; + u16 data_size = ntohs(pdulen) - llc_len; if (unlikely(pskb_trim_rcsum(skb, data_size))) return 0; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f619c652726..1b853c34d30 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1,5 +1,5 @@ menu "Core Netfilter Configuration" - depends on NET && NETFILTER + depends on NET && INET && NETFILTER config NETFILTER_NETLINK tristate "Netfilter netlink interface" @@ -25,19 +25,57 @@ config NETFILTER_NETLINK_LOG and is also scheduled to replace the old syslog-based ipt_LOG and ip6t_LOG modules. -config NF_CONNTRACK - tristate "Layer 3 Independent Connection tracking (EXPERIMENTAL)" - depends on EXPERIMENTAL && IP_NF_CONNTRACK=n - default n - ---help--- +config NF_CONNTRACK_ENABLED + tristate "Netfilter connection tracking support" + help Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related into connections. + This is required to do Masquerading or other kinds of Network + Address Translation (except for Fast NAT). It can also be used to + enhance packet filtering (see `Connection state match support' + below). + + To compile it as a module, choose M here. If unsure, say N. + +choice + prompt "Netfilter connection tracking support" + depends on NF_CONNTRACK_ENABLED + +config NF_CONNTRACK_SUPPORT + bool "Layer 3 Independent Connection tracking (EXPERIMENTAL)" + depends on EXPERIMENTAL + help Layer 3 independent connection tracking is experimental scheme which generalize ip_conntrack to support other layer 3 protocols. - To compile it as a module, choose M here. If unsure, say N. + This is required to do Masquerading or other kinds of Network + Address Translation (except for Fast NAT). It can also be used to + enhance packet filtering (see `Connection state match support' + below). + +config IP_NF_CONNTRACK_SUPPORT + bool "Layer 3 Dependent Connection tracking (OBSOLETE)" + help + The old, Layer 3 dependent ip_conntrack subsystem of netfilter. + + This is required to do Masquerading or other kinds of Network + Address Translation (except for Fast NAT). It can also be used to + enhance packet filtering (see `Connection state match support' + below). + +endchoice + +config NF_CONNTRACK + tristate + default m if NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=m + default y if NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=y + +config IP_NF_CONNTRACK + tristate + default m if IP_NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=m + default y if IP_NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=y config NF_CT_ACCT bool "Connection tracking flow accounting" @@ -82,8 +120,12 @@ config NF_CONNTRACK_EVENTS If unsure, say `N'. +config NF_CT_PROTO_GRE + tristate + depends on EXPERIMENTAL && NF_CONNTRACK + config NF_CT_PROTO_SCTP - tristate 'SCTP protocol on new connection tracking support (EXPERIMENTAL)' + tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' depends on EXPERIMENTAL && NF_CONNTRACK default n help @@ -93,8 +135,23 @@ config NF_CT_PROTO_SCTP If you want to compile it as a module, say M here and read Documentation/modules.txt. If unsure, say `N'. +config NF_CONNTRACK_AMANDA + tristate "Amanda backup protocol support (EXPERIMENTAL)" + depends on EXPERIMENTAL && NF_CONNTRACK + select TEXTSEARCH + select TEXTSEARCH_KMP + help + If you are running the Amanda backup package <http://www.amanda.org/> + on this machine or machines that will be MASQUERADED through this + machine, then you may want to enable this feature. This allows the + connection tracking and natting code to allow the sub-channels that + Amanda requires for communication of the backup data, messages and + index. + + To compile it as a module, choose M here. If unsure, say N. + config NF_CONNTRACK_FTP - tristate "FTP support on new connection tracking (EXPERIMENTAL)" + tristate "FTP protocol support (EXPERIMENTAL)" depends on EXPERIMENTAL && NF_CONNTRACK help Tracking FTP connections is problematic: special helpers are @@ -107,6 +164,101 @@ config NF_CONNTRACK_FTP To compile it as a module, choose M here. If unsure, say N. +config NF_CONNTRACK_H323 + tristate "H.323 protocol support (EXPERIMENTAL)" + depends on EXPERIMENTAL && NF_CONNTRACK + help + H.323 is a VoIP signalling protocol from ITU-T. As one of the most + important VoIP protocols, it is widely used by voice hardware and + software including voice gateways, IP phones, Netmeeting, OpenPhone, + Gnomemeeting, etc. + + With this module you can support H.323 on a connection tracking/NAT + firewall. + + This module supports RAS, Fast Start, H.245 Tunnelling, Call + Forwarding, RTP/RTCP and T.120 based audio, video, fax, chat, + whiteboard, file transfer, etc. For more information, please + visit http://nath323.sourceforge.net/. + + To compile it as a module, choose M here. If unsure, say N. + +config NF_CONNTRACK_IRC + tristate "IRC protocol support (EXPERIMENTAL)" + depends on EXPERIMENTAL && NF_CONNTRACK + help + There is a commonly-used extension to IRC called + Direct Client-to-Client Protocol (DCC). This enables users to send + files to each other, and also chat to each other without the need + of a server. DCC Sending is used anywhere you send files over IRC, + and DCC Chat is most commonly used by Eggdrop bots. If you are + using NAT, this extension will enable you to send files and initiate + chats. Note that you do NOT need this extension to get files or + have others initiate chats, or everything else in IRC. + + To compile it as a module, choose M here. If unsure, say N. + +config NF_CONNTRACK_NETBIOS_NS + tristate "NetBIOS name service protocol support (EXPERIMENTAL)" + depends on EXPERIMENTAL && NF_CONNTRACK + help + NetBIOS name service requests are sent as broadcast messages from an + unprivileged port and responded to with unicast messages to the + same port. This make them hard to firewall properly because connection + tracking doesn't deal with broadcasts. This helper tracks locally + originating NetBIOS name service requests and the corresponding + responses. It relies on correct IP address configuration, specifically + netmask and broadcast address. When properly configured, the output + of "ip address show" should look similar to this: + + $ ip -4 address show eth0 + 4: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast qlen 1000 + inet 172.16.2.252/24 brd 172.16.2.255 scope global eth0 + + To compile it as a module, choose M here. If unsure, say N. + +config NF_CONNTRACK_PPTP + tristate "PPtP protocol support (EXPERIMENTAL)" + depends on EXPERIMENTAL && NF_CONNTRACK + select NF_CT_PROTO_GRE + help + This module adds support for PPTP (Point to Point Tunnelling + Protocol, RFC2637) connection tracking and NAT. + + If you are running PPTP sessions over a stateful firewall or NAT + box, you may want to enable this feature. + + Please note that not all PPTP modes of operation are supported yet. + Specifically these limitations exist: + - Blindy assumes that control connections are always established + in PNS->PAC direction. This is a violation of RFC2637. + - Only supports a single call within each session + + To compile it as a module, choose M here. If unsure, say N. + +config NF_CONNTRACK_SIP + tristate "SIP protocol support (EXPERIMENTAL)" + depends on EXPERIMENTAL && NF_CONNTRACK + help + SIP is an application-layer control protocol that can establish, + modify, and terminate multimedia sessions (conferences) such as + Internet telephony calls. With the ip_conntrack_sip and + the nf_nat_sip modules you can support the protocol on a connection + tracking/NATing firewall. + + To compile it as a module, choose M here. If unsure, say N. + +config NF_CONNTRACK_TFTP + tristate "TFTP protocol support (EXPERIMENTAL)" + depends on EXPERIMENTAL && NF_CONNTRACK + help + TFTP connection tracking helper, this is required depending + on how restrictive your ruleset is. + If you are using a tftp client behind -j SNAT or -j MASQUERADING + you will need this. + + To compile it as a module, choose M here. If unsure, say N. + config NF_CT_NETLINK tristate 'Connection tracking netlink interface (EXPERIMENTAL)' depends on EXPERIMENTAL && NF_CONNTRACK && NETFILTER_NETLINK @@ -184,6 +336,17 @@ config NETFILTER_XT_TARGET_NFQUEUE To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_NFLOG + tristate '"NFLOG" target support' + depends on NETFILTER_XTABLES + help + This option enables the NFLOG target, which allows to LOG + messages through the netfilter logging API, which can use + either the old LOG target, the old ULOG target or nfnetlink_log + as backend. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_NOTRACK tristate '"NOTRACK" target support' depends on NETFILTER_XTABLES @@ -464,5 +627,19 @@ config NETFILTER_XT_MATCH_TCPMSS To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_HASHLIMIT + tristate '"hashlimit" match support' + depends on NETFILTER_XTABLES + help + This option adds a `hashlimit' match. + + As opposed to `limit', this match dynamically creates a hash table + of limit buckets, based on your selection of source/destination + addresses and/or ports. + + It enables you to express policies like `10kpps for any given + destination address' or `500pps from any given source address' + with a single rule. + endmenu diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index a74be492fd0..5dc5574f7e9 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1,7 +1,10 @@ netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o -nf_conntrack-objs := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o + +nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o +nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o obj-$(CONFIG_NETFILTER) = netfilter.o +obj-$(CONFIG_SYSCTL) += nf_sysctl.o obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o @@ -11,13 +14,23 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o # SCTP protocol connection tracking +obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o # netlink interface for nf_conntrack obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o # connection tracking helpers +nf_conntrack_h323-objs := nf_conntrack_h323_main.o nf_conntrack_h323_asn1.o + +obj-$(CONFIG_NF_CONNTRACK_AMANDA) += nf_conntrack_amanda.o obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o +obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o +obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o +obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o +obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o +obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o +obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o @@ -28,6 +41,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o +obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o @@ -56,3 +70,4 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o +obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index d80b935b3a9..291b8c6862f 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -28,7 +28,7 @@ static DEFINE_SPINLOCK(afinfo_lock); -struct nf_afinfo *nf_afinfo[NPROTO]; +struct nf_afinfo *nf_afinfo[NPROTO] __read_mostly; EXPORT_SYMBOL(nf_afinfo); int nf_register_afinfo(struct nf_afinfo *afinfo) @@ -54,7 +54,7 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo); * of skbuffs queued for userspace, and not deregister a hook unless * this is zero, but that sucks. Now, we simply check when the * packets come back: if the hook is gone, the packet is discarded. */ -struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; +struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS] __read_mostly; EXPORT_SYMBOL(nf_hooks); static DEFINE_SPINLOCK(nf_hook_lock); @@ -222,28 +222,21 @@ copy_skb: } EXPORT_SYMBOL(skb_make_writable); -u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval, u_int32_t csum) -{ - u_int32_t diff[] = { oldval, newval }; - - return csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum)); -} -EXPORT_SYMBOL(nf_csum_update); - -u_int16_t nf_proto_csum_update(struct sk_buff *skb, - u_int32_t oldval, u_int32_t newval, - u_int16_t csum, int pseudohdr) +void nf_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, + __be32 from, __be32 to, int pseudohdr) { + __be32 diff[] = { ~from, to }; if (skb->ip_summed != CHECKSUM_PARTIAL) { - csum = nf_csum_update(oldval, newval, csum); + *sum = csum_fold(csum_partial((char *)diff, sizeof(diff), + ~csum_unfold(*sum))); if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) - skb->csum = nf_csum_update(oldval, newval, skb->csum); + skb->csum = ~csum_partial((char *)diff, sizeof(diff), + ~skb->csum); } else if (pseudohdr) - csum = ~nf_csum_update(oldval, newval, ~csum); - - return csum; + *sum = ~csum_fold(csum_partial((char *)diff, sizeof(diff), + csum_unfold(*sum))); } -EXPORT_SYMBOL(nf_proto_csum_update); +EXPORT_SYMBOL(nf_proto_csum_replace4); /* This does not belong here, but locally generated errors need it if connection tracking in use: without this, connection may not be in hash table, and hence diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c new file mode 100644 index 00000000000..b8869eab765 --- /dev/null +++ b/net/netfilter/nf_conntrack_amanda.c @@ -0,0 +1,238 @@ +/* Amanda extension for IP connection tracking + * + * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca> + * based on HW's ip_conntrack_irc.c as well as other modules + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/textsearch.h> +#include <linux/skbuff.h> +#include <linux/in.h> +#include <linux/udp.h> +#include <linux/netfilter.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_ecache.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <linux/netfilter/nf_conntrack_amanda.h> + +static unsigned int master_timeout __read_mostly = 300; +static char *ts_algo = "kmp"; + +MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); +MODULE_DESCRIPTION("Amanda connection tracking module"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ip_conntrack_amanda"); + +module_param(master_timeout, uint, 0600); +MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); +module_param(ts_algo, charp, 0400); +MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)"); + +unsigned int (*nf_nat_amanda_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp) + __read_mostly; +EXPORT_SYMBOL_GPL(nf_nat_amanda_hook); + +enum amanda_strings { + SEARCH_CONNECT, + SEARCH_NEWLINE, + SEARCH_DATA, + SEARCH_MESG, + SEARCH_INDEX, +}; + +static struct { + char *string; + size_t len; + struct ts_config *ts; +} search[] __read_mostly = { + [SEARCH_CONNECT] = { + .string = "CONNECT ", + .len = 8, + }, + [SEARCH_NEWLINE] = { + .string = "\n", + .len = 1, + }, + [SEARCH_DATA] = { + .string = "DATA ", + .len = 5, + }, + [SEARCH_MESG] = { + .string = "MESG ", + .len = 5, + }, + [SEARCH_INDEX] = { + .string = "INDEX ", + .len = 6, + }, +}; + +static int amanda_help(struct sk_buff **pskb, + unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + struct ts_state ts; + struct nf_conntrack_expect *exp; + struct nf_conntrack_tuple *tuple; + unsigned int dataoff, start, stop, off, i; + char pbuf[sizeof("65535")], *tmp; + u_int16_t len; + __be16 port; + int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + int ret = NF_ACCEPT; + typeof(nf_nat_amanda_hook) nf_nat_amanda; + + /* Only look at packets from the Amanda server */ + if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) + return NF_ACCEPT; + + /* increase the UDP timeout of the master connection as replies from + * Amanda clients to the server can be quite delayed */ + nf_ct_refresh(ct, *pskb, master_timeout * HZ); + + /* No data? */ + dataoff = protoff + sizeof(struct udphdr); + if (dataoff >= (*pskb)->len) { + if (net_ratelimit()) + printk("amanda_help: skblen = %u\n", (*pskb)->len); + return NF_ACCEPT; + } + + memset(&ts, 0, sizeof(ts)); + start = skb_find_text(*pskb, dataoff, (*pskb)->len, + search[SEARCH_CONNECT].ts, &ts); + if (start == UINT_MAX) + goto out; + start += dataoff + search[SEARCH_CONNECT].len; + + memset(&ts, 0, sizeof(ts)); + stop = skb_find_text(*pskb, start, (*pskb)->len, + search[SEARCH_NEWLINE].ts, &ts); + if (stop == UINT_MAX) + goto out; + stop += start; + + for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) { + memset(&ts, 0, sizeof(ts)); + off = skb_find_text(*pskb, start, stop, search[i].ts, &ts); + if (off == UINT_MAX) + continue; + off += start + search[i].len; + + len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off); + if (skb_copy_bits(*pskb, off, pbuf, len)) + break; + pbuf[len] = '\0'; + + port = htons(simple_strtoul(pbuf, &tmp, 10)); + len = tmp - pbuf; + if (port == 0 || len > 5) + break; + + exp = nf_conntrack_expect_alloc(ct); + if (exp == NULL) { + ret = NF_DROP; + goto out; + } + tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + nf_conntrack_expect_init(exp, family, + &tuple->src.u3, &tuple->dst.u3, + IPPROTO_TCP, NULL, &port); + + nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook); + if (nf_nat_amanda && ct->status & IPS_NAT_MASK) + ret = nf_nat_amanda(pskb, ctinfo, off - dataoff, + len, exp); + else if (nf_conntrack_expect_related(exp) != 0) + ret = NF_DROP; + nf_conntrack_expect_put(exp); + } + +out: + return ret; +} + +static struct nf_conntrack_helper amanda_helper[2] __read_mostly = { + { + .name = "amanda", + .max_expected = 3, + .timeout = 180, + .me = THIS_MODULE, + .help = amanda_help, + .tuple.src.l3num = AF_INET, + .tuple.src.u.udp.port = __constant_htons(10080), + .tuple.dst.protonum = IPPROTO_UDP, + .mask.src.l3num = 0xFFFF, + .mask.src.u.udp.port = __constant_htons(0xFFFF), + .mask.dst.protonum = 0xFF, + }, + { + .name = "amanda", + .max_expected = 3, + .timeout = 180, + .me = THIS_MODULE, + .help = amanda_help, + .tuple.src.l3num = AF_INET6, + .tuple.src.u.udp.port = __constant_htons(10080), + .tuple.dst.protonum = IPPROTO_UDP, + .mask.src.l3num = 0xFFFF, + .mask.src.u.udp.port = __constant_htons(0xFFFF), + .mask.dst.protonum = 0xFF, + }, +}; + +static void __exit nf_conntrack_amanda_fini(void) +{ + int i; + + nf_conntrack_helper_unregister(&amanda_helper[0]); + nf_conntrack_helper_unregister(&amanda_helper[1]); + for (i = 0; i < ARRAY_SIZE(search); i++) + textsearch_destroy(search[i].ts); +} + +static int __init nf_conntrack_amanda_init(void) +{ + int ret, i; + + ret = -ENOMEM; + for (i = 0; i < ARRAY_SIZE(search); i++) { + search[i].ts = textsearch_prepare(ts_algo, search[i].string, + search[i].len, + GFP_KERNEL, TS_AUTOLOAD); + if (search[i].ts == NULL) + goto err1; + } + ret = nf_conntrack_helper_register(&amanda_helper[0]); + if (ret < 0) + goto err1; + ret = nf_conntrack_helper_register(&amanda_helper[1]); + if (ret < 0) + goto err2; + return 0; + +err2: + nf_conntrack_helper_unregister(&amanda_helper[0]); +err1: + for (; i >= 0; i--) { + if (search[i].ts) + textsearch_destroy(search[i].ts); + } + return ret; +} + +module_init(nf_conntrack_amanda_init); +module_exit(nf_conntrack_amanda_fini); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 093b3ddc513..9b02ec4012f 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -46,15 +46,12 @@ #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/socket.h> - -/* This rwlock protects the main hash table, protocol/helper/expected - registrations, conntrack timers*/ -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) +#include <linux/mm.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_l3proto.h> -#include <net/netfilter/nf_conntrack_protocol.h> +#include <net/netfilter/nf_conntrack_l4proto.h> +#include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_core.h> @@ -67,92 +64,32 @@ #endif DEFINE_RWLOCK(nf_conntrack_lock); +EXPORT_SYMBOL_GPL(nf_conntrack_lock); /* nf_conntrack_standalone needs this */ atomic_t nf_conntrack_count = ATOMIC_INIT(0); +EXPORT_SYMBOL_GPL(nf_conntrack_count); -void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL; -LIST_HEAD(nf_conntrack_expect_list); -struct nf_conntrack_protocol **nf_ct_protos[PF_MAX] __read_mostly; -struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX] __read_mostly; -static LIST_HEAD(helpers); -unsigned int nf_conntrack_htable_size __read_mostly = 0; -int nf_conntrack_max __read_mostly; -struct list_head *nf_conntrack_hash __read_mostly; -static kmem_cache_t *nf_conntrack_expect_cachep __read_mostly; -struct nf_conn nf_conntrack_untracked; -unsigned int nf_ct_log_invalid __read_mostly; -static LIST_HEAD(unconfirmed); -static int nf_conntrack_vmalloc __read_mostly; - -static unsigned int nf_conntrack_next_id; -static unsigned int nf_conntrack_expect_next_id; -#ifdef CONFIG_NF_CONNTRACK_EVENTS -ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain); -ATOMIC_NOTIFIER_HEAD(nf_conntrack_expect_chain); +void (*nf_conntrack_destroyed)(struct nf_conn *conntrack); +EXPORT_SYMBOL_GPL(nf_conntrack_destroyed); -DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache); +unsigned int nf_conntrack_htable_size __read_mostly; +EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); -/* deliver cached events and clear cache entry - must be called with locally - * disabled softirqs */ -static inline void -__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) -{ - DEBUGP("ecache: delivering events for %p\n", ecache->ct); - if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) - && ecache->events) - atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events, - ecache->ct); - - ecache->events = 0; - nf_ct_put(ecache->ct); - ecache->ct = NULL; -} +int nf_conntrack_max __read_mostly; +EXPORT_SYMBOL_GPL(nf_conntrack_max); -/* Deliver all cached events for a particular conntrack. This is called - * by code prior to async packet handling for freeing the skb */ -void nf_ct_deliver_cached_events(const struct nf_conn *ct) -{ - struct nf_conntrack_ecache *ecache; +struct list_head *nf_conntrack_hash __read_mostly; +EXPORT_SYMBOL_GPL(nf_conntrack_hash); - local_bh_disable(); - ecache = &__get_cpu_var(nf_conntrack_ecache); - if (ecache->ct == ct) - __nf_ct_deliver_cached_events(ecache); - local_bh_enable(); -} +struct nf_conn nf_conntrack_untracked __read_mostly; +EXPORT_SYMBOL_GPL(nf_conntrack_untracked); -/* Deliver cached events for old pending events, if current conntrack != old */ -void __nf_ct_event_cache_init(struct nf_conn *ct) -{ - struct nf_conntrack_ecache *ecache; - - /* take care of delivering potentially old events */ - ecache = &__get_cpu_var(nf_conntrack_ecache); - BUG_ON(ecache->ct == ct); - if (ecache->ct) - __nf_ct_deliver_cached_events(ecache); - /* initialize for this conntrack/packet */ - ecache->ct = ct; - nf_conntrack_get(&ct->ct_general); -} - -/* flush the event cache - touches other CPU's data and must not be called - * while packets are still passing through the code */ -static void nf_ct_event_cache_flush(void) -{ - struct nf_conntrack_ecache *ecache; - int cpu; +unsigned int nf_ct_log_invalid __read_mostly; +LIST_HEAD(unconfirmed); +static int nf_conntrack_vmalloc __read_mostly; - for_each_possible_cpu(cpu) { - ecache = &per_cpu(nf_conntrack_ecache, cpu); - if (ecache->ct) - nf_ct_put(ecache->ct); - } -} -#else -static inline void nf_ct_event_cache_flush(void) {} -#endif /* CONFIG_NF_CONNTRACK_EVENTS */ +static unsigned int nf_conntrack_next_id; DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat); @@ -171,7 +108,7 @@ static struct { size_t size; /* slab cache pointer */ - kmem_cache_t *cachep; + struct kmem_cache *cachep; /* allocated slab cache + modules which uses this slab cache */ int use; @@ -184,85 +121,6 @@ DEFINE_RWLOCK(nf_ct_cache_lock); /* This avoids calling kmem_cache_create() with same name simultaneously */ static DEFINE_MUTEX(nf_ct_cache_mutex); -extern struct nf_conntrack_protocol nf_conntrack_generic_protocol; -struct nf_conntrack_protocol * -__nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol) -{ - if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL)) - return &nf_conntrack_generic_protocol; - - return nf_ct_protos[l3proto][protocol]; -} - -/* this is guaranteed to always return a valid protocol helper, since - * it falls back to generic_protocol */ -struct nf_conntrack_protocol * -nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol) -{ - struct nf_conntrack_protocol *p; - - preempt_disable(); - p = __nf_ct_proto_find(l3proto, protocol); - if (!try_module_get(p->me)) - p = &nf_conntrack_generic_protocol; - preempt_enable(); - - return p; -} - -void nf_ct_proto_put(struct nf_conntrack_protocol *p) -{ - module_put(p->me); -} - -struct nf_conntrack_l3proto * -nf_ct_l3proto_find_get(u_int16_t l3proto) -{ - struct nf_conntrack_l3proto *p; - - preempt_disable(); - p = __nf_ct_l3proto_find(l3proto); - if (!try_module_get(p->me)) - p = &nf_conntrack_generic_l3proto; - preempt_enable(); - - return p; -} - -void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p) -{ - module_put(p->me); -} - -int -nf_ct_l3proto_try_module_get(unsigned short l3proto) -{ - int ret; - struct nf_conntrack_l3proto *p; - -retry: p = nf_ct_l3proto_find_get(l3proto); - if (p == &nf_conntrack_generic_l3proto) { - ret = request_module("nf_conntrack-%d", l3proto); - if (!ret) - goto retry; - - return -EPROTOTYPE; - } - - return 0; -} - -void nf_ct_l3proto_module_put(unsigned short l3proto) -{ - struct nf_conntrack_l3proto *p; - - preempt_disable(); - p = __nf_ct_l3proto_find(l3proto); - preempt_enable(); - - module_put(p->me); -} - static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; @@ -289,7 +147,7 @@ int nf_conntrack_register_cache(u_int32_t features, const char *name, { int ret = 0; char *cache_name; - kmem_cache_t *cachep; + struct kmem_cache *cachep; DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n", features, name, size); @@ -363,11 +221,12 @@ out_up_mutex: mutex_unlock(&nf_ct_cache_mutex); return ret; } +EXPORT_SYMBOL_GPL(nf_conntrack_register_cache); /* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */ void nf_conntrack_unregister_cache(u_int32_t features) { - kmem_cache_t *cachep; + struct kmem_cache *cachep; char *name; /* @@ -397,6 +256,7 @@ void nf_conntrack_unregister_cache(u_int32_t features) mutex_unlock(&nf_ct_cache_mutex); } +EXPORT_SYMBOL_GPL(nf_conntrack_unregister_cache); int nf_ct_get_tuple(const struct sk_buff *skb, @@ -406,7 +266,7 @@ nf_ct_get_tuple(const struct sk_buff *skb, u_int8_t protonum, struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l3proto *l3proto, - const struct nf_conntrack_protocol *protocol) + const struct nf_conntrack_l4proto *l4proto) { NF_CT_TUPLE_U_BLANK(tuple); @@ -417,14 +277,15 @@ nf_ct_get_tuple(const struct sk_buff *skb, tuple->dst.protonum = protonum; tuple->dst.dir = IP_CT_DIR_ORIGINAL; - return protocol->pkt_to_tuple(skb, dataoff, tuple); + return l4proto->pkt_to_tuple(skb, dataoff, tuple); } +EXPORT_SYMBOL_GPL(nf_ct_get_tuple); int nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_l3proto *l3proto, - const struct nf_conntrack_protocol *protocol) + const struct nf_conntrack_l4proto *l4proto) { NF_CT_TUPLE_U_BLANK(inverse); @@ -435,111 +296,14 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, inverse->dst.dir = !orig->dst.dir; inverse->dst.protonum = orig->dst.protonum; - return protocol->invert_tuple(inverse, orig); -} - -/* nf_conntrack_expect helper functions */ -void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) -{ - struct nf_conn_help *master_help = nfct_help(exp->master); - - NF_CT_ASSERT(master_help); - ASSERT_WRITE_LOCK(&nf_conntrack_lock); - NF_CT_ASSERT(!timer_pending(&exp->timeout)); - - list_del(&exp->list); - NF_CT_STAT_INC(expect_delete); - master_help->expecting--; - nf_conntrack_expect_put(exp); -} - -static void expectation_timed_out(unsigned long ul_expect) -{ - struct nf_conntrack_expect *exp = (void *)ul_expect; - - write_lock_bh(&nf_conntrack_lock); - nf_ct_unlink_expect(exp); - write_unlock_bh(&nf_conntrack_lock); - nf_conntrack_expect_put(exp); -} - -struct nf_conntrack_expect * -__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple) -{ - struct nf_conntrack_expect *i; - - list_for_each_entry(i, &nf_conntrack_expect_list, list) { - if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { - atomic_inc(&i->use); - return i; - } - } - return NULL; -} - -/* Just find a expectation corresponding to a tuple. */ -struct nf_conntrack_expect * -nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple) -{ - struct nf_conntrack_expect *i; - - read_lock_bh(&nf_conntrack_lock); - i = __nf_conntrack_expect_find(tuple); - read_unlock_bh(&nf_conntrack_lock); - - return i; -} - -/* If an expectation for this connection is found, it gets delete from - * global list then returned. */ -static struct nf_conntrack_expect * -find_expectation(const struct nf_conntrack_tuple *tuple) -{ - struct nf_conntrack_expect *i; - - list_for_each_entry(i, &nf_conntrack_expect_list, list) { - /* If master is not in hash table yet (ie. packet hasn't left - this machine yet), how can other end know about expected? - Hence these are not the droids you are looking for (if - master ct never got confirmed, we'd hold a reference to it - and weird things would happen to future packets). */ - if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) - && nf_ct_is_confirmed(i->master)) { - if (i->flags & NF_CT_EXPECT_PERMANENT) { - atomic_inc(&i->use); - return i; - } else if (del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - return i; - } - } - } - return NULL; -} - -/* delete all expectations for this conntrack */ -void nf_ct_remove_expectations(struct nf_conn *ct) -{ - struct nf_conntrack_expect *i, *tmp; - struct nf_conn_help *help = nfct_help(ct); - - /* Optimization: most connection never expect any others. */ - if (!help || help->expecting == 0) - return; - - list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) { - if (i->master == ct && del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - nf_conntrack_expect_put(i); - } - } + return l4proto->invert_tuple(inverse, orig); } +EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); static void clean_from_lists(struct nf_conn *ct) { DEBUGP("clean_from_lists(%p)\n", ct); - ASSERT_WRITE_LOCK(&nf_conntrack_lock); list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); @@ -551,8 +315,9 @@ static void destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; + struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_protocol *proto; + struct nf_conntrack_l4proto *l4proto; DEBUGP("destroy_conntrack(%p)\n", ct); NF_CT_ASSERT(atomic_read(&nfct->use) == 0); @@ -561,6 +326,9 @@ destroy_conntrack(struct nf_conntrack *nfct) nf_conntrack_event(IPCT_DESTROY, ct); set_bit(IPS_DYING_BIT, &ct->status); + if (help && help->helper && help->helper->destroy) + help->helper->destroy(ct); + /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to nf_conntrack_lock!!! -HW */ @@ -568,9 +336,9 @@ destroy_conntrack(struct nf_conntrack *nfct) if (l3proto && l3proto->destroy) l3proto->destroy(ct); - proto = __nf_ct_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); - if (proto && proto->destroy) - proto->destroy(ct); + l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); + if (l4proto && l4proto->destroy) + l4proto->destroy(ct); if (nf_conntrack_destroyed) nf_conntrack_destroyed(ct); @@ -618,7 +386,6 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple_hash *h; unsigned int hash = hash_conntrack(tuple); - ASSERT_READ_LOCK(&nf_conntrack_lock); list_for_each_entry(h, &nf_conntrack_hash[hash], list) { if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && nf_ct_tuple_equal(tuple, &h->tuple)) { @@ -630,6 +397,7 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, return NULL; } +EXPORT_SYMBOL_GPL(__nf_conntrack_find); /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * @@ -646,6 +414,7 @@ nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple, return h; } +EXPORT_SYMBOL_GPL(nf_conntrack_find_get); static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int hash, @@ -669,6 +438,7 @@ void nf_conntrack_hash_insert(struct nf_conn *ct) __nf_conntrack_hash_insert(ct, hash, repl_hash); write_unlock_bh(&nf_conntrack_lock); } +EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); /* Confirm a connection given skb; places it in hash table */ int @@ -746,6 +516,7 @@ out: write_unlock_bh(&nf_conntrack_lock); return NF_DROP; } +EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); /* Returns true if a connection correspondings to the tuple (required for NAT). */ @@ -761,6 +532,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, return h != NULL; } +EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ @@ -794,53 +566,13 @@ static int early_drop(struct list_head *chain) return dropped; } -static struct nf_conntrack_helper * -__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) -{ - struct nf_conntrack_helper *h; - - list_for_each_entry(h, &helpers, list) { - if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask)) - return h; - } - return NULL; -} - -struct nf_conntrack_helper * -nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple) -{ - struct nf_conntrack_helper *helper; - - /* need nf_conntrack_lock to assure that helper exists until - * try_module_get() is called */ - read_lock_bh(&nf_conntrack_lock); - - helper = __nf_ct_helper_find(tuple); - if (helper) { - /* need to increase module usage count to assure helper will - * not go away while the caller is e.g. busy putting a - * conntrack in the hash that uses the helper */ - if (!try_module_get(helper->me)) - helper = NULL; - } - - read_unlock_bh(&nf_conntrack_lock); - - return helper; -} - -void nf_ct_helper_put(struct nf_conntrack_helper *helper) -{ - module_put(helper->me); -} - static struct nf_conn * __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, - const struct nf_conntrack_l3proto *l3proto) + const struct nf_conntrack_l3proto *l3proto, + u_int32_t features) { struct nf_conn *conntrack = NULL; - u_int32_t features = 0; struct nf_conntrack_helper *helper; if (unlikely(!nf_conntrack_hash_rnd_initted)) { @@ -866,12 +598,13 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, } /* find features needed by this conntrack. */ - features = l3proto->get_features(orig); + features |= l3proto->get_features(orig); /* FIXME: protect helper list per RCU */ read_lock_bh(&nf_conntrack_lock); helper = __nf_ct_helper_find(repl); - if (helper) + /* NAT might want to assign a helper later */ + if (helper || features & NF_CT_F_NAT) features |= NF_CT_F_HELP; read_unlock_bh(&nf_conntrack_lock); @@ -893,12 +626,6 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, memset(conntrack, 0, nf_ct_cache[features].size); conntrack->features = features; - if (helper) { - struct nf_conn_help *help = nfct_help(conntrack); - NF_CT_ASSERT(help); - help->helper = helper; - } - atomic_set(&conntrack->ct_general.use, 1); conntrack->ct_general.destroy = destroy_conntrack; conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; @@ -922,8 +649,9 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, struct nf_conntrack_l3proto *l3proto; l3proto = __nf_ct_l3proto_find(orig->src.l3num); - return __nf_conntrack_alloc(orig, repl, l3proto); + return __nf_conntrack_alloc(orig, repl, l3proto, 0); } +EXPORT_SYMBOL_GPL(nf_conntrack_alloc); void nf_conntrack_free(struct nf_conn *conntrack) { @@ -934,32 +662,40 @@ void nf_conntrack_free(struct nf_conn *conntrack) kmem_cache_free(nf_ct_cache[features].cachep, conntrack); atomic_dec(&nf_conntrack_count); } +EXPORT_SYMBOL_GPL(nf_conntrack_free); /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static struct nf_conntrack_tuple_hash * init_conntrack(const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l3proto *l3proto, - struct nf_conntrack_protocol *protocol, + struct nf_conntrack_l4proto *l4proto, struct sk_buff *skb, unsigned int dataoff) { struct nf_conn *conntrack; struct nf_conntrack_tuple repl_tuple; struct nf_conntrack_expect *exp; + u_int32_t features = 0; - if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) { + if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { DEBUGP("Can't invert tuple.\n"); return NULL; } - conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto); + read_lock_bh(&nf_conntrack_lock); + exp = __nf_conntrack_expect_find(tuple); + if (exp && exp->helper) + features = NF_CT_F_HELP; + read_unlock_bh(&nf_conntrack_lock); + + conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto, features); if (conntrack == NULL || IS_ERR(conntrack)) { DEBUGP("Can't allocate conntrack.\n"); return (struct nf_conntrack_tuple_hash *)conntrack; } - if (!protocol->new(conntrack, skb, dataoff)) { + if (!l4proto->new(conntrack, skb, dataoff)) { nf_conntrack_free(conntrack); DEBUGP("init conntrack: can't track with proto module\n"); return NULL; @@ -974,6 +710,8 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, /* Welcome, Mr. Bond. We've been expecting you... */ __set_bit(IPS_EXPECTED_BIT, &conntrack->status); conntrack->master = exp->master; + if (exp->helper) + nfct_help(conntrack)->helper = exp->helper; #ifdef CONFIG_NF_CONNTRACK_MARK conntrack->mark = exp->master->mark; #endif @@ -982,8 +720,13 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, #endif nf_conntrack_get(&conntrack->master->ct_general); NF_CT_STAT_INC(expect_new); - } else + } else { + struct nf_conn_help *help = nfct_help(conntrack); + + if (help) + help->helper = __nf_ct_helper_find(&repl_tuple); NF_CT_STAT_INC(new); + } /* Overload tuple linked list to put us in unconfirmed list. */ list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); @@ -1006,7 +749,7 @@ resolve_normal_ct(struct sk_buff *skb, u_int16_t l3num, u_int8_t protonum, struct nf_conntrack_l3proto *l3proto, - struct nf_conntrack_protocol *proto, + struct nf_conntrack_l4proto *l4proto, int *set_reply, enum ip_conntrack_info *ctinfo) { @@ -1016,7 +759,7 @@ resolve_normal_ct(struct sk_buff *skb, if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data), dataoff, l3num, protonum, &tuple, l3proto, - proto)) { + l4proto)) { DEBUGP("resolve_normal_ct: Can't get tuple\n"); return NULL; } @@ -1024,7 +767,7 @@ resolve_normal_ct(struct sk_buff *skb, /* look for tuple match */ h = nf_conntrack_find_get(&tuple, NULL); if (!h) { - h = init_conntrack(&tuple, l3proto, proto, skb, dataoff); + h = init_conntrack(&tuple, l3proto, l4proto, skb, dataoff); if (!h) return NULL; if (IS_ERR(h)) @@ -1062,7 +805,7 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_protocol *proto; + struct nf_conntrack_l4proto *l4proto; unsigned int dataoff; u_int8_t protonum; int set_reply = 0; @@ -1080,19 +823,19 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) return -ret; } - proto = __nf_ct_proto_find((u_int16_t)pf, protonum); + l4proto = __nf_ct_l4proto_find((u_int16_t)pf, protonum); /* It may be an special packet, error, unclean... * inverse of the return code tells to the netfilter * core what to do with the packet. */ - if (proto->error != NULL && - (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) { + if (l4proto->error != NULL && + (ret = l4proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) { NF_CT_STAT_INC(error); NF_CT_STAT_INC(invalid); return -ret; } - ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto, + ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ @@ -1108,7 +851,7 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) NF_CT_ASSERT((*pskb)->nfct); - ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum); + ret = l4proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum); if (ret < 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ @@ -1124,255 +867,38 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) return ret; } +EXPORT_SYMBOL_GPL(nf_conntrack_in); int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_tuple *orig) { return nf_ct_invert_tuple(inverse, orig, __nf_ct_l3proto_find(orig->src.l3num), - __nf_ct_proto_find(orig->src.l3num, + __nf_ct_l4proto_find(orig->src.l3num, orig->dst.protonum)); } +EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr); -/* Would two expected things clash? */ -static inline int expect_clash(const struct nf_conntrack_expect *a, - const struct nf_conntrack_expect *b) -{ - /* Part covered by intersection of masks must be unequal, - otherwise they clash */ - struct nf_conntrack_tuple intersect_mask; - int count; - - intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num; - intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all; - intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all; - intersect_mask.dst.protonum = a->mask.dst.protonum - & b->mask.dst.protonum; - - for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ - intersect_mask.src.u3.all[count] = - a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; - } - - for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ - intersect_mask.dst.u3.all[count] = - a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count]; - } - - return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); -} - -static inline int expect_matches(const struct nf_conntrack_expect *a, - const struct nf_conntrack_expect *b) -{ - return a->master == b->master - && nf_ct_tuple_equal(&a->tuple, &b->tuple) - && nf_ct_tuple_equal(&a->mask, &b->mask); -} - -/* Generally a bad idea to call this: could have matched already. */ -void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp) -{ - struct nf_conntrack_expect *i; - - write_lock_bh(&nf_conntrack_lock); - /* choose the the oldest expectation to evict */ - list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { - if (expect_matches(i, exp) && del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - write_unlock_bh(&nf_conntrack_lock); - nf_conntrack_expect_put(i); - return; - } - } - write_unlock_bh(&nf_conntrack_lock); -} - -/* We don't increase the master conntrack refcount for non-fulfilled - * conntracks. During the conntrack destruction, the expectations are - * always killed before the conntrack itself */ -struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me) -{ - struct nf_conntrack_expect *new; - - new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC); - if (!new) { - DEBUGP("expect_related: OOM allocating expect\n"); - return NULL; - } - new->master = me; - atomic_set(&new->use, 1); - return new; -} - -void nf_conntrack_expect_put(struct nf_conntrack_expect *exp) +/* Alter reply tuple (maybe alter helper). This is for NAT, and is + implicitly racy: see __nf_conntrack_confirm */ +void nf_conntrack_alter_reply(struct nf_conn *ct, + const struct nf_conntrack_tuple *newreply) { - if (atomic_dec_and_test(&exp->use)) - kmem_cache_free(nf_conntrack_expect_cachep, exp); -} - -static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp) -{ - struct nf_conn_help *master_help = nfct_help(exp->master); - - atomic_inc(&exp->use); - master_help->expecting++; - list_add(&exp->list, &nf_conntrack_expect_list); - - init_timer(&exp->timeout); - exp->timeout.data = (unsigned long)exp; - exp->timeout.function = expectation_timed_out; - exp->timeout.expires = jiffies + master_help->helper->timeout * HZ; - add_timer(&exp->timeout); - - exp->id = ++nf_conntrack_expect_next_id; - atomic_inc(&exp->use); - NF_CT_STAT_INC(expect_create); -} - -/* Race with expectations being used means we could have none to find; OK. */ -static void evict_oldest_expect(struct nf_conn *master) -{ - struct nf_conntrack_expect *i; - - list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { - if (i->master == master) { - if (del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - nf_conntrack_expect_put(i); - } - break; - } - } -} - -static inline int refresh_timer(struct nf_conntrack_expect *i) -{ - struct nf_conn_help *master_help = nfct_help(i->master); - - if (!del_timer(&i->timeout)) - return 0; - - i->timeout.expires = jiffies + master_help->helper->timeout*HZ; - add_timer(&i->timeout); - return 1; -} - -int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) -{ - struct nf_conntrack_expect *i; - struct nf_conn *master = expect->master; - struct nf_conn_help *master_help = nfct_help(master); - int ret; - - NF_CT_ASSERT(master_help); - - DEBUGP("nf_conntrack_expect_related %p\n", related_to); - DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple); - DEBUGP("mask: "); NF_CT_DUMP_TUPLE(&expect->mask); - - write_lock_bh(&nf_conntrack_lock); - list_for_each_entry(i, &nf_conntrack_expect_list, list) { - if (expect_matches(i, expect)) { - /* Refresh timer: if it's dying, ignore.. */ - if (refresh_timer(i)) { - ret = 0; - goto out; - } - } else if (expect_clash(i, expect)) { - ret = -EBUSY; - goto out; - } - } - /* Will be over limit? */ - if (master_help->helper->max_expected && - master_help->expecting >= master_help->helper->max_expected) - evict_oldest_expect(master); - - nf_conntrack_expect_insert(expect); - nf_conntrack_expect_event(IPEXP_NEW, expect); - ret = 0; -out: - write_unlock_bh(&nf_conntrack_lock); - return ret; -} - -int nf_conntrack_helper_register(struct nf_conntrack_helper *me) -{ - int ret; - BUG_ON(me->timeout == 0); - - ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help", - sizeof(struct nf_conn) - + sizeof(struct nf_conn_help) - + __alignof__(struct nf_conn_help)); - if (ret < 0) { - printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n"); - return ret; - } - write_lock_bh(&nf_conntrack_lock); - list_add(&me->list, &helpers); - write_unlock_bh(&nf_conntrack_lock); - - return 0; -} - -struct nf_conntrack_helper * -__nf_conntrack_helper_find_byname(const char *name) -{ - struct nf_conntrack_helper *h; - - list_for_each_entry(h, &helpers, list) { - if (!strcmp(h->name, name)) - return h; - } - - return NULL; -} - -static inline void unhelp(struct nf_conntrack_tuple_hash *i, - const struct nf_conntrack_helper *me) -{ - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); struct nf_conn_help *help = nfct_help(ct); - if (help && help->helper == me) { - nf_conntrack_event(IPCT_HELPER, ct); - help->helper = NULL; - } -} - -void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) -{ - unsigned int i; - struct nf_conntrack_tuple_hash *h; - struct nf_conntrack_expect *exp, *tmp; - - /* Need write lock here, to delete helper. */ write_lock_bh(&nf_conntrack_lock); - list_del(&me->list); - - /* Get rid of expectations */ - list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) { - struct nf_conn_help *help = nfct_help(exp->master); - if (help->helper == me && del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); - nf_conntrack_expect_put(exp); - } - } + /* Should be unconfirmed, so not in hash table yet */ + NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); - /* Get rid of expecteds, set helpers to NULL. */ - list_for_each_entry(h, &unconfirmed, list) - unhelp(h, me); - for (i = 0; i < nf_conntrack_htable_size; i++) { - list_for_each_entry(h, &nf_conntrack_hash[i], list) - unhelp(h, me); - } - write_unlock_bh(&nf_conntrack_lock); + DEBUGP("Altering reply tuple of %p to ", ct); + NF_CT_DUMP_TUPLE(newreply); - /* Someone could be still looking at the helper in a bh. */ - synchronize_net(); + ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; + if (!ct->master && help && help->expecting == 0) + help->helper = __nf_ct_helper_find(newreply); + write_unlock_bh(&nf_conntrack_lock); } +EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ void __nf_ct_refresh_acct(struct nf_conn *ct, @@ -1399,9 +925,14 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, ct->timeout.expires = extra_jiffies; event = IPCT_REFRESH; } else { - /* Need del_timer for race avoidance (may already be dying). */ - if (del_timer(&ct->timeout)) { - ct->timeout.expires = jiffies + extra_jiffies; + unsigned long newtime = jiffies + extra_jiffies; + + /* Only update the timeout if the new timeout is at least + HZ jiffies from the old timeout. Need del_timer for race + avoidance (may already be dying). */ + if (newtime - ct->timeout.expires >= HZ + && del_timer(&ct->timeout)) { + ct->timeout.expires = newtime; add_timer(&ct->timeout); event = IPCT_REFRESH; } @@ -1412,9 +943,10 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, ct->counters[CTINFO2DIR(ctinfo)].packets++; ct->counters[CTINFO2DIR(ctinfo)].bytes += skb->len - (unsigned int)(skb->nh.raw - skb->data); - if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) - || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) - event |= IPCT_COUNTER_FILLING; + + if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) + || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) + event |= IPCT_COUNTER_FILLING; } #endif @@ -1424,6 +956,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, if (event) nf_conntrack_event_cache(event, skb); } +EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); #if defined(CONFIG_NF_CT_NETLINK) || \ defined(CONFIG_NF_CT_NETLINK_MODULE) @@ -1448,6 +981,7 @@ int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb, nfattr_failure: return -1; } +EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nfattr); static const size_t cta_min_proto[CTA_PROTO_MAX] = { [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t), @@ -1463,13 +997,12 @@ int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[], if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) return -EINVAL; - t->src.u.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); - t->dst.u.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); + t->src.u.tcp.port = *(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); + t->dst.u.tcp.port = *(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); return 0; } +EXPORT_SYMBOL_GPL(nf_ct_port_nfattr_to_tuple); #endif /* Used by ipt_REJECT and ip6t_REJECT. */ @@ -1490,6 +1023,7 @@ void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) nskb->nfctinfo = ctinfo; nf_conntrack_get(nskb->nfct); } +EXPORT_SYMBOL_GPL(__nf_conntrack_attach); static inline int do_iter(const struct nf_conntrack_tuple_hash *i, @@ -1520,9 +1054,10 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), if (iter(ct, data)) goto found; } + write_unlock_bh(&nf_conntrack_lock); return NULL; found: - atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); + atomic_inc(&ct->ct_general.use); write_unlock_bh(&nf_conntrack_lock); return ct; } @@ -1542,6 +1077,7 @@ nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) nf_ct_put(ct); } } +EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); static int kill_all(struct nf_conn *i, void *data) { @@ -1557,10 +1093,11 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size) get_order(sizeof(struct list_head) * size)); } -void nf_conntrack_flush() +void nf_conntrack_flush(void) { nf_ct_iterate_cleanup(kill_all, NULL); } +EXPORT_SYMBOL_GPL(nf_conntrack_flush); /* Mishearing the voices in his head, our hero wonders how he's supposed to kill the mall. */ @@ -1598,6 +1135,8 @@ void nf_conntrack_cleanup(void) free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc, nf_conntrack_htable_size); + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_generic); + /* free l3proto protocol tables */ for (i = 0; i < PF_MAX; i++) if (nf_ct_protos[i]) { @@ -1723,10 +1262,14 @@ int __init nf_conntrack_init(void) goto err_free_conntrack_slab; } + ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_generic); + if (ret < 0) + goto out_free_expect_slab; + /* Don't NEED lock here, but good form anyway. */ write_lock_bh(&nf_conntrack_lock); - for (i = 0; i < PF_MAX; i++) - nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto; + for (i = 0; i < AF_MAX; i++) + nf_ct_l3protos[i] = &nf_conntrack_l3proto_generic; write_unlock_bh(&nf_conntrack_lock); /* For use by REJECT target */ @@ -1740,6 +1283,8 @@ int __init nf_conntrack_init(void) return ret; +out_free_expect_slab: + kmem_cache_destroy(nf_conntrack_expect_cachep); err_free_conntrack_slab: nf_conntrack_unregister_cache(NF_CT_F_BASIC); err_free_hash: diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c new file mode 100644 index 00000000000..1a223e0c085 --- /dev/null +++ b/net/netfilter/nf_conntrack_ecache.c @@ -0,0 +1,93 @@ +/* Event cache for netfilter. */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/netfilter.h> +#include <linux/skbuff.h> +#include <linux/vmalloc.h> +#include <linux/stddef.h> +#include <linux/err.h> +#include <linux/percpu.h> +#include <linux/notifier.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> + +ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain); +EXPORT_SYMBOL_GPL(nf_conntrack_chain); + +ATOMIC_NOTIFIER_HEAD(nf_conntrack_expect_chain); +EXPORT_SYMBOL_GPL(nf_conntrack_expect_chain); + +DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache); +EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache); + +/* deliver cached events and clear cache entry - must be called with locally + * disabled softirqs */ +static inline void +__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) +{ + if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) + && ecache->events) + atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events, + ecache->ct); + + ecache->events = 0; + nf_ct_put(ecache->ct); + ecache->ct = NULL; +} + +/* Deliver all cached events for a particular conntrack. This is called + * by code prior to async packet handling for freeing the skb */ +void nf_ct_deliver_cached_events(const struct nf_conn *ct) +{ + struct nf_conntrack_ecache *ecache; + + local_bh_disable(); + ecache = &__get_cpu_var(nf_conntrack_ecache); + if (ecache->ct == ct) + __nf_ct_deliver_cached_events(ecache); + local_bh_enable(); +} +EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); + +/* Deliver cached events for old pending events, if current conntrack != old */ +void __nf_ct_event_cache_init(struct nf_conn *ct) +{ + struct nf_conntrack_ecache *ecache; + + /* take care of delivering potentially old events */ + ecache = &__get_cpu_var(nf_conntrack_ecache); + BUG_ON(ecache->ct == ct); + if (ecache->ct) + __nf_ct_deliver_cached_events(ecache); + /* initialize for this conntrack/packet */ + ecache->ct = ct; + nf_conntrack_get(&ct->ct_general); +} +EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init); + +/* flush the event cache - touches other CPU's data and must not be called + * while packets are still passing through the code */ +void nf_ct_event_cache_flush(void) +{ + struct nf_conntrack_ecache *ecache; + int cpu; + + for_each_possible_cpu(cpu) { + ecache = &per_cpu(nf_conntrack_ecache, cpu); + if (ecache->ct) + nf_ct_put(ecache->ct); + } +} + diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c new file mode 100644 index 00000000000..9cbf926cdd1 --- /dev/null +++ b/net/netfilter/nf_conntrack_expect.c @@ -0,0 +1,445 @@ +/* Expectation handling for nf_conntrack. */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/netfilter.h> +#include <linux/skbuff.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/stddef.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/percpu.h> +#include <linux/kernel.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_tuple.h> + +LIST_HEAD(nf_conntrack_expect_list); +EXPORT_SYMBOL_GPL(nf_conntrack_expect_list); + +struct kmem_cache *nf_conntrack_expect_cachep __read_mostly; +static unsigned int nf_conntrack_expect_next_id; + +/* nf_conntrack_expect helper functions */ +void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) +{ + struct nf_conn_help *master_help = nfct_help(exp->master); + + NF_CT_ASSERT(master_help); + NF_CT_ASSERT(!timer_pending(&exp->timeout)); + + list_del(&exp->list); + NF_CT_STAT_INC(expect_delete); + master_help->expecting--; + nf_conntrack_expect_put(exp); +} +EXPORT_SYMBOL_GPL(nf_ct_unlink_expect); + +static void expectation_timed_out(unsigned long ul_expect) +{ + struct nf_conntrack_expect *exp = (void *)ul_expect; + + write_lock_bh(&nf_conntrack_lock); + nf_ct_unlink_expect(exp); + write_unlock_bh(&nf_conntrack_lock); + nf_conntrack_expect_put(exp); +} + +struct nf_conntrack_expect * +__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple) +{ + struct nf_conntrack_expect *i; + + list_for_each_entry(i, &nf_conntrack_expect_list, list) { + if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) + return i; + } + return NULL; +} +EXPORT_SYMBOL_GPL(__nf_conntrack_expect_find); + +/* Just find a expectation corresponding to a tuple. */ +struct nf_conntrack_expect * +nf_conntrack_expect_find_get(const struct nf_conntrack_tuple *tuple) +{ + struct nf_conntrack_expect *i; + + read_lock_bh(&nf_conntrack_lock); + i = __nf_conntrack_expect_find(tuple); + if (i) + atomic_inc(&i->use); + read_unlock_bh(&nf_conntrack_lock); + + return i; +} +EXPORT_SYMBOL_GPL(nf_conntrack_expect_find_get); + +/* If an expectation for this connection is found, it gets delete from + * global list then returned. */ +struct nf_conntrack_expect * +find_expectation(const struct nf_conntrack_tuple *tuple) +{ + struct nf_conntrack_expect *exp; + + exp = __nf_conntrack_expect_find(tuple); + if (!exp) + return NULL; + + /* If master is not in hash table yet (ie. packet hasn't left + this machine yet), how can other end know about expected? + Hence these are not the droids you are looking for (if + master ct never got confirmed, we'd hold a reference to it + and weird things would happen to future packets). */ + if (!nf_ct_is_confirmed(exp->master)) + return NULL; + + if (exp->flags & NF_CT_EXPECT_PERMANENT) { + atomic_inc(&exp->use); + return exp; + } else if (del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + return exp; + } + + return NULL; +} + +/* delete all expectations for this conntrack */ +void nf_ct_remove_expectations(struct nf_conn *ct) +{ + struct nf_conntrack_expect *i, *tmp; + struct nf_conn_help *help = nfct_help(ct); + + /* Optimization: most connection never expect any others. */ + if (!help || help->expecting == 0) + return; + + list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) { + if (i->master == ct && del_timer(&i->timeout)) { + nf_ct_unlink_expect(i); + nf_conntrack_expect_put(i); + } + } +} +EXPORT_SYMBOL_GPL(nf_ct_remove_expectations); + +/* Would two expected things clash? */ +static inline int expect_clash(const struct nf_conntrack_expect *a, + const struct nf_conntrack_expect *b) +{ + /* Part covered by intersection of masks must be unequal, + otherwise they clash */ + struct nf_conntrack_tuple intersect_mask; + int count; + + intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num; + intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all; + intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all; + intersect_mask.dst.protonum = a->mask.dst.protonum + & b->mask.dst.protonum; + + for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ + intersect_mask.src.u3.all[count] = + a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; + } + + for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ + intersect_mask.dst.u3.all[count] = + a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count]; + } + + return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); +} + +static inline int expect_matches(const struct nf_conntrack_expect *a, + const struct nf_conntrack_expect *b) +{ + return a->master == b->master + && nf_ct_tuple_equal(&a->tuple, &b->tuple) + && nf_ct_tuple_equal(&a->mask, &b->mask); +} + +/* Generally a bad idea to call this: could have matched already. */ +void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp) +{ + struct nf_conntrack_expect *i; + + write_lock_bh(&nf_conntrack_lock); + /* choose the the oldest expectation to evict */ + list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { + if (expect_matches(i, exp) && del_timer(&i->timeout)) { + nf_ct_unlink_expect(i); + write_unlock_bh(&nf_conntrack_lock); + nf_conntrack_expect_put(i); + return; + } + } + write_unlock_bh(&nf_conntrack_lock); +} +EXPORT_SYMBOL_GPL(nf_conntrack_unexpect_related); + +/* We don't increase the master conntrack refcount for non-fulfilled + * conntracks. During the conntrack destruction, the expectations are + * always killed before the conntrack itself */ +struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me) +{ + struct nf_conntrack_expect *new; + + new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC); + if (!new) + return NULL; + + new->master = me; + atomic_set(&new->use, 1); + return new; +} +EXPORT_SYMBOL_GPL(nf_conntrack_expect_alloc); + +void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family, + union nf_conntrack_address *saddr, + union nf_conntrack_address *daddr, + u_int8_t proto, __be16 *src, __be16 *dst) +{ + int len; + + if (family == AF_INET) + len = 4; + else + len = 16; + + exp->flags = 0; + exp->expectfn = NULL; + exp->helper = NULL; + exp->tuple.src.l3num = family; + exp->tuple.dst.protonum = proto; + exp->mask.src.l3num = 0xFFFF; + exp->mask.dst.protonum = 0xFF; + + if (saddr) { + memcpy(&exp->tuple.src.u3, saddr, len); + if (sizeof(exp->tuple.src.u3) > len) + /* address needs to be cleared for nf_ct_tuple_equal */ + memset((void *)&exp->tuple.src.u3 + len, 0x00, + sizeof(exp->tuple.src.u3) - len); + memset(&exp->mask.src.u3, 0xFF, len); + if (sizeof(exp->mask.src.u3) > len) + memset((void *)&exp->mask.src.u3 + len, 0x00, + sizeof(exp->mask.src.u3) - len); + } else { + memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3)); + memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3)); + } + + if (daddr) { + memcpy(&exp->tuple.dst.u3, daddr, len); + if (sizeof(exp->tuple.dst.u3) > len) + /* address needs to be cleared for nf_ct_tuple_equal */ + memset((void *)&exp->tuple.dst.u3 + len, 0x00, + sizeof(exp->tuple.dst.u3) - len); + memset(&exp->mask.dst.u3, 0xFF, len); + if (sizeof(exp->mask.dst.u3) > len) + memset((void *)&exp->mask.dst.u3 + len, 0x00, + sizeof(exp->mask.dst.u3) - len); + } else { + memset(&exp->tuple.dst.u3, 0x00, sizeof(exp->tuple.dst.u3)); + memset(&exp->mask.dst.u3, 0x00, sizeof(exp->mask.dst.u3)); + } + + if (src) { + exp->tuple.src.u.all = (__force u16)*src; + exp->mask.src.u.all = 0xFFFF; + } else { + exp->tuple.src.u.all = 0; + exp->mask.src.u.all = 0; + } + + if (dst) { + exp->tuple.dst.u.all = (__force u16)*dst; + exp->mask.dst.u.all = 0xFFFF; + } else { + exp->tuple.dst.u.all = 0; + exp->mask.dst.u.all = 0; + } +} +EXPORT_SYMBOL_GPL(nf_conntrack_expect_init); + +void nf_conntrack_expect_put(struct nf_conntrack_expect *exp) +{ + if (atomic_dec_and_test(&exp->use)) + kmem_cache_free(nf_conntrack_expect_cachep, exp); +} +EXPORT_SYMBOL_GPL(nf_conntrack_expect_put); + +static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp) +{ + struct nf_conn_help *master_help = nfct_help(exp->master); + + atomic_inc(&exp->use); + master_help->expecting++; + list_add(&exp->list, &nf_conntrack_expect_list); + + init_timer(&exp->timeout); + exp->timeout.data = (unsigned long)exp; + exp->timeout.function = expectation_timed_out; + exp->timeout.expires = jiffies + master_help->helper->timeout * HZ; + add_timer(&exp->timeout); + + exp->id = ++nf_conntrack_expect_next_id; + atomic_inc(&exp->use); + NF_CT_STAT_INC(expect_create); +} + +/* Race with expectations being used means we could have none to find; OK. */ +static void evict_oldest_expect(struct nf_conn *master) +{ + struct nf_conntrack_expect *i; + + list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { + if (i->master == master) { + if (del_timer(&i->timeout)) { + nf_ct_unlink_expect(i); + nf_conntrack_expect_put(i); + } + break; + } + } +} + +static inline int refresh_timer(struct nf_conntrack_expect *i) +{ + struct nf_conn_help *master_help = nfct_help(i->master); + + if (!del_timer(&i->timeout)) + return 0; + + i->timeout.expires = jiffies + master_help->helper->timeout*HZ; + add_timer(&i->timeout); + return 1; +} + +int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) +{ + struct nf_conntrack_expect *i; + struct nf_conn *master = expect->master; + struct nf_conn_help *master_help = nfct_help(master); + int ret; + + NF_CT_ASSERT(master_help); + + write_lock_bh(&nf_conntrack_lock); + list_for_each_entry(i, &nf_conntrack_expect_list, list) { + if (expect_matches(i, expect)) { + /* Refresh timer: if it's dying, ignore.. */ + if (refresh_timer(i)) { + ret = 0; + goto out; + } + } else if (expect_clash(i, expect)) { + ret = -EBUSY; + goto out; + } + } + /* Will be over limit? */ + if (master_help->helper->max_expected && + master_help->expecting >= master_help->helper->max_expected) + evict_oldest_expect(master); + + nf_conntrack_expect_insert(expect); + nf_conntrack_expect_event(IPEXP_NEW, expect); + ret = 0; +out: + write_unlock_bh(&nf_conntrack_lock); + return ret; +} +EXPORT_SYMBOL_GPL(nf_conntrack_expect_related); + +#ifdef CONFIG_PROC_FS +static void *exp_seq_start(struct seq_file *s, loff_t *pos) +{ + struct list_head *e = &nf_conntrack_expect_list; + loff_t i; + + /* strange seq_file api calls stop even if we fail, + * thus we need to grab lock since stop unlocks */ + read_lock_bh(&nf_conntrack_lock); + + if (list_empty(e)) + return NULL; + + for (i = 0; i <= *pos; i++) { + e = e->next; + if (e == &nf_conntrack_expect_list) + return NULL; + } + return e; +} + +static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct list_head *e = v; + + ++*pos; + e = e->next; + + if (e == &nf_conntrack_expect_list) + return NULL; + + return e; +} + +static void exp_seq_stop(struct seq_file *s, void *v) +{ + read_unlock_bh(&nf_conntrack_lock); +} + +static int exp_seq_show(struct seq_file *s, void *v) +{ + struct nf_conntrack_expect *expect = v; + + if (expect->timeout.function) + seq_printf(s, "%ld ", timer_pending(&expect->timeout) + ? (long)(expect->timeout.expires - jiffies)/HZ : 0); + else + seq_printf(s, "- "); + seq_printf(s, "l3proto = %u proto=%u ", + expect->tuple.src.l3num, + expect->tuple.dst.protonum); + print_tuple(s, &expect->tuple, + __nf_ct_l3proto_find(expect->tuple.src.l3num), + __nf_ct_l4proto_find(expect->tuple.src.l3num, + expect->tuple.dst.protonum)); + return seq_putc(s, '\n'); +} + +static struct seq_operations exp_seq_ops = { + .start = exp_seq_start, + .next = exp_seq_next, + .stop = exp_seq_stop, + .show = exp_seq_show +}; + +static int exp_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &exp_seq_ops); +} + +struct file_operations exp_file_ops = { + .owner = THIS_MODULE, + .open = exp_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; +#endif /* CONFIG_PROC_FS */ diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 0c17a5bd112..92a94716876 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -26,12 +26,15 @@ #include <net/tcp.h> #include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_helper.h> #include <linux/netfilter/nf_conntrack_ftp.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); MODULE_DESCRIPTION("ftp connection tracking helper"); +MODULE_ALIAS("ip_conntrack_ftp"); /* This is slow, but it's simple. --RR */ static char *ftp_buffer; @@ -48,7 +51,7 @@ module_param(loose, bool, 0600); unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, - enum ip_ct_ftp_type type, + enum nf_ct_ftp_type type, unsigned int matchoff, unsigned int matchlen, struct nf_conntrack_expect *exp, @@ -71,7 +74,7 @@ static struct ftp_search { size_t plen; char skip; char term; - enum ip_ct_ftp_type ftptype; + enum nf_ct_ftp_type ftptype; int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char); } search[IP_CT_DIR_MAX][2] = { [IP_CT_DIR_ORIGINAL] = { @@ -80,7 +83,7 @@ static struct ftp_search { .plen = sizeof("PORT") - 1, .skip = ' ', .term = '\r', - .ftptype = IP_CT_FTP_PORT, + .ftptype = NF_CT_FTP_PORT, .getnum = try_rfc959, }, { @@ -88,7 +91,7 @@ static struct ftp_search { .plen = sizeof("EPRT") - 1, .skip = ' ', .term = '\r', - .ftptype = IP_CT_FTP_EPRT, + .ftptype = NF_CT_FTP_EPRT, .getnum = try_eprt, }, }, @@ -98,7 +101,7 @@ static struct ftp_search { .plen = sizeof("227 ") - 1, .skip = '(', .term = ')', - .ftptype = IP_CT_FTP_PASV, + .ftptype = NF_CT_FTP_PASV, .getnum = try_rfc959, }, { @@ -106,7 +109,7 @@ static struct ftp_search { .plen = sizeof("229 ") - 1, .skip = '(', .term = ')', - .ftptype = IP_CT_FTP_EPSV, + .ftptype = NF_CT_FTP_EPSV, .getnum = try_epsv_response, }, }, @@ -171,7 +174,7 @@ static int try_rfc959(const char *data, size_t dlen, /* Grab port: number up to delimiter */ static int get_port(const char *data, int start, size_t dlen, char delim, - u_int16_t *port) + __be16 *port) { u_int16_t tmp_port = 0; int i; @@ -317,7 +320,7 @@ static int find_pattern(const char *data, size_t dlen, } /* Look up to see if we're just after a \n. */ -static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir) +static int find_nl_seq(u32 seq, const struct nf_ct_ftp_master *info, int dir) { unsigned int i; @@ -328,7 +331,7 @@ static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir) } /* We don't update if it's older than what we have. */ -static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir, +static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir, struct sk_buff *skb) { unsigned int i, oldest = NUM_SEQ_TO_REMEMBER; @@ -364,12 +367,12 @@ static int help(struct sk_buff **pskb, u32 seq; int dir = CTINFO2DIR(ctinfo); unsigned int matchlen, matchoff; - struct ip_ct_ftp_master *ct_ftp_info = &nfct_help(ct)->help.ct_ftp_info; + struct nf_ct_ftp_master *ct_ftp_info = &nfct_help(ct)->help.ct_ftp_info; struct nf_conntrack_expect *exp; struct nf_conntrack_man cmd = {}; - unsigned int i; int found = 0, ends_in_nl; + typeof(nf_nat_ftp_hook) nf_nat_ftp; /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED @@ -500,12 +503,12 @@ static int help(struct sk_buff **pskb, .u = { .tcp = { 0 }}, }, .dst = { .protonum = 0xFF, - .u = { .tcp = { 0xFFFF }}, + .u = { .tcp = { __constant_htons(0xFFFF) }}, }, }; if (cmd.l3num == PF_INET) { - exp->mask.src.u3.ip = 0xFFFFFFFF; - exp->mask.dst.u3.ip = 0xFFFFFFFF; + exp->mask.src.u3.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u3.ip = htonl(0xFFFFFFFF); } else { memset(exp->mask.src.u3.ip6, 0xFF, sizeof(exp->mask.src.u3.ip6)); @@ -514,13 +517,15 @@ static int help(struct sk_buff **pskb, } exp->expectfn = NULL; + exp->helper = NULL; exp->flags = 0; /* Now, NAT might want to mangle the packet, and register the * (possibly changed) expectation itself. */ - if (nf_nat_ftp_hook) - ret = nf_nat_ftp_hook(pskb, ctinfo, search[dir][i].ftptype, - matchoff, matchlen, exp, &seq); + nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook); + if (nf_nat_ftp && ct->status & IPS_NAT_MASK) + ret = nf_nat_ftp(pskb, ctinfo, search[dir][i].ftptype, + matchoff, matchlen, exp, &seq); else { /* Can't expect this? Best to drop packet now. */ if (nf_conntrack_expect_related(exp) != 0) @@ -584,7 +589,8 @@ static int __init nf_conntrack_ftp_init(void) for (j = 0; j < 2; j++) { ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]); ftp[i][j].tuple.dst.protonum = IPPROTO_TCP; - ftp[i][j].mask.src.u.tcp.port = 0xFFFF; + ftp[i][j].mask.src.l3num = 0xFFFF; + ftp[i][j].mask.src.u.tcp.port = htons(0xFFFF); ftp[i][j].mask.dst.protonum = 0xFF; ftp[i][j].max_expected = 1; ftp[i][j].timeout = 5 * 60; /* 5 Minutes */ diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 26dfecadb33..f6fad713d48 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -15,7 +15,7 @@ #else #include <stdio.h> #endif -#include <linux/netfilter_ipv4/ip_conntrack_helper_h323_asn1.h> +#include <linux/netfilter/nf_conntrack_h323_asn1.h> /* Trace Flag */ #ifndef H323_TRACE @@ -144,7 +144,7 @@ static decoder_t Decoders[] = { /**************************************************************************** * H.323 Types ****************************************************************************/ -#include "ip_conntrack_helper_h323_types.c" +#include "nf_conntrack_h323_types.c" /**************************************************************************** * Functions diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c new file mode 100644 index 00000000000..6d8568959f8 --- /dev/null +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -0,0 +1,1856 @@ +/* + * H.323 connection tracking helper + * + * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net> + * + * This source code is licensed under General Public License version 2. + * + * Based on the 'brute force' H.323 connection tracking module by + * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * + * For more information, please see http://nath323.sourceforge.net/ + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/ctype.h> +#include <linux/inet.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <linux/tcp.h> +#include <linux/skbuff.h> +#include <net/route.h> +#include <net/ip6_route.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_tuple.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_ecache.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <linux/netfilter/nf_conntrack_h323.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +/* Parameters */ +static unsigned int default_rrq_ttl __read_mostly = 300; +module_param(default_rrq_ttl, uint, 0600); +MODULE_PARM_DESC(default_rrq_ttl, "use this TTL if it's missing in RRQ"); + +static int gkrouted_only __read_mostly = 1; +module_param(gkrouted_only, int, 0600); +MODULE_PARM_DESC(gkrouted_only, "only accept calls from gatekeeper"); + +static int callforward_filter __read_mostly = 1; +module_param(callforward_filter, bool, 0600); +MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations " + "if both endpoints are on different sides " + "(determined by routing information)"); + +/* Hooks for NAT */ +int (*set_h245_addr_hook) (struct sk_buff **pskb, + unsigned char **data, int dataoff, + H245_TransportAddress *taddr, + union nf_conntrack_address *addr, __be16 port) + __read_mostly; +int (*set_h225_addr_hook) (struct sk_buff **pskb, + unsigned char **data, int dataoff, + TransportAddress *taddr, + union nf_conntrack_address *addr, __be16 port) + __read_mostly; +int (*set_sig_addr_hook) (struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, + TransportAddress *taddr, int count) __read_mostly; +int (*set_ras_addr_hook) (struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, + TransportAddress *taddr, int count) __read_mostly; +int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + H245_TransportAddress *taddr, + __be16 port, __be16 rtp_port, + struct nf_conntrack_expect *rtp_exp, + struct nf_conntrack_expect *rtcp_exp) __read_mostly; +int (*nat_t120_hook) (struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + H245_TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp) __read_mostly; +int (*nat_h245_hook) (struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp) __read_mostly; +int (*nat_callforwarding_hook) (struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp) __read_mostly; +int (*nat_q931_hook) (struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, TransportAddress *taddr, int idx, + __be16 port, struct nf_conntrack_expect *exp) + __read_mostly; + +static DEFINE_SPINLOCK(nf_h323_lock); +static char *h323_buffer; + +static struct nf_conntrack_helper nf_conntrack_helper_h245; +static struct nf_conntrack_helper nf_conntrack_helper_q931[]; +static struct nf_conntrack_helper nf_conntrack_helper_ras[]; + +/****************************************************************************/ +static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned char **data, int *datalen, int *dataoff) +{ + struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + int dir = CTINFO2DIR(ctinfo); + struct tcphdr _tcph, *th; + int tcpdatalen; + int tcpdataoff; + unsigned char *tpkt; + int tpktlen; + int tpktoff; + + /* Get TCP header */ + th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + if (th == NULL) + return 0; + + /* Get TCP data offset */ + tcpdataoff = protoff + th->doff * 4; + + /* Get TCP data length */ + tcpdatalen = (*pskb)->len - tcpdataoff; + if (tcpdatalen <= 0) /* No TCP data */ + goto clear_out; + + if (*data == NULL) { /* first TPKT */ + /* Get first TPKT pointer */ + tpkt = skb_header_pointer(*pskb, tcpdataoff, tcpdatalen, + h323_buffer); + BUG_ON(tpkt == NULL); + + /* Validate TPKT identifier */ + if (tcpdatalen < 4 || tpkt[0] != 0x03 || tpkt[1] != 0) { + /* Netmeeting sends TPKT header and data separately */ + if (info->tpkt_len[dir] > 0) { + DEBUGP("nf_ct_h323: previous packet " + "indicated separate TPKT data of %hu " + "bytes\n", info->tpkt_len[dir]); + if (info->tpkt_len[dir] <= tcpdatalen) { + /* Yes, there was a TPKT header + * received */ + *data = tpkt; + *datalen = info->tpkt_len[dir]; + *dataoff = 0; + goto out; + } + + /* Fragmented TPKT */ + if (net_ratelimit()) + printk("nf_ct_h323: " + "fragmented TPKT\n"); + goto clear_out; + } + + /* It is not even a TPKT */ + return 0; + } + tpktoff = 0; + } else { /* Next TPKT */ + tpktoff = *dataoff + *datalen; + tcpdatalen -= tpktoff; + if (tcpdatalen <= 4) /* No more TPKT */ + goto clear_out; + tpkt = *data + *datalen; + + /* Validate TPKT identifier */ + if (tpkt[0] != 0x03 || tpkt[1] != 0) + goto clear_out; + } + + /* Validate TPKT length */ + tpktlen = tpkt[2] * 256 + tpkt[3]; + if (tpktlen < 4) + goto clear_out; + if (tpktlen > tcpdatalen) { + if (tcpdatalen == 4) { /* Separate TPKT header */ + /* Netmeeting sends TPKT header and data separately */ + DEBUGP("nf_ct_h323: separate TPKT header indicates " + "there will be TPKT data of %hu bytes\n", + tpktlen - 4); + info->tpkt_len[dir] = tpktlen - 4; + return 0; + } + + if (net_ratelimit()) + printk("nf_ct_h323: incomplete TPKT (fragmented?)\n"); + goto clear_out; + } + + /* This is the encapsulated data */ + *data = tpkt + 4; + *datalen = tpktlen - 4; + *dataoff = tpktoff + 4; + + out: + /* Clear TPKT length */ + info->tpkt_len[dir] = 0; + return 1; + + clear_out: + info->tpkt_len[dir] = 0; + return 0; +} + +/****************************************************************************/ +static int get_h245_addr(struct nf_conn *ct, unsigned char *data, + H245_TransportAddress *taddr, + union nf_conntrack_address *addr, __be16 *port) +{ + unsigned char *p; + int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + int len; + + if (taddr->choice != eH245_TransportAddress_unicastAddress) + return 0; + + switch (taddr->unicastAddress.choice) { + case eUnicastAddress_iPAddress: + if (family != AF_INET) + return 0; + p = data + taddr->unicastAddress.iPAddress.network; + len = 4; + break; + case eUnicastAddress_iP6Address: + if (family != AF_INET6) + return 0; + p = data + taddr->unicastAddress.iP6Address.network; + len = 16; + break; + default: + return 0; + } + + memcpy(addr, p, len); + memset((void *)addr + len, 0, sizeof(*addr) - len); + memcpy(port, p + len, sizeof(__be16)); + + return 1; +} + +/****************************************************************************/ +static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + H245_TransportAddress *taddr) +{ + int dir = CTINFO2DIR(ctinfo); + int ret = 0; + __be16 port; + __be16 rtp_port, rtcp_port; + union nf_conntrack_address addr; + struct nf_conntrack_expect *rtp_exp; + struct nf_conntrack_expect *rtcp_exp; + typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp; + + /* Read RTP or RTCP address */ + if (!get_h245_addr(ct, *data, taddr, &addr, &port) || + memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) || + port == 0) + return 0; + + /* RTP port is even */ + port &= htons(~1); + rtp_port = port; + rtcp_port = htons(ntohs(port) + 1); + + /* Create expect for RTP */ + if ((rtp_exp = nf_conntrack_expect_alloc(ct)) == NULL) + return -1; + nf_conntrack_expect_init(rtp_exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3, + IPPROTO_UDP, NULL, &rtp_port); + + /* Create expect for RTCP */ + if ((rtcp_exp = nf_conntrack_expect_alloc(ct)) == NULL) { + nf_conntrack_expect_put(rtp_exp); + return -1; + } + nf_conntrack_expect_init(rtcp_exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3, + IPPROTO_UDP, NULL, &rtcp_port); + + if (memcmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3, + sizeof(ct->tuplehash[dir].tuple.src.u3)) && + (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook)) && + ct->status & IPS_NAT_MASK) { + /* NAT needed */ + ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + taddr, port, rtp_port, rtp_exp, rtcp_exp); + } else { /* Conntrack only */ + if (nf_conntrack_expect_related(rtp_exp) == 0) { + if (nf_conntrack_expect_related(rtcp_exp) == 0) { + DEBUGP("nf_ct_h323: expect RTP "); + NF_CT_DUMP_TUPLE(&rtp_exp->tuple); + DEBUGP("nf_ct_h323: expect RTCP "); + NF_CT_DUMP_TUPLE(&rtcp_exp->tuple); + } else { + nf_conntrack_unexpect_related(rtp_exp); + ret = -1; + } + } else + ret = -1; + } + + nf_conntrack_expect_put(rtp_exp); + nf_conntrack_expect_put(rtcp_exp); + + return ret; +} + +/****************************************************************************/ +static int expect_t120(struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + H245_TransportAddress *taddr) +{ + int dir = CTINFO2DIR(ctinfo); + int ret = 0; + __be16 port; + union nf_conntrack_address addr; + struct nf_conntrack_expect *exp; + typeof(nat_t120_hook) nat_t120; + + /* Read T.120 address */ + if (!get_h245_addr(ct, *data, taddr, &addr, &port) || + memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) || + port == 0) + return 0; + + /* Create expect for T.120 connections */ + if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + return -1; + nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3, + IPPROTO_TCP, NULL, &port); + exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple channels */ + + if (memcmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3, + sizeof(ct->tuplehash[dir].tuple.src.u3)) && + (nat_t120 = rcu_dereference(nat_t120_hook)) && + ct->status & IPS_NAT_MASK) { + /* NAT needed */ + ret = nat_t120(pskb, ct, ctinfo, data, dataoff, taddr, + port, exp); + } else { /* Conntrack only */ + if (nf_conntrack_expect_related(exp) == 0) { + DEBUGP("nf_ct_h323: expect T.120 "); + NF_CT_DUMP_TUPLE(&exp->tuple); + } else + ret = -1; + } + + nf_conntrack_expect_put(exp); + + return ret; +} + +/****************************************************************************/ +static int process_h245_channel(struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + H2250LogicalChannelParameters *channel) +{ + int ret; + + if (channel->options & eH2250LogicalChannelParameters_mediaChannel) { + /* RTP */ + ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + &channel->mediaChannel); + if (ret < 0) + return -1; + } + + if (channel-> + options & eH2250LogicalChannelParameters_mediaControlChannel) { + /* RTCP */ + ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + &channel->mediaControlChannel); + if (ret < 0) + return -1; + } + + return 0; +} + +/****************************************************************************/ +static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + OpenLogicalChannel *olc) +{ + int ret; + + DEBUGP("nf_ct_h323: OpenLogicalChannel\n"); + + if (olc->forwardLogicalChannelParameters.multiplexParameters.choice == + eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters) + { + ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff, + &olc-> + forwardLogicalChannelParameters. + multiplexParameters. + h2250LogicalChannelParameters); + if (ret < 0) + return -1; + } + + if ((olc->options & + eOpenLogicalChannel_reverseLogicalChannelParameters) && + (olc->reverseLogicalChannelParameters.options & + eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters) + && (olc->reverseLogicalChannelParameters.multiplexParameters. + choice == + eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)) + { + ret = + process_h245_channel(pskb, ct, ctinfo, data, dataoff, + &olc-> + reverseLogicalChannelParameters. + multiplexParameters. + h2250LogicalChannelParameters); + if (ret < 0) + return -1; + } + + if ((olc->options & eOpenLogicalChannel_separateStack) && + olc->forwardLogicalChannelParameters.dataType.choice == + eDataType_data && + olc->forwardLogicalChannelParameters.dataType.data.application. + choice == eDataApplicationCapability_application_t120 && + olc->forwardLogicalChannelParameters.dataType.data.application. + t120.choice == eDataProtocolCapability_separateLANStack && + olc->separateStack.networkAddress.choice == + eNetworkAccessParameters_networkAddress_localAreaAddress) { + ret = expect_t120(pskb, ct, ctinfo, data, dataoff, + &olc->separateStack.networkAddress. + localAreaAddress); + if (ret < 0) + return -1; + } + + return 0; +} + +/****************************************************************************/ +static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + OpenLogicalChannelAck *olca) +{ + H2250LogicalChannelAckParameters *ack; + int ret; + + DEBUGP("nf_ct_h323: OpenLogicalChannelAck\n"); + + if ((olca->options & + eOpenLogicalChannelAck_reverseLogicalChannelParameters) && + (olca->reverseLogicalChannelParameters.options & + eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters) + && (olca->reverseLogicalChannelParameters.multiplexParameters. + choice == + eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)) + { + ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff, + &olca-> + reverseLogicalChannelParameters. + multiplexParameters. + h2250LogicalChannelParameters); + if (ret < 0) + return -1; + } + + if ((olca->options & + eOpenLogicalChannelAck_forwardMultiplexAckParameters) && + (olca->forwardMultiplexAckParameters.choice == + eOpenLogicalChannelAck_forwardMultiplexAckParameters_h2250LogicalChannelAckParameters)) + { + ack = &olca->forwardMultiplexAckParameters. + h2250LogicalChannelAckParameters; + if (ack->options & + eH2250LogicalChannelAckParameters_mediaChannel) { + /* RTP */ + ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + &ack->mediaChannel); + if (ret < 0) + return -1; + } + + if (ack->options & + eH2250LogicalChannelAckParameters_mediaControlChannel) { + /* RTCP */ + ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + &ack->mediaControlChannel); + if (ret < 0) + return -1; + } + } + + return 0; +} + +/****************************************************************************/ +static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + MultimediaSystemControlMessage *mscm) +{ + switch (mscm->choice) { + case eMultimediaSystemControlMessage_request: + if (mscm->request.choice == + eRequestMessage_openLogicalChannel) { + return process_olc(pskb, ct, ctinfo, data, dataoff, + &mscm->request.openLogicalChannel); + } + DEBUGP("nf_ct_h323: H.245 Request %d\n", + mscm->request.choice); + break; + case eMultimediaSystemControlMessage_response: + if (mscm->response.choice == + eResponseMessage_openLogicalChannelAck) { + return process_olca(pskb, ct, ctinfo, data, dataoff, + &mscm->response. + openLogicalChannelAck); + } + DEBUGP("nf_ct_h323: H.245 Response %d\n", + mscm->response.choice); + break; + default: + DEBUGP("nf_ct_h323: H.245 signal %d\n", mscm->choice); + break; + } + + return 0; +} + +/****************************************************************************/ +static int h245_help(struct sk_buff **pskb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) +{ + static MultimediaSystemControlMessage mscm; + unsigned char *data = NULL; + int datalen; + int dataoff; + int ret; + + /* Until there's been traffic both ways, don't look in packets. */ + if (ctinfo != IP_CT_ESTABLISHED && + ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + return NF_ACCEPT; + } + DEBUGP("nf_ct_h245: skblen = %u\n", (*pskb)->len); + + spin_lock_bh(&nf_h323_lock); + + /* Process each TPKT */ + while (get_tpkt_data(pskb, protoff, ct, ctinfo, + &data, &datalen, &dataoff)) { + DEBUGP("nf_ct_h245: TPKT len=%d ", datalen); + NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple); + + /* Decode H.245 signal */ + ret = DecodeMultimediaSystemControlMessage(data, datalen, + &mscm); + if (ret < 0) { + if (net_ratelimit()) + printk("nf_ct_h245: decoding error: %s\n", + ret == H323_ERROR_BOUND ? + "out of bound" : "out of range"); + /* We don't drop when decoding error */ + break; + } + + /* Process H.245 signal */ + if (process_h245(pskb, ct, ctinfo, &data, dataoff, &mscm) < 0) + goto drop; + } + + spin_unlock_bh(&nf_h323_lock); + return NF_ACCEPT; + + drop: + spin_unlock_bh(&nf_h323_lock); + if (net_ratelimit()) + printk("nf_ct_h245: packet dropped\n"); + return NF_DROP; +} + +/****************************************************************************/ +static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = { + .name = "H.245", + .me = THIS_MODULE, + .max_expected = H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */, + .timeout = 240, + .tuple.dst.protonum = IPPROTO_UDP, + .mask.src.u.udp.port = __constant_htons(0xFFFF), + .mask.dst.protonum = 0xFF, + .help = h245_help +}; + +/****************************************************************************/ +int get_h225_addr(struct nf_conn *ct, unsigned char *data, + TransportAddress *taddr, + union nf_conntrack_address *addr, __be16 *port) +{ + unsigned char *p; + int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + int len; + + switch (taddr->choice) { + case eTransportAddress_ipAddress: + if (family != AF_INET) + return 0; + p = data + taddr->ipAddress.ip; + len = 4; + break; + case eTransportAddress_ip6Address: + if (family != AF_INET6) + return 0; + p = data + taddr->ip6Address.ip6; + len = 16; + break; + default: + return 0; + } + + memcpy(addr, p, len); + memset((void *)addr + len, 0, sizeof(*addr) - len); + memcpy(port, p + len, sizeof(__be16)); + + return 1; +} + +/****************************************************************************/ +static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + TransportAddress *taddr) +{ + int dir = CTINFO2DIR(ctinfo); + int ret = 0; + __be16 port; + union nf_conntrack_address addr; + struct nf_conntrack_expect *exp; + typeof(nat_h245_hook) nat_h245; + + /* Read h245Address */ + if (!get_h225_addr(ct, *data, taddr, &addr, &port) || + memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) || + port == 0) + return 0; + + /* Create expect for h245 connection */ + if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + return -1; + nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3, + IPPROTO_TCP, NULL, &port); + exp->helper = &nf_conntrack_helper_h245; + + if (memcmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3, + sizeof(ct->tuplehash[dir].tuple.src.u3)) && + (nat_h245 = rcu_dereference(nat_h245_hook)) && + ct->status & IPS_NAT_MASK) { + /* NAT needed */ + ret = nat_h245(pskb, ct, ctinfo, data, dataoff, taddr, + port, exp); + } else { /* Conntrack only */ + if (nf_conntrack_expect_related(exp) == 0) { + DEBUGP("nf_ct_q931: expect H.245 "); + NF_CT_DUMP_TUPLE(&exp->tuple); + } else + ret = -1; + } + + nf_conntrack_expect_put(exp); + + return ret; +} + +/* If the calling party is on the same side of the forward-to party, + * we don't need to track the second call */ +static int callforward_do_filter(union nf_conntrack_address *src, + union nf_conntrack_address *dst, + int family) +{ + struct flowi fl1, fl2; + int ret = 0; + + memset(&fl1, 0, sizeof(fl1)); + memset(&fl2, 0, sizeof(fl2)); + + switch (family) { + case AF_INET: { + struct rtable *rt1, *rt2; + + fl1.fl4_dst = src->ip; + fl2.fl4_dst = dst->ip; + if (ip_route_output_key(&rt1, &fl1) == 0) { + if (ip_route_output_key(&rt2, &fl2) == 0) { + if (rt1->rt_gateway == rt2->rt_gateway && + rt1->u.dst.dev == rt2->u.dst.dev) + ret = 1; + dst_release(&rt2->u.dst); + } + dst_release(&rt1->u.dst); + } + break; + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: { + struct rt6_info *rt1, *rt2; + + memcpy(&fl1.fl6_dst, src, sizeof(fl1.fl6_dst)); + memcpy(&fl2.fl6_dst, dst, sizeof(fl2.fl6_dst)); + rt1 = (struct rt6_info *)ip6_route_output(NULL, &fl1); + if (rt1) { + rt2 = (struct rt6_info *)ip6_route_output(NULL, &fl2); + if (rt2) { + if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, + sizeof(rt1->rt6i_gateway)) && + rt1->u.dst.dev == rt2->u.dst.dev) + ret = 1; + dst_release(&rt2->u.dst); + } + dst_release(&rt1->u.dst); + } + break; + } +#endif + } + return ret; + +} + +/****************************************************************************/ +static int expect_callforwarding(struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + TransportAddress *taddr) +{ + int dir = CTINFO2DIR(ctinfo); + int ret = 0; + __be16 port; + union nf_conntrack_address addr; + struct nf_conntrack_expect *exp; + typeof(nat_callforwarding_hook) nat_callforwarding; + + /* Read alternativeAddress */ + if (!get_h225_addr(ct, *data, taddr, &addr, &port) || port == 0) + return 0; + + /* If the calling party is on the same side of the forward-to party, + * we don't need to track the second call */ + if (callforward_filter && + callforward_do_filter(&addr, &ct->tuplehash[!dir].tuple.src.u3, + ct->tuplehash[!dir].tuple.src.l3num)) { + DEBUGP("nf_ct_q931: Call Forwarding not tracked\n"); + return 0; + } + + /* Create expect for the second call leg */ + if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + return -1; + nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, &addr, + IPPROTO_TCP, NULL, &port); + exp->helper = nf_conntrack_helper_q931; + + if (memcmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3, + sizeof(ct->tuplehash[dir].tuple.src.u3)) && + (nat_callforwarding = rcu_dereference(nat_callforwarding_hook)) && + ct->status & IPS_NAT_MASK) { + /* Need NAT */ + ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff, + taddr, port, exp); + } else { /* Conntrack only */ + if (nf_conntrack_expect_related(exp) == 0) { + DEBUGP("nf_ct_q931: expect Call Forwarding "); + NF_CT_DUMP_TUPLE(&exp->tuple); + } else + ret = -1; + } + + nf_conntrack_expect_put(exp); + + return ret; +} + +/****************************************************************************/ +static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + Setup_UUIE *setup) +{ + int dir = CTINFO2DIR(ctinfo); + int ret; + int i; + __be16 port; + union nf_conntrack_address addr; + typeof(set_h225_addr_hook) set_h225_addr; + + DEBUGP("nf_ct_q931: Setup\n"); + + if (setup->options & eSetup_UUIE_h245Address) { + ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + &setup->h245Address); + if (ret < 0) + return -1; + } + + set_h225_addr = rcu_dereference(set_h225_addr_hook); + if ((setup->options & eSetup_UUIE_destCallSignalAddress) && + (set_h225_addr) && ct->status && IPS_NAT_MASK && + get_h225_addr(ct, *data, &setup->destCallSignalAddress, + &addr, &port) && + memcmp(&addr, &ct->tuplehash[!dir].tuple.src.u3, sizeof(addr))) { + DEBUGP("nf_ct_q931: set destCallSignalAddress " + NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n", + NIP6(*(struct in6_addr *)&addr), ntohs(port), + NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3), + ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port)); + ret = set_h225_addr(pskb, data, dataoff, + &setup->destCallSignalAddress, + &ct->tuplehash[!dir].tuple.src.u3, + ct->tuplehash[!dir].tuple.src.u.tcp.port); + if (ret < 0) + return -1; + } + + if ((setup->options & eSetup_UUIE_sourceCallSignalAddress) && + (set_h225_addr) && ct->status & IPS_NAT_MASK && + get_h225_addr(ct, *data, &setup->sourceCallSignalAddress, + &addr, &port) && + memcmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(addr))) { + DEBUGP("nf_ct_q931: set sourceCallSignalAddress " + NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n", + NIP6(*(struct in6_addr *)&addr), ntohs(port), + NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3), + ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port)); + ret = set_h225_addr(pskb, data, dataoff, + &setup->sourceCallSignalAddress, + &ct->tuplehash[!dir].tuple.dst.u3, + ct->tuplehash[!dir].tuple.dst.u.tcp.port); + if (ret < 0) + return -1; + } + + if (setup->options & eSetup_UUIE_fastStart) { + for (i = 0; i < setup->fastStart.count; i++) { + ret = process_olc(pskb, ct, ctinfo, data, dataoff, + &setup->fastStart.item[i]); + if (ret < 0) + return -1; + } + } + + return 0; +} + +/****************************************************************************/ +static int process_callproceeding(struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + CallProceeding_UUIE *callproc) +{ + int ret; + int i; + + DEBUGP("nf_ct_q931: CallProceeding\n"); + + if (callproc->options & eCallProceeding_UUIE_h245Address) { + ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + &callproc->h245Address); + if (ret < 0) + return -1; + } + + if (callproc->options & eCallProceeding_UUIE_fastStart) { + for (i = 0; i < callproc->fastStart.count; i++) { + ret = process_olc(pskb, ct, ctinfo, data, dataoff, + &callproc->fastStart.item[i]); + if (ret < 0) + return -1; + } + } + + return 0; +} + +/****************************************************************************/ +static int process_connect(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + Connect_UUIE *connect) +{ + int ret; + int i; + + DEBUGP("nf_ct_q931: Connect\n"); + + if (connect->options & eConnect_UUIE_h245Address) { + ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + &connect->h245Address); + if (ret < 0) + return -1; + } + + if (connect->options & eConnect_UUIE_fastStart) { + for (i = 0; i < connect->fastStart.count; i++) { + ret = process_olc(pskb, ct, ctinfo, data, dataoff, + &connect->fastStart.item[i]); + if (ret < 0) + return -1; + } + } + + return 0; +} + +/****************************************************************************/ +static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + Alerting_UUIE *alert) +{ + int ret; + int i; + + DEBUGP("nf_ct_q931: Alerting\n"); + + if (alert->options & eAlerting_UUIE_h245Address) { + ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + &alert->h245Address); + if (ret < 0) + return -1; + } + + if (alert->options & eAlerting_UUIE_fastStart) { + for (i = 0; i < alert->fastStart.count; i++) { + ret = process_olc(pskb, ct, ctinfo, data, dataoff, + &alert->fastStart.item[i]); + if (ret < 0) + return -1; + } + } + + return 0; +} + +/****************************************************************************/ +static int process_information(struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + Information_UUIE *info) +{ + int ret; + int i; + + DEBUGP("nf_ct_q931: Information\n"); + + if (info->options & eInformation_UUIE_fastStart) { + for (i = 0; i < info->fastStart.count; i++) { + ret = process_olc(pskb, ct, ctinfo, data, dataoff, + &info->fastStart.item[i]); + if (ret < 0) + return -1; + } + } + + return 0; +} + +/****************************************************************************/ +static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + Facility_UUIE *facility) +{ + int ret; + int i; + + DEBUGP("nf_ct_q931: Facility\n"); + + if (facility->reason.choice == eFacilityReason_callForwarded) { + if (facility->options & eFacility_UUIE_alternativeAddress) + return expect_callforwarding(pskb, ct, ctinfo, data, + dataoff, + &facility-> + alternativeAddress); + return 0; + } + + if (facility->options & eFacility_UUIE_h245Address) { + ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + &facility->h245Address); + if (ret < 0) + return -1; + } + + if (facility->options & eFacility_UUIE_fastStart) { + for (i = 0; i < facility->fastStart.count; i++) { + ret = process_olc(pskb, ct, ctinfo, data, dataoff, + &facility->fastStart.item[i]); + if (ret < 0) + return -1; + } + } + + return 0; +} + +/****************************************************************************/ +static int process_progress(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + Progress_UUIE *progress) +{ + int ret; + int i; + + DEBUGP("nf_ct_q931: Progress\n"); + + if (progress->options & eProgress_UUIE_h245Address) { + ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + &progress->h245Address); + if (ret < 0) + return -1; + } + + if (progress->options & eProgress_UUIE_fastStart) { + for (i = 0; i < progress->fastStart.count; i++) { + ret = process_olc(pskb, ct, ctinfo, data, dataoff, + &progress->fastStart.item[i]); + if (ret < 0) + return -1; + } + } + + return 0; +} + +/****************************************************************************/ +static int process_q931(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, Q931 *q931) +{ + H323_UU_PDU *pdu = &q931->UUIE.h323_uu_pdu; + int i; + int ret = 0; + + switch (pdu->h323_message_body.choice) { + case eH323_UU_PDU_h323_message_body_setup: + ret = process_setup(pskb, ct, ctinfo, data, dataoff, + &pdu->h323_message_body.setup); + break; + case eH323_UU_PDU_h323_message_body_callProceeding: + ret = process_callproceeding(pskb, ct, ctinfo, data, dataoff, + &pdu->h323_message_body. + callProceeding); + break; + case eH323_UU_PDU_h323_message_body_connect: + ret = process_connect(pskb, ct, ctinfo, data, dataoff, + &pdu->h323_message_body.connect); + break; + case eH323_UU_PDU_h323_message_body_alerting: + ret = process_alerting(pskb, ct, ctinfo, data, dataoff, + &pdu->h323_message_body.alerting); + break; + case eH323_UU_PDU_h323_message_body_information: + ret = process_information(pskb, ct, ctinfo, data, dataoff, + &pdu->h323_message_body. + information); + break; + case eH323_UU_PDU_h323_message_body_facility: + ret = process_facility(pskb, ct, ctinfo, data, dataoff, + &pdu->h323_message_body.facility); + break; + case eH323_UU_PDU_h323_message_body_progress: + ret = process_progress(pskb, ct, ctinfo, data, dataoff, + &pdu->h323_message_body.progress); + break; + default: + DEBUGP("nf_ct_q931: Q.931 signal %d\n", + pdu->h323_message_body.choice); + break; + } + + if (ret < 0) + return -1; + + if (pdu->options & eH323_UU_PDU_h245Control) { + for (i = 0; i < pdu->h245Control.count; i++) { + ret = process_h245(pskb, ct, ctinfo, data, dataoff, + &pdu->h245Control.item[i]); + if (ret < 0) + return -1; + } + } + + return 0; +} + +/****************************************************************************/ +static int q931_help(struct sk_buff **pskb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) +{ + static Q931 q931; + unsigned char *data = NULL; + int datalen; + int dataoff; + int ret; + + /* Until there's been traffic both ways, don't look in packets. */ + if (ctinfo != IP_CT_ESTABLISHED && + ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + return NF_ACCEPT; + } + DEBUGP("nf_ct_q931: skblen = %u\n", (*pskb)->len); + + spin_lock_bh(&nf_h323_lock); + + /* Process each TPKT */ + while (get_tpkt_data(pskb, protoff, ct, ctinfo, + &data, &datalen, &dataoff)) { + DEBUGP("nf_ct_q931: TPKT len=%d ", datalen); + NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple); + + /* Decode Q.931 signal */ + ret = DecodeQ931(data, datalen, &q931); + if (ret < 0) { + if (net_ratelimit()) + printk("nf_ct_q931: decoding error: %s\n", + ret == H323_ERROR_BOUND ? + "out of bound" : "out of range"); + /* We don't drop when decoding error */ + break; + } + + /* Process Q.931 signal */ + if (process_q931(pskb, ct, ctinfo, &data, dataoff, &q931) < 0) + goto drop; + } + + spin_unlock_bh(&nf_h323_lock); + return NF_ACCEPT; + + drop: + spin_unlock_bh(&nf_h323_lock); + if (net_ratelimit()) + printk("nf_ct_q931: packet dropped\n"); + return NF_DROP; +} + +/****************************************************************************/ +static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { + { + .name = "Q.931", + .me = THIS_MODULE, + /* T.120 and H.245 */ + .max_expected = H323_RTP_CHANNEL_MAX * 4 + 4, + .timeout = 240, + .tuple.src.l3num = AF_INET, + .tuple.src.u.tcp.port = __constant_htons(Q931_PORT), + .tuple.dst.protonum = IPPROTO_TCP, + .mask.src.l3num = 0xFFFF, + .mask.src.u.tcp.port = __constant_htons(0xFFFF), + .mask.dst.protonum = 0xFF, + .help = q931_help + }, + { + .name = "Q.931", + .me = THIS_MODULE, + /* T.120 and H.245 */ + .max_expected = H323_RTP_CHANNEL_MAX * 4 + 4, + .timeout = 240, + .tuple.src.l3num = AF_INET6, + .tuple.src.u.tcp.port = __constant_htons(Q931_PORT), + .tuple.dst.protonum = IPPROTO_TCP, + .mask.src.l3num = 0xFFFF, + .mask.src.u.tcp.port = __constant_htons(0xFFFF), + .mask.dst.protonum = 0xFF, + .help = q931_help + }, +}; + +/****************************************************************************/ +static unsigned char *get_udp_data(struct sk_buff **pskb, unsigned int protoff, + int *datalen) +{ + struct udphdr _uh, *uh; + int dataoff; + + uh = skb_header_pointer(*pskb, protoff, sizeof(_uh), &_uh); + if (uh == NULL) + return NULL; + dataoff = protoff + sizeof(_uh); + if (dataoff >= (*pskb)->len) + return NULL; + *datalen = (*pskb)->len - dataoff; + return skb_header_pointer(*pskb, dataoff, *datalen, h323_buffer); +} + +/****************************************************************************/ +static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, + union nf_conntrack_address *addr, + __be16 port) +{ + struct nf_conntrack_expect *exp; + struct nf_conntrack_tuple tuple; + + memset(&tuple.src.u3, 0, sizeof(tuple.src.u3)); + tuple.src.u.tcp.port = 0; + memcpy(&tuple.dst.u3, addr, sizeof(tuple.dst.u3)); + tuple.dst.u.tcp.port = port; + tuple.dst.protonum = IPPROTO_TCP; + + exp = __nf_conntrack_expect_find(&tuple); + if (exp && exp->master == ct) + return exp; + return NULL; +} + +/****************************************************************************/ +static int set_expect_timeout(struct nf_conntrack_expect *exp, + unsigned timeout) +{ + if (!exp || !del_timer(&exp->timeout)) + return 0; + + exp->timeout.expires = jiffies + timeout * HZ; + add_timer(&exp->timeout); + + return 1; +} + +/****************************************************************************/ +static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, + TransportAddress *taddr, int count) +{ + struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + int dir = CTINFO2DIR(ctinfo); + int ret = 0; + int i; + __be16 port; + union nf_conntrack_address addr; + struct nf_conntrack_expect *exp; + typeof(nat_q931_hook) nat_q931; + + /* Look for the first related address */ + for (i = 0; i < count; i++) { + if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) && + memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, + sizeof(addr)) == 0 && port != 0) + break; + } + + if (i >= count) /* Not found */ + return 0; + + /* Create expect for Q.931 */ + if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + return -1; + nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + gkrouted_only ? /* only accept calls from GK? */ + &ct->tuplehash[!dir].tuple.src.u3 : + NULL, + &ct->tuplehash[!dir].tuple.dst.u3, + IPPROTO_TCP, NULL, &port); + exp->helper = nf_conntrack_helper_q931; + exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */ + + nat_q931 = rcu_dereference(nat_q931_hook); + if (nat_q931 && ct->status & IPS_NAT_MASK) { /* Need NAT */ + ret = nat_q931(pskb, ct, ctinfo, data, taddr, i, port, exp); + } else { /* Conntrack only */ + if (nf_conntrack_expect_related(exp) == 0) { + DEBUGP("nf_ct_ras: expect Q.931 "); + NF_CT_DUMP_TUPLE(&exp->tuple); + + /* Save port for looking up expect in processing RCF */ + info->sig_port[dir] = port; + } else + ret = -1; + } + + nf_conntrack_expect_put(exp); + + return ret; +} + +/****************************************************************************/ +static int process_grq(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, GatekeeperRequest *grq) +{ + typeof(set_ras_addr_hook) set_ras_addr; + + DEBUGP("nf_ct_ras: GRQ\n"); + + set_ras_addr = rcu_dereference(set_ras_addr_hook); + if (set_ras_addr && ct->status & IPS_NAT_MASK) /* NATed */ + return set_ras_addr(pskb, ct, ctinfo, data, + &grq->rasAddress, 1); + return 0; +} + +/****************************************************************************/ +static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, GatekeeperConfirm *gcf) +{ + int dir = CTINFO2DIR(ctinfo); + int ret = 0; + __be16 port; + union nf_conntrack_address addr; + struct nf_conntrack_expect *exp; + + DEBUGP("nf_ct_ras: GCF\n"); + + if (!get_h225_addr(ct, *data, &gcf->rasAddress, &addr, &port)) + return 0; + + /* Registration port is the same as discovery port */ + if (!memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) && + port == ct->tuplehash[dir].tuple.src.u.udp.port) + return 0; + + /* Avoid RAS expectation loops. A GCF is never expected. */ + if (test_bit(IPS_EXPECTED_BIT, &ct->status)) + return 0; + + /* Need new expect */ + if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + return -1; + nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, &addr, + IPPROTO_UDP, NULL, &port); + exp->helper = nf_conntrack_helper_ras; + + if (nf_conntrack_expect_related(exp) == 0) { + DEBUGP("nf_ct_ras: expect RAS "); + NF_CT_DUMP_TUPLE(&exp->tuple); + } else + ret = -1; + + nf_conntrack_expect_put(exp); + + return ret; +} + +/****************************************************************************/ +static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, RegistrationRequest *rrq) +{ + struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + int ret; + typeof(set_ras_addr_hook) set_ras_addr; + + DEBUGP("nf_ct_ras: RRQ\n"); + + ret = expect_q931(pskb, ct, ctinfo, data, + rrq->callSignalAddress.item, + rrq->callSignalAddress.count); + if (ret < 0) + return -1; + + set_ras_addr = rcu_dereference(set_ras_addr_hook); + if (set_ras_addr && ct->status & IPS_NAT_MASK) { + ret = set_ras_addr(pskb, ct, ctinfo, data, + rrq->rasAddress.item, + rrq->rasAddress.count); + if (ret < 0) + return -1; + } + + if (rrq->options & eRegistrationRequest_timeToLive) { + DEBUGP("nf_ct_ras: RRQ TTL = %u seconds\n", rrq->timeToLive); + info->timeout = rrq->timeToLive; + } else + info->timeout = default_rrq_ttl; + + return 0; +} + +/****************************************************************************/ +static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, RegistrationConfirm *rcf) +{ + struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + int dir = CTINFO2DIR(ctinfo); + int ret; + struct nf_conntrack_expect *exp; + typeof(set_sig_addr_hook) set_sig_addr; + + DEBUGP("nf_ct_ras: RCF\n"); + + set_sig_addr = rcu_dereference(set_sig_addr_hook); + if (set_sig_addr && ct->status & IPS_NAT_MASK) { + ret = set_sig_addr(pskb, ct, ctinfo, data, + rcf->callSignalAddress.item, + rcf->callSignalAddress.count); + if (ret < 0) + return -1; + } + + if (rcf->options & eRegistrationConfirm_timeToLive) { + DEBUGP("nf_ct_ras: RCF TTL = %u seconds\n", rcf->timeToLive); + info->timeout = rcf->timeToLive; + } + + if (info->timeout > 0) { + DEBUGP + ("nf_ct_ras: set RAS connection timeout to %u seconds\n", + info->timeout); + nf_ct_refresh(ct, *pskb, info->timeout * HZ); + + /* Set expect timeout */ + read_lock_bh(&nf_conntrack_lock); + exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3, + info->sig_port[!dir]); + if (exp) { + DEBUGP("nf_ct_ras: set Q.931 expect " + "timeout to %u seconds for", + info->timeout); + NF_CT_DUMP_TUPLE(&exp->tuple); + set_expect_timeout(exp, info->timeout); + } + read_unlock_bh(&nf_conntrack_lock); + } + + return 0; +} + +/****************************************************************************/ +static int process_urq(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, UnregistrationRequest *urq) +{ + struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + int dir = CTINFO2DIR(ctinfo); + int ret; + typeof(set_sig_addr_hook) set_sig_addr; + + DEBUGP("nf_ct_ras: URQ\n"); + + set_sig_addr = rcu_dereference(set_sig_addr_hook); + if (set_sig_addr && ct->status & IPS_NAT_MASK) { + ret = set_sig_addr(pskb, ct, ctinfo, data, + urq->callSignalAddress.item, + urq->callSignalAddress.count); + if (ret < 0) + return -1; + } + + /* Clear old expect */ + nf_ct_remove_expectations(ct); + info->sig_port[dir] = 0; + info->sig_port[!dir] = 0; + + /* Give it 30 seconds for UCF or URJ */ + nf_ct_refresh(ct, *pskb, 30 * HZ); + + return 0; +} + +/****************************************************************************/ +static int process_arq(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, AdmissionRequest *arq) +{ + struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + int dir = CTINFO2DIR(ctinfo); + __be16 port; + union nf_conntrack_address addr; + typeof(set_h225_addr_hook) set_h225_addr; + + DEBUGP("nf_ct_ras: ARQ\n"); + + set_h225_addr = rcu_dereference(set_h225_addr_hook); + if ((arq->options & eAdmissionRequest_destCallSignalAddress) && + get_h225_addr(ct, *data, &arq->destCallSignalAddress, + &addr, &port) && + !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) && + port == info->sig_port[dir] && + set_h225_addr && ct->status & IPS_NAT_MASK) { + /* Answering ARQ */ + return set_h225_addr(pskb, data, 0, + &arq->destCallSignalAddress, + &ct->tuplehash[!dir].tuple.dst.u3, + info->sig_port[!dir]); + } + + if ((arq->options & eAdmissionRequest_srcCallSignalAddress) && + get_h225_addr(ct, *data, &arq->srcCallSignalAddress, + &addr, &port) && + !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) && + set_h225_addr && ct->status & IPS_NAT_MASK) { + /* Calling ARQ */ + return set_h225_addr(pskb, data, 0, + &arq->srcCallSignalAddress, + &ct->tuplehash[!dir].tuple.dst.u3, + port); + } + + return 0; +} + +/****************************************************************************/ +static int process_acf(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, AdmissionConfirm *acf) +{ + int dir = CTINFO2DIR(ctinfo); + int ret = 0; + __be16 port; + union nf_conntrack_address addr; + struct nf_conntrack_expect *exp; + typeof(set_sig_addr_hook) set_sig_addr; + + DEBUGP("nf_ct_ras: ACF\n"); + + if (!get_h225_addr(ct, *data, &acf->destCallSignalAddress, + &addr, &port)) + return 0; + + if (!memcmp(&addr, &ct->tuplehash[dir].tuple.dst.u3, sizeof(addr))) { + /* Answering ACF */ + set_sig_addr = rcu_dereference(set_sig_addr_hook); + if (set_sig_addr && ct->status & IPS_NAT_MASK) + return set_sig_addr(pskb, ct, ctinfo, data, + &acf->destCallSignalAddress, 1); + return 0; + } + + /* Need new expect */ + if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + return -1; + nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, &addr, + IPPROTO_TCP, NULL, &port); + exp->flags = NF_CT_EXPECT_PERMANENT; + exp->helper = nf_conntrack_helper_q931; + + if (nf_conntrack_expect_related(exp) == 0) { + DEBUGP("nf_ct_ras: expect Q.931 "); + NF_CT_DUMP_TUPLE(&exp->tuple); + } else + ret = -1; + + nf_conntrack_expect_put(exp); + + return ret; +} + +/****************************************************************************/ +static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, LocationRequest *lrq) +{ + typeof(set_ras_addr_hook) set_ras_addr; + + DEBUGP("nf_ct_ras: LRQ\n"); + + set_ras_addr = rcu_dereference(set_ras_addr_hook); + if (set_ras_addr && ct->status & IPS_NAT_MASK) + return set_ras_addr(pskb, ct, ctinfo, data, + &lrq->replyAddress, 1); + return 0; +} + +/****************************************************************************/ +static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, LocationConfirm *lcf) +{ + int dir = CTINFO2DIR(ctinfo); + int ret = 0; + __be16 port; + union nf_conntrack_address addr; + struct nf_conntrack_expect *exp; + + DEBUGP("nf_ct_ras: LCF\n"); + + if (!get_h225_addr(ct, *data, &lcf->callSignalAddress, + &addr, &port)) + return 0; + + /* Need new expect for call signal */ + if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + return -1; + nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, &addr, + IPPROTO_TCP, NULL, &port); + exp->flags = NF_CT_EXPECT_PERMANENT; + exp->helper = nf_conntrack_helper_q931; + + if (nf_conntrack_expect_related(exp) == 0) { + DEBUGP("nf_ct_ras: expect Q.931 "); + NF_CT_DUMP_TUPLE(&exp->tuple); + } else + ret = -1; + + nf_conntrack_expect_put(exp); + + /* Ignore rasAddress */ + + return ret; +} + +/****************************************************************************/ +static int process_irr(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, InfoRequestResponse *irr) +{ + int ret; + typeof(set_ras_addr_hook) set_ras_addr; + typeof(set_sig_addr_hook) set_sig_addr; + + DEBUGP("nf_ct_ras: IRR\n"); + + set_ras_addr = rcu_dereference(set_ras_addr_hook); + if (set_ras_addr && ct->status & IPS_NAT_MASK) { + ret = set_ras_addr(pskb, ct, ctinfo, data, + &irr->rasAddress, 1); + if (ret < 0) + return -1; + } + + set_sig_addr = rcu_dereference(set_sig_addr_hook); + if (set_sig_addr && ct->status & IPS_NAT_MASK) { + ret = set_sig_addr(pskb, ct, ctinfo, data, + irr->callSignalAddress.item, + irr->callSignalAddress.count); + if (ret < 0) + return -1; + } + + return 0; +} + +/****************************************************************************/ +static int process_ras(struct sk_buff **pskb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, RasMessage *ras) +{ + switch (ras->choice) { + case eRasMessage_gatekeeperRequest: + return process_grq(pskb, ct, ctinfo, data, + &ras->gatekeeperRequest); + case eRasMessage_gatekeeperConfirm: + return process_gcf(pskb, ct, ctinfo, data, + &ras->gatekeeperConfirm); + case eRasMessage_registrationRequest: + return process_rrq(pskb, ct, ctinfo, data, + &ras->registrationRequest); + case eRasMessage_registrationConfirm: + return process_rcf(pskb, ct, ctinfo, data, + &ras->registrationConfirm); + case eRasMessage_unregistrationRequest: + return process_urq(pskb, ct, ctinfo, data, + &ras->unregistrationRequest); + case eRasMessage_admissionRequest: + return process_arq(pskb, ct, ctinfo, data, + &ras->admissionRequest); + case eRasMessage_admissionConfirm: + return process_acf(pskb, ct, ctinfo, data, + &ras->admissionConfirm); + case eRasMessage_locationRequest: + return process_lrq(pskb, ct, ctinfo, data, + &ras->locationRequest); + case eRasMessage_locationConfirm: + return process_lcf(pskb, ct, ctinfo, data, + &ras->locationConfirm); + case eRasMessage_infoRequestResponse: + return process_irr(pskb, ct, ctinfo, data, + &ras->infoRequestResponse); + default: + DEBUGP("nf_ct_ras: RAS message %d\n", ras->choice); + break; + } + + return 0; +} + +/****************************************************************************/ +static int ras_help(struct sk_buff **pskb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) +{ + static RasMessage ras; + unsigned char *data; + int datalen = 0; + int ret; + + DEBUGP("nf_ct_ras: skblen = %u\n", (*pskb)->len); + + spin_lock_bh(&nf_h323_lock); + + /* Get UDP data */ + data = get_udp_data(pskb, protoff, &datalen); + if (data == NULL) + goto accept; + DEBUGP("nf_ct_ras: RAS message len=%d ", datalen); + NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple); + + /* Decode RAS message */ + ret = DecodeRasMessage(data, datalen, &ras); + if (ret < 0) { + if (net_ratelimit()) + printk("nf_ct_ras: decoding error: %s\n", + ret == H323_ERROR_BOUND ? + "out of bound" : "out of range"); + goto accept; + } + + /* Process RAS message */ + if (process_ras(pskb, ct, ctinfo, &data, &ras) < 0) + goto drop; + + accept: + spin_unlock_bh(&nf_h323_lock); + return NF_ACCEPT; + + drop: + spin_unlock_bh(&nf_h323_lock); + if (net_ratelimit()) + printk("nf_ct_ras: packet dropped\n"); + return NF_DROP; +} + +/****************************************************************************/ +static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { + { + .name = "RAS", + .me = THIS_MODULE, + .max_expected = 32, + .timeout = 240, + .tuple.src.l3num = AF_INET, + .tuple.src.u.udp.port = __constant_htons(RAS_PORT), + .tuple.dst.protonum = IPPROTO_UDP, + .mask.src.l3num = 0xFFFF, + .mask.src.u.udp.port = __constant_htons(0xFFFF), + .mask.dst.protonum = 0xFF, + .help = ras_help, + }, + { + .name = "RAS", + .me = THIS_MODULE, + .max_expected = 32, + .timeout = 240, + .tuple.src.l3num = AF_INET6, + .tuple.src.u.udp.port = __constant_htons(RAS_PORT), + .tuple.dst.protonum = IPPROTO_UDP, + .mask.src.l3num = 0xFFFF, + .mask.src.u.udp.port = __constant_htons(0xFFFF), + .mask.dst.protonum = 0xFF, + .help = ras_help, + }, +}; + +/****************************************************************************/ +static void __exit nf_conntrack_h323_fini(void) +{ + nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[1]); + nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]); + nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]); + nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]); + kfree(h323_buffer); + DEBUGP("nf_ct_h323: fini\n"); +} + +/****************************************************************************/ +static int __init nf_conntrack_h323_init(void) +{ + int ret; + + h323_buffer = kmalloc(65536, GFP_KERNEL); + if (!h323_buffer) + return -ENOMEM; + ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[0]); + if (ret < 0) + goto err1; + ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[1]); + if (ret < 0) + goto err2; + ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[0]); + if (ret < 0) + goto err3; + ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]); + if (ret < 0) + goto err4; + DEBUGP("nf_ct_h323: init success\n"); + return 0; + +err4: + nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]); +err3: + nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]); +err2: + nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]); +err1: + return ret; +} + +/****************************************************************************/ +module_init(nf_conntrack_h323_init); +module_exit(nf_conntrack_h323_fini); + +EXPORT_SYMBOL_GPL(get_h225_addr); +EXPORT_SYMBOL_GPL(set_h245_addr_hook); +EXPORT_SYMBOL_GPL(set_h225_addr_hook); +EXPORT_SYMBOL_GPL(set_sig_addr_hook); +EXPORT_SYMBOL_GPL(set_ras_addr_hook); +EXPORT_SYMBOL_GPL(nat_rtp_rtcp_hook); +EXPORT_SYMBOL_GPL(nat_t120_hook); +EXPORT_SYMBOL_GPL(nat_h245_hook); +EXPORT_SYMBOL_GPL(nat_callforwarding_hook); +EXPORT_SYMBOL_GPL(nat_q931_hook); + +MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>"); +MODULE_DESCRIPTION("H.323 connection tracking helper"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ip_conntrack_h323"); diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323_types.c b/net/netfilter/nf_conntrack_h323_types.c index 4b359618bed..4c6f8b3b120 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_h323_types.c +++ b/net/netfilter/nf_conntrack_h323_types.c @@ -36,7 +36,8 @@ static field_t _TransportAddress_ipxAddress[] = { /* SEQUENCE */ }; static field_t _TransportAddress_ip6Address[] = { /* SEQUENCE */ - {FNAME("ip") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL}, + {FNAME("ip") OCTSTR, FIXD, 16, 0, DECODE, + offsetof(TransportAddress_ip6Address, ip6), NULL}, {FNAME("port") INT, WORD, 0, 0, SKIP, 0, NULL}, }; @@ -65,8 +66,8 @@ static field_t _TransportAddress[] = { /* CHOICE */ _TransportAddress_ipSourceRoute}, {FNAME("ipxAddress") SEQ, 0, 3, 3, SKIP, 0, _TransportAddress_ipxAddress}, - {FNAME("ip6Address") SEQ, 0, 2, 2, SKIP | EXT, 0, - _TransportAddress_ip6Address}, + {FNAME("ip6Address") SEQ, 0, 2, 2, DECODE | EXT, + offsetof(TransportAddress, ip6Address), _TransportAddress_ip6Address}, {FNAME("netBios") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL}, {FNAME("nsap") OCTSTR, 5, 1, 0, SKIP, 0, NULL}, {FNAME("nonStandardAddress") SEQ, 0, 2, 2, SKIP, 0, diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c new file mode 100644 index 00000000000..0743be4434b --- /dev/null +++ b/net/netfilter/nf_conntrack_helper.c @@ -0,0 +1,155 @@ +/* Helper handling for netfilter. */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/netfilter.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/vmalloc.h> +#include <linux/stddef.h> +#include <linux/slab.h> +#include <linux/random.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_l3proto.h> +#include <net/netfilter/nf_conntrack_l4proto.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_core.h> + +static __read_mostly LIST_HEAD(helpers); + +struct nf_conntrack_helper * +__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) +{ + struct nf_conntrack_helper *h; + + list_for_each_entry(h, &helpers, list) { + if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask)) + return h; + } + return NULL; +} + +struct nf_conntrack_helper * +nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple) +{ + struct nf_conntrack_helper *helper; + + /* need nf_conntrack_lock to assure that helper exists until + * try_module_get() is called */ + read_lock_bh(&nf_conntrack_lock); + + helper = __nf_ct_helper_find(tuple); + if (helper) { + /* need to increase module usage count to assure helper will + * not go away while the caller is e.g. busy putting a + * conntrack in the hash that uses the helper */ + if (!try_module_get(helper->me)) + helper = NULL; + } + + read_unlock_bh(&nf_conntrack_lock); + + return helper; +} +EXPORT_SYMBOL_GPL(nf_ct_helper_find_get); + +void nf_ct_helper_put(struct nf_conntrack_helper *helper) +{ + module_put(helper->me); +} +EXPORT_SYMBOL_GPL(nf_ct_helper_put); + +struct nf_conntrack_helper * +__nf_conntrack_helper_find_byname(const char *name) +{ + struct nf_conntrack_helper *h; + + list_for_each_entry(h, &helpers, list) { + if (!strcmp(h->name, name)) + return h; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find_byname); + +static inline int unhelp(struct nf_conntrack_tuple_hash *i, + const struct nf_conntrack_helper *me) +{ + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); + struct nf_conn_help *help = nfct_help(ct); + + if (help && help->helper == me) { + nf_conntrack_event(IPCT_HELPER, ct); + help->helper = NULL; + } + return 0; +} + +int nf_conntrack_helper_register(struct nf_conntrack_helper *me) +{ + int size, ret; + + BUG_ON(me->timeout == 0); + + size = ALIGN(sizeof(struct nf_conn), __alignof__(struct nf_conn_help)) + + sizeof(struct nf_conn_help); + ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help", + size); + if (ret < 0) { + printk(KERN_ERR "nf_conntrack_helper_register: Unable to create slab cache for conntracks\n"); + return ret; + } + write_lock_bh(&nf_conntrack_lock); + list_add(&me->list, &helpers); + write_unlock_bh(&nf_conntrack_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); + +void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) +{ + unsigned int i; + struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_expect *exp, *tmp; + + /* Need write lock here, to delete helper. */ + write_lock_bh(&nf_conntrack_lock); + list_del(&me->list); + + /* Get rid of expectations */ + list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) { + struct nf_conn_help *help = nfct_help(exp->master); + if ((help->helper == me || exp->helper == me) && + del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_conntrack_expect_put(exp); + } + } + + /* Get rid of expecteds, set helpers to NULL. */ + list_for_each_entry(h, &unconfirmed, list) + unhelp(h, me); + for (i = 0; i < nf_conntrack_htable_size; i++) { + list_for_each_entry(h, &nf_conntrack_hash[i], list) + unhelp(h, me); + } + write_unlock_bh(&nf_conntrack_lock); + + /* Someone could be still looking at the helper in a bh. */ + synchronize_net(); +} +EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c new file mode 100644 index 00000000000..ed01db63439 --- /dev/null +++ b/net/netfilter/nf_conntrack_irc.c @@ -0,0 +1,281 @@ +/* IRC extension for IP connection tracking, Version 1.21 + * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org> + * based on RR's ip_conntrack_ftp.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/skbuff.h> +#include <linux/in.h> +#include <linux/tcp.h> +#include <linux/netfilter.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <linux/netfilter/nf_conntrack_irc.h> + +#define MAX_PORTS 8 +static unsigned short ports[MAX_PORTS]; +static int ports_c; +static unsigned int max_dcc_channels = 8; +static unsigned int dcc_timeout __read_mostly = 300; +/* This is slow, but it's simple. --RR */ +static char *irc_buffer; +static DEFINE_SPINLOCK(irc_buffer_lock); + +unsigned int (*nf_nat_irc_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp) __read_mostly; +EXPORT_SYMBOL_GPL(nf_nat_irc_hook); + +MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); +MODULE_DESCRIPTION("IRC (DCC) connection tracking helper"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ip_conntrack_irc"); + +module_param_array(ports, ushort, &ports_c, 0400); +MODULE_PARM_DESC(ports, "port numbers of IRC servers"); +module_param(max_dcc_channels, uint, 0400); +MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per " + "IRC session"); +module_param(dcc_timeout, uint, 0400); +MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels"); + +static const char *dccprotos[] = { + "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " +}; + +#define MINMATCHLEN 5 + +#if 0 +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \ + __FILE__, __FUNCTION__ , ## args) +#else +#define DEBUGP(format, args...) +#endif + +/* tries to get the ip_addr and port out of a dcc command + * return value: -1 on failure, 0 on success + * data pointer to first byte of DCC command data + * data_end pointer to last byte of dcc command data + * ip returns parsed ip of dcc command + * port returns parsed port of dcc command + * ad_beg_p returns pointer to first byte of addr data + * ad_end_p returns pointer to last byte of addr data + */ +static int parse_dcc(char *data, char *data_end, u_int32_t *ip, + u_int16_t *port, char **ad_beg_p, char **ad_end_p) +{ + /* at least 12: "AAAAAAAA P\1\n" */ + while (*data++ != ' ') + if (data > data_end - 12) + return -1; + + *ad_beg_p = data; + *ip = simple_strtoul(data, &data, 10); + + /* skip blanks between ip and port */ + while (*data == ' ') { + if (data >= data_end) + return -1; + data++; + } + + *port = simple_strtoul(data, &data, 10); + *ad_end_p = data; + + return 0; +} + +static int help(struct sk_buff **pskb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) +{ + unsigned int dataoff; + struct tcphdr _tcph, *th; + char *data, *data_limit, *ib_ptr; + int dir = CTINFO2DIR(ctinfo); + struct nf_conntrack_expect *exp; + struct nf_conntrack_tuple *tuple; + u_int32_t dcc_ip; + u_int16_t dcc_port; + __be16 port; + int i, ret = NF_ACCEPT; + char *addr_beg_p, *addr_end_p; + typeof(nf_nat_irc_hook) nf_nat_irc; + + /* If packet is coming from IRC server */ + if (dir == IP_CT_DIR_REPLY) + return NF_ACCEPT; + + /* Until there's been traffic both ways, don't look in packets. */ + if (ctinfo != IP_CT_ESTABLISHED && + ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + return NF_ACCEPT; + + /* Not a full tcp header? */ + th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + if (th == NULL) + return NF_ACCEPT; + + /* No data? */ + dataoff = protoff + th->doff*4; + if (dataoff >= (*pskb)->len) + return NF_ACCEPT; + + spin_lock_bh(&irc_buffer_lock); + ib_ptr = skb_header_pointer(*pskb, dataoff, (*pskb)->len - dataoff, + irc_buffer); + BUG_ON(ib_ptr == NULL); + + data = ib_ptr; + data_limit = ib_ptr + (*pskb)->len - dataoff; + + /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24 + * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */ + while (data < data_limit - (19 + MINMATCHLEN)) { + if (memcmp(data, "\1DCC ", 5)) { + data++; + continue; + } + data += 5; + /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */ + + DEBUGP("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u...\n", + NIPQUAD(iph->saddr), ntohs(th->source), + NIPQUAD(iph->daddr), ntohs(th->dest)); + + for (i = 0; i < ARRAY_SIZE(dccprotos); i++) { + if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) { + /* no match */ + continue; + } + data += strlen(dccprotos[i]); + DEBUGP("DCC %s detected\n", dccprotos[i]); + + /* we have at least + * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid + * data left (== 14/13 bytes) */ + if (parse_dcc((char *)data, data_limit, &dcc_ip, + &dcc_port, &addr_beg_p, &addr_end_p)) { + DEBUGP("unable to parse dcc command\n"); + continue; + } + DEBUGP("DCC bound ip/port: %u.%u.%u.%u:%u\n", + HIPQUAD(dcc_ip), dcc_port); + + /* dcc_ip can be the internal OR external (NAT'ed) IP */ + tuple = &ct->tuplehash[dir].tuple; + if (tuple->src.u3.ip != htonl(dcc_ip) && + tuple->dst.u3.ip != htonl(dcc_ip)) { + if (net_ratelimit()) + printk(KERN_WARNING + "Forged DCC command from " + "%u.%u.%u.%u: %u.%u.%u.%u:%u\n", + NIPQUAD(tuple->src.u3.ip), + HIPQUAD(dcc_ip), dcc_port); + continue; + } + + exp = nf_conntrack_expect_alloc(ct); + if (exp == NULL) { + ret = NF_DROP; + goto out; + } + tuple = &ct->tuplehash[!dir].tuple; + port = htons(dcc_port); + nf_conntrack_expect_init(exp, tuple->src.l3num, + NULL, &tuple->dst.u3, + IPPROTO_TCP, NULL, &port); + + nf_nat_irc = rcu_dereference(nf_nat_irc_hook); + if (nf_nat_irc && ct->status & IPS_NAT_MASK) + ret = nf_nat_irc(pskb, ctinfo, + addr_beg_p - ib_ptr, + addr_end_p - addr_beg_p, + exp); + else if (nf_conntrack_expect_related(exp) != 0) + ret = NF_DROP; + nf_conntrack_expect_put(exp); + goto out; + } + } + out: + spin_unlock_bh(&irc_buffer_lock); + return ret; +} + +static struct nf_conntrack_helper irc[MAX_PORTS] __read_mostly; +static char irc_names[MAX_PORTS][sizeof("irc-65535")] __read_mostly; + +static void nf_conntrack_irc_fini(void); + +static int __init nf_conntrack_irc_init(void) +{ + int i, ret; + char *tmpname; + + if (max_dcc_channels < 1) { + printk("nf_ct_irc: max_dcc_channels must not be zero\n"); + return -EINVAL; + } + + irc_buffer = kmalloc(65536, GFP_KERNEL); + if (!irc_buffer) + return -ENOMEM; + + /* If no port given, default to standard irc port */ + if (ports_c == 0) + ports[ports_c++] = IRC_PORT; + + for (i = 0; i < ports_c; i++) { + irc[i].tuple.src.l3num = AF_INET; + irc[i].tuple.src.u.tcp.port = htons(ports[i]); + irc[i].tuple.dst.protonum = IPPROTO_TCP; + irc[i].mask.src.l3num = 0xFFFF; + irc[i].mask.src.u.tcp.port = htons(0xFFFF); + irc[i].mask.dst.protonum = 0xFF; + irc[i].max_expected = max_dcc_channels; + irc[i].timeout = dcc_timeout; + irc[i].me = THIS_MODULE; + irc[i].help = help; + + tmpname = &irc_names[i][0]; + if (ports[i] == IRC_PORT) + sprintf(tmpname, "irc"); + else + sprintf(tmpname, "irc-%u", i); + irc[i].name = tmpname; + + ret = nf_conntrack_helper_register(&irc[i]); + if (ret) { + printk("nf_ct_irc: failed to register helper " + "for pf: %u port: %u\n", + irc[i].tuple.src.l3num, ports[i]); + nf_conntrack_irc_fini(); + return ret; + } + } + return 0; +} + +/* This function is intentionally _NOT_ defined as __exit, because + * it is needed by the init function */ +static void nf_conntrack_irc_fini(void) +{ + int i; + + for (i = 0; i < ports_c; i++) + nf_conntrack_helper_unregister(&irc[i]); + kfree(irc_buffer); +} + +module_init(nf_conntrack_irc_init); +module_exit(nf_conntrack_irc_fini); diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c index 21e0bc91cf2..a3d31c3ac8e 100644 --- a/net/netfilter/nf_conntrack_l3proto_generic.c +++ b/net/netfilter/nf_conntrack_l3proto_generic.c @@ -26,7 +26,7 @@ #include <linux/netfilter_ipv4.h> #include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_conntrack_protocol.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> @@ -37,8 +37,6 @@ #define DEBUGP(format, args...) #endif -DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat); - static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { @@ -84,7 +82,7 @@ static u_int32_t generic_get_features(const struct nf_conntrack_tuple *tuple) return NF_CT_F_BASIC; } -struct nf_conntrack_l3proto nf_conntrack_generic_l3proto = { +struct nf_conntrack_l3proto nf_conntrack_l3proto_generic = { .l3proto = PF_UNSPEC, .name = "unknown", .pkt_to_tuple = generic_pkt_to_tuple, @@ -94,3 +92,4 @@ struct nf_conntrack_l3proto nf_conntrack_generic_l3proto = { .prepare = generic_prepare, .get_features = generic_get_features, }; +EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic); diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c new file mode 100644 index 00000000000..a5b234e444d --- /dev/null +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -0,0 +1,126 @@ +/* + * NetBIOS name service broadcast connection tracking helper + * + * (c) 2005 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +/* + * This helper tracks locally originating NetBIOS name service + * requests by issuing permanent expectations (valid until + * timing out) matching all reply connections from the + * destination network. The only NetBIOS specific thing is + * actually the port number. + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/if_addr.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <net/route.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_expect.h> + +#define NMBD_PORT 137 + +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ip_conntrack_netbios_ns"); + +static unsigned int timeout __read_mostly = 3; +module_param(timeout, uint, 0400); +MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); + +static int help(struct sk_buff **pskb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) +{ + struct nf_conntrack_expect *exp; + struct iphdr *iph = (*pskb)->nh.iph; + struct rtable *rt = (struct rtable *)(*pskb)->dst; + struct in_device *in_dev; + __be32 mask = 0; + + /* we're only interested in locally generated packets */ + if ((*pskb)->sk == NULL) + goto out; + if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST)) + goto out; + if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) + goto out; + + rcu_read_lock(); + in_dev = __in_dev_get_rcu(rt->u.dst.dev); + if (in_dev != NULL) { + for_primary_ifa(in_dev) { + if (ifa->ifa_broadcast == iph->daddr) { + mask = ifa->ifa_mask; + break; + } + } endfor_ifa(in_dev); + } + rcu_read_unlock(); + + if (mask == 0) + goto out; + + exp = nf_conntrack_expect_alloc(ct); + if (exp == NULL) + goto out; + + exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + exp->tuple.src.u.udp.port = htons(NMBD_PORT); + + exp->mask.src.u3.ip = mask; + exp->mask.src.u.udp.port = htons(0xFFFF); + exp->mask.dst.u3.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.udp.port = htons(0xFFFF); + exp->mask.dst.protonum = 0xFF; + + exp->expectfn = NULL; + exp->flags = NF_CT_EXPECT_PERMANENT; + + nf_conntrack_expect_related(exp); + nf_conntrack_expect_put(exp); + + nf_ct_refresh(ct, *pskb, timeout * HZ); +out: + return NF_ACCEPT; +} + +static struct nf_conntrack_helper helper __read_mostly = { + .name = "netbios-ns", + .tuple.src.l3num = AF_INET, + .tuple.src.u.udp.port = __constant_htons(NMBD_PORT), + .tuple.dst.protonum = IPPROTO_UDP, + .mask.src.l3num = 0xFFFF, + .mask.src.u.udp.port = __constant_htons(0xFFFF), + .mask.dst.protonum = 0xFF, + .max_expected = 1, + .me = THIS_MODULE, + .help = help, +}; + +static int __init nf_conntrack_netbios_ns_init(void) +{ + helper.timeout = timeout; + return nf_conntrack_helper_register(&helper); +} + +static void __exit nf_conntrack_netbios_ns_fini(void) +{ + nf_conntrack_helper_unregister(&helper); +} + +module_init(nf_conntrack_netbios_ns_init); +module_exit(nf_conntrack_netbios_ns_fini); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index bd0156a28ec..bd1d2de75e4 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -35,10 +35,15 @@ #include <linux/netfilter.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_l3proto.h> -#include <net/netfilter/nf_conntrack_protocol.h> -#include <linux/netfilter_ipv4/ip_nat_protocol.h> +#include <net/netfilter/nf_conntrack_l4proto.h> +#include <net/netfilter/nf_conntrack_tuple.h> +#ifdef CONFIG_NF_NAT_NEEDED +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_protocol.h> +#endif #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> @@ -50,15 +55,15 @@ static char __initdata version[] = "0.93"; static inline int ctnetlink_dump_tuples_proto(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, - struct nf_conntrack_protocol *proto) + struct nf_conntrack_l4proto *l4proto) { int ret = 0; struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO); NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum); - if (likely(proto->tuple_to_nfattr)) - ret = proto->tuple_to_nfattr(skb, tuple); + if (likely(l4proto->tuple_to_nfattr)) + ret = l4proto->tuple_to_nfattr(skb, tuple); NFA_NEST_END(skb, nest_parms); @@ -93,7 +98,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb, { int ret; struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_protocol *proto; + struct nf_conntrack_l4proto *l4proto; l3proto = nf_ct_l3proto_find_get(tuple->src.l3num); ret = ctnetlink_dump_tuples_ip(skb, tuple, l3proto); @@ -102,9 +107,9 @@ ctnetlink_dump_tuples(struct sk_buff *skb, if (unlikely(ret < 0)) return ret; - proto = nf_ct_proto_find_get(tuple->src.l3num, tuple->dst.protonum); - ret = ctnetlink_dump_tuples_proto(skb, tuple, proto); - nf_ct_proto_put(proto); + l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum); + ret = ctnetlink_dump_tuples_proto(skb, tuple, l4proto); + nf_ct_l4proto_put(l4proto); return ret; } @@ -112,7 +117,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb, static inline int ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct) { - u_int32_t status = htonl((u_int32_t) ct->status); + __be32 status = htonl((u_int32_t) ct->status); NFA_PUT(skb, CTA_STATUS, sizeof(status), &status); return 0; @@ -124,7 +129,7 @@ static inline int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) { long timeout_l = ct->timeout.expires - jiffies; - u_int32_t timeout; + __be32 timeout; if (timeout_l < 0) timeout = 0; @@ -141,26 +146,27 @@ nfattr_failure: static inline int ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct) { - struct nf_conntrack_protocol *proto = nf_ct_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); + struct nf_conntrack_l4proto *l4proto = nf_ct_l4proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); struct nfattr *nest_proto; int ret; - if (!proto->to_nfattr) { - nf_ct_proto_put(proto); + if (!l4proto->to_nfattr) { + nf_ct_l4proto_put(l4proto); return 0; } nest_proto = NFA_NEST(skb, CTA_PROTOINFO); - ret = proto->to_nfattr(skb, nest_proto, ct); + ret = l4proto->to_nfattr(skb, nest_proto, ct); - nf_ct_proto_put(proto); + nf_ct_l4proto_put(l4proto); NFA_NEST_END(skb, nest_proto); return ret; nfattr_failure: + nf_ct_l4proto_put(l4proto); return -1; } @@ -194,7 +200,7 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, { enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; struct nfattr *nest_count = NFA_NEST(skb, type); - u_int32_t tmp; + __be32 tmp; tmp = htonl(ct->counters[dir].packets); NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); @@ -217,7 +223,7 @@ nfattr_failure: static inline int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) { - u_int32_t mark = htonl(ct->mark); + __be32 mark = htonl(ct->mark); NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark); return 0; @@ -232,7 +238,7 @@ nfattr_failure: static inline int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) { - u_int32_t id = htonl(ct->id); + __be32 id = htonl(ct->id); NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id); return 0; @@ -243,7 +249,7 @@ nfattr_failure: static inline int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) { - u_int32_t use = htonl(atomic_read(&ct->ct_general.use)); + __be32 use = htonl(atomic_read(&ct->ct_general.use)); NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use); return 0; @@ -329,8 +335,6 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, } else if (events & (IPCT_NEW | IPCT_RELATED)) { type = IPCTNL_MSG_CT_NEW; flags = NLM_F_CREATE|NLM_F_EXCL; - /* dump everything */ - events = ~0UL; group = NFNLGRP_CONNTRACK_NEW; } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) { type = IPCTNL_MSG_CT_NEW; @@ -365,28 +369,35 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nfattr_failure; NFA_NEST_END(skb, nest_parms); - - /* NAT stuff is now a status flag */ - if ((events & IPCT_STATUS || events & IPCT_NATINFO) - && ctnetlink_dump_status(skb, ct) < 0) - goto nfattr_failure; - if (events & IPCT_REFRESH - && ctnetlink_dump_timeout(skb, ct) < 0) - goto nfattr_failure; - if (events & IPCT_PROTOINFO - && ctnetlink_dump_protoinfo(skb, ct) < 0) - goto nfattr_failure; - if (events & IPCT_HELPINFO - && ctnetlink_dump_helpinfo(skb, ct) < 0) - goto nfattr_failure; - if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) - goto nfattr_failure; + if (events & IPCT_DESTROY) { + if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) + goto nfattr_failure; + } else { + if (ctnetlink_dump_status(skb, ct) < 0) + goto nfattr_failure; - if (events & IPCT_MARK - && ctnetlink_dump_mark(skb, ct) < 0) - goto nfattr_failure; + if (ctnetlink_dump_timeout(skb, ct) < 0) + goto nfattr_failure; + + if (events & IPCT_PROTOINFO + && ctnetlink_dump_protoinfo(skb, ct) < 0) + goto nfattr_failure; + + if ((events & IPCT_HELPER || nfct_help(ct)) + && ctnetlink_dump_helpinfo(skb, ct) < 0) + goto nfattr_failure; + + if ((events & IPCT_MARK || ct->mark) + && ctnetlink_dump_mark(skb, ct) < 0) + goto nfattr_failure; + + if (events & IPCT_COUNTER_FILLING && + (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)) + goto nfattr_failure; + } nlh->nlmsg_len = skb->tail - b; nfnetlink_send(skb, 0, group, 0); @@ -423,7 +434,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) restart: list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) { h = (struct nf_conntrack_tuple_hash *) i; - if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) + if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = nf_ct_tuplehash_to_ctrack(h); /* Dump entries of a given L3 protocol number. @@ -491,7 +502,7 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr, struct nf_conntrack_tuple *tuple) { struct nfattr *tb[CTA_PROTO_MAX]; - struct nf_conntrack_protocol *proto; + struct nf_conntrack_l4proto *l4proto; int ret = 0; nfattr_parse_nested(tb, CTA_PROTO_MAX, attr); @@ -503,12 +514,12 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr, return -EINVAL; tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]); - proto = nf_ct_proto_find_get(tuple->src.l3num, tuple->dst.protonum); + l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum); - if (likely(proto->nfattr_to_tuple)) - ret = proto->nfattr_to_tuple(tb, tuple); + if (likely(l4proto->nfattr_to_tuple)) + ret = l4proto->nfattr_to_tuple(tb, tuple); - nf_ct_proto_put(proto); + nf_ct_l4proto_put(l4proto); return ret; } @@ -549,28 +560,28 @@ ctnetlink_parse_tuple(struct nfattr *cda[], struct nf_conntrack_tuple *tuple, return 0; } -#ifdef CONFIG_IP_NF_NAT_NEEDED +#ifdef CONFIG_NF_NAT_NEEDED static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = { [CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t), [CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t), }; -static int ctnetlink_parse_nat_proto(struct nfattr *attr, +static int nfnetlink_parse_nat_proto(struct nfattr *attr, const struct nf_conn *ct, - struct ip_nat_range *range) + struct nf_nat_range *range) { struct nfattr *tb[CTA_PROTONAT_MAX]; - struct ip_nat_protocol *npt; + struct nf_nat_protocol *npt; nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr); if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat)) return -EINVAL; - npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); + npt = nf_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); if (!npt->nfattr_to_range) { - ip_nat_proto_put(npt); + nf_nat_proto_put(npt); return 0; } @@ -578,7 +589,7 @@ static int ctnetlink_parse_nat_proto(struct nfattr *attr, if (npt->nfattr_to_range(tb, range) > 0) range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; - ip_nat_proto_put(npt); + nf_nat_proto_put(npt); return 0; } @@ -589,8 +600,8 @@ static const size_t cta_min_nat[CTA_NAT_MAX] = { }; static inline int -ctnetlink_parse_nat(struct nfattr *nat, - const struct nf_conn *ct, struct ip_nat_range *range) +nfnetlink_parse_nat(struct nfattr *nat, + const struct nf_conn *ct, struct nf_nat_range *range) { struct nfattr *tb[CTA_NAT_MAX]; int err; @@ -603,12 +614,12 @@ ctnetlink_parse_nat(struct nfattr *nat, return -EINVAL; if (tb[CTA_NAT_MINIP-1]) - range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]); + range->min_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MINIP-1]); if (!tb[CTA_NAT_MAXIP-1]) range->max_ip = range->min_ip; else - range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]); + range->max_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MAXIP-1]); if (range->min_ip) range->flags |= IP_NAT_RANGE_MAP_IPS; @@ -616,7 +627,7 @@ ctnetlink_parse_nat(struct nfattr *nat, if (!tb[CTA_NAT_PROTO-1]) return 0; - err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range); + err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range); if (err < 0) return err; @@ -681,7 +692,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, ct = nf_ct_tuplehash_to_ctrack(h); if (cda[CTA_ID-1]) { - u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1])); + u_int32_t id = ntohl(*(__be32 *)NFA_DATA(cda[CTA_ID-1])); if (ct->id != id) { nf_ct_put(ct); return -ENOENT; @@ -751,7 +762,6 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, nf_ct_put(ct); return -ENOMEM; } - NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid; err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, IPCTNL_MSG_CT_NEW, 1, ct); @@ -775,7 +785,7 @@ static inline int ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[]) { unsigned long d; - unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1])); + unsigned int status = ntohl(*(__be32 *)NFA_DATA(cda[CTA_STATUS-1])); d = ct->status ^ status; if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) @@ -792,35 +802,35 @@ ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[]) return -EINVAL; if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) { -#ifndef CONFIG_IP_NF_NAT_NEEDED +#ifndef CONFIG_NF_NAT_NEEDED return -EINVAL; #else - struct ip_nat_range range; + struct nf_nat_range range; if (cda[CTA_NAT_DST-1]) { - if (ctnetlink_parse_nat(cda[CTA_NAT_DST-1], ct, + if (nfnetlink_parse_nat(cda[CTA_NAT_DST-1], ct, &range) < 0) return -EINVAL; - if (ip_nat_initialized(ct, + if (nf_nat_initialized(ct, HOOK2MANIP(NF_IP_PRE_ROUTING))) return -EEXIST; - ip_nat_setup_info(ct, &range, hooknum); + nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); } if (cda[CTA_NAT_SRC-1]) { - if (ctnetlink_parse_nat(cda[CTA_NAT_SRC-1], ct, + if (nfnetlink_parse_nat(cda[CTA_NAT_SRC-1], ct, &range) < 0) return -EINVAL; - if (ip_nat_initialized(ct, + if (nf_nat_initialized(ct, HOOK2MANIP(NF_IP_POST_ROUTING))) return -EEXIST; - ip_nat_setup_info(ct, &range, hooknum); + nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); } #endif } /* Be careful here, modifying NAT bits can screw up things, * so don't let users modify them directly if they don't pass - * ip_nat_range. */ + * nf_nat_range. */ ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK); return 0; } @@ -874,7 +884,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[]) static inline int ctnetlink_change_timeout(struct nf_conn *ct, struct nfattr *cda[]) { - u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); + u_int32_t timeout = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1])); if (!del_timer(&ct->timeout)) return -ETIME; @@ -889,18 +899,18 @@ static inline int ctnetlink_change_protoinfo(struct nf_conn *ct, struct nfattr *cda[]) { struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1]; - struct nf_conntrack_protocol *proto; + struct nf_conntrack_l4proto *l4proto; u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; u_int16_t l3num = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; int err = 0; nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr); - proto = nf_ct_proto_find_get(l3num, npt); + l4proto = nf_ct_l4proto_find_get(l3num, npt); - if (proto->from_nfattr) - err = proto->from_nfattr(tb, ct); - nf_ct_proto_put(proto); + if (l4proto->from_nfattr) + err = l4proto->from_nfattr(tb, ct); + nf_ct_l4proto_put(l4proto); return err; } @@ -936,7 +946,7 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nfattr *cda[]) #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK-1]) - ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1])); + ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1])); #endif return 0; @@ -949,6 +959,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[], { struct nf_conn *ct; int err = -EINVAL; + struct nf_conn_help *help; ct = nf_conntrack_alloc(otuple, rtuple); if (ct == NULL || IS_ERR(ct)) @@ -956,14 +967,16 @@ ctnetlink_create_conntrack(struct nfattr *cda[], if (!cda[CTA_TIMEOUT-1]) goto err; - ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); + ct->timeout.expires = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1])); ct->timeout.expires = jiffies + ct->timeout.expires * HZ; ct->status |= IPS_CONFIRMED; - err = ctnetlink_change_status(ct, cda); - if (err < 0) - goto err; + if (cda[CTA_STATUS-1]) { + err = ctnetlink_change_status(ct, cda); + if (err < 0) + goto err; + } if (cda[CTA_PROTOINFO-1]) { err = ctnetlink_change_protoinfo(ct, cda); @@ -973,12 +986,19 @@ ctnetlink_create_conntrack(struct nfattr *cda[], #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK-1]) - ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1])); + ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1])); #endif + help = nfct_help(ct); + if (help) + help->helper = nf_ct_helper_find_get(rtuple); + add_timer(&ct->timeout); nf_conntrack_hash_insert(ct); + if (help && help->helper) + nf_ct_helper_put(help->helper); + return 0; err: @@ -1072,7 +1092,7 @@ ctnetlink_exp_dump_mask(struct sk_buff *skb, { int ret; struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_protocol *proto; + struct nf_conntrack_l4proto *l4proto; struct nfattr *nest_parms = NFA_NEST(skb, CTA_EXPECT_MASK); l3proto = nf_ct_l3proto_find_get(tuple->src.l3num); @@ -1082,9 +1102,9 @@ ctnetlink_exp_dump_mask(struct sk_buff *skb, if (unlikely(ret < 0)) goto nfattr_failure; - proto = nf_ct_proto_find_get(tuple->src.l3num, tuple->dst.protonum); - ret = ctnetlink_dump_tuples_proto(skb, mask, proto); - nf_ct_proto_put(proto); + l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum); + ret = ctnetlink_dump_tuples_proto(skb, mask, l4proto); + nf_ct_l4proto_put(l4proto); if (unlikely(ret < 0)) goto nfattr_failure; @@ -1101,8 +1121,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct nf_conntrack_expect *exp) { struct nf_conn *master = exp->master; - u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ); - u_int32_t id = htonl(exp->id); + __be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ); + __be32 id = htonl(exp->id); if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) goto nfattr_failure; @@ -1275,12 +1295,12 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - exp = nf_conntrack_expect_find(&tuple); + exp = nf_conntrack_expect_find_get(&tuple); if (!exp) return -ENOENT; if (cda[CTA_EXPECT_ID-1]) { - u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); + __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); if (exp->id != ntohl(id)) { nf_conntrack_expect_put(exp); return -ENOENT; @@ -1291,8 +1311,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb2) goto out; - NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid; - + err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, 1, exp); @@ -1331,13 +1350,12 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return err; /* bump usage count to 2 */ - exp = nf_conntrack_expect_find(&tuple); + exp = nf_conntrack_expect_find_get(&tuple); if (!exp) return -ENOENT; if (cda[CTA_EXPECT_ID-1]) { - u_int32_t id = - *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); + __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); if (exp->id != ntohl(id)) { nf_conntrack_expect_put(exp); return -ENOENT; @@ -1433,6 +1451,7 @@ ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3) exp->expectfn = NULL; exp->flags = 0; exp->master = ct; + exp->helper = NULL; memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple)); memcpy(&exp->mask, &mask, sizeof(struct nf_conntrack_tuple)); @@ -1529,6 +1548,7 @@ static struct nfnetlink_subsystem ctnl_exp_subsys = { .cb = ctnl_exp_cb, }; +MODULE_ALIAS("ip_conntrack_netlink"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c new file mode 100644 index 00000000000..f0ff00e0d05 --- /dev/null +++ b/net/netfilter/nf_conntrack_pptp.c @@ -0,0 +1,607 @@ +/* + * Connection tracking support for PPTP (Point to Point Tunneling Protocol). + * PPTP is a a protocol for creating virtual private networks. + * It is a specification defined by Microsoft and some vendors + * working with Microsoft. PPTP is built on top of a modified + * version of the Internet Generic Routing Encapsulation Protocol. + * GRE is defined in RFC 1701 and RFC 1702. Documentation of + * PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + * Limitations: + * - We blindly assume that control connections are always + * established in PNS->PAC direction. This is a violation + * of RFFC2673 + * - We can only support one single call within each session + * TODO: + * - testing of incoming PPTP calls + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/in.h> +#include <linux/tcp.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <linux/netfilter/nf_conntrack_proto_gre.h> +#include <linux/netfilter/nf_conntrack_pptp.h> + +#define NF_CT_PPTP_VERSION "3.1" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP"); +MODULE_ALIAS("ip_conntrack_pptp"); + +static DEFINE_SPINLOCK(nf_pptp_lock); + +int +(*nf_nat_pptp_hook_outbound)(struct sk_buff **pskb, + struct nf_conn *ct, enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) __read_mostly; +EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound); + +int +(*nf_nat_pptp_hook_inbound)(struct sk_buff **pskb, + struct nf_conn *ct, enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) __read_mostly; +EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_inbound); + +void +(*nf_nat_pptp_hook_exp_gre)(struct nf_conntrack_expect *expect_orig, + struct nf_conntrack_expect *expect_reply) + __read_mostly; +EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_exp_gre); + +void +(*nf_nat_pptp_hook_expectfn)(struct nf_conn *ct, + struct nf_conntrack_expect *exp) __read_mostly; +EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn); + +#if 0 +/* PptpControlMessageType names */ +const char *pptp_msg_name[] = { + "UNKNOWN_MESSAGE", + "START_SESSION_REQUEST", + "START_SESSION_REPLY", + "STOP_SESSION_REQUEST", + "STOP_SESSION_REPLY", + "ECHO_REQUEST", + "ECHO_REPLY", + "OUT_CALL_REQUEST", + "OUT_CALL_REPLY", + "IN_CALL_REQUEST", + "IN_CALL_REPLY", + "IN_CALL_CONNECT", + "CALL_CLEAR_REQUEST", + "CALL_DISCONNECT_NOTIFY", + "WAN_ERROR_NOTIFY", + "SET_LINK_INFO" +}; +EXPORT_SYMBOL(pptp_msg_name); +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) +#else +#define DEBUGP(format, args...) +#endif + +#define SECS *HZ +#define MINS * 60 SECS +#define HOURS * 60 MINS + +#define PPTP_GRE_TIMEOUT (10 MINS) +#define PPTP_GRE_STREAM_TIMEOUT (5 HOURS) + +static void pptp_expectfn(struct nf_conn *ct, + struct nf_conntrack_expect *exp) +{ + typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn; + DEBUGP("increasing timeouts\n"); + + /* increase timeout of GRE data channel conntrack entry */ + ct->proto.gre.timeout = PPTP_GRE_TIMEOUT; + ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT; + + /* Can you see how rusty this code is, compared with the pre-2.6.11 + * one? That's what happened to my shiny newnat of 2002 ;( -HW */ + + rcu_read_lock(); + nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn); + if (nf_nat_pptp_expectfn && ct->status & IPS_NAT_MASK) + nf_nat_pptp_expectfn(ct, exp); + else { + struct nf_conntrack_tuple inv_t; + struct nf_conntrack_expect *exp_other; + + /* obviously this tuple inversion only works until you do NAT */ + nf_ct_invert_tuplepr(&inv_t, &exp->tuple); + DEBUGP("trying to unexpect other dir: "); + NF_CT_DUMP_TUPLE(&inv_t); + + exp_other = nf_conntrack_expect_find_get(&inv_t); + if (exp_other) { + /* delete other expectation. */ + DEBUGP("found\n"); + nf_conntrack_unexpect_related(exp_other); + nf_conntrack_expect_put(exp_other); + } else { + DEBUGP("not found\n"); + } + } + rcu_read_unlock(); +} + +static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) +{ + struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_expect *exp; + struct nf_conn *sibling; + + DEBUGP("trying to timeout ct or exp for tuple "); + NF_CT_DUMP_TUPLE(t); + + h = nf_conntrack_find_get(t, NULL); + if (h) { + sibling = nf_ct_tuplehash_to_ctrack(h); + DEBUGP("setting timeout of conntrack %p to 0\n", sibling); + sibling->proto.gre.timeout = 0; + sibling->proto.gre.stream_timeout = 0; + if (del_timer(&sibling->timeout)) + sibling->timeout.function((unsigned long)sibling); + nf_ct_put(sibling); + return 1; + } else { + exp = nf_conntrack_expect_find_get(t); + if (exp) { + DEBUGP("unexpect_related of expect %p\n", exp); + nf_conntrack_unexpect_related(exp); + nf_conntrack_expect_put(exp); + return 1; + } + } + return 0; +} + +/* timeout GRE data connections */ +static void pptp_destroy_siblings(struct nf_conn *ct) +{ + struct nf_conn_help *help = nfct_help(ct); + struct nf_conntrack_tuple t; + + nf_ct_gre_keymap_destroy(ct); + + /* try original (pns->pac) tuple */ + memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); + t.dst.protonum = IPPROTO_GRE; + t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id; + t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id; + if (!destroy_sibling_or_exp(&t)) + DEBUGP("failed to timeout original pns->pac ct/exp\n"); + + /* try reply (pac->pns) tuple */ + memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); + t.dst.protonum = IPPROTO_GRE; + t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id; + t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id; + if (!destroy_sibling_or_exp(&t)) + DEBUGP("failed to timeout reply pac->pns ct/exp\n"); +} + +/* expect GRE connections (PNS->PAC and PAC->PNS direction) */ +static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid) +{ + struct nf_conntrack_expect *exp_orig, *exp_reply; + enum ip_conntrack_dir dir; + int ret = 1; + typeof(nf_nat_pptp_hook_exp_gre) nf_nat_pptp_exp_gre; + + exp_orig = nf_conntrack_expect_alloc(ct); + if (exp_orig == NULL) + goto out; + + exp_reply = nf_conntrack_expect_alloc(ct); + if (exp_reply == NULL) + goto out_put_orig; + + /* original direction, PNS->PAC */ + dir = IP_CT_DIR_ORIGINAL; + nf_conntrack_expect_init(exp_orig, ct->tuplehash[dir].tuple.src.l3num, + &ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[dir].tuple.dst.u3, + IPPROTO_GRE, &peer_callid, &callid); + exp_orig->expectfn = pptp_expectfn; + + /* reply direction, PAC->PNS */ + dir = IP_CT_DIR_REPLY; + nf_conntrack_expect_init(exp_reply, ct->tuplehash[dir].tuple.src.l3num, + &ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[dir].tuple.dst.u3, + IPPROTO_GRE, &callid, &peer_callid); + exp_reply->expectfn = pptp_expectfn; + + nf_nat_pptp_exp_gre = rcu_dereference(nf_nat_pptp_hook_exp_gre); + if (nf_nat_pptp_exp_gre && ct->status & IPS_NAT_MASK) + nf_nat_pptp_exp_gre(exp_orig, exp_reply); + if (nf_conntrack_expect_related(exp_orig) != 0) + goto out_put_both; + if (nf_conntrack_expect_related(exp_reply) != 0) + goto out_unexpect_orig; + + /* Add GRE keymap entries */ + if (nf_ct_gre_keymap_add(ct, IP_CT_DIR_ORIGINAL, &exp_orig->tuple) != 0) + goto out_unexpect_both; + if (nf_ct_gre_keymap_add(ct, IP_CT_DIR_REPLY, &exp_reply->tuple) != 0) { + nf_ct_gre_keymap_destroy(ct); + goto out_unexpect_both; + } + ret = 0; + +out_put_both: + nf_conntrack_expect_put(exp_reply); +out_put_orig: + nf_conntrack_expect_put(exp_orig); +out: + return ret; + +out_unexpect_both: + nf_conntrack_unexpect_related(exp_reply); +out_unexpect_orig: + nf_conntrack_unexpect_related(exp_orig); + goto out_put_both; +} + +static inline int +pptp_inbound_pkt(struct sk_buff **pskb, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq, + unsigned int reqlen, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info; + u_int16_t msg; + __be16 cid = 0, pcid = 0; + typeof(nf_nat_pptp_hook_inbound) nf_nat_pptp_inbound; + + msg = ntohs(ctlh->messageType); + DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); + + switch (msg) { + case PPTP_START_SESSION_REPLY: + /* server confirms new control session */ + if (info->sstate < PPTP_SESSION_REQUESTED) + goto invalid; + if (pptpReq->srep.resultCode == PPTP_START_OK) + info->sstate = PPTP_SESSION_CONFIRMED; + else + info->sstate = PPTP_SESSION_ERROR; + break; + + case PPTP_STOP_SESSION_REPLY: + /* server confirms end of control session */ + if (info->sstate > PPTP_SESSION_STOPREQ) + goto invalid; + if (pptpReq->strep.resultCode == PPTP_STOP_OK) + info->sstate = PPTP_SESSION_NONE; + else + info->sstate = PPTP_SESSION_ERROR; + break; + + case PPTP_OUT_CALL_REPLY: + /* server accepted call, we now expect GRE frames */ + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; + if (info->cstate != PPTP_CALL_OUT_REQ && + info->cstate != PPTP_CALL_OUT_CONF) + goto invalid; + + cid = pptpReq->ocack.callID; + pcid = pptpReq->ocack.peersCallID; + if (info->pns_call_id != pcid) + goto invalid; + DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], + ntohs(cid), ntohs(pcid)); + + if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) { + info->cstate = PPTP_CALL_OUT_CONF; + info->pac_call_id = cid; + exp_gre(ct, cid, pcid); + } else + info->cstate = PPTP_CALL_NONE; + break; + + case PPTP_IN_CALL_REQUEST: + /* server tells us about incoming call request */ + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; + + cid = pptpReq->icreq.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); + info->cstate = PPTP_CALL_IN_REQ; + info->pac_call_id = cid; + break; + + case PPTP_IN_CALL_CONNECT: + /* server tells us about incoming call established */ + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; + if (info->cstate != PPTP_CALL_IN_REP && + info->cstate != PPTP_CALL_IN_CONF) + goto invalid; + + pcid = pptpReq->iccon.peersCallID; + cid = info->pac_call_id; + + if (info->pns_call_id != pcid) + goto invalid; + + DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid)); + info->cstate = PPTP_CALL_IN_CONF; + + /* we expect a GRE connection from PAC to PNS */ + exp_gre(ct, cid, pcid); + break; + + case PPTP_CALL_DISCONNECT_NOTIFY: + /* server confirms disconnect */ + cid = pptpReq->disc.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); + info->cstate = PPTP_CALL_NONE; + + /* untrack this call id, unexpect GRE packets */ + pptp_destroy_siblings(ct); + break; + + case PPTP_WAN_ERROR_NOTIFY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* I don't have to explain these ;) */ + break; + + default: + goto invalid; + } + + nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound); + if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK) + return nf_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq); + return NF_ACCEPT; + +invalid: + DEBUGP("invalid %s: type=%d cid=%u pcid=%u " + "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], + msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, + ntohs(info->pns_call_id), ntohs(info->pac_call_id)); + return NF_ACCEPT; +} + +static inline int +pptp_outbound_pkt(struct sk_buff **pskb, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq, + unsigned int reqlen, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info; + u_int16_t msg; + __be16 cid = 0, pcid = 0; + typeof(nf_nat_pptp_hook_outbound) nf_nat_pptp_outbound; + + msg = ntohs(ctlh->messageType); + DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); + + switch (msg) { + case PPTP_START_SESSION_REQUEST: + /* client requests for new control session */ + if (info->sstate != PPTP_SESSION_NONE) + goto invalid; + info->sstate = PPTP_SESSION_REQUESTED; + break; + + case PPTP_STOP_SESSION_REQUEST: + /* client requests end of control session */ + info->sstate = PPTP_SESSION_STOPREQ; + break; + + case PPTP_OUT_CALL_REQUEST: + /* client initiating connection to server */ + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; + info->cstate = PPTP_CALL_OUT_REQ; + /* track PNS call id */ + cid = pptpReq->ocreq.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); + info->pns_call_id = cid; + break; + + case PPTP_IN_CALL_REPLY: + /* client answers incoming call */ + if (info->cstate != PPTP_CALL_IN_REQ && + info->cstate != PPTP_CALL_IN_REP) + goto invalid; + + cid = pptpReq->icack.callID; + pcid = pptpReq->icack.peersCallID; + if (info->pac_call_id != pcid) + goto invalid; + DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg], + ntohs(cid), ntohs(pcid)); + + if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) { + /* part two of the three-way handshake */ + info->cstate = PPTP_CALL_IN_REP; + info->pns_call_id = cid; + } else + info->cstate = PPTP_CALL_NONE; + break; + + case PPTP_CALL_CLEAR_REQUEST: + /* client requests hangup of call */ + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; + /* FUTURE: iterate over all calls and check if + * call ID is valid. We don't do this without newnat, + * because we only know about last call */ + info->cstate = PPTP_CALL_CLEAR_REQ; + break; + + case PPTP_SET_LINK_INFO: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* I don't have to explain these ;) */ + break; + + default: + goto invalid; + } + + nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound); + if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK) + return nf_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq); + return NF_ACCEPT; + +invalid: + DEBUGP("invalid %s: type=%d cid=%u pcid=%u " + "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], + msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, + ntohs(info->pns_call_id), ntohs(info->pac_call_id)); + return NF_ACCEPT; +} + +static const unsigned int pptp_msg_size[] = { + [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest), + [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply), + [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest), + [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply), + [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest), + [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply), + [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest), + [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply), + [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected), + [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest), + [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify), + [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify), + [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo), +}; + +/* track caller id inside control connection, call expect_related */ +static int +conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) + +{ + int dir = CTINFO2DIR(ctinfo); + struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info; + struct tcphdr _tcph, *tcph; + struct pptp_pkt_hdr _pptph, *pptph; + struct PptpControlHeader _ctlh, *ctlh; + union pptp_ctrl_union _pptpReq, *pptpReq; + unsigned int tcplen = (*pskb)->len - protoff; + unsigned int datalen, reqlen, nexthdr_off; + int oldsstate, oldcstate; + int ret; + u_int16_t msg; + + /* don't do any tracking before tcp handshake complete */ + if (ctinfo != IP_CT_ESTABLISHED && + ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + return NF_ACCEPT; + + nexthdr_off = protoff; + tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); + BUG_ON(!tcph); + nexthdr_off += tcph->doff * 4; + datalen = tcplen - tcph->doff * 4; + + pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); + if (!pptph) { + DEBUGP("no full PPTP header, can't track\n"); + return NF_ACCEPT; + } + nexthdr_off += sizeof(_pptph); + datalen -= sizeof(_pptph); + + /* if it's not a control message we can't do anything with it */ + if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL || + ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) { + DEBUGP("not a control packet\n"); + return NF_ACCEPT; + } + + ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); + if (!ctlh) + return NF_ACCEPT; + nexthdr_off += sizeof(_ctlh); + datalen -= sizeof(_ctlh); + + reqlen = datalen; + msg = ntohs(ctlh->messageType); + if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg]) + return NF_ACCEPT; + if (reqlen > sizeof(*pptpReq)) + reqlen = sizeof(*pptpReq); + + pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); + if (!pptpReq) + return NF_ACCEPT; + + oldsstate = info->sstate; + oldcstate = info->cstate; + + spin_lock_bh(&nf_pptp_lock); + + /* FIXME: We just blindly assume that the control connection is always + * established from PNS->PAC. However, RFC makes no guarantee */ + if (dir == IP_CT_DIR_ORIGINAL) + /* client -> server (PNS -> PAC) */ + ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, + ctinfo); + else + /* server -> client (PAC -> PNS) */ + ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, + ctinfo); + DEBUGP("sstate: %d->%d, cstate: %d->%d\n", + oldsstate, info->sstate, oldcstate, info->cstate); + spin_unlock_bh(&nf_pptp_lock); + + return ret; +} + +/* control protocol helper */ +static struct nf_conntrack_helper pptp __read_mostly = { + .name = "pptp", + .me = THIS_MODULE, + .max_expected = 2, + .timeout = 5 * 60, + .tuple.src.l3num = AF_INET, + .tuple.src.u.tcp.port = __constant_htons(PPTP_CONTROL_PORT), + .tuple.dst.protonum = IPPROTO_TCP, + .mask.src.l3num = 0xffff, + .mask.src.u.tcp.port = __constant_htons(0xffff), + .mask.dst.protonum = 0xff, + .help = conntrack_pptp_help, + .destroy = pptp_destroy_siblings, +}; + +static int __init nf_conntrack_pptp_init(void) +{ + return nf_conntrack_helper_register(&pptp); +} + +static void __exit nf_conntrack_pptp_fini(void) +{ + nf_conntrack_helper_unregister(&pptp); + nf_ct_gre_keymap_flush(); +} + +module_init(nf_conntrack_pptp_init); +module_exit(nf_conntrack_pptp_fini); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c new file mode 100644 index 00000000000..1a61b72712c --- /dev/null +++ b/net/netfilter/nf_conntrack_proto.c @@ -0,0 +1,410 @@ +/* L3/L4 protocol support for nf_conntrack. */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/netfilter.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/skbuff.h> +#include <linux/vmalloc.h> +#include <linux/stddef.h> +#include <linux/err.h> +#include <linux/percpu.h> +#include <linux/moduleparam.h> +#include <linux/notifier.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_l3proto.h> +#include <net/netfilter/nf_conntrack_l4proto.h> +#include <net/netfilter/nf_conntrack_core.h> + +struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly; +struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly; +EXPORT_SYMBOL_GPL(nf_ct_l3protos); + +#ifdef CONFIG_SYSCTL +static DEFINE_MUTEX(nf_ct_proto_sysctl_mutex); + +static int +nf_ct_register_sysctl(struct ctl_table_header **header, struct ctl_table *path, + struct ctl_table *table, unsigned int *users) +{ + if (*header == NULL) { + *header = nf_register_sysctl_table(path, table); + if (*header == NULL) + return -ENOMEM; + } + if (users != NULL) + (*users)++; + return 0; +} + +static void +nf_ct_unregister_sysctl(struct ctl_table_header **header, + struct ctl_table *table, unsigned int *users) +{ + if (users != NULL && --*users > 0) + return; + nf_unregister_sysctl_table(*header, table); + *header = NULL; +} +#endif + +struct nf_conntrack_l4proto * +__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto) +{ + if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL)) + return &nf_conntrack_l4proto_generic; + + return nf_ct_protos[l3proto][l4proto]; +} +EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find); + +/* this is guaranteed to always return a valid protocol helper, since + * it falls back to generic_protocol */ +struct nf_conntrack_l4proto * +nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto) +{ + struct nf_conntrack_l4proto *p; + + preempt_disable(); + p = __nf_ct_l4proto_find(l3proto, l4proto); + if (!try_module_get(p->me)) + p = &nf_conntrack_l4proto_generic; + preempt_enable(); + + return p; +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get); + +void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p) +{ + module_put(p->me); +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_put); + +struct nf_conntrack_l3proto * +nf_ct_l3proto_find_get(u_int16_t l3proto) +{ + struct nf_conntrack_l3proto *p; + + preempt_disable(); + p = __nf_ct_l3proto_find(l3proto); + if (!try_module_get(p->me)) + p = &nf_conntrack_l3proto_generic; + preempt_enable(); + + return p; +} +EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get); + +void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p) +{ + module_put(p->me); +} +EXPORT_SYMBOL_GPL(nf_ct_l3proto_put); + +int +nf_ct_l3proto_try_module_get(unsigned short l3proto) +{ + int ret; + struct nf_conntrack_l3proto *p; + +retry: p = nf_ct_l3proto_find_get(l3proto); + if (p == &nf_conntrack_l3proto_generic) { + ret = request_module("nf_conntrack-%d", l3proto); + if (!ret) + goto retry; + + return -EPROTOTYPE; + } + + return 0; +} +EXPORT_SYMBOL_GPL(nf_ct_l3proto_try_module_get); + +void nf_ct_l3proto_module_put(unsigned short l3proto) +{ + struct nf_conntrack_l3proto *p; + + preempt_disable(); + p = __nf_ct_l3proto_find(l3proto); + preempt_enable(); + + module_put(p->me); +} +EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); + +static int kill_l3proto(struct nf_conn *i, void *data) +{ + return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num == + ((struct nf_conntrack_l3proto *)data)->l3proto); +} + +static int kill_l4proto(struct nf_conn *i, void *data) +{ + struct nf_conntrack_l4proto *l4proto; + l4proto = (struct nf_conntrack_l4proto *)data; + return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == + l4proto->l4proto) && + (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num == + l4proto->l3proto); +} + +static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto) +{ + int err = 0; + +#ifdef CONFIG_SYSCTL + mutex_lock(&nf_ct_proto_sysctl_mutex); + if (l3proto->ctl_table != NULL) { + err = nf_ct_register_sysctl(&l3proto->ctl_table_header, + l3proto->ctl_table_path, + l3proto->ctl_table, NULL); + } + mutex_unlock(&nf_ct_proto_sysctl_mutex); +#endif + return err; +} + +static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto) +{ +#ifdef CONFIG_SYSCTL + mutex_lock(&nf_ct_proto_sysctl_mutex); + if (l3proto->ctl_table_header != NULL) + nf_ct_unregister_sysctl(&l3proto->ctl_table_header, + l3proto->ctl_table, NULL); + mutex_unlock(&nf_ct_proto_sysctl_mutex); +#endif +} + +int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) +{ + int ret = 0; + + if (proto->l3proto >= AF_MAX) { + ret = -EBUSY; + goto out; + } + + write_lock_bh(&nf_conntrack_lock); + if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) { + ret = -EBUSY; + goto out_unlock; + } + nf_ct_l3protos[proto->l3proto] = proto; + write_unlock_bh(&nf_conntrack_lock); + + ret = nf_ct_l3proto_register_sysctl(proto); + if (ret < 0) + nf_conntrack_l3proto_unregister(proto); + return ret; + +out_unlock: + write_unlock_bh(&nf_conntrack_lock); +out: + return ret; +} +EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register); + +int nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) +{ + int ret = 0; + + if (proto->l3proto >= AF_MAX) { + ret = -EBUSY; + goto out; + } + + write_lock_bh(&nf_conntrack_lock); + if (nf_ct_l3protos[proto->l3proto] != proto) { + write_unlock_bh(&nf_conntrack_lock); + ret = -EBUSY; + goto out; + } + + nf_ct_l3protos[proto->l3proto] = &nf_conntrack_l3proto_generic; + write_unlock_bh(&nf_conntrack_lock); + + nf_ct_l3proto_unregister_sysctl(proto); + + /* Somebody could be still looking at the proto in bh. */ + synchronize_net(); + + /* Remove all contrack entries for this protocol */ + nf_ct_iterate_cleanup(kill_l3proto, proto); + +out: + return ret; +} +EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); + +static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto) +{ + int err = 0; + +#ifdef CONFIG_SYSCTL + mutex_lock(&nf_ct_proto_sysctl_mutex); + if (l4proto->ctl_table != NULL) { + err = nf_ct_register_sysctl(l4proto->ctl_table_header, + nf_net_netfilter_sysctl_path, + l4proto->ctl_table, + l4proto->ctl_table_users); + if (err < 0) + goto out; + } +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + if (l4proto->ctl_compat_table != NULL) { + err = nf_ct_register_sysctl(&l4proto->ctl_compat_table_header, + nf_net_ipv4_netfilter_sysctl_path, + l4proto->ctl_compat_table, NULL); + if (err == 0) + goto out; + nf_ct_unregister_sysctl(l4proto->ctl_table_header, + l4proto->ctl_table, + l4proto->ctl_table_users); + } +#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ +out: + mutex_unlock(&nf_ct_proto_sysctl_mutex); +#endif /* CONFIG_SYSCTL */ + return err; +} + +static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto) +{ +#ifdef CONFIG_SYSCTL + mutex_lock(&nf_ct_proto_sysctl_mutex); + if (l4proto->ctl_table_header != NULL && + *l4proto->ctl_table_header != NULL) + nf_ct_unregister_sysctl(l4proto->ctl_table_header, + l4proto->ctl_table, + l4proto->ctl_table_users); +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + if (l4proto->ctl_compat_table_header != NULL) + nf_ct_unregister_sysctl(&l4proto->ctl_compat_table_header, + l4proto->ctl_compat_table, NULL); +#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ + mutex_unlock(&nf_ct_proto_sysctl_mutex); +#endif /* CONFIG_SYSCTL */ +} + +/* FIXME: Allow NULL functions and sub in pointers to generic for + them. --RR */ +int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) +{ + int ret = 0; + + if (l4proto->l3proto >= PF_MAX) { + ret = -EBUSY; + goto out; + } + + if (l4proto == &nf_conntrack_l4proto_generic) + return nf_ct_l4proto_register_sysctl(l4proto); + +retry: + write_lock_bh(&nf_conntrack_lock); + if (nf_ct_protos[l4proto->l3proto]) { + if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] + != &nf_conntrack_l4proto_generic) { + ret = -EBUSY; + goto out_unlock; + } + } else { + /* l3proto may be loaded latter. */ + struct nf_conntrack_l4proto **proto_array; + int i; + + write_unlock_bh(&nf_conntrack_lock); + + proto_array = (struct nf_conntrack_l4proto **) + kmalloc(MAX_NF_CT_PROTO * + sizeof(struct nf_conntrack_l4proto *), + GFP_KERNEL); + if (proto_array == NULL) { + ret = -ENOMEM; + goto out; + } + for (i = 0; i < MAX_NF_CT_PROTO; i++) + proto_array[i] = &nf_conntrack_l4proto_generic; + + write_lock_bh(&nf_conntrack_lock); + if (nf_ct_protos[l4proto->l3proto]) { + /* bad timing, but no problem */ + write_unlock_bh(&nf_conntrack_lock); + kfree(proto_array); + } else { + nf_ct_protos[l4proto->l3proto] = proto_array; + write_unlock_bh(&nf_conntrack_lock); + } + + /* + * Just once because array is never freed until unloading + * nf_conntrack.ko + */ + goto retry; + } + + nf_ct_protos[l4proto->l3proto][l4proto->l4proto] = l4proto; + write_unlock_bh(&nf_conntrack_lock); + + ret = nf_ct_l4proto_register_sysctl(l4proto); + if (ret < 0) + nf_conntrack_l4proto_unregister(l4proto); + return ret; + +out_unlock: + write_unlock_bh(&nf_conntrack_lock); +out: + return ret; +} +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register); + +int nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) +{ + int ret = 0; + + if (l4proto->l3proto >= PF_MAX) { + ret = -EBUSY; + goto out; + } + + if (l4proto == &nf_conntrack_l4proto_generic) { + nf_ct_l4proto_unregister_sysctl(l4proto); + goto out; + } + + write_lock_bh(&nf_conntrack_lock); + if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] + != l4proto) { + write_unlock_bh(&nf_conntrack_lock); + ret = -EBUSY; + goto out; + } + nf_ct_protos[l4proto->l3proto][l4proto->l4proto] + = &nf_conntrack_l4proto_generic; + write_unlock_bh(&nf_conntrack_lock); + + nf_ct_l4proto_unregister_sysctl(l4proto); + + /* Somebody could be still looking at the proto in bh. */ + synchronize_net(); + + /* Remove all contrack entries for this protocol */ + nf_ct_iterate_cleanup(kill_l4proto, l4proto); + +out: + return ret; +} +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 26408bb0955..69902531c23 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -15,9 +15,9 @@ #include <linux/sched.h> #include <linux/timer.h> #include <linux/netfilter.h> -#include <net/netfilter/nf_conntrack_protocol.h> +#include <net/netfilter/nf_conntrack_l4proto.h> -unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ; +static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ; static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, @@ -71,10 +71,42 @@ static int new(struct nf_conn *conntrack, const struct sk_buff *skb, return 1; } -struct nf_conntrack_protocol nf_conntrack_generic_protocol = +#ifdef CONFIG_SYSCTL +static struct ctl_table_header *generic_sysctl_header; +static struct ctl_table generic_sysctl_table[] = { + { + .ctl_name = NET_NF_CONNTRACK_GENERIC_TIMEOUT, + .procname = "nf_conntrack_generic_timeout", + .data = &nf_ct_generic_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = 0 + } +}; +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT +static struct ctl_table generic_compat_sysctl_table[] = { + { + .ctl_name = NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT, + .procname = "ip_conntrack_generic_timeout", + .data = &nf_ct_generic_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = 0 + } +}; +#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ +#endif /* CONFIG_SYSCTL */ + +struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = { .l3proto = PF_UNSPEC, - .proto = 0, + .l4proto = 0, .name = "unknown", .pkt_to_tuple = generic_pkt_to_tuple, .invert_tuple = generic_invert_tuple, @@ -82,4 +114,11 @@ struct nf_conntrack_protocol nf_conntrack_generic_protocol = .print_conntrack = generic_print_conntrack, .packet = packet, .new = new, +#ifdef CONFIG_SYSCTL + .ctl_table_header = &generic_sysctl_header, + .ctl_table = generic_sysctl_table, +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + .ctl_compat_table = generic_compat_sysctl_table, +#endif +#endif }; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c new file mode 100644 index 00000000000..ac193ce7024 --- /dev/null +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -0,0 +1,305 @@ +/* + * ip_conntrack_proto_gre.c - Version 3.0 + * + * Connection tracking protocol helper module for GRE. + * + * GRE is a generic encapsulation protocol, which is generally not very + * suited for NAT, as it has no protocol-specific part as port numbers. + * + * It has an optional key field, which may help us distinguishing two + * connections between the same two hosts. + * + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * + * PPTP is built on top of a modified version of GRE, and has a mandatory + * field called "CallID", which serves us for the same purpose as the key + * field in plain GRE. + * + * Documentation about PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/timer.h> +#include <linux/list.h> +#include <linux/seq_file.h> +#include <linux/in.h> +#include <linux/skbuff.h> + +#include <net/netfilter/nf_conntrack_l4proto.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <linux/netfilter/nf_conntrack_proto_gre.h> +#include <linux/netfilter/nf_conntrack_pptp.h> + +#define GRE_TIMEOUT (30 * HZ) +#define GRE_STREAM_TIMEOUT (180 * HZ) + +#if 0 +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) +#else +#define DEBUGP(x, args...) +#endif + +static DEFINE_RWLOCK(nf_ct_gre_lock); +static LIST_HEAD(gre_keymap_list); + +void nf_ct_gre_keymap_flush(void) +{ + struct list_head *pos, *n; + + write_lock_bh(&nf_ct_gre_lock); + list_for_each_safe(pos, n, &gre_keymap_list) { + list_del(pos); + kfree(pos); + } + write_unlock_bh(&nf_ct_gre_lock); +} +EXPORT_SYMBOL(nf_ct_gre_keymap_flush); + +static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, + const struct nf_conntrack_tuple *t) +{ + return km->tuple.src.l3num == t->src.l3num && + !memcmp(&km->tuple.src.u3, &t->src.u3, sizeof(t->src.u3)) && + !memcmp(&km->tuple.dst.u3, &t->dst.u3, sizeof(t->dst.u3)) && + km->tuple.dst.protonum == t->dst.protonum && + km->tuple.dst.u.all == t->dst.u.all; +} + +/* look up the source key for a given tuple */ +static __be16 gre_keymap_lookup(struct nf_conntrack_tuple *t) +{ + struct nf_ct_gre_keymap *km; + __be16 key = 0; + + read_lock_bh(&nf_ct_gre_lock); + list_for_each_entry(km, &gre_keymap_list, list) { + if (gre_key_cmpfn(km, t)) { + key = km->tuple.src.u.gre.key; + break; + } + } + read_unlock_bh(&nf_ct_gre_lock); + + DEBUGP("lookup src key 0x%x for ", key); + NF_CT_DUMP_TUPLE(t); + + return key; +} + +/* add a single keymap entry, associate with specified master ct */ +int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, + struct nf_conntrack_tuple *t) +{ + struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_gre_keymap **kmp, *km; + + BUG_ON(strcmp(help->helper->name, "pptp")); + kmp = &help->help.ct_pptp_info.keymap[dir]; + if (*kmp) { + /* check whether it's a retransmission */ + list_for_each_entry(km, &gre_keymap_list, list) { + if (gre_key_cmpfn(km, t) && km == *kmp) + return 0; + } + DEBUGP("trying to override keymap_%s for ct %p\n", + dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); + return -EEXIST; + } + + km = kmalloc(sizeof(*km), GFP_ATOMIC); + if (!km) + return -ENOMEM; + memcpy(&km->tuple, t, sizeof(*t)); + *kmp = km; + + DEBUGP("adding new entry %p: ", km); + NF_CT_DUMP_TUPLE(&km->tuple); + + write_lock_bh(&nf_ct_gre_lock); + list_add_tail(&km->list, &gre_keymap_list); + write_unlock_bh(&nf_ct_gre_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add); + +/* destroy the keymap entries associated with specified master ct */ +void nf_ct_gre_keymap_destroy(struct nf_conn *ct) +{ + struct nf_conn_help *help = nfct_help(ct); + enum ip_conntrack_dir dir; + + DEBUGP("entering for ct %p\n", ct); + BUG_ON(strcmp(help->helper->name, "pptp")); + + write_lock_bh(&nf_ct_gre_lock); + for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { + if (help->help.ct_pptp_info.keymap[dir]) { + DEBUGP("removing %p from list\n", + help->help.ct_pptp_info.keymap[dir]); + list_del(&help->help.ct_pptp_info.keymap[dir]->list); + kfree(help->help.ct_pptp_info.keymap[dir]); + help->help.ct_pptp_info.keymap[dir] = NULL; + } + } + write_unlock_bh(&nf_ct_gre_lock); +} +EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy); + +/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ + +/* invert gre part of tuple */ +static int gre_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + tuple->dst.u.gre.key = orig->src.u.gre.key; + tuple->src.u.gre.key = orig->dst.u.gre.key; + return 1; +} + +/* gre hdr info to tuple */ +static int gre_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ + struct gre_hdr_pptp _pgrehdr, *pgrehdr; + __be16 srckey; + struct gre_hdr _grehdr, *grehdr; + + /* first only delinearize old RFC1701 GRE header */ + grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); + if (!grehdr || grehdr->version != GRE_VERSION_PPTP) { + /* try to behave like "nf_conntrack_proto_generic" */ + tuple->src.u.all = 0; + tuple->dst.u.all = 0; + return 1; + } + + /* PPTP header is variable length, only need up to the call_id field */ + pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr); + if (!pgrehdr) + return 1; + + if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) { + DEBUGP("GRE_VERSION_PPTP but unknown proto\n"); + return 0; + } + + tuple->dst.u.gre.key = pgrehdr->call_id; + srckey = gre_keymap_lookup(tuple); + tuple->src.u.gre.key = srckey; + + return 1; +} + +/* print gre part of tuple */ +static int gre_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return seq_printf(s, "srckey=0x%x dstkey=0x%x ", + ntohs(tuple->src.u.gre.key), + ntohs(tuple->dst.u.gre.key)); +} + +/* print private data for conntrack */ +static int gre_print_conntrack(struct seq_file *s, + const struct nf_conn *ct) +{ + return seq_printf(s, "timeout=%u, stream_timeout=%u ", + (ct->proto.gre.timeout / HZ), + (ct->proto.gre.stream_timeout / HZ)); +} + +/* Returns verdict for packet, and may modify conntrack */ +static int gre_packet(struct nf_conn *ct, + const struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + int pf, + unsigned int hooknum) +{ + /* If we've seen traffic both ways, this is a GRE connection. + * Extend timeout. */ + if (ct->status & IPS_SEEN_REPLY) { + nf_ct_refresh_acct(ct, ctinfo, skb, + ct->proto.gre.stream_timeout); + /* Also, more likely to be important, and not a probe. */ + set_bit(IPS_ASSURED_BIT, &ct->status); + nf_conntrack_event_cache(IPCT_STATUS, skb); + } else + nf_ct_refresh_acct(ct, ctinfo, skb, + ct->proto.gre.timeout); + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int gre_new(struct nf_conn *ct, const struct sk_buff *skb, + unsigned int dataoff) +{ + DEBUGP(": "); + NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + + /* initialize to sane value. Ideally a conntrack helper + * (e.g. in case of pptp) is increasing them */ + ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT; + ct->proto.gre.timeout = GRE_TIMEOUT; + + return 1; +} + +/* Called when a conntrack entry has already been removed from the hashes + * and is about to be deleted from memory */ +static void gre_destroy(struct nf_conn *ct) +{ + struct nf_conn *master = ct->master; + DEBUGP(" entering\n"); + + if (!master) + DEBUGP("no master !?!\n"); + else + nf_ct_gre_keymap_destroy(master); +} + +/* protocol helper struct */ +static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = { + .l3proto = AF_INET, + .l4proto = IPPROTO_GRE, + .name = "gre", + .pkt_to_tuple = gre_pkt_to_tuple, + .invert_tuple = gre_invert_tuple, + .print_tuple = gre_print_tuple, + .print_conntrack = gre_print_conntrack, + .packet = gre_packet, + .new = gre_new, + .destroy = gre_destroy, + .me = THIS_MODULE, +#if defined(CONFIG_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_NF_CONNTRACK_NETLINK_MODULE) + .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, +#endif +}; + +static int __init nf_ct_proto_gre_init(void) +{ + return nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4); +} + +static void nf_ct_proto_gre_fini(void) +{ + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4); + nf_ct_gre_keymap_flush(); +} + +module_init(nf_ct_proto_gre_init); +module_exit(nf_ct_proto_gre_fini); + +MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index af568777372..76e26366822 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -32,7 +32,8 @@ #include <linux/interrupt.h> #include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_conntrack_protocol.h> +#include <net/netfilter/nf_conntrack_l4proto.h> +#include <net/netfilter/nf_conntrack_ecache.h> #if 0 #define DEBUGP(format, ...) printk(format, ## __VA_ARGS__) @@ -216,7 +217,7 @@ static int sctp_print_conntrack(struct seq_file *s, for (offset = dataoff + sizeof(sctp_sctphdr_t), count = 0; \ offset < skb->len && \ (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \ - offset += (htons(sch->length) + 3) & ~3, count++) + offset += (ntohs(sch->length) + 3) & ~3, count++) /* Some validity checks to make sure the chunks are fine */ static int do_basic_checks(struct nf_conn *conntrack, @@ -508,36 +509,10 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb, return 1; } -struct nf_conntrack_protocol nf_conntrack_protocol_sctp4 = { - .l3proto = PF_INET, - .proto = IPPROTO_SCTP, - .name = "sctp", - .pkt_to_tuple = sctp_pkt_to_tuple, - .invert_tuple = sctp_invert_tuple, - .print_tuple = sctp_print_tuple, - .print_conntrack = sctp_print_conntrack, - .packet = sctp_packet, - .new = sctp_new, - .destroy = NULL, - .me = THIS_MODULE -}; - -struct nf_conntrack_protocol nf_conntrack_protocol_sctp6 = { - .l3proto = PF_INET6, - .proto = IPPROTO_SCTP, - .name = "sctp", - .pkt_to_tuple = sctp_pkt_to_tuple, - .invert_tuple = sctp_invert_tuple, - .print_tuple = sctp_print_tuple, - .print_conntrack = sctp_print_conntrack, - .packet = sctp_packet, - .new = sctp_new, - .destroy = NULL, - .me = THIS_MODULE -}; - #ifdef CONFIG_SYSCTL -static ctl_table nf_ct_sysctl_table[] = { +static unsigned int sctp_sysctl_table_users; +static struct ctl_table_header *sctp_sysctl_header; +static struct ctl_table sctp_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED, .procname = "nf_conntrack_sctp_timeout_closed", @@ -594,63 +569,134 @@ static ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = &proc_dointvec_jiffies, }, - { .ctl_name = 0 } + { + .ctl_name = 0 + } }; -static ctl_table nf_ct_netfilter_table[] = { +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT +static struct ctl_table sctp_compat_sysctl_table[] = { { - .ctl_name = NET_NETFILTER, - .procname = "netfilter", - .mode = 0555, - .child = nf_ct_sysctl_table, + .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED, + .procname = "ip_conntrack_sctp_timeout_closed", + .data = &nf_ct_sctp_timeout_closed, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT, + .procname = "ip_conntrack_sctp_timeout_cookie_wait", + .data = &nf_ct_sctp_timeout_cookie_wait, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED, + .procname = "ip_conntrack_sctp_timeout_cookie_echoed", + .data = &nf_ct_sctp_timeout_cookie_echoed, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED, + .procname = "ip_conntrack_sctp_timeout_established", + .data = &nf_ct_sctp_timeout_established, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT, + .procname = "ip_conntrack_sctp_timeout_shutdown_sent", + .data = &nf_ct_sctp_timeout_shutdown_sent, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD, + .procname = "ip_conntrack_sctp_timeout_shutdown_recd", + .data = &nf_ct_sctp_timeout_shutdown_recd, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, }, - { .ctl_name = 0 } -}; - -static ctl_table nf_ct_net_table[] = { { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = nf_ct_netfilter_table, + .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT, + .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent", + .data = &nf_ct_sctp_timeout_shutdown_ack_sent, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, }, - { .ctl_name = 0 } + { + .ctl_name = 0 + } +}; +#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ +#endif + +struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = { + .l3proto = PF_INET, + .l4proto = IPPROTO_SCTP, + .name = "sctp", + .pkt_to_tuple = sctp_pkt_to_tuple, + .invert_tuple = sctp_invert_tuple, + .print_tuple = sctp_print_tuple, + .print_conntrack = sctp_print_conntrack, + .packet = sctp_packet, + .new = sctp_new, + .me = THIS_MODULE, +#ifdef CONFIG_SYSCTL + .ctl_table_users = &sctp_sysctl_table_users, + .ctl_table_header = &sctp_sysctl_header, + .ctl_table = sctp_sysctl_table, +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + .ctl_compat_table = sctp_compat_sysctl_table, +#endif +#endif }; -static struct ctl_table_header *nf_ct_sysctl_header; +struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = { + .l3proto = PF_INET6, + .l4proto = IPPROTO_SCTP, + .name = "sctp", + .pkt_to_tuple = sctp_pkt_to_tuple, + .invert_tuple = sctp_invert_tuple, + .print_tuple = sctp_print_tuple, + .print_conntrack = sctp_print_conntrack, + .packet = sctp_packet, + .new = sctp_new, + .me = THIS_MODULE, +#ifdef CONFIG_SYSCTL + .ctl_table_users = &sctp_sysctl_table_users, + .ctl_table_header = &sctp_sysctl_header, + .ctl_table = sctp_sysctl_table, #endif +}; int __init nf_conntrack_proto_sctp_init(void) { int ret; - ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp4); + ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp4); if (ret) { - printk("nf_conntrack_proto_sctp4: protocol register failed\n"); + printk("nf_conntrack_l4proto_sctp4: protocol register failed\n"); goto out; } - ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp6); + ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp6); if (ret) { - printk("nf_conntrack_proto_sctp6: protocol register failed\n"); + printk("nf_conntrack_l4proto_sctp6: protocol register failed\n"); goto cleanup_sctp4; } -#ifdef CONFIG_SYSCTL - nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); - if (nf_ct_sysctl_header == NULL) { - printk("nf_conntrack_proto_sctp: can't register to sysctl.\n"); - goto cleanup; - } -#endif - return ret; -#ifdef CONFIG_SYSCTL - cleanup: - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6); -#endif cleanup_sctp4: - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4); + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4); out: DEBUGP("SCTP conntrack module loading %s\n", ret ? "failed": "succeeded"); @@ -659,11 +705,8 @@ int __init nf_conntrack_proto_sctp_init(void) void __exit nf_conntrack_proto_sctp_fini(void) { - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6); - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4); -#ifdef CONFIG_SYSCTL - unregister_sysctl_table(nf_ct_sysctl_header); -#endif + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp6); + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4); DEBUGP("SCTP conntrack module unloaded\n"); } @@ -673,3 +716,4 @@ module_exit(nf_conntrack_proto_sctp_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Kiran Kumar Immidi"); MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP"); +MODULE_ALIAS("ip_conntrack_proto_sctp"); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 238bbb5b72e..626b0011dd8 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -42,7 +42,8 @@ #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_conntrack_protocol.h> +#include <net/netfilter/nf_conntrack_l4proto.h> +#include <net/netfilter/nf_conntrack_ecache.h> #if 0 #define DEBUGP printk @@ -92,22 +93,22 @@ static const char *tcp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS; -unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS; -unsigned int nf_ct_tcp_timeout_established __read_mostly = 5 DAYS; -unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS; -unsigned int nf_ct_tcp_timeout_close_wait __read_mostly = 60 SECS; -unsigned int nf_ct_tcp_timeout_last_ack __read_mostly = 30 SECS; -unsigned int nf_ct_tcp_timeout_time_wait __read_mostly = 2 MINS; -unsigned int nf_ct_tcp_timeout_close __read_mostly = 10 SECS; +static unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS; +static unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS; +static unsigned int nf_ct_tcp_timeout_established __read_mostly = 5 DAYS; +static unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS; +static unsigned int nf_ct_tcp_timeout_close_wait __read_mostly = 60 SECS; +static unsigned int nf_ct_tcp_timeout_last_ack __read_mostly = 30 SECS; +static unsigned int nf_ct_tcp_timeout_time_wait __read_mostly = 2 MINS; +static unsigned int nf_ct_tcp_timeout_close __read_mostly = 10 SECS; /* RFC1122 says the R2 limit should be at least 100 seconds. Linux uses 15 packets as limit, which corresponds to ~13-30min depending on RTO. */ -unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; +static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; -static unsigned int * tcp_timeouts[] -= { NULL, /* TCP_CONNTRACK_NONE */ +static unsigned int * tcp_timeouts[] = { + NULL, /* TCP_CONNTRACK_NONE */ &nf_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */ &nf_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */ &nf_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */ @@ -473,8 +474,8 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, /* Fast path for timestamp-only option */ if (length == TCPOLEN_TSTAMP_ALIGNED*4 - && *(__u32 *)ptr == - __constant_ntohl((TCPOPT_NOP << 24) + && *(__be32 *)ptr == + __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) @@ -505,9 +506,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, for (i = 0; i < (opsize - TCPOLEN_SACK_BASE); i += TCPOLEN_SACK_PERBLOCK) { - memcpy(&tmp, (__u32 *)(ptr + i) + 1, - sizeof(__u32)); - tmp = ntohl(tmp); + tmp = ntohl(*((__be32 *)(ptr+i)+1)); if (after(tmp, *sack)) *sack = tmp; @@ -731,7 +730,7 @@ static int tcp_in_window(struct ip_ct_tcp *state, return res; } -#ifdef CONFIG_IP_NF_NAT_NEEDED +#ifdef CONFIG_NF_NAT_NEEDED /* Update sender->td_end after NAT successfully mangled the packet */ /* Caller must linearize skb at tcp header. */ void nf_conntrack_tcp_update(struct sk_buff *skb, @@ -763,7 +762,7 @@ void nf_conntrack_tcp_update(struct sk_buff *skb, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); } - +EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update); #endif #define TH_FIN 0x01 @@ -1167,11 +1166,221 @@ static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct) return 0; } #endif - -struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 = + +#ifdef CONFIG_SYSCTL +static unsigned int tcp_sysctl_table_users; +static struct ctl_table_header *tcp_sysctl_header; +static struct ctl_table tcp_sysctl_table[] = { + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, + .procname = "nf_conntrack_tcp_timeout_syn_sent", + .data = &nf_ct_tcp_timeout_syn_sent, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, + .procname = "nf_conntrack_tcp_timeout_syn_recv", + .data = &nf_ct_tcp_timeout_syn_recv, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, + .procname = "nf_conntrack_tcp_timeout_established", + .data = &nf_ct_tcp_timeout_established, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, + .procname = "nf_conntrack_tcp_timeout_fin_wait", + .data = &nf_ct_tcp_timeout_fin_wait, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, + .procname = "nf_conntrack_tcp_timeout_close_wait", + .data = &nf_ct_tcp_timeout_close_wait, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, + .procname = "nf_conntrack_tcp_timeout_last_ack", + .data = &nf_ct_tcp_timeout_last_ack, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, + .procname = "nf_conntrack_tcp_timeout_time_wait", + .data = &nf_ct_tcp_timeout_time_wait, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, + .procname = "nf_conntrack_tcp_timeout_close", + .data = &nf_ct_tcp_timeout_close, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, + .procname = "nf_conntrack_tcp_timeout_max_retrans", + .data = &nf_ct_tcp_timeout_max_retrans, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE, + .procname = "nf_conntrack_tcp_loose", + .data = &nf_ct_tcp_loose, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL, + .procname = "nf_conntrack_tcp_be_liberal", + .data = &nf_ct_tcp_be_liberal, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS, + .procname = "nf_conntrack_tcp_max_retrans", + .data = &nf_ct_tcp_max_retrans, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = 0 + } +}; + +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT +static struct ctl_table tcp_compat_sysctl_table[] = { + { + .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, + .procname = "ip_conntrack_tcp_timeout_syn_sent", + .data = &nf_ct_tcp_timeout_syn_sent, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, + .procname = "ip_conntrack_tcp_timeout_syn_recv", + .data = &nf_ct_tcp_timeout_syn_recv, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, + .procname = "ip_conntrack_tcp_timeout_established", + .data = &nf_ct_tcp_timeout_established, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, + .procname = "ip_conntrack_tcp_timeout_fin_wait", + .data = &nf_ct_tcp_timeout_fin_wait, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, + .procname = "ip_conntrack_tcp_timeout_close_wait", + .data = &nf_ct_tcp_timeout_close_wait, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, + .procname = "ip_conntrack_tcp_timeout_last_ack", + .data = &nf_ct_tcp_timeout_last_ack, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, + .procname = "ip_conntrack_tcp_timeout_time_wait", + .data = &nf_ct_tcp_timeout_time_wait, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, + .procname = "ip_conntrack_tcp_timeout_close", + .data = &nf_ct_tcp_timeout_close, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, + .procname = "ip_conntrack_tcp_timeout_max_retrans", + .data = &nf_ct_tcp_timeout_max_retrans, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE, + .procname = "ip_conntrack_tcp_loose", + .data = &nf_ct_tcp_loose, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL, + .procname = "ip_conntrack_tcp_be_liberal", + .data = &nf_ct_tcp_be_liberal, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS, + .procname = "ip_conntrack_tcp_max_retrans", + .data = &nf_ct_tcp_max_retrans, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = 0 + } +}; +#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ +#endif /* CONFIG_SYSCTL */ + +struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 = { .l3proto = PF_INET, - .proto = IPPROTO_TCP, + .l4proto = IPPROTO_TCP, .name = "tcp", .pkt_to_tuple = tcp_pkt_to_tuple, .invert_tuple = tcp_invert_tuple, @@ -1187,12 +1396,21 @@ struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 = .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, #endif +#ifdef CONFIG_SYSCTL + .ctl_table_users = &tcp_sysctl_table_users, + .ctl_table_header = &tcp_sysctl_header, + .ctl_table = tcp_sysctl_table, +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + .ctl_compat_table = tcp_compat_sysctl_table, +#endif +#endif }; +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); -struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 = +struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 = { .l3proto = PF_INET6, - .proto = IPPROTO_TCP, + .l4proto = IPPROTO_TCP, .name = "tcp", .pkt_to_tuple = tcp_pkt_to_tuple, .invert_tuple = tcp_invert_tuple, @@ -1208,7 +1426,10 @@ struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 = .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, #endif +#ifdef CONFIG_SYSCTL + .ctl_table_users = &tcp_sysctl_table_users, + .ctl_table_header = &tcp_sysctl_header, + .ctl_table = tcp_sysctl_table, +#endif }; - -EXPORT_SYMBOL(nf_conntrack_protocol_tcp4); -EXPORT_SYMBOL(nf_conntrack_protocol_tcp6); +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index d28981cf9af..e49cd25998c 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -22,13 +22,15 @@ #include <linux/ipv6.h> #include <net/ip6_checksum.h> #include <net/checksum.h> + #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> -#include <net/netfilter/nf_conntrack_protocol.h> +#include <net/netfilter/nf_conntrack_l4proto.h> +#include <net/netfilter/nf_conntrack_ecache.h> -unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ; -unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ; +static unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ; +static unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ; static int udp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, @@ -146,10 +148,59 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff, return NF_ACCEPT; } -struct nf_conntrack_protocol nf_conntrack_protocol_udp4 = +#ifdef CONFIG_SYSCTL +static unsigned int udp_sysctl_table_users; +static struct ctl_table_header *udp_sysctl_header; +static struct ctl_table udp_sysctl_table[] = { + { + .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT, + .procname = "nf_conntrack_udp_timeout", + .data = &nf_ct_udp_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM, + .procname = "nf_conntrack_udp_timeout_stream", + .data = &nf_ct_udp_timeout_stream, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = 0 + } +}; +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT +static struct ctl_table udp_compat_sysctl_table[] = { + { + .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT, + .procname = "ip_conntrack_udp_timeout", + .data = &nf_ct_udp_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM, + .procname = "ip_conntrack_udp_timeout_stream", + .data = &nf_ct_udp_timeout_stream, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = 0 + } +}; +#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ +#endif /* CONFIG_SYSCTL */ + +struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 = { .l3proto = PF_INET, - .proto = IPPROTO_UDP, + .l4proto = IPPROTO_UDP, .name = "udp", .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, @@ -163,12 +214,21 @@ struct nf_conntrack_protocol nf_conntrack_protocol_udp4 = .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, #endif +#ifdef CONFIG_SYSCTL + .ctl_table_users = &udp_sysctl_table_users, + .ctl_table_header = &udp_sysctl_header, + .ctl_table = udp_sysctl_table, +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + .ctl_compat_table = udp_compat_sysctl_table, +#endif +#endif }; +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); -struct nf_conntrack_protocol nf_conntrack_protocol_udp6 = +struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 = { .l3proto = PF_INET6, - .proto = IPPROTO_UDP, + .l4proto = IPPROTO_UDP, .name = "udp", .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, @@ -182,7 +242,10 @@ struct nf_conntrack_protocol nf_conntrack_protocol_udp6 = .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, #endif +#ifdef CONFIG_SYSCTL + .ctl_table_users = &udp_sysctl_table_users, + .ctl_table_header = &udp_sysctl_header, + .ctl_table = udp_sysctl_table, +#endif }; - -EXPORT_SYMBOL(nf_conntrack_protocol_udp4); -EXPORT_SYMBOL(nf_conntrack_protocol_udp6); +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6); diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c new file mode 100644 index 00000000000..eb2a2411f97 --- /dev/null +++ b/net/netfilter/nf_conntrack_sip.c @@ -0,0 +1,531 @@ +/* SIP extension for IP connection tracking. + * + * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> + * based on RR's ip_conntrack_ftp.c and other modules. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/ctype.h> +#include <linux/skbuff.h> +#include <linux/inet.h> +#include <linux/in.h> +#include <linux/udp.h> +#include <linux/netfilter.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <linux/netfilter/nf_conntrack_sip.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>"); +MODULE_DESCRIPTION("SIP connection tracking helper"); +MODULE_ALIAS("ip_conntrack_sip"); + +#define MAX_PORTS 8 +static unsigned short ports[MAX_PORTS]; +static int ports_c; +module_param_array(ports, ushort, &ports_c, 0400); +MODULE_PARM_DESC(ports, "port numbers of SIP servers"); + +static unsigned int sip_timeout __read_mostly = SIP_TIMEOUT; +module_param(sip_timeout, uint, 0600); +MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session"); + +unsigned int (*nf_nat_sip_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct nf_conn *ct, + const char **dptr) __read_mostly; +EXPORT_SYMBOL_GPL(nf_nat_sip_hook); + +unsigned int (*nf_nat_sdp_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct nf_conntrack_expect *exp, + const char *dptr) __read_mostly; +EXPORT_SYMBOL_GPL(nf_nat_sdp_hook); + +static int digits_len(struct nf_conn *, const char *, const char *, int *); +static int epaddr_len(struct nf_conn *, const char *, const char *, int *); +static int skp_digits_len(struct nf_conn *, const char *, const char *, int *); +static int skp_epaddr_len(struct nf_conn *, const char *, const char *, int *); + +struct sip_header_nfo { + const char *lname; + const char *sname; + const char *ln_str; + size_t lnlen; + size_t snlen; + size_t ln_strlen; + int case_sensitive; + int (*match_len)(struct nf_conn *, const char *, + const char *, int *); +}; + +static const struct sip_header_nfo ct_sip_hdrs[] = { + [POS_REG_REQ_URI] = { /* SIP REGISTER request URI */ + .lname = "sip:", + .lnlen = sizeof("sip:") - 1, + .ln_str = ":", + .ln_strlen = sizeof(":") - 1, + .match_len = epaddr_len, + }, + [POS_REQ_URI] = { /* SIP request URI */ + .lname = "sip:", + .lnlen = sizeof("sip:") - 1, + .ln_str = "@", + .ln_strlen = sizeof("@") - 1, + .match_len = epaddr_len, + }, + [POS_FROM] = { /* SIP From header */ + .lname = "From:", + .lnlen = sizeof("From:") - 1, + .sname = "\r\nf:", + .snlen = sizeof("\r\nf:") - 1, + .ln_str = "sip:", + .ln_strlen = sizeof("sip:") - 1, + .match_len = skp_epaddr_len, + }, + [POS_TO] = { /* SIP To header */ + .lname = "To:", + .lnlen = sizeof("To:") - 1, + .sname = "\r\nt:", + .snlen = sizeof("\r\nt:") - 1, + .ln_str = "sip:", + .ln_strlen = sizeof("sip:") - 1, + .match_len = skp_epaddr_len + }, + [POS_VIA] = { /* SIP Via header */ + .lname = "Via:", + .lnlen = sizeof("Via:") - 1, + .sname = "\r\nv:", + .snlen = sizeof("\r\nv:") - 1, /* rfc3261 "\r\n" */ + .ln_str = "UDP ", + .ln_strlen = sizeof("UDP ") - 1, + .match_len = epaddr_len, + }, + [POS_CONTACT] = { /* SIP Contact header */ + .lname = "Contact:", + .lnlen = sizeof("Contact:") - 1, + .sname = "\r\nm:", + .snlen = sizeof("\r\nm:") - 1, + .ln_str = "sip:", + .ln_strlen = sizeof("sip:") - 1, + .match_len = skp_epaddr_len + }, + [POS_CONTENT] = { /* SIP Content length header */ + .lname = "Content-Length:", + .lnlen = sizeof("Content-Length:") - 1, + .sname = "\r\nl:", + .snlen = sizeof("\r\nl:") - 1, + .ln_str = ":", + .ln_strlen = sizeof(":") - 1, + .match_len = skp_digits_len + }, + [POS_MEDIA] = { /* SDP media info */ + .case_sensitive = 1, + .lname = "\nm=", + .lnlen = sizeof("\nm=") - 1, + .sname = "\rm=", + .snlen = sizeof("\rm=") - 1, + .ln_str = "audio ", + .ln_strlen = sizeof("audio ") - 1, + .match_len = digits_len + }, + [POS_OWNER_IP4] = { /* SDP owner address*/ + .case_sensitive = 1, + .lname = "\no=", + .lnlen = sizeof("\no=") - 1, + .sname = "\ro=", + .snlen = sizeof("\ro=") - 1, + .ln_str = "IN IP4 ", + .ln_strlen = sizeof("IN IP4 ") - 1, + .match_len = epaddr_len + }, + [POS_CONNECTION_IP4] = {/* SDP connection info */ + .case_sensitive = 1, + .lname = "\nc=", + .lnlen = sizeof("\nc=") - 1, + .sname = "\rc=", + .snlen = sizeof("\rc=") - 1, + .ln_str = "IN IP4 ", + .ln_strlen = sizeof("IN IP4 ") - 1, + .match_len = epaddr_len + }, + [POS_OWNER_IP6] = { /* SDP owner address*/ + .case_sensitive = 1, + .lname = "\no=", + .lnlen = sizeof("\no=") - 1, + .sname = "\ro=", + .snlen = sizeof("\ro=") - 1, + .ln_str = "IN IP6 ", + .ln_strlen = sizeof("IN IP6 ") - 1, + .match_len = epaddr_len + }, + [POS_CONNECTION_IP6] = {/* SDP connection info */ + .case_sensitive = 1, + .lname = "\nc=", + .lnlen = sizeof("\nc=") - 1, + .sname = "\rc=", + .snlen = sizeof("\rc=") - 1, + .ln_str = "IN IP6 ", + .ln_strlen = sizeof("IN IP6 ") - 1, + .match_len = epaddr_len + }, + [POS_SDP_HEADER] = { /* SDP version header */ + .case_sensitive = 1, + .lname = "\nv=", + .lnlen = sizeof("\nv=") - 1, + .sname = "\rv=", + .snlen = sizeof("\rv=") - 1, + .ln_str = "=", + .ln_strlen = sizeof("=") - 1, + .match_len = digits_len + } +}; + +/* get line lenght until first CR or LF seen. */ +int ct_sip_lnlen(const char *line, const char *limit) +{ + const char *k = line; + + while ((line <= limit) && (*line == '\r' || *line == '\n')) + line++; + + while (line <= limit) { + if (*line == '\r' || *line == '\n') + break; + line++; + } + return line - k; +} +EXPORT_SYMBOL_GPL(ct_sip_lnlen); + +/* Linear string search, case sensitive. */ +const char *ct_sip_search(const char *needle, const char *haystack, + size_t needle_len, size_t haystack_len, + int case_sensitive) +{ + const char *limit = haystack + (haystack_len - needle_len); + + while (haystack <= limit) { + if (case_sensitive) { + if (strncmp(haystack, needle, needle_len) == 0) + return haystack; + } else { + if (strnicmp(haystack, needle, needle_len) == 0) + return haystack; + } + haystack++; + } + return NULL; +} +EXPORT_SYMBOL_GPL(ct_sip_search); + +static int digits_len(struct nf_conn *ct, const char *dptr, + const char *limit, int *shift) +{ + int len = 0; + while (dptr <= limit && isdigit(*dptr)) { + dptr++; + len++; + } + return len; +} + +/* get digits lenght, skiping blank spaces. */ +static int skp_digits_len(struct nf_conn *ct, const char *dptr, + const char *limit, int *shift) +{ + for (; dptr <= limit && *dptr == ' '; dptr++) + (*shift)++; + + return digits_len(ct, dptr, limit, shift); +} + +static int parse_addr(struct nf_conn *ct, const char *cp, const char **endp, + union nf_conntrack_address *addr, const char *limit) +{ + const char *end; + int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + int ret = 0; + + switch (family) { + case AF_INET: + ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end); + break; + case AF_INET6: + ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end); + break; + default: + BUG(); + } + + if (ret == 0 || end == cp) + return 0; + if (endp) + *endp = end; + return 1; +} + +/* skip ip address. returns its length. */ +static int epaddr_len(struct nf_conn *ct, const char *dptr, + const char *limit, int *shift) +{ + union nf_conntrack_address addr; + const char *aux = dptr; + + if (!parse_addr(ct, dptr, &dptr, &addr, limit)) { + DEBUGP("ip: %s parse failed.!\n", dptr); + return 0; + } + + /* Port number */ + if (*dptr == ':') { + dptr++; + dptr += digits_len(ct, dptr, limit, shift); + } + return dptr - aux; +} + +/* get address length, skiping user info. */ +static int skp_epaddr_len(struct nf_conn *ct, const char *dptr, + const char *limit, int *shift) +{ + int s = *shift; + + for (; dptr <= limit && *dptr != '@'; dptr++) + (*shift)++; + + if (*dptr == '@') { + dptr++; + (*shift)++; + } else + *shift = s; + + return epaddr_len(ct, dptr, limit, shift); +} + +/* Returns 0 if not found, -1 error parsing. */ +int ct_sip_get_info(struct nf_conn *ct, + const char *dptr, size_t dlen, + unsigned int *matchoff, + unsigned int *matchlen, + enum sip_header_pos pos) +{ + const struct sip_header_nfo *hnfo = &ct_sip_hdrs[pos]; + const char *limit, *aux, *k = dptr; + int shift = 0; + + limit = dptr + (dlen - hnfo->lnlen); + + while (dptr <= limit) { + if ((strncmp(dptr, hnfo->lname, hnfo->lnlen) != 0) && + (strncmp(dptr, hnfo->sname, hnfo->snlen) != 0)) { + dptr++; + continue; + } + aux = ct_sip_search(hnfo->ln_str, dptr, hnfo->ln_strlen, + ct_sip_lnlen(dptr, limit), + hnfo->case_sensitive); + if (!aux) { + DEBUGP("'%s' not found in '%s'.\n", hnfo->ln_str, + hnfo->lname); + return -1; + } + aux += hnfo->ln_strlen; + + *matchlen = hnfo->match_len(ct, aux, limit, &shift); + if (!*matchlen) + return -1; + + *matchoff = (aux - k) + shift; + + DEBUGP("%s match succeeded! - len: %u\n", hnfo->lname, + *matchlen); + return 1; + } + DEBUGP("%s header not found.\n", hnfo->lname); + return 0; +} +EXPORT_SYMBOL_GPL(ct_sip_get_info); + +static int set_expected_rtp(struct sk_buff **pskb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + union nf_conntrack_address *addr, + __be16 port, + const char *dptr) +{ + struct nf_conntrack_expect *exp; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + int family = ct->tuplehash[!dir].tuple.src.l3num; + int ret; + typeof(nf_nat_sdp_hook) nf_nat_sdp; + + exp = nf_conntrack_expect_alloc(ct); + if (exp == NULL) + return NF_DROP; + nf_conntrack_expect_init(exp, family, + &ct->tuplehash[!dir].tuple.src.u3, addr, + IPPROTO_UDP, NULL, &port); + + nf_nat_sdp = rcu_dereference(nf_nat_sdp_hook); + if (nf_nat_sdp && ct->status & IPS_NAT_MASK) + ret = nf_nat_sdp(pskb, ctinfo, exp, dptr); + else { + if (nf_conntrack_expect_related(exp) != 0) + ret = NF_DROP; + else + ret = NF_ACCEPT; + } + nf_conntrack_expect_put(exp); + + return ret; +} + +static int sip_help(struct sk_buff **pskb, + unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + union nf_conntrack_address addr; + unsigned int dataoff, datalen; + const char *dptr; + int ret = NF_ACCEPT; + int matchoff, matchlen; + u_int16_t port; + enum sip_header_pos pos; + typeof(nf_nat_sip_hook) nf_nat_sip; + + /* No Data ? */ + dataoff = protoff + sizeof(struct udphdr); + if (dataoff >= (*pskb)->len) + return NF_ACCEPT; + + nf_ct_refresh(ct, *pskb, sip_timeout * HZ); + + if (!skb_is_nonlinear(*pskb)) + dptr = (*pskb)->data + dataoff; + else { + DEBUGP("Copy of skbuff not supported yet.\n"); + goto out; + } + + nf_nat_sip = rcu_dereference(nf_nat_sip_hook); + if (nf_nat_sip && ct->status & IPS_NAT_MASK) { + if (!nf_nat_sip(pskb, ctinfo, ct, &dptr)) { + ret = NF_DROP; + goto out; + } + } + + datalen = (*pskb)->len - dataoff; + if (datalen < sizeof("SIP/2.0 200") - 1) + goto out; + + /* RTP info only in some SDP pkts */ + if (memcmp(dptr, "INVITE", sizeof("INVITE") - 1) != 0 && + memcmp(dptr, "SIP/2.0 200", sizeof("SIP/2.0 200") - 1) != 0) { + goto out; + } + /* Get address and port from SDP packet. */ + pos = family == AF_INET ? POS_CONNECTION_IP4 : POS_CONNECTION_IP6; + if (ct_sip_get_info(ct, dptr, datalen, &matchoff, &matchlen, pos) > 0) { + + /* We'll drop only if there are parse problems. */ + if (!parse_addr(ct, dptr + matchoff, NULL, &addr, + dptr + datalen)) { + ret = NF_DROP; + goto out; + } + if (ct_sip_get_info(ct, dptr, datalen, &matchoff, &matchlen, + POS_MEDIA) > 0) { + + port = simple_strtoul(dptr + matchoff, NULL, 10); + if (port < 1024) { + ret = NF_DROP; + goto out; + } + ret = set_expected_rtp(pskb, ct, ctinfo, &addr, + htons(port), dptr); + } + } +out: + return ret; +} + +static struct nf_conntrack_helper sip[MAX_PORTS][2] __read_mostly; +static char sip_names[MAX_PORTS][2][sizeof("sip-65535")] __read_mostly; + +static void nf_conntrack_sip_fini(void) +{ + int i, j; + + for (i = 0; i < ports_c; i++) { + for (j = 0; j < 2; j++) { + if (sip[i][j].me == NULL) + continue; + nf_conntrack_helper_unregister(&sip[i][j]); + } + } +} + +static int __init nf_conntrack_sip_init(void) +{ + int i, j, ret; + char *tmpname; + + if (ports_c == 0) + ports[ports_c++] = SIP_PORT; + + for (i = 0; i < ports_c; i++) { + memset(&sip[i], 0, sizeof(sip[i])); + + sip[i][0].tuple.src.l3num = AF_INET; + sip[i][1].tuple.src.l3num = AF_INET6; + for (j = 0; j < 2; j++) { + sip[i][j].tuple.dst.protonum = IPPROTO_UDP; + sip[i][j].tuple.src.u.udp.port = htons(ports[i]); + sip[i][j].mask.src.l3num = 0xFFFF; + sip[i][j].mask.src.u.udp.port = htons(0xFFFF); + sip[i][j].mask.dst.protonum = 0xFF; + sip[i][j].max_expected = 2; + sip[i][j].timeout = 3 * 60; /* 3 minutes */ + sip[i][j].me = THIS_MODULE; + sip[i][j].help = sip_help; + + tmpname = &sip_names[i][j][0]; + if (ports[i] == SIP_PORT) + sprintf(tmpname, "sip"); + else + sprintf(tmpname, "sip-%u", i); + sip[i][j].name = tmpname; + + DEBUGP("port #%u: %u\n", i, ports[i]); + + ret = nf_conntrack_helper_register(&sip[i][j]); + if (ret) { + printk("nf_ct_sip: failed to register helper " + "for pf: %u port: %u\n", + sip[i][j].tuple.src.l3num, ports[i]); + nf_conntrack_sip_fini(); + return ret; + } + } + } + return 0; +} + +module_init(nf_conntrack_sip_init); +module_exit(nf_conntrack_sip_fini); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 5954f677381..f1cb60ff931 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -29,13 +29,11 @@ #include <linux/sysctl.h> #endif -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_conntrack_l3proto.h> -#include <net/netfilter/nf_conntrack_protocol.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_l3proto.h> +#include <net/netfilter/nf_conntrack_l4proto.h> +#include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> #if 0 @@ -46,33 +44,15 @@ MODULE_LICENSE("GPL"); -extern atomic_t nf_conntrack_count; -DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); - -static int kill_l3proto(struct nf_conn *i, void *data) -{ - return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num == - ((struct nf_conntrack_l3proto *)data)->l3proto); -} - -static int kill_proto(struct nf_conn *i, void *data) -{ - struct nf_conntrack_protocol *proto; - proto = (struct nf_conntrack_protocol *)data; - return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == - proto->proto) && - (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num == - proto->l3proto); -} - #ifdef CONFIG_PROC_FS -static int +int print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l3proto *l3proto, - struct nf_conntrack_protocol *proto) + struct nf_conntrack_l4proto *l4proto) { - return l3proto->print_tuple(s, tuple) || proto->print_tuple(s, tuple); + return l3proto->print_tuple(s, tuple) || l4proto->print_tuple(s, tuple); } +EXPORT_SYMBOL_GPL(print_tuple); #ifdef CONFIG_NF_CT_ACCT static unsigned int @@ -150,9 +130,8 @@ static int ct_seq_show(struct seq_file *s, void *v) const struct nf_conntrack_tuple_hash *hash = v; const struct nf_conn *conntrack = nf_ct_tuplehash_to_ctrack(hash); struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_protocol *proto; + struct nf_conntrack_l4proto *l4proto; - ASSERT_READ_LOCK(&nf_conntrack_lock); NF_CT_ASSERT(conntrack); /* we only want to print DIR_ORIGINAL */ @@ -163,16 +142,16 @@ static int ct_seq_show(struct seq_file *s, void *v) .tuple.src.l3num); NF_CT_ASSERT(l3proto); - proto = __nf_ct_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + l4proto = __nf_ct_l4proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.src.l3num, conntrack->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.dst.protonum); - NF_CT_ASSERT(proto); + NF_CT_ASSERT(l4proto); if (seq_printf(s, "%-8s %u %-8s %u %ld ", l3proto->name, conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, - proto->name, + l4proto->name, conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum, timer_pending(&conntrack->timeout) ? (long)(conntrack->timeout.expires - jiffies)/HZ : 0) != 0) @@ -181,11 +160,11 @@ static int ct_seq_show(struct seq_file *s, void *v) if (l3proto->print_conntrack(s, conntrack)) return -ENOSPC; - if (proto->print_conntrack(s, conntrack)) + if (l4proto->print_conntrack(s, conntrack)) return -ENOSPC; if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - l3proto, proto)) + l3proto, l4proto)) return -ENOSPC; if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL])) @@ -196,7 +175,7 @@ static int ct_seq_show(struct seq_file *s, void *v) return -ENOSPC; if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple, - l3proto, proto)) + l3proto, l4proto)) return -ENOSPC; if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY])) @@ -258,84 +237,6 @@ static struct file_operations ct_file_ops = { .release = seq_release_private, }; -/* expects */ -static void *exp_seq_start(struct seq_file *s, loff_t *pos) -{ - struct list_head *e = &nf_conntrack_expect_list; - loff_t i; - - /* strange seq_file api calls stop even if we fail, - * thus we need to grab lock since stop unlocks */ - read_lock_bh(&nf_conntrack_lock); - - if (list_empty(e)) - return NULL; - - for (i = 0; i <= *pos; i++) { - e = e->next; - if (e == &nf_conntrack_expect_list) - return NULL; - } - return e; -} - -static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - struct list_head *e = v; - - ++*pos; - e = e->next; - - if (e == &nf_conntrack_expect_list) - return NULL; - - return e; -} - -static void exp_seq_stop(struct seq_file *s, void *v) -{ - read_unlock_bh(&nf_conntrack_lock); -} - -static int exp_seq_show(struct seq_file *s, void *v) -{ - struct nf_conntrack_expect *expect = v; - - if (expect->timeout.function) - seq_printf(s, "%ld ", timer_pending(&expect->timeout) - ? (long)(expect->timeout.expires - jiffies)/HZ : 0); - else - seq_printf(s, "- "); - seq_printf(s, "l3proto = %u proto=%u ", - expect->tuple.src.l3num, - expect->tuple.dst.protonum); - print_tuple(s, &expect->tuple, - __nf_ct_l3proto_find(expect->tuple.src.l3num), - __nf_ct_proto_find(expect->tuple.src.l3num, - expect->tuple.dst.protonum)); - return seq_putc(s, '\n'); -} - -static struct seq_operations exp_seq_ops = { - .start = exp_seq_start, - .next = exp_seq_next, - .stop = exp_seq_stop, - .show = exp_seq_show -}; - -static int exp_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &exp_seq_ops); -} - -static struct file_operations exp_file_ops = { - .owner = THIS_MODULE, - .open = exp_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release -}; - static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) { int cpu; @@ -428,34 +329,9 @@ static struct file_operations ct_cpu_seq_fops = { /* Sysctl support */ int nf_conntrack_checksum __read_mostly = 1; +EXPORT_SYMBOL_GPL(nf_conntrack_checksum); #ifdef CONFIG_SYSCTL - -/* From nf_conntrack_core.c */ -extern int nf_conntrack_max; -extern unsigned int nf_conntrack_htable_size; - -/* From nf_conntrack_proto_tcp.c */ -extern unsigned int nf_ct_tcp_timeout_syn_sent; -extern unsigned int nf_ct_tcp_timeout_syn_recv; -extern unsigned int nf_ct_tcp_timeout_established; -extern unsigned int nf_ct_tcp_timeout_fin_wait; -extern unsigned int nf_ct_tcp_timeout_close_wait; -extern unsigned int nf_ct_tcp_timeout_last_ack; -extern unsigned int nf_ct_tcp_timeout_time_wait; -extern unsigned int nf_ct_tcp_timeout_close; -extern unsigned int nf_ct_tcp_timeout_max_retrans; -extern int nf_ct_tcp_loose; -extern int nf_ct_tcp_be_liberal; -extern int nf_ct_tcp_max_retrans; - -/* From nf_conntrack_proto_udp.c */ -extern unsigned int nf_ct_udp_timeout; -extern unsigned int nf_ct_udp_timeout_stream; - -/* From nf_conntrack_proto_generic.c */ -extern unsigned int nf_ct_generic_timeout; - /* Log invalid packets of a given protocol */ static int log_invalid_proto_min = 0; static int log_invalid_proto_max = 255; @@ -496,94 +372,6 @@ static ctl_table nf_ct_sysctl_table[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, - .procname = "nf_conntrack_tcp_timeout_syn_sent", - .data = &nf_ct_tcp_timeout_syn_sent, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, - .procname = "nf_conntrack_tcp_timeout_syn_recv", - .data = &nf_ct_tcp_timeout_syn_recv, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, - .procname = "nf_conntrack_tcp_timeout_established", - .data = &nf_ct_tcp_timeout_established, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, - .procname = "nf_conntrack_tcp_timeout_fin_wait", - .data = &nf_ct_tcp_timeout_fin_wait, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, - .procname = "nf_conntrack_tcp_timeout_close_wait", - .data = &nf_ct_tcp_timeout_close_wait, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, - .procname = "nf_conntrack_tcp_timeout_last_ack", - .data = &nf_ct_tcp_timeout_last_ack, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, - .procname = "nf_conntrack_tcp_timeout_time_wait", - .data = &nf_ct_tcp_timeout_time_wait, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, - .procname = "nf_conntrack_tcp_timeout_close", - .data = &nf_ct_tcp_timeout_close, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT, - .procname = "nf_conntrack_udp_timeout", - .data = &nf_ct_udp_timeout, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM, - .procname = "nf_conntrack_udp_timeout_stream", - .data = &nf_ct_udp_timeout_stream, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_NF_CONNTRACK_GENERIC_TIMEOUT, - .procname = "nf_conntrack_generic_timeout", - .data = &nf_ct_generic_timeout, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { .ctl_name = NET_NF_CONNTRACK_LOG_INVALID, .procname = "nf_conntrack_log_invalid", .data = &nf_ct_log_invalid, @@ -594,38 +382,6 @@ static ctl_table nf_ct_sysctl_table[] = { .extra1 = &log_invalid_proto_min, .extra2 = &log_invalid_proto_max, }, - { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, - .procname = "nf_conntrack_tcp_timeout_max_retrans", - .data = &nf_ct_tcp_timeout_max_retrans, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE, - .procname = "nf_conntrack_tcp_loose", - .data = &nf_ct_tcp_loose, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL, - .procname = "nf_conntrack_tcp_be_liberal", - .data = &nf_ct_tcp_be_liberal, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS, - .procname = "nf_conntrack_tcp_max_retrans", - .data = &nf_ct_tcp_max_retrans, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, { .ctl_name = 0 } }; @@ -659,109 +415,9 @@ static ctl_table nf_ct_net_table[] = { }, { .ctl_name = 0 } }; -EXPORT_SYMBOL(nf_ct_log_invalid); +EXPORT_SYMBOL_GPL(nf_ct_log_invalid); #endif /* CONFIG_SYSCTL */ -int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) -{ - int ret = 0; - - write_lock_bh(&nf_conntrack_lock); - if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_generic_l3proto) { - ret = -EBUSY; - goto out; - } - nf_ct_l3protos[proto->l3proto] = proto; -out: - write_unlock_bh(&nf_conntrack_lock); - - return ret; -} - -void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) -{ - write_lock_bh(&nf_conntrack_lock); - nf_ct_l3protos[proto->l3proto] = &nf_conntrack_generic_l3proto; - write_unlock_bh(&nf_conntrack_lock); - - /* Somebody could be still looking at the proto in bh. */ - synchronize_net(); - - /* Remove all contrack entries for this protocol */ - nf_ct_iterate_cleanup(kill_l3proto, proto); -} - -/* FIXME: Allow NULL functions and sub in pointers to generic for - them. --RR */ -int nf_conntrack_protocol_register(struct nf_conntrack_protocol *proto) -{ - int ret = 0; - -retry: - write_lock_bh(&nf_conntrack_lock); - if (nf_ct_protos[proto->l3proto]) { - if (nf_ct_protos[proto->l3proto][proto->proto] - != &nf_conntrack_generic_protocol) { - ret = -EBUSY; - goto out_unlock; - } - } else { - /* l3proto may be loaded latter. */ - struct nf_conntrack_protocol **proto_array; - int i; - - write_unlock_bh(&nf_conntrack_lock); - - proto_array = (struct nf_conntrack_protocol **) - kmalloc(MAX_NF_CT_PROTO * - sizeof(struct nf_conntrack_protocol *), - GFP_KERNEL); - if (proto_array == NULL) { - ret = -ENOMEM; - goto out; - } - for (i = 0; i < MAX_NF_CT_PROTO; i++) - proto_array[i] = &nf_conntrack_generic_protocol; - - write_lock_bh(&nf_conntrack_lock); - if (nf_ct_protos[proto->l3proto]) { - /* bad timing, but no problem */ - write_unlock_bh(&nf_conntrack_lock); - kfree(proto_array); - } else { - nf_ct_protos[proto->l3proto] = proto_array; - write_unlock_bh(&nf_conntrack_lock); - } - - /* - * Just once because array is never freed until unloading - * nf_conntrack.ko - */ - goto retry; - } - - nf_ct_protos[proto->l3proto][proto->proto] = proto; - -out_unlock: - write_unlock_bh(&nf_conntrack_lock); -out: - return ret; -} - -void nf_conntrack_protocol_unregister(struct nf_conntrack_protocol *proto) -{ - write_lock_bh(&nf_conntrack_lock); - nf_ct_protos[proto->l3proto][proto->proto] - = &nf_conntrack_generic_protocol; - write_unlock_bh(&nf_conntrack_lock); - - /* Somebody could be still looking at the proto in bh. */ - synchronize_net(); - - /* Remove all contrack entries for this protocol */ - nf_ct_iterate_cleanup(kill_proto, proto); -} - static int __init nf_conntrack_standalone_init(void) { #ifdef CONFIG_PROC_FS @@ -834,70 +490,4 @@ module_exit(nf_conntrack_standalone_fini); void need_conntrack(void) { } - -#ifdef CONFIG_NF_CONNTRACK_EVENTS -EXPORT_SYMBOL_GPL(nf_conntrack_chain); -EXPORT_SYMBOL_GPL(nf_conntrack_expect_chain); -EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); -EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); -EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init); -EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache); -EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); -#endif -EXPORT_SYMBOL(nf_ct_l3proto_try_module_get); -EXPORT_SYMBOL(nf_ct_l3proto_module_put); -EXPORT_SYMBOL(nf_conntrack_l3proto_register); -EXPORT_SYMBOL(nf_conntrack_l3proto_unregister); -EXPORT_SYMBOL(nf_conntrack_protocol_register); -EXPORT_SYMBOL(nf_conntrack_protocol_unregister); -EXPORT_SYMBOL(nf_ct_invert_tuplepr); -EXPORT_SYMBOL(nf_conntrack_destroyed); -EXPORT_SYMBOL(need_conntrack); -EXPORT_SYMBOL(nf_conntrack_helper_register); -EXPORT_SYMBOL(nf_conntrack_helper_unregister); -EXPORT_SYMBOL(nf_ct_iterate_cleanup); -EXPORT_SYMBOL(__nf_ct_refresh_acct); -EXPORT_SYMBOL(nf_ct_protos); -EXPORT_SYMBOL(__nf_ct_proto_find); -EXPORT_SYMBOL(nf_ct_proto_find_get); -EXPORT_SYMBOL(nf_ct_proto_put); -EXPORT_SYMBOL(nf_ct_l3proto_find_get); -EXPORT_SYMBOL(nf_ct_l3proto_put); -EXPORT_SYMBOL(nf_ct_l3protos); -EXPORT_SYMBOL_GPL(nf_conntrack_checksum); -EXPORT_SYMBOL(nf_conntrack_expect_alloc); -EXPORT_SYMBOL(nf_conntrack_expect_put); -EXPORT_SYMBOL(nf_conntrack_expect_related); -EXPORT_SYMBOL(nf_conntrack_unexpect_related); -EXPORT_SYMBOL(nf_conntrack_tuple_taken); -EXPORT_SYMBOL(nf_conntrack_htable_size); -EXPORT_SYMBOL(nf_conntrack_lock); -EXPORT_SYMBOL(nf_conntrack_hash); -EXPORT_SYMBOL(nf_conntrack_untracked); -EXPORT_SYMBOL_GPL(nf_conntrack_find_get); -#ifdef CONFIG_IP_NF_NAT_NEEDED -EXPORT_SYMBOL(nf_conntrack_tcp_update); -#endif -EXPORT_SYMBOL(__nf_conntrack_confirm); -EXPORT_SYMBOL(nf_ct_get_tuple); -EXPORT_SYMBOL(nf_ct_invert_tuple); -EXPORT_SYMBOL(nf_conntrack_in); -EXPORT_SYMBOL(__nf_conntrack_attach); -EXPORT_SYMBOL(nf_conntrack_alloc); -EXPORT_SYMBOL(nf_conntrack_free); -EXPORT_SYMBOL(nf_conntrack_flush); -EXPORT_SYMBOL(nf_ct_remove_expectations); -EXPORT_SYMBOL(nf_ct_helper_find_get); -EXPORT_SYMBOL(nf_ct_helper_put); -EXPORT_SYMBOL(__nf_conntrack_helper_find_byname); -EXPORT_SYMBOL(__nf_conntrack_find); -EXPORT_SYMBOL(nf_ct_unlink_expect); -EXPORT_SYMBOL(nf_conntrack_hash_insert); -EXPORT_SYMBOL(__nf_conntrack_expect_find); -EXPORT_SYMBOL(nf_conntrack_expect_find); -EXPORT_SYMBOL(nf_conntrack_expect_list); -#if defined(CONFIG_NF_CT_NETLINK) || \ - defined(CONFIG_NF_CT_NETLINK_MODULE) -EXPORT_SYMBOL(nf_ct_port_tuple_to_nfattr); -EXPORT_SYMBOL(nf_ct_port_nfattr_to_tuple); -#endif +EXPORT_SYMBOL_GPL(need_conntrack); diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c new file mode 100644 index 00000000000..f5bffe24b0a --- /dev/null +++ b/net/netfilter/nf_conntrack_tftp.c @@ -0,0 +1,160 @@ +/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/in.h> +#include <linux/udp.h> +#include <linux/netfilter.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_tuple.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_ecache.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <linux/netfilter/nf_conntrack_tftp.h> + +MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); +MODULE_DESCRIPTION("TFTP connection tracking helper"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ip_conntrack_tftp"); + +#define MAX_PORTS 8 +static unsigned short ports[MAX_PORTS]; +static int ports_c; +module_param_array(ports, ushort, &ports_c, 0400); +MODULE_PARM_DESC(ports, "Port numbers of TFTP servers"); + +#if 0 +#define DEBUGP(format, args...) printk("%s:%s:" format, \ + __FILE__, __FUNCTION__ , ## args) +#else +#define DEBUGP(format, args...) +#endif + +unsigned int (*nf_nat_tftp_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct nf_conntrack_expect *exp) __read_mostly; +EXPORT_SYMBOL_GPL(nf_nat_tftp_hook); + +static int tftp_help(struct sk_buff **pskb, + unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + struct tftphdr _tftph, *tfh; + struct nf_conntrack_expect *exp; + struct nf_conntrack_tuple *tuple; + unsigned int ret = NF_ACCEPT; + int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + typeof(nf_nat_tftp_hook) nf_nat_tftp; + + tfh = skb_header_pointer(*pskb, protoff + sizeof(struct udphdr), + sizeof(_tftph), &_tftph); + if (tfh == NULL) + return NF_ACCEPT; + + switch (ntohs(tfh->opcode)) { + case TFTP_OPCODE_READ: + case TFTP_OPCODE_WRITE: + /* RRQ and WRQ works the same way */ + DEBUGP(""); + NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + + exp = nf_conntrack_expect_alloc(ct); + if (exp == NULL) + return NF_DROP; + tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + nf_conntrack_expect_init(exp, family, + &tuple->src.u3, &tuple->dst.u3, + IPPROTO_UDP, + NULL, &tuple->dst.u.udp.port); + + DEBUGP("expect: "); + NF_CT_DUMP_TUPLE(&exp->tuple); + NF_CT_DUMP_TUPLE(&exp->mask); + + nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook); + if (nf_nat_tftp && ct->status & IPS_NAT_MASK) + ret = nf_nat_tftp(pskb, ctinfo, exp); + else if (nf_conntrack_expect_related(exp) != 0) + ret = NF_DROP; + nf_conntrack_expect_put(exp); + break; + case TFTP_OPCODE_DATA: + case TFTP_OPCODE_ACK: + DEBUGP("Data/ACK opcode\n"); + break; + case TFTP_OPCODE_ERROR: + DEBUGP("Error opcode\n"); + break; + default: + DEBUGP("Unknown opcode\n"); + } + return ret; +} + +static struct nf_conntrack_helper tftp[MAX_PORTS][2] __read_mostly; +static char tftp_names[MAX_PORTS][2][sizeof("tftp-65535")] __read_mostly; + +static void nf_conntrack_tftp_fini(void) +{ + int i, j; + + for (i = 0; i < ports_c; i++) { + for (j = 0; j < 2; j++) + nf_conntrack_helper_unregister(&tftp[i][j]); + } +} + +static int __init nf_conntrack_tftp_init(void) +{ + int i, j, ret; + char *tmpname; + + if (ports_c == 0) + ports[ports_c++] = TFTP_PORT; + + for (i = 0; i < ports_c; i++) { + memset(&tftp[i], 0, sizeof(tftp[i])); + + tftp[i][0].tuple.src.l3num = AF_INET; + tftp[i][1].tuple.src.l3num = AF_INET6; + for (j = 0; j < 2; j++) { + tftp[i][j].tuple.dst.protonum = IPPROTO_UDP; + tftp[i][j].tuple.src.u.udp.port = htons(ports[i]); + tftp[i][j].mask.src.l3num = 0xFFFF; + tftp[i][j].mask.dst.protonum = 0xFF; + tftp[i][j].mask.src.u.udp.port = htons(0xFFFF); + tftp[i][j].max_expected = 1; + tftp[i][j].timeout = 5 * 60; /* 5 minutes */ + tftp[i][j].me = THIS_MODULE; + tftp[i][j].help = tftp_help; + + tmpname = &tftp_names[i][j][0]; + if (ports[i] == TFTP_PORT) + sprintf(tmpname, "tftp"); + else + sprintf(tmpname, "tftp-%u", i); + tftp[i][j].name = tmpname; + + ret = nf_conntrack_helper_register(&tftp[i][j]); + if (ret) { + printk("nf_ct_tftp: failed to register helper " + "for pf: %u port: %u\n", + tftp[i][j].tuple.src.l3num, ports[i]); + nf_conntrack_tftp_fini(); + return ret; + } + } + } + return 0; +} + +module_init(nf_conntrack_tftp_init); +module_exit(nf_conntrack_tftp_fini); diff --git a/net/netfilter/nf_sysctl.c b/net/netfilter/nf_sysctl.c new file mode 100644 index 00000000000..06ddddb2911 --- /dev/null +++ b/net/netfilter/nf_sysctl.c @@ -0,0 +1,134 @@ +/* nf_sysctl.c netfilter sysctl registration/unregistation + * + * Copyright (c) 2006 Patrick McHardy <kaber@trash.net> + */ +#include <linux/module.h> +#include <linux/sysctl.h> +#include <linux/string.h> +#include <linux/slab.h> + +static void +path_free(struct ctl_table *path, struct ctl_table *table) +{ + struct ctl_table *t, *next; + + for (t = path; t != NULL && t != table; t = next) { + next = t->child; + kfree(t); + } +} + +static struct ctl_table * +path_dup(struct ctl_table *path, struct ctl_table *table) +{ + struct ctl_table *t, *last = NULL, *tmp; + + for (t = path; t != NULL; t = t->child) { + /* twice the size since path elements are terminated by an + * empty element */ + tmp = kmemdup(t, 2 * sizeof(*t), GFP_KERNEL); + if (tmp == NULL) { + if (last != NULL) + path_free(path, table); + return NULL; + } + + if (last != NULL) + last->child = tmp; + else + path = tmp; + last = tmp; + } + + if (last != NULL) + last->child = table; + else + path = table; + + return path; +} + +struct ctl_table_header * +nf_register_sysctl_table(struct ctl_table *path, struct ctl_table *table) +{ + struct ctl_table_header *header; + + path = path_dup(path, table); + if (path == NULL) + return NULL; + header = register_sysctl_table(path, 0); + if (header == NULL) + path_free(path, table); + return header; +} +EXPORT_SYMBOL_GPL(nf_register_sysctl_table); + +void +nf_unregister_sysctl_table(struct ctl_table_header *header, + struct ctl_table *table) +{ + struct ctl_table *path = header->ctl_table; + + unregister_sysctl_table(header); + path_free(path, table); +} +EXPORT_SYMBOL_GPL(nf_unregister_sysctl_table); + +/* net/netfilter */ +static struct ctl_table nf_net_netfilter_table[] = { + { + .ctl_name = NET_NETFILTER, + .procname = "netfilter", + .mode = 0555, + }, + { + .ctl_name = 0 + } +}; +struct ctl_table nf_net_netfilter_sysctl_path[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = nf_net_netfilter_table, + }, + { + .ctl_name = 0 + } +}; +EXPORT_SYMBOL_GPL(nf_net_netfilter_sysctl_path); + +/* net/ipv4/netfilter */ +static struct ctl_table nf_net_ipv4_netfilter_table[] = { + { + .ctl_name = NET_IPV4_NETFILTER, + .procname = "netfilter", + .mode = 0555, + }, + { + .ctl_name = 0 + } +}; +static struct ctl_table nf_net_ipv4_table[] = { + { + .ctl_name = NET_IPV4, + .procname = "ipv4", + .mode = 0555, + .child = nf_net_ipv4_netfilter_table, + }, + { + .ctl_name = 0 + } +}; +struct ctl_table nf_net_ipv4_netfilter_sysctl_path[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = nf_net_ipv4_table, + }, + { + .ctl_name = 0 + } +}; +EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b59d3b2bde2..d1505dd25c6 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -408,13 +408,13 @@ __build_packet_message(struct nfulnl_instance *inst, const struct net_device *indev, const struct net_device *outdev, const struct nf_loginfo *li, - const char *prefix) + const char *prefix, unsigned int plen) { unsigned char *old_tail; struct nfulnl_msg_packet_hdr pmsg; struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - u_int32_t tmp_uint; + __be32 tmp_uint; UDEBUG("entered\n"); @@ -427,17 +427,13 @@ __build_packet_message(struct nfulnl_instance *inst, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(inst->group_num); - pmsg.hw_protocol = htons(skb->protocol); + pmsg.hw_protocol = skb->protocol; pmsg.hook = hooknum; NFA_PUT(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg); - if (prefix) { - int slen = strlen(prefix); - if (slen > NFULNL_PREFIXLEN) - slen = NFULNL_PREFIXLEN; - NFA_PUT(inst->skb, NFULA_PREFIX, slen, prefix); - } + if (prefix) + NFA_PUT(inst->skb, NFULA_PREFIX, plen, prefix); if (indev) { tmp_uint = htonl(indev->ifindex); @@ -501,18 +497,16 @@ __build_packet_message(struct nfulnl_instance *inst, #endif } - if (skb->nfmark) { - tmp_uint = htonl(skb->nfmark); + if (skb->mark) { + tmp_uint = htonl(skb->mark); NFA_PUT(inst->skb, NFULA_MARK, sizeof(tmp_uint), &tmp_uint); } if (indev && skb->dev && skb->dev->hard_header_parse) { struct nfulnl_msg_packet_hw phw; - - phw.hw_addrlen = - skb->dev->hard_header_parse((struct sk_buff *)skb, + int len = skb->dev->hard_header_parse((struct sk_buff *)skb, phw.hw_addr); - phw.hw_addrlen = htons(phw.hw_addrlen); + phw.hw_addrlen = htons(len); NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw); } @@ -529,7 +523,7 @@ __build_packet_message(struct nfulnl_instance *inst, if (skb->sk) { read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) { - u_int32_t uid = htonl(skb->sk->sk_socket->file->f_uid); + __be32 uid = htonl(skb->sk->sk_socket->file->f_uid); /* need to unlock here since NFA_PUT may goto */ read_unlock_bh(&skb->sk->sk_callback_lock); NFA_PUT(inst->skb, NFULA_UID, sizeof(uid), &uid); @@ -544,7 +538,7 @@ __build_packet_message(struct nfulnl_instance *inst, } /* global sequence number */ if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) { - tmp_uint = atomic_inc_return(&global_seq); + tmp_uint = htonl(atomic_inc_return(&global_seq)); NFA_PUT(inst->skb, NFULA_SEQ_GLOBAL, sizeof(tmp_uint), &tmp_uint); } @@ -603,6 +597,7 @@ nfulnl_log_packet(unsigned int pf, const struct nf_loginfo *li; unsigned int qthreshold; unsigned int nlbufsiz; + unsigned int plen; if (li_user && li_user->type == NF_LOG_TYPE_ULOG) li = li_user; @@ -618,6 +613,10 @@ nfulnl_log_packet(unsigned int pf, return; } + plen = 0; + if (prefix) + plen = strlen(prefix); + /* all macros expand to constant values at compile time */ /* FIXME: do we want to make the size calculation conditional based on * what is actually present? way more branches and checks, but more @@ -632,7 +631,7 @@ nfulnl_log_packet(unsigned int pf, #endif + NFA_SPACE(sizeof(u_int32_t)) /* mark */ + NFA_SPACE(sizeof(u_int32_t)) /* uid */ - + NFA_SPACE(NFULNL_PREFIXLEN) /* prefix */ + + NFA_SPACE(plen) /* prefix */ + NFA_SPACE(sizeof(struct nfulnl_msg_packet_hw)) + NFA_SPACE(sizeof(struct nfulnl_msg_packet_timestamp)); @@ -703,7 +702,7 @@ nfulnl_log_packet(unsigned int pf, inst->qlen++; __build_packet_message(inst, skb, data_len, pf, - hooknum, in, out, li, prefix); + hooknum, in, out, li, prefix, plen); /* timer_pending always called within inst->lock, so there * is no chance of a race here */ @@ -878,33 +877,33 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, params = NFA_DATA(nfula[NFULA_CFG_MODE-1]); nfulnl_set_mode(inst, params->copy_mode, - ntohs(params->copy_range)); + ntohl(params->copy_range)); } if (nfula[NFULA_CFG_TIMEOUT-1]) { - u_int32_t timeout = - *(u_int32_t *)NFA_DATA(nfula[NFULA_CFG_TIMEOUT-1]); + __be32 timeout = + *(__be32 *)NFA_DATA(nfula[NFULA_CFG_TIMEOUT-1]); nfulnl_set_timeout(inst, ntohl(timeout)); } if (nfula[NFULA_CFG_NLBUFSIZ-1]) { - u_int32_t nlbufsiz = - *(u_int32_t *)NFA_DATA(nfula[NFULA_CFG_NLBUFSIZ-1]); + __be32 nlbufsiz = + *(__be32 *)NFA_DATA(nfula[NFULA_CFG_NLBUFSIZ-1]); nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz)); } if (nfula[NFULA_CFG_QTHRESH-1]) { - u_int32_t qthresh = - *(u_int16_t *)NFA_DATA(nfula[NFULA_CFG_QTHRESH-1]); + __be32 qthresh = + *(__be32 *)NFA_DATA(nfula[NFULA_CFG_QTHRESH-1]); nfulnl_set_qthresh(inst, ntohl(qthresh)); } if (nfula[NFULA_CFG_FLAGS-1]) { - u_int16_t flags = - *(u_int16_t *)NFA_DATA(nfula[NFULA_CFG_FLAGS-1]); + __be16 flags = + *(__be16 *)NFA_DATA(nfula[NFULA_CFG_FLAGS-1]); nfulnl_set_flags(inst, ntohs(flags)); } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 8eb2473d83e..a88a017da22 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -349,7 +349,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, struct sk_buff *entskb = entry->skb; struct net_device *indev; struct net_device *outdev; - unsigned int tmp_uint; + __be32 tmp_uint; QDEBUG("entered\n"); @@ -414,7 +414,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, nfmsg->res_id = htons(queue->queue_num); pmsg.packet_id = htonl(entry->id); - pmsg.hw_protocol = htons(entskb->protocol); + pmsg.hw_protocol = entskb->protocol; pmsg.hook = entinf->hook; NFA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); @@ -480,8 +480,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, #endif } - if (entskb->nfmark) { - tmp_uint = htonl(entskb->nfmark); + if (entskb->mark) { + tmp_uint = htonl(entskb->mark); NFA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint); } @@ -489,10 +489,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, && entskb->dev->hard_header_parse) { struct nfqnl_msg_packet_hw phw; - phw.hw_addrlen = - entskb->dev->hard_header_parse(entskb, + int len = entskb->dev->hard_header_parse(entskb, phw.hw_addr); - phw.hw_addrlen = htons(phw.hw_addrlen); + phw.hw_addrlen = htons(len); NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); } @@ -622,9 +621,10 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) int diff; diff = data_len - e->skb->len; - if (diff < 0) - skb_trim(e->skb, data_len); - else if (diff > 0) { + if (diff < 0) { + if (pskb_trim(e->skb, data_len)) + return -ENOMEM; + } else if (diff > 0) { if (data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { @@ -834,8 +834,8 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, } if (nfqa[NFQA_MARK-1]) - entry->skb->nfmark = ntohl(*(u_int32_t *) - NFA_DATA(nfqa[NFQA_MARK-1])); + entry->skb->mark = ntohl(*(__be32 *) + NFA_DATA(nfqa[NFQA_MARK-1])); issue_verdict(entry, verdict); instance_put(queue); @@ -947,6 +947,14 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ntohl(params->copy_range)); } + if (nfqa[NFQA_CFG_QUEUE_MAXLEN-1]) { + __be32 *queue_maxlen; + queue_maxlen = NFA_DATA(nfqa[NFQA_CFG_QUEUE_MAXLEN-1]); + spin_lock_bh(&queue->lock); + queue->queue_maxlen = ntohl(*queue_maxlen); + spin_unlock_bh(&queue->lock); + } + out_put: instance_put(queue); return ret; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 58522fc65d3..8996584b849 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -21,6 +21,7 @@ #include <linux/string.h> #include <linux/vmalloc.h> #include <linux/mutex.h> +#include <linux/mm.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_arp.h> diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index c01524f817f..0534bfa65cc 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -31,6 +31,9 @@ MODULE_ALIAS("ipt_CONNMARK"); #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_CONNMARK.h> #include <net/netfilter/nf_conntrack_compat.h> +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#include <net/netfilter/nf_conntrack_ecache.h> +#endif static unsigned int target(struct sk_buff **pskb, @@ -42,7 +45,7 @@ target(struct sk_buff **pskb, { const struct xt_connmark_target_info *markinfo = targinfo; u_int32_t diff; - u_int32_t nfmark; + u_int32_t mark; u_int32_t newmark; u_int32_t ctinfo; u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo); @@ -62,7 +65,7 @@ target(struct sk_buff **pskb, break; case XT_CONNMARK_SAVE: newmark = (*ctmark & ~markinfo->mask) | - ((*pskb)->nfmark & markinfo->mask); + ((*pskb)->mark & markinfo->mask); if (*ctmark != newmark) { *ctmark = newmark; #if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) @@ -73,10 +76,10 @@ target(struct sk_buff **pskb, } break; case XT_CONNMARK_RESTORE: - nfmark = (*pskb)->nfmark; - diff = (*ctmark ^ nfmark) & markinfo->mask; + mark = (*pskb)->mark; + diff = (*ctmark ^ mark) & markinfo->mask; if (diff != 0) - (*pskb)->nfmark = nfmark ^ diff; + (*pskb)->mark = mark ^ diff; break; } } @@ -93,6 +96,11 @@ checkentry(const char *tablename, { struct xt_connmark_target_info *matchinfo = targinfo; + if (nf_ct_l3proto_try_module_get(target->family) < 0) { + printk(KERN_WARNING "can't load conntrack support for " + "proto=%d\n", target->family); + return 0; + } if (matchinfo->mode == XT_CONNMARK_RESTORE) { if (strcmp(tablename, "mangle") != 0) { printk(KERN_WARNING "CONNMARK: restore can only be " @@ -108,6 +116,12 @@ checkentry(const char *tablename, return 1; } +static void +destroy(const struct xt_target *target, void *targinfo) +{ + nf_ct_l3proto_module_put(target->family); +} + #ifdef CONFIG_COMPAT struct compat_xt_connmark_target_info { compat_ulong_t mark, mask; @@ -144,6 +158,7 @@ static struct xt_target xt_connmark_target[] = { .name = "CONNMARK", .family = AF_INET, .checkentry = checkentry, + .destroy = destroy, .target = target, .targetsize = sizeof(struct xt_connmark_target_info), #ifdef CONFIG_COMPAT @@ -157,6 +172,7 @@ static struct xt_target xt_connmark_target[] = { .name = "CONNMARK", .family = AF_INET6, .checkentry = checkentry, + .destroy = destroy, .target = target, .targetsize = sizeof(struct xt_connmark_target_info), .me = THIS_MODULE @@ -165,7 +181,6 @@ static struct xt_target xt_connmark_target[] = { static int __init xt_connmark_init(void) { - need_conntrack(); return xt_register_targets(xt_connmark_target, ARRAY_SIZE(xt_connmark_target)); } diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 46738626667..a3fe3c334b0 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -93,6 +93,11 @@ static int checkentry(const char *tablename, const void *entry, { struct xt_connsecmark_target_info *info = targinfo; + if (nf_ct_l3proto_try_module_get(target->family) < 0) { + printk(KERN_WARNING "can't load conntrack support for " + "proto=%d\n", target->family); + return 0; + } switch (info->mode) { case CONNSECMARK_SAVE: case CONNSECMARK_RESTORE: @@ -106,11 +111,18 @@ static int checkentry(const char *tablename, const void *entry, return 1; } +static void +destroy(const struct xt_target *target, void *targinfo) +{ + nf_ct_l3proto_module_put(target->family); +} + static struct xt_target xt_connsecmark_target[] = { { .name = "CONNSECMARK", .family = AF_INET, .checkentry = checkentry, + .destroy = destroy, .target = target, .targetsize = sizeof(struct xt_connsecmark_target_info), .table = "mangle", @@ -120,6 +132,7 @@ static struct xt_target xt_connsecmark_target[] = { .name = "CONNSECMARK", .family = AF_INET6, .checkentry = checkentry, + .destroy = destroy, .target = target, .targetsize = sizeof(struct xt_connsecmark_target_info), .table = "mangle", @@ -129,7 +142,6 @@ static struct xt_target xt_connsecmark_target[] = { static int __init xt_connsecmark_init(void) { - need_conntrack(); return xt_register_targets(xt_connsecmark_target, ARRAY_SIZE(xt_connsecmark_target)); } diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index c6e860a7114..0b48547e8d6 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -31,8 +31,8 @@ target_v0(struct sk_buff **pskb, { const struct xt_mark_target_info *markinfo = targinfo; - if((*pskb)->nfmark != markinfo->mark) - (*pskb)->nfmark = markinfo->mark; + if((*pskb)->mark != markinfo->mark) + (*pskb)->mark = markinfo->mark; return XT_CONTINUE; } @@ -54,16 +54,16 @@ target_v1(struct sk_buff **pskb, break; case XT_MARK_AND: - mark = (*pskb)->nfmark & markinfo->mark; + mark = (*pskb)->mark & markinfo->mark; break; case XT_MARK_OR: - mark = (*pskb)->nfmark | markinfo->mark; + mark = (*pskb)->mark | markinfo->mark; break; } - if((*pskb)->nfmark != mark) - (*pskb)->nfmark = mark; + if((*pskb)->mark != mark) + (*pskb)->mark = mark; return XT_CONTINUE; } diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c new file mode 100644 index 00000000000..901ed7abaa1 --- /dev/null +++ b/net/netfilter/xt_NFLOG.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2006 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/skbuff.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_NFLOG.h> + +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("x_tables NFLOG target"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_NFLOG"); +MODULE_ALIAS("ip6t_NFLOG"); + +static unsigned int +nflog_target(struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + unsigned int hooknum, const struct xt_target *target, + const void *targinfo) +{ + const struct xt_nflog_info *info = targinfo; + struct nf_loginfo li; + + li.type = NF_LOG_TYPE_ULOG; + li.u.ulog.copy_len = info->len; + li.u.ulog.group = info->group; + li.u.ulog.qthreshold = info->threshold; + + nf_log_packet(target->family, hooknum, *pskb, in, out, &li, + "%s", info->prefix); + return XT_CONTINUE; +} + +static int +nflog_checkentry(const char *tablename, const void *entry, + const struct xt_target *target, void *targetinfo, + unsigned int hookmask) +{ + struct xt_nflog_info *info = targetinfo; + + if (info->flags & ~XT_NFLOG_MASK) + return 0; + if (info->prefix[sizeof(info->prefix) - 1] != '\0') + return 0; + return 1; +} + +static struct xt_target xt_nflog_target[] = { + { + .name = "NFLOG", + .family = AF_INET, + .checkentry = nflog_checkentry, + .target = nflog_target, + .targetsize = sizeof(struct xt_nflog_info), + .me = THIS_MODULE, + }, + { + .name = "NFLOG", + .family = AF_INET6, + .checkentry = nflog_checkentry, + .target = nflog_target, + .targetsize = sizeof(struct xt_nflog_info), + .me = THIS_MODULE, + }, +}; + +static int __init xt_nflog_init(void) +{ + return xt_register_targets(xt_nflog_target, + ARRAY_SIZE(xt_nflog_target)); +} + +static void __exit xt_nflog_fini(void) +{ + xt_unregister_targets(xt_nflog_target, ARRAY_SIZE(xt_nflog_target)); +} + +module_init(xt_nflog_init); +module_exit(xt_nflog_fini); diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index dcc497ea818..d93cb096a67 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -139,15 +139,28 @@ static int check(const char *tablename, sinfo->direction != XT_CONNBYTES_DIR_BOTH) return 0; + if (nf_ct_l3proto_try_module_get(match->family) < 0) { + printk(KERN_WARNING "can't load conntrack support for " + "proto=%d\n", match->family); + return 0; + } + return 1; } +static void +destroy(const struct xt_match *match, void *matchinfo) +{ + nf_ct_l3proto_module_put(match->family); +} + static struct xt_match xt_connbytes_match[] = { { .name = "connbytes", .family = AF_INET, .checkentry = check, .match = match, + .destroy = destroy, .matchsize = sizeof(struct xt_connbytes_info), .me = THIS_MODULE }, @@ -156,6 +169,7 @@ static struct xt_match xt_connbytes_match[] = { .family = AF_INET6, .checkentry = check, .match = match, + .destroy = destroy, .matchsize = sizeof(struct xt_connbytes_info), .me = THIS_MODULE }, diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index a8f03057dbd..36c2defff23 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -63,22 +63,18 @@ checkentry(const char *tablename, printk(KERN_WARNING "connmark: only support 32bit mark\n"); return 0; } -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (nf_ct_l3proto_try_module_get(match->family) < 0) { - printk(KERN_WARNING "can't load nf_conntrack support for " + printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); return 0; } -#endif return 1; } static void destroy(const struct xt_match *match, void *matchinfo) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); -#endif } #ifdef CONFIG_COMPAT @@ -140,7 +136,6 @@ static struct xt_match xt_connmark_match[] = { static int __init xt_connmark_init(void) { - need_conntrack(); return xt_register_matches(xt_connmark_match, ARRAY_SIZE(xt_connmark_match)); } diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 0ea501a2fda..3dc2357b8de 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -20,6 +20,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_conntrack.h> +#include <net/netfilter/nf_conntrack_compat.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); @@ -228,21 +229,17 @@ checkentry(const char *tablename, void *matchinfo, unsigned int hook_mask) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (nf_ct_l3proto_try_module_get(match->family) < 0) { - printk(KERN_WARNING "can't load nf_conntrack support for " + printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); return 0; } -#endif return 1; } static void destroy(const struct xt_match *match, void *matchinfo) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); -#endif } static struct xt_match conntrack_match = { @@ -257,7 +254,6 @@ static struct xt_match conntrack_match = { static int __init xt_conntrack_init(void) { - need_conntrack(); return xt_register_match(&conntrack_match); } diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 33ccdbf8e79..a5a6e192ac2 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -6,23 +6,8 @@ * $Id: ipt_hashlimit.c 3244 2004-10-20 16:24:29Z laforge@netfilter.org $ * * Development of this code was funded by Astaro AG, http://www.astaro.com/ - * - * based on ipt_limit.c by: - * Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr> - * Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr> - * Rusty Russell <rusty@rustcorp.com.au> - * - * The general idea is to create a hash table for every dstip and have a - * seperate limit counter per tuple. This way you can do something like 'limit - * the number of syn packets for each of my internal addresses. - * - * Ideally this would just be implemented as a general 'hash' match, which would - * allow us to attach any iptables target to it's hash buckets. But this is - * not possible in the current iptables architecture. As always, pkttables for - * 2.7.x will help ;) */ #include <linux/module.h> -#include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/random.h> #include <linux/jhash.h> @@ -31,28 +16,41 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/list.h> +#include <linux/skbuff.h> +#include <linux/mm.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ipt_hashlimit.h> - -/* FIXME: this is just for IP_NF_ASSERRT */ -#include <linux/netfilter_ipv4/ip_conntrack.h> +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter/xt_hashlimit.h> #include <linux/mutex.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("iptables match for limiting per hash-bucket"); +MODULE_ALIAS("ipt_hashlimit"); +MODULE_ALIAS("ip6t_hashlimit"); /* need to declare this at the top */ -static struct proc_dir_entry *hashlimit_procdir; +static struct proc_dir_entry *hashlimit_procdir4; +static struct proc_dir_entry *hashlimit_procdir6; static struct file_operations dl_file_ops; /* hash table crap */ - struct dsthash_dst { - __be32 src_ip; - __be32 dst_ip; - /* ports have to be consecutive !!! */ + union { + struct { + __be32 src; + __be32 dst; + } ip; + struct { + __be32 src[4]; + __be32 dst[4]; + } ip6; + } addr; __be16 src_port; __be16 dst_port; }; @@ -71,9 +69,10 @@ struct dsthash_ent { } rateinfo; }; -struct ipt_hashlimit_htable { +struct xt_hashlimit_htable { struct hlist_node node; /* global list of all htables */ atomic_t use; + int family; struct hashlimit_cfg cfg; /* config */ @@ -81,8 +80,8 @@ struct ipt_hashlimit_htable { spinlock_t lock; /* lock for list_head */ u_int32_t rnd; /* random seed for hash */ int rnd_initialized; + unsigned int count; /* number entries in table */ struct timer_list timer; /* timer for gc */ - atomic_t count; /* number entries in table */ /* seq_file stuff */ struct proc_dir_entry *pde; @@ -93,45 +92,37 @@ struct ipt_hashlimit_htable { static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */ static DEFINE_MUTEX(hlimit_mutex); /* additional checkentry protection */ static HLIST_HEAD(hashlimit_htables); -static kmem_cache_t *hashlimit_cachep __read_mostly; +static struct kmem_cache *hashlimit_cachep __read_mostly; static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b) { - return (ent->dst.dst_ip == b->dst_ip - && ent->dst.dst_port == b->dst_port - && ent->dst.src_port == b->src_port - && ent->dst.src_ip == b->src_ip); + return !memcmp(&ent->dst, b, sizeof(ent->dst)); } -static inline u_int32_t -hash_dst(const struct ipt_hashlimit_htable *ht, const struct dsthash_dst *dst) +static u_int32_t +hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst) { - return (jhash_3words((__force u32)dst->dst_ip, - ((__force u32)dst->dst_port<<16 | - (__force u32)dst->src_port), - (__force u32)dst->src_ip, ht->rnd) % ht->cfg.size); + return jhash(dst, sizeof(*dst), ht->rnd) % ht->cfg.size; } -static inline struct dsthash_ent * -__dsthash_find(const struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst) +static struct dsthash_ent * +dsthash_find(const struct xt_hashlimit_htable *ht, struct dsthash_dst *dst) { struct dsthash_ent *ent; struct hlist_node *pos; u_int32_t hash = hash_dst(ht, dst); - if (!hlist_empty(&ht->hash[hash])) - hlist_for_each_entry(ent, pos, &ht->hash[hash], node) { - if (dst_cmp(ent, dst)) { + if (!hlist_empty(&ht->hash[hash])) { + hlist_for_each_entry(ent, pos, &ht->hash[hash], node) + if (dst_cmp(ent, dst)) return ent; - } - } - + } return NULL; } /* allocate dsthash_ent, initialize dst, put in htable and lock it */ static struct dsthash_ent * -__dsthash_alloc_init(struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst) +dsthash_alloc_init(struct xt_hashlimit_htable *ht, struct dsthash_dst *dst) { struct dsthash_ent *ent; @@ -142,12 +133,11 @@ __dsthash_alloc_init(struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst) ht->rnd_initialized = 1; } - if (ht->cfg.max && - atomic_read(&ht->count) >= ht->cfg.max) { + if (ht->cfg.max && ht->count >= ht->cfg.max) { /* FIXME: do something. question is what.. */ if (net_ratelimit()) - printk(KERN_WARNING - "ipt_hashlimit: max count of %u reached\n", + printk(KERN_WARNING + "xt_hashlimit: max count of %u reached\n", ht->cfg.max); return NULL; } @@ -155,53 +145,47 @@ __dsthash_alloc_init(struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst) ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC); if (!ent) { if (net_ratelimit()) - printk(KERN_ERR - "ipt_hashlimit: can't allocate dsthash_ent\n"); + printk(KERN_ERR + "xt_hashlimit: can't allocate dsthash_ent\n"); return NULL; } - - atomic_inc(&ht->count); - - ent->dst.dst_ip = dst->dst_ip; - ent->dst.dst_port = dst->dst_port; - ent->dst.src_ip = dst->src_ip; - ent->dst.src_port = dst->src_port; + memcpy(&ent->dst, dst, sizeof(ent->dst)); hlist_add_head(&ent->node, &ht->hash[hash_dst(ht, dst)]); - + ht->count++; return ent; } -static inline void -__dsthash_free(struct ipt_hashlimit_htable *ht, struct dsthash_ent *ent) +static inline void +dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) { hlist_del(&ent->node); kmem_cache_free(hashlimit_cachep, ent); - atomic_dec(&ht->count); + ht->count--; } static void htable_gc(unsigned long htlong); -static int htable_create(struct ipt_hashlimit_info *minfo) +static int htable_create(struct xt_hashlimit_info *minfo, int family) { - int i; + struct xt_hashlimit_htable *hinfo; unsigned int size; - struct ipt_hashlimit_htable *hinfo; + unsigned int i; if (minfo->cfg.size) size = minfo->cfg.size; else { - size = (((num_physpages << PAGE_SHIFT) / 16384) - / sizeof(struct list_head)); + size = ((num_physpages << PAGE_SHIFT) / 16384) / + sizeof(struct list_head); if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) size = 8192; if (size < 16) size = 16; } /* FIXME: don't use vmalloc() here or anywhere else -HW */ - hinfo = vmalloc(sizeof(struct ipt_hashlimit_htable) - + (sizeof(struct list_head) * size)); + hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) + + sizeof(struct list_head) * size); if (!hinfo) { - printk(KERN_ERR "ipt_hashlimit: Unable to create hashtable\n"); + printk(KERN_ERR "xt_hashlimit: unable to create hashtable\n"); return -1; } minfo->hinfo = hinfo; @@ -217,11 +201,14 @@ static int htable_create(struct ipt_hashlimit_info *minfo) for (i = 0; i < hinfo->cfg.size; i++) INIT_HLIST_HEAD(&hinfo->hash[i]); - atomic_set(&hinfo->count, 0); atomic_set(&hinfo->use, 1); + hinfo->count = 0; + hinfo->family = family; hinfo->rnd_initialized = 0; spin_lock_init(&hinfo->lock); - hinfo->pde = create_proc_entry(minfo->name, 0, hashlimit_procdir); + hinfo->pde = create_proc_entry(minfo->name, 0, + family == AF_INET ? hashlimit_procdir4 : + hashlimit_procdir6); if (!hinfo->pde) { vfree(hinfo); return -1; @@ -242,23 +229,21 @@ static int htable_create(struct ipt_hashlimit_info *minfo) return 0; } -static int select_all(struct ipt_hashlimit_htable *ht, struct dsthash_ent *he) +static int select_all(struct xt_hashlimit_htable *ht, struct dsthash_ent *he) { return 1; } -static int select_gc(struct ipt_hashlimit_htable *ht, struct dsthash_ent *he) +static int select_gc(struct xt_hashlimit_htable *ht, struct dsthash_ent *he) { return (jiffies >= he->expires); } -static void htable_selective_cleanup(struct ipt_hashlimit_htable *ht, - int (*select)(struct ipt_hashlimit_htable *ht, +static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, + int (*select)(struct xt_hashlimit_htable *ht, struct dsthash_ent *he)) { - int i; - - IP_NF_ASSERT(ht->cfg.size && ht->cfg.max); + unsigned int i; /* lock hash table and iterate over it */ spin_lock_bh(&ht->lock); @@ -267,7 +252,7 @@ static void htable_selective_cleanup(struct ipt_hashlimit_htable *ht, struct hlist_node *pos, *n; hlist_for_each_entry_safe(dh, pos, n, &ht->hash[i], node) { if ((*select)(ht, dh)) - __dsthash_free(ht, dh); + dsthash_free(ht, dh); } } spin_unlock_bh(&ht->lock); @@ -276,7 +261,7 @@ static void htable_selective_cleanup(struct ipt_hashlimit_htable *ht, /* hash table garbage collector, run by timer */ static void htable_gc(unsigned long htlong) { - struct ipt_hashlimit_htable *ht = (struct ipt_hashlimit_htable *)htlong; + struct xt_hashlimit_htable *ht = (struct xt_hashlimit_htable *)htlong; htable_selective_cleanup(ht, select_gc); @@ -285,38 +270,39 @@ static void htable_gc(unsigned long htlong) add_timer(&ht->timer); } -static void htable_destroy(struct ipt_hashlimit_htable *hinfo) +static void htable_destroy(struct xt_hashlimit_htable *hinfo) { /* remove timer, if it is pending */ if (timer_pending(&hinfo->timer)) del_timer(&hinfo->timer); /* remove proc entry */ - remove_proc_entry(hinfo->pde->name, hashlimit_procdir); - + remove_proc_entry(hinfo->pde->name, + hinfo->family == AF_INET ? hashlimit_procdir4 : + hashlimit_procdir6); htable_selective_cleanup(hinfo, select_all); vfree(hinfo); } -static struct ipt_hashlimit_htable *htable_find_get(char *name) +static struct xt_hashlimit_htable *htable_find_get(char *name, int family) { - struct ipt_hashlimit_htable *hinfo; + struct xt_hashlimit_htable *hinfo; struct hlist_node *pos; spin_lock_bh(&hashlimit_lock); hlist_for_each_entry(hinfo, pos, &hashlimit_htables, node) { - if (!strcmp(name, hinfo->pde->name)) { + if (!strcmp(name, hinfo->pde->name) && + hinfo->family == family) { atomic_inc(&hinfo->use); spin_unlock_bh(&hashlimit_lock); return hinfo; } } spin_unlock_bh(&hashlimit_lock); - return NULL; } -static void htable_put(struct ipt_hashlimit_htable *hinfo) +static void htable_put(struct xt_hashlimit_htable *hinfo) { if (atomic_dec_and_test(&hinfo->use)) { spin_lock_bh(&hashlimit_lock); @@ -326,7 +312,6 @@ static void htable_put(struct ipt_hashlimit_htable *hinfo) } } - /* The algorithm used is the Simple Token Bucket Filter (TBF) * see net/sched/sch_tbf.c in the linux source tree */ @@ -370,17 +355,82 @@ user2credits(u_int32_t user) /* If multiplying would overflow... */ if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) /* Divide first. */ - return (user / IPT_HASHLIMIT_SCALE) * HZ * CREDITS_PER_JIFFY; + return (user / XT_HASHLIMIT_SCALE) * HZ * CREDITS_PER_JIFFY; - return (user * HZ * CREDITS_PER_JIFFY) / IPT_HASHLIMIT_SCALE; + return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE; } static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now) { - dh->rateinfo.credit += (now - xchg(&dh->rateinfo.prev, now)) - * CREDITS_PER_JIFFY; + dh->rateinfo.credit += (now - dh->rateinfo.prev) * CREDITS_PER_JIFFY; if (dh->rateinfo.credit > dh->rateinfo.credit_cap) dh->rateinfo.credit = dh->rateinfo.credit_cap; + dh->rateinfo.prev = now; +} + +static int +hashlimit_init_dst(struct xt_hashlimit_htable *hinfo, struct dsthash_dst *dst, + const struct sk_buff *skb, unsigned int protoff) +{ + __be16 _ports[2], *ports; + int nexthdr; + + memset(dst, 0, sizeof(*dst)); + + switch (hinfo->family) { + case AF_INET: + if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) + dst->addr.ip.dst = skb->nh.iph->daddr; + if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP) + dst->addr.ip.src = skb->nh.iph->saddr; + + if (!(hinfo->cfg.mode & + (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT))) + return 0; + nexthdr = skb->nh.iph->protocol; + break; +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) + case AF_INET6: + if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) + memcpy(&dst->addr.ip6.dst, &skb->nh.ipv6h->daddr, + sizeof(dst->addr.ip6.dst)); + if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP) + memcpy(&dst->addr.ip6.src, &skb->nh.ipv6h->saddr, + sizeof(dst->addr.ip6.src)); + + if (!(hinfo->cfg.mode & + (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT))) + return 0; + nexthdr = ipv6_find_hdr(skb, &protoff, -1, NULL); + if (nexthdr < 0) + return -1; + break; +#endif + default: + BUG(); + return 0; + } + + switch (nexthdr) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_SCTP: + case IPPROTO_DCCP: + ports = skb_header_pointer(skb, protoff, sizeof(_ports), + &_ports); + break; + default: + _ports[0] = _ports[1] = 0; + ports = _ports; + break; + } + if (!ports) + return -1; + if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SPT) + dst->src_port = ports[0]; + if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DPT) + dst->dst_port = ports[1]; + return 0; } static int @@ -393,68 +443,31 @@ hashlimit_match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct ipt_hashlimit_info *r = - ((struct ipt_hashlimit_info *)matchinfo)->u.master; - struct ipt_hashlimit_htable *hinfo = r->hinfo; + struct xt_hashlimit_info *r = + ((struct xt_hashlimit_info *)matchinfo)->u.master; + struct xt_hashlimit_htable *hinfo = r->hinfo; unsigned long now = jiffies; struct dsthash_ent *dh; struct dsthash_dst dst; - /* build 'dst' according to hinfo->cfg and current packet */ - memset(&dst, 0, sizeof(dst)); - if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DIP) - dst.dst_ip = skb->nh.iph->daddr; - if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SIP) - dst.src_ip = skb->nh.iph->saddr; - if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DPT - ||hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SPT) { - __be16 _ports[2], *ports; - - switch (skb->nh.iph->protocol) { - case IPPROTO_TCP: - case IPPROTO_UDP: - case IPPROTO_SCTP: - case IPPROTO_DCCP: - ports = skb_header_pointer(skb, skb->nh.iph->ihl*4, - sizeof(_ports), &_ports); - break; - default: - _ports[0] = _ports[1] = 0; - ports = _ports; - break; - } - if (!ports) { - /* We've been asked to examine this packet, and we - can't. Hence, no choice but to drop. */ - *hotdrop = 1; - return 0; - } - if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SPT) - dst.src_port = ports[0]; - if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DPT) - dst.dst_port = ports[1]; - } + if (hashlimit_init_dst(hinfo, &dst, skb, protoff) < 0) + goto hotdrop; spin_lock_bh(&hinfo->lock); - dh = __dsthash_find(hinfo, &dst); + dh = dsthash_find(hinfo, &dst); if (!dh) { - dh = __dsthash_alloc_init(hinfo, &dst); - + dh = dsthash_alloc_init(hinfo, &dst); if (!dh) { - /* enomem... don't match == DROP */ - if (net_ratelimit()) - printk(KERN_ERR "%s: ENOMEM\n", __FUNCTION__); spin_unlock_bh(&hinfo->lock); - return 0; + goto hotdrop; } dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); - dh->rateinfo.prev = jiffies; - dh->rateinfo.credit = user2credits(hinfo->cfg.avg * - hinfo->cfg.burst); - dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg * - hinfo->cfg.burst); + dh->rateinfo.credit = user2credits(hinfo->cfg.avg * + hinfo->cfg.burst); + dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg * + hinfo->cfg.burst); dh->rateinfo.cost = user2credits(hinfo->cfg.avg); } else { /* update expiration timeout */ @@ -473,6 +486,10 @@ hashlimit_match(const struct sk_buff *skb, /* default case: we're overlimit, thus don't match */ return 0; + +hotdrop: + *hotdrop = 1; + return 0; } static int @@ -482,42 +499,37 @@ hashlimit_checkentry(const char *tablename, void *matchinfo, unsigned int hook_mask) { - struct ipt_hashlimit_info *r = matchinfo; + struct xt_hashlimit_info *r = matchinfo; /* Check for overflow. */ - if (r->cfg.burst == 0 - || user2credits(r->cfg.avg * r->cfg.burst) < - user2credits(r->cfg.avg)) { - printk(KERN_ERR "ipt_hashlimit: Overflow, try lower: %u/%u\n", + if (r->cfg.burst == 0 || + user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) { + printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n", r->cfg.avg, r->cfg.burst); return 0; } - - if (r->cfg.mode == 0 - || r->cfg.mode > (IPT_HASHLIMIT_HASH_DPT - |IPT_HASHLIMIT_HASH_DIP - |IPT_HASHLIMIT_HASH_SIP - |IPT_HASHLIMIT_HASH_SPT)) + if (r->cfg.mode == 0 || + r->cfg.mode > (XT_HASHLIMIT_HASH_DPT | + XT_HASHLIMIT_HASH_DIP | + XT_HASHLIMIT_HASH_SIP | + XT_HASHLIMIT_HASH_SPT)) return 0; - if (!r->cfg.gc_interval) return 0; - if (!r->cfg.expire) return 0; - if (r->name[sizeof(r->name) - 1] != '\0') return 0; /* This is the best we've got: We cannot release and re-grab lock, - * since checkentry() is called before ip_tables.c grabs ipt_mutex. - * We also cannot grab the hashtable spinlock, since htable_create will + * since checkentry() is called before x_tables.c grabs xt_mutex. + * We also cannot grab the hashtable spinlock, since htable_create will * call vmalloc, and that can sleep. And we cannot just re-search * the list of htable's in htable_create(), since then we would * create duplicate proc files. -HW */ mutex_lock(&hlimit_mutex); - r->hinfo = htable_find_get(r->name); - if (!r->hinfo && (htable_create(r) != 0)) { + r->hinfo = htable_find_get(r->name, match->family); + if (!r->hinfo && htable_create(r, match->family) != 0) { mutex_unlock(&hlimit_mutex); return 0; } @@ -525,20 +537,19 @@ hashlimit_checkentry(const char *tablename, /* Ugly hack: For SMP, we only want to use one set */ r->u.master = r; - return 1; } static void hashlimit_destroy(const struct xt_match *match, void *matchinfo) { - struct ipt_hashlimit_info *r = matchinfo; + struct xt_hashlimit_info *r = matchinfo; htable_put(r->hinfo); } #ifdef CONFIG_COMPAT -struct compat_ipt_hashlimit_info { +struct compat_xt_hashlimit_info { char name[IFNAMSIZ]; struct hashlimit_cfg cfg; compat_uptr_t hinfo; @@ -547,40 +558,56 @@ struct compat_ipt_hashlimit_info { static void compat_from_user(void *dst, void *src) { - int off = offsetof(struct compat_ipt_hashlimit_info, hinfo); + int off = offsetof(struct compat_xt_hashlimit_info, hinfo); memcpy(dst, src, off); - memset(dst + off, 0, sizeof(struct compat_ipt_hashlimit_info) - off); + memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off); } static int compat_to_user(void __user *dst, void *src) { - int off = offsetof(struct compat_ipt_hashlimit_info, hinfo); + int off = offsetof(struct compat_xt_hashlimit_info, hinfo); return copy_to_user(dst, src, off) ? -EFAULT : 0; } #endif -static struct ipt_match ipt_hashlimit = { - .name = "hashlimit", - .match = hashlimit_match, - .matchsize = sizeof(struct ipt_hashlimit_info), +static struct xt_match xt_hashlimit[] = { + { + .name = "hashlimit", + .family = AF_INET, + .match = hashlimit_match, + .matchsize = sizeof(struct xt_hashlimit_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_xt_hashlimit_info), + .compat_from_user = compat_from_user, + .compat_to_user = compat_to_user, +#endif + .checkentry = hashlimit_checkentry, + .destroy = hashlimit_destroy, + .me = THIS_MODULE + }, + { + .name = "hashlimit", + .family = AF_INET6, + .match = hashlimit_match, + .matchsize = sizeof(struct xt_hashlimit_info), #ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_ipt_hashlimit_info), - .compat_from_user = compat_from_user, - .compat_to_user = compat_to_user, + .compatsize = sizeof(struct compat_xt_hashlimit_info), + .compat_from_user = compat_from_user, + .compat_to_user = compat_to_user, #endif - .checkentry = hashlimit_checkentry, - .destroy = hashlimit_destroy, - .me = THIS_MODULE + .checkentry = hashlimit_checkentry, + .destroy = hashlimit_destroy, + .me = THIS_MODULE + }, }; /* PROC stuff */ - static void *dl_seq_start(struct seq_file *s, loff_t *pos) { struct proc_dir_entry *pde = s->private; - struct ipt_hashlimit_htable *htable = pde->data; + struct xt_hashlimit_htable *htable = pde->data; unsigned int *bucket; spin_lock_bh(&htable->lock); @@ -598,7 +625,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos) static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos) { struct proc_dir_entry *pde = s->private; - struct ipt_hashlimit_htable *htable = pde->data; + struct xt_hashlimit_htable *htable = pde->data; unsigned int *bucket = (unsigned int *)v; *pos = ++(*bucket); @@ -612,43 +639,59 @@ static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos) static void dl_seq_stop(struct seq_file *s, void *v) { struct proc_dir_entry *pde = s->private; - struct ipt_hashlimit_htable *htable = pde->data; + struct xt_hashlimit_htable *htable = pde->data; unsigned int *bucket = (unsigned int *)v; kfree(bucket); - spin_unlock_bh(&htable->lock); } -static inline int dl_seq_real_show(struct dsthash_ent *ent, struct seq_file *s) +static int dl_seq_real_show(struct dsthash_ent *ent, int family, + struct seq_file *s) { /* recalculate to show accurate numbers */ rateinfo_recalc(ent, jiffies); - return seq_printf(s, "%ld %u.%u.%u.%u:%u->%u.%u.%u.%u:%u %u %u %u\n", - (long)(ent->expires - jiffies)/HZ, - NIPQUAD(ent->dst.src_ip), ntohs(ent->dst.src_port), - NIPQUAD(ent->dst.dst_ip), ntohs(ent->dst.dst_port), - ent->rateinfo.credit, ent->rateinfo.credit_cap, - ent->rateinfo.cost); + switch (family) { + case AF_INET: + return seq_printf(s, "%ld %u.%u.%u.%u:%u->" + "%u.%u.%u.%u:%u %u %u %u\n", + (long)(ent->expires - jiffies)/HZ, + NIPQUAD(ent->dst.addr.ip.src), + ntohs(ent->dst.src_port), + NIPQUAD(ent->dst.addr.ip.dst), + ntohs(ent->dst.dst_port), + ent->rateinfo.credit, ent->rateinfo.credit_cap, + ent->rateinfo.cost); + case AF_INET6: + return seq_printf(s, "%ld " NIP6_FMT ":%u->" + NIP6_FMT ":%u %u %u %u\n", + (long)(ent->expires - jiffies)/HZ, + NIP6(*(struct in6_addr *)&ent->dst.addr.ip6.src), + ntohs(ent->dst.src_port), + NIP6(*(struct in6_addr *)&ent->dst.addr.ip6.dst), + ntohs(ent->dst.dst_port), + ent->rateinfo.credit, ent->rateinfo.credit_cap, + ent->rateinfo.cost); + default: + BUG(); + return 0; + } } static int dl_seq_show(struct seq_file *s, void *v) { struct proc_dir_entry *pde = s->private; - struct ipt_hashlimit_htable *htable = pde->data; + struct xt_hashlimit_htable *htable = pde->data; unsigned int *bucket = (unsigned int *)v; struct dsthash_ent *ent; struct hlist_node *pos; - if (!hlist_empty(&htable->hash[*bucket])) - hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node) { - if (dl_seq_real_show(ent, s)) { - /* buffer was filled and unable to print that tuple */ + if (!hlist_empty(&htable->hash[*bucket])) { + hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node) + if (dl_seq_real_show(ent, htable->family, s)) return 1; - } - } - + } return 0; } @@ -678,56 +721,53 @@ static struct file_operations dl_file_ops = { .release = seq_release }; -static int init_or_fini(int fini) +static int __init xt_hashlimit_init(void) { - int ret = 0; - - if (fini) - goto cleanup; + int err; - if (ipt_register_match(&ipt_hashlimit)) { - ret = -EINVAL; - goto cleanup_nothing; - } + err = xt_register_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit)); + if (err < 0) + goto err1; - hashlimit_cachep = kmem_cache_create("ipt_hashlimit", - sizeof(struct dsthash_ent), 0, - 0, NULL, NULL); + err = -ENOMEM; + hashlimit_cachep = kmem_cache_create("xt_hashlimit", + sizeof(struct dsthash_ent), 0, 0, + NULL, NULL); if (!hashlimit_cachep) { - printk(KERN_ERR "Unable to create ipt_hashlimit slab cache\n"); - ret = -ENOMEM; - goto cleanup_unreg_match; + printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n"); + goto err2; } - - hashlimit_procdir = proc_mkdir("ipt_hashlimit", proc_net); - if (!hashlimit_procdir) { - printk(KERN_ERR "Unable to create proc dir entry\n"); - ret = -ENOMEM; - goto cleanup_free_slab; + hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", proc_net); + if (!hashlimit_procdir4) { + printk(KERN_ERR "xt_hashlimit: unable to create proc dir " + "entry\n"); + goto err3; } - - return ret; - -cleanup: + hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", proc_net); + if (!hashlimit_procdir6) { + printk(KERN_ERR "xt_hashlimit: tnable to create proc dir " + "entry\n"); + goto err4; + } + return 0; +err4: remove_proc_entry("ipt_hashlimit", proc_net); -cleanup_free_slab: +err3: kmem_cache_destroy(hashlimit_cachep); -cleanup_unreg_match: - ipt_unregister_match(&ipt_hashlimit); -cleanup_nothing: - return ret; - -} +err2: + xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit)); +err1: + return err; -static int __init ipt_hashlimit_init(void) -{ - return init_or_fini(0); } -static void __exit ipt_hashlimit_fini(void) +static void __exit xt_hashlimit_fini(void) { - init_or_fini(1); + remove_proc_entry("ipt_hashlimit", proc_net); + remove_proc_entry("ip6t_hashlimit", proc_net); + kmem_cache_destroy(hashlimit_cachep); + xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit)); } -module_init(ipt_hashlimit_init); -module_exit(ipt_hashlimit_fini); +module_init(xt_hashlimit_init); +module_exit(xt_hashlimit_fini); diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 5d7818b73e3..04bc32ba719 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -24,6 +24,7 @@ #endif #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_helper.h> +#include <net/netfilter/nf_conntrack_compat.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>"); @@ -143,13 +144,11 @@ static int check(const char *tablename, { struct xt_helper_info *info = matchinfo; -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (nf_ct_l3proto_try_module_get(match->family) < 0) { - printk(KERN_WARNING "can't load nf_conntrack support for " + printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); return 0; } -#endif info->name[29] = '\0'; return 1; } @@ -157,9 +156,7 @@ static int check(const char *tablename, static void destroy(const struct xt_match *match, void *matchinfo) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); -#endif } static struct xt_match xt_helper_match[] = { @@ -185,7 +182,6 @@ static struct xt_match xt_helper_match[] = { static int __init xt_helper_init(void) { - need_conntrack(); return xt_register_matches(xt_helper_match, ARRAY_SIZE(xt_helper_match)); } diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 934dddfbcd2..dfa1ee6914c 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -31,7 +31,7 @@ match(const struct sk_buff *skb, { const struct xt_mark_info *info = matchinfo; - return ((skb->nfmark & info->mask) == info->mark) ^ info->invert; + return ((skb->mark & info->mask) == info->mark) ^ info->invert; } static int diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index d3aefd38093..1602086c7fd 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -1,5 +1,5 @@ -/* Kernel module to match one of a list of TCP/UDP/SCTP/DCCP ports: ports are in - the same place so we can treat them as equal. */ +/* Kernel module to match one of a list of TCP/UDP(-Lite)/SCTP/DCCP ports: + ports are in the same place so we can treat them as equal. */ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> @@ -104,7 +104,7 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; const struct xt_multiport *multiinfo = matchinfo; if (offset) @@ -135,7 +135,7 @@ match_v1(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; const struct xt_multiport_v1 *multiinfo = matchinfo; if (offset) @@ -162,6 +162,7 @@ check(u_int16_t proto, { /* Must specify supported protocol, no unknown flags or bad count */ return (proto == IPPROTO_TCP || proto == IPPROTO_UDP + || proto == IPPROTO_UDPLITE || proto == IPPROTO_SCTP || proto == IPPROTO_DCCP) && !(ip_invflags & XT_INV_PROTO) && (match_flags == XT_MULTIPORT_SOURCE diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index fd8f954cded..b9b3ffc5451 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -113,20 +113,16 @@ checkentry(const char *tablename, if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) return 0; - if (brnf_deferred_hooks == 0 && - info->bitmask & XT_PHYSDEV_OP_OUT && + if (info->bitmask & XT_PHYSDEV_OP_OUT && (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || info->invert & XT_PHYSDEV_OP_BRIDGED) && hook_mask & ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | (1 << NF_IP_POST_ROUTING))) { printk(KERN_WARNING "physdev match: using --physdev-out in the " "OUTPUT, FORWARD and POSTROUTING chains for non-bridged " - "traffic is deprecated and breaks other things, it will " - "be removed in January 2007. See Documentation/" - "feature-removal-schedule.txt for details. This doesn't " - "affect you in case you're using it for purely bridged " - "traffic.\n"); - brnf_deferred_hooks = 1; + "traffic is not supported anymore.\n"); + if (hook_mask & (1 << NF_IP_LOCAL_OUT)) + return 0; } return 1; } diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index 7956acaaa24..71bf036f833 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -71,7 +71,7 @@ match_packet(const struct sk_buff *skb, duprintf("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d\tflags: %x\n", ++i, offset, sch->type, htons(sch->length), sch->flags); - offset += (htons(sch->length) + 3) & ~3; + offset += (ntohs(sch->length) + 3) & ~3; duprintf("skb->len: %d\toffset: %d\n", skb->len, offset); diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index d9010b16a1f..df37b912163 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -50,22 +50,18 @@ static int check(const char *tablename, void *matchinfo, unsigned int hook_mask) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (nf_ct_l3proto_try_module_get(match->family) < 0) { - printk(KERN_WARNING "can't load nf_conntrack support for " + printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); return 0; } -#endif return 1; } static void destroy(const struct xt_match *match, void *matchinfo) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); -#endif } static struct xt_match xt_state_match[] = { @@ -91,7 +87,6 @@ static struct xt_match xt_state_match[] = { static int __init xt_state_init(void) { - need_conntrack(); return xt_register_matches(xt_state_match, ARRAY_SIZE(xt_state_match)); } diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index e76a68e0bc6..46414b562a1 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -10,7 +10,7 @@ #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> -MODULE_DESCRIPTION("x_tables match for TCP and UDP, supports IPv4 and IPv6"); +MODULE_DESCRIPTION("x_tables match for TCP and UDP(-Lite), supports IPv4 and IPv6"); MODULE_LICENSE("GPL"); MODULE_ALIAS("xt_tcp"); MODULE_ALIAS("xt_udp"); @@ -234,6 +234,24 @@ static struct xt_match xt_tcpudp_match[] = { .proto = IPPROTO_UDP, .me = THIS_MODULE, }, + { + .name = "udplite", + .family = AF_INET, + .checkentry = udp_checkentry, + .match = udp_match, + .matchsize = sizeof(struct xt_udp), + .proto = IPPROTO_UDPLITE, + .me = THIS_MODULE, + }, + { + .name = "udplite", + .family = AF_INET6, + .checkentry = udp_checkentry, + .match = udp_match, + .matchsize = sizeof(struct xt_udp), + .proto = IPPROTO_UDPLITE, + .me = THIS_MODULE, + }, }; static int __init xt_tcpudp_init(void) diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig index 9f7121ae13e..56958c85f2b 100644 --- a/net/netlabel/Kconfig +++ b/net/netlabel/Kconfig @@ -4,7 +4,7 @@ config NETLABEL bool "NetLabel subsystem support" - depends on NET && SECURITY + depends on SECURITY default n ---help--- NetLabel provides support for explicit network packet labeling diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index a6ce1d6d5c5..743b05734a4 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -407,12 +407,14 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, &audit_info); - audit_log_format(audit_buf, - " cipso_doi=%u cipso_type=%s res=%u", - doi, - type_str, - ret_val == 0 ? 1 : 0); - audit_log_end(audit_buf); + if (audit_buf != NULL) { + audit_log_format(audit_buf, + " cipso_doi=%u cipso_type=%s res=%u", + doi, + type_str, + ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + } return ret_val; } @@ -452,17 +454,13 @@ static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info) } list_start: - ans_skb = nlmsg_new(NLMSG_GOODSIZE * nlsze_mult, GFP_KERNEL); + ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE * nlsze_mult, GFP_KERNEL); if (ans_skb == NULL) { ret_val = -ENOMEM; goto list_failure; } - data = netlbl_netlink_hdr_put(ans_skb, - info->snd_pid, - info->snd_seq, - netlbl_cipsov4_gnl_family.id, - 0, - NLBL_CIPSOV4_C_LIST); + data = genlmsg_put_reply(ans_skb, info, &netlbl_cipsov4_gnl_family, + 0, NLBL_CIPSOV4_C_LIST); if (data == NULL) { ret_val = -ENOMEM; goto list_failure; @@ -568,7 +566,7 @@ list_start: genlmsg_end(ans_skb, data); - ret_val = genlmsg_unicast(ans_skb, info->snd_pid); + ret_val = genlmsg_reply(ans_skb, info); if (ret_val != 0) goto list_failure; @@ -607,12 +605,9 @@ static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg) struct netlbl_cipsov4_doiwalk_arg *cb_arg = arg; void *data; - data = netlbl_netlink_hdr_put(cb_arg->skb, - NETLINK_CB(cb_arg->nl_cb->skb).pid, - cb_arg->seq, - netlbl_cipsov4_gnl_family.id, - NLM_F_MULTI, - NLBL_CIPSOV4_C_LISTALL); + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + cb_arg->seq, &netlbl_cipsov4_gnl_family, + NLM_F_MULTI, NLBL_CIPSOV4_C_LISTALL); if (data == NULL) goto listall_cb_failure; @@ -687,11 +682,13 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, &audit_info); - audit_log_format(audit_buf, - " cipso_doi=%u res=%u", - doi, - ret_val == 0 ? 1 : 0); - audit_log_end(audit_buf); + if (audit_buf != NULL) { + audit_log_format(audit_buf, + " cipso_doi=%u res=%u", + doi, + ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + } return ret_val; } diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index af4371d3b45..f46a0aeec44 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -202,7 +202,6 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, int ret_val; u32 bkt; struct audit_buffer *audit_buf; - char *audit_domain; switch (entry->type) { case NETLBL_NLTYPE_UNLABELED: @@ -243,24 +242,24 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, } else ret_val = -EINVAL; - if (entry->domain != NULL) - audit_domain = entry->domain; - else - audit_domain = "(default)"; audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info); - audit_log_format(audit_buf, " nlbl_domain=%s", audit_domain); - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - audit_log_format(audit_buf, " nlbl_protocol=unlbl"); - break; - case NETLBL_NLTYPE_CIPSOV4: + if (audit_buf != NULL) { audit_log_format(audit_buf, - " nlbl_protocol=cipsov4 cipso_doi=%u", - entry->type_def.cipsov4->doi); - break; + " nlbl_domain=%s", + entry->domain ? entry->domain : "(default)"); + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + audit_log_format(audit_buf, " nlbl_protocol=unlbl"); + break; + case NETLBL_NLTYPE_CIPSOV4: + audit_log_format(audit_buf, + " nlbl_protocol=cipsov4 cipso_doi=%u", + entry->type_def.cipsov4->doi); + break; + } + audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); } - audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); - audit_log_end(audit_buf); rcu_read_unlock(); @@ -310,7 +309,6 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) int ret_val = -ENOENT; struct netlbl_dom_map *entry; struct audit_buffer *audit_buf; - char *audit_domain; rcu_read_lock(); if (domain != NULL) @@ -348,16 +346,14 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) spin_unlock(&netlbl_domhsh_def_lock); } - if (entry->domain != NULL) - audit_domain = entry->domain; - else - audit_domain = "(default)"; audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info); - audit_log_format(audit_buf, - " nlbl_domain=%s res=%u", - audit_domain, - ret_val == 0 ? 1 : 0); - audit_log_end(audit_buf); + if (audit_buf != NULL) { + audit_log_format(audit_buf, + " nlbl_domain=%s res=%u", + entry->domain ? entry->domain : "(default)", + ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + } if (ret_val == 0) call_rcu(&entry->rcu, netlbl_domhsh_free_entry); diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index ff971103fd0..e03a3282c55 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -40,6 +40,207 @@ #include "netlabel_user.h" /* + * Security Attribute Functions + */ + +/** + * netlbl_secattr_catmap_walk - Walk a LSM secattr catmap looking for a bit + * @catmap: the category bitmap + * @offset: the offset to start searching at, in bits + * + * Description: + * This function walks a LSM secattr category bitmap starting at @offset and + * returns the spot of the first set bit or -ENOENT if no bits are set. + * + */ +int netlbl_secattr_catmap_walk(struct netlbl_lsm_secattr_catmap *catmap, + u32 offset) +{ + struct netlbl_lsm_secattr_catmap *iter = catmap; + u32 node_idx; + u32 node_bit; + NETLBL_CATMAP_MAPTYPE bitmap; + + if (offset > iter->startbit) { + while (offset >= (iter->startbit + NETLBL_CATMAP_SIZE)) { + iter = iter->next; + if (iter == NULL) + return -ENOENT; + } + node_idx = (offset - iter->startbit) / NETLBL_CATMAP_MAPSIZE; + node_bit = offset - iter->startbit - + (NETLBL_CATMAP_MAPSIZE * node_idx); + } else { + node_idx = 0; + node_bit = 0; + } + bitmap = iter->bitmap[node_idx] >> node_bit; + + for (;;) { + if (bitmap != 0) { + while ((bitmap & NETLBL_CATMAP_BIT) == 0) { + bitmap >>= 1; + node_bit++; + } + return iter->startbit + + (NETLBL_CATMAP_MAPSIZE * node_idx) + node_bit; + } + if (++node_idx >= NETLBL_CATMAP_MAPCNT) { + if (iter->next != NULL) { + iter = iter->next; + node_idx = 0; + } else + return -ENOENT; + } + bitmap = iter->bitmap[node_idx]; + node_bit = 0; + } + + return -ENOENT; +} + +/** + * netlbl_secattr_catmap_walk_rng - Find the end of a string of set bits + * @catmap: the category bitmap + * @offset: the offset to start searching at, in bits + * + * Description: + * This function walks a LSM secattr category bitmap starting at @offset and + * returns the spot of the first cleared bit or -ENOENT if the offset is past + * the end of the bitmap. + * + */ +int netlbl_secattr_catmap_walk_rng(struct netlbl_lsm_secattr_catmap *catmap, + u32 offset) +{ + struct netlbl_lsm_secattr_catmap *iter = catmap; + u32 node_idx; + u32 node_bit; + NETLBL_CATMAP_MAPTYPE bitmask; + NETLBL_CATMAP_MAPTYPE bitmap; + + if (offset > iter->startbit) { + while (offset >= (iter->startbit + NETLBL_CATMAP_SIZE)) { + iter = iter->next; + if (iter == NULL) + return -ENOENT; + } + node_idx = (offset - iter->startbit) / NETLBL_CATMAP_MAPSIZE; + node_bit = offset - iter->startbit - + (NETLBL_CATMAP_MAPSIZE * node_idx); + } else { + node_idx = 0; + node_bit = 0; + } + bitmask = NETLBL_CATMAP_BIT << node_bit; + + for (;;) { + bitmap = iter->bitmap[node_idx]; + while (bitmask != 0 && (bitmap & bitmask) != 0) { + bitmask <<= 1; + node_bit++; + } + + if (bitmask != 0) + return iter->startbit + + (NETLBL_CATMAP_MAPSIZE * node_idx) + + node_bit - 1; + else if (++node_idx >= NETLBL_CATMAP_MAPCNT) { + if (iter->next == NULL) + return iter->startbit + NETLBL_CATMAP_SIZE - 1; + iter = iter->next; + node_idx = 0; + } + bitmask = NETLBL_CATMAP_BIT; + node_bit = 0; + } + + return -ENOENT; +} + +/** + * netlbl_secattr_catmap_setbit - Set a bit in a LSM secattr catmap + * @catmap: the category bitmap + * @bit: the bit to set + * @flags: memory allocation flags + * + * Description: + * Set the bit specified by @bit in @catmap. Returns zero on success, + * negative values on failure. + * + */ +int netlbl_secattr_catmap_setbit(struct netlbl_lsm_secattr_catmap *catmap, + u32 bit, + gfp_t flags) +{ + struct netlbl_lsm_secattr_catmap *iter = catmap; + u32 node_bit; + u32 node_idx; + + while (iter->next != NULL && + bit >= (iter->startbit + NETLBL_CATMAP_SIZE)) + iter = iter->next; + if (bit >= (iter->startbit + NETLBL_CATMAP_SIZE)) { + iter->next = netlbl_secattr_catmap_alloc(flags); + if (iter->next == NULL) + return -ENOMEM; + iter = iter->next; + iter->startbit = bit & ~(NETLBL_CATMAP_SIZE - 1); + } + + /* gcc always rounds to zero when doing integer division */ + node_idx = (bit - iter->startbit) / NETLBL_CATMAP_MAPSIZE; + node_bit = bit - iter->startbit - (NETLBL_CATMAP_MAPSIZE * node_idx); + iter->bitmap[node_idx] |= NETLBL_CATMAP_BIT << node_bit; + + return 0; +} + +/** + * netlbl_secattr_catmap_setrng - Set a range of bits in a LSM secattr catmap + * @catmap: the category bitmap + * @start: the starting bit + * @end: the last bit in the string + * @flags: memory allocation flags + * + * Description: + * Set a range of bits, starting at @start and ending with @end. Returns zero + * on success, negative values on failure. + * + */ +int netlbl_secattr_catmap_setrng(struct netlbl_lsm_secattr_catmap *catmap, + u32 start, + u32 end, + gfp_t flags) +{ + int ret_val = 0; + struct netlbl_lsm_secattr_catmap *iter = catmap; + u32 iter_max_spot; + u32 spot; + + /* XXX - This could probably be made a bit faster by combining writes + * to the catmap instead of setting a single bit each time, but for + * right now skipping to the start of the range in the catmap should + * be a nice improvement over calling the individual setbit function + * repeatedly from a loop. */ + + while (iter->next != NULL && + start >= (iter->startbit + NETLBL_CATMAP_SIZE)) + iter = iter->next; + iter_max_spot = iter->startbit + NETLBL_CATMAP_SIZE; + + for (spot = start; spot <= end && ret_val == 0; spot++) { + if (spot >= iter_max_spot && iter->next != NULL) { + iter = iter->next; + iter_max_spot = iter->startbit + NETLBL_CATMAP_SIZE; + } + ret_val = netlbl_secattr_catmap_setbit(iter, spot, GFP_ATOMIC); + } + + return ret_val; +} + +/* * LSM Functions */ @@ -62,6 +263,9 @@ int netlbl_socket_setattr(const struct socket *sock, int ret_val = -ENOENT; struct netlbl_dom_map *dom_entry; + if ((secattr->flags & NETLBL_SECATTR_DOMAIN) == 0) + return -ENOENT; + rcu_read_lock(); dom_entry = netlbl_domhsh_getentry(secattr->domain); if (dom_entry == NULL) @@ -146,10 +350,8 @@ int netlbl_socket_getattr(const struct socket *sock, int netlbl_skbuff_getattr(const struct sk_buff *skb, struct netlbl_lsm_secattr *secattr) { - int ret_val; - - ret_val = cipso_v4_skbuff_getattr(skb, secattr); - if (ret_val == 0) + if (CIPSO_V4_OPTEXIST(skb) && + cipso_v4_skbuff_getattr(skb, secattr) == 0) return 0; return netlbl_unlabel_getattr(secattr); @@ -200,7 +402,7 @@ void netlbl_cache_invalidate(void) int netlbl_cache_add(const struct sk_buff *skb, const struct netlbl_lsm_secattr *secattr) { - if (secattr->cache == NULL) + if ((secattr->flags & NETLBL_SECATTR_CACHE) == 0) return -ENOMSG; if (CIPSO_V4_OPTEXIST(skb)) diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 53c9079ad2c..e8c80f33f3d 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -188,12 +188,9 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg) struct netlbl_domhsh_walk_arg *cb_arg = arg; void *data; - data = netlbl_netlink_hdr_put(cb_arg->skb, - NETLINK_CB(cb_arg->nl_cb->skb).pid, - cb_arg->seq, - netlbl_mgmt_gnl_family.id, - NLM_F_MULTI, - NLBL_MGMT_C_LISTALL); + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + cb_arg->seq, &netlbl_mgmt_gnl_family, + NLM_F_MULTI, NLBL_MGMT_C_LISTALL); if (data == NULL) goto listall_cb_failure; @@ -356,15 +353,11 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) void *data; struct netlbl_dom_map *entry; - ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (ans_skb == NULL) return -ENOMEM; - data = netlbl_netlink_hdr_put(ans_skb, - info->snd_pid, - info->snd_seq, - netlbl_mgmt_gnl_family.id, - 0, - NLBL_MGMT_C_LISTDEF); + data = genlmsg_put_reply(ans_skb, info, &netlbl_mgmt_gnl_family, + 0, NLBL_MGMT_C_LISTDEF); if (data == NULL) goto listdef_failure; @@ -390,7 +383,7 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) genlmsg_end(ans_skb, data); - ret_val = genlmsg_unicast(ans_skb, info->snd_pid); + ret_val = genlmsg_reply(ans_skb, info); if (ret_val != 0) goto listdef_failure; return 0; @@ -422,12 +415,9 @@ static int netlbl_mgmt_protocols_cb(struct sk_buff *skb, int ret_val = -ENOMEM; void *data; - data = netlbl_netlink_hdr_put(skb, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - netlbl_mgmt_gnl_family.id, - NLM_F_MULTI, - NLBL_MGMT_C_PROTOCOLS); + data = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + &netlbl_mgmt_gnl_family, NLM_F_MULTI, + NLBL_MGMT_C_PROTOCOLS); if (data == NULL) goto protocols_cb_failure; @@ -492,15 +482,11 @@ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info) struct sk_buff *ans_skb = NULL; void *data; - ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (ans_skb == NULL) return -ENOMEM; - data = netlbl_netlink_hdr_put(ans_skb, - info->snd_pid, - info->snd_seq, - netlbl_mgmt_gnl_family.id, - 0, - NLBL_MGMT_C_VERSION); + data = genlmsg_put_reply(ans_skb, info, &netlbl_mgmt_gnl_family, + 0, NLBL_MGMT_C_VERSION); if (data == NULL) goto version_failure; @@ -512,7 +498,7 @@ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info) genlmsg_end(ans_skb, data); - ret_val = genlmsg_unicast(ans_skb, info->snd_pid); + ret_val = genlmsg_reply(ans_skb, info); if (ret_val != 0) goto version_failure; return 0; diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 1833ad233b3..5bc37181662 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -35,6 +35,7 @@ #include <linux/socket.h> #include <linux/string.h> #include <linux/skbuff.h> +#include <linux/audit.h> #include <net/sock.h> #include <net/netlink.h> #include <net/genetlink.h> @@ -47,7 +48,8 @@ #include "netlabel_unlabeled.h" /* Accept unlabeled packets flag */ -static atomic_t netlabel_unlabel_accept_flg = ATOMIC_INIT(0); +static DEFINE_SPINLOCK(netlabel_unlabel_acceptflg_lock); +static u8 netlabel_unlabel_acceptflg = 0; /* NetLabel Generic NETLINK CIPSOv4 family */ static struct genl_family netlbl_unlabel_gnl_family = { @@ -82,13 +84,20 @@ static void netlbl_unlabel_acceptflg_set(u8 value, struct audit_buffer *audit_buf; u8 old_val; - old_val = atomic_read(&netlabel_unlabel_accept_flg); - atomic_set(&netlabel_unlabel_accept_flg, value); + rcu_read_lock(); + old_val = netlabel_unlabel_acceptflg; + spin_lock(&netlabel_unlabel_acceptflg_lock); + netlabel_unlabel_acceptflg = value; + spin_unlock(&netlabel_unlabel_acceptflg_lock); + rcu_read_unlock(); audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_ALLOW, audit_info); - audit_log_format(audit_buf, " unlbl_accept=%u old=%u", value, old_val); - audit_log_end(audit_buf); + if (audit_buf != NULL) { + audit_log_format(audit_buf, + " unlbl_accept=%u old=%u", value, old_val); + audit_log_end(audit_buf); + } } /* @@ -138,29 +147,27 @@ static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info) struct sk_buff *ans_skb; void *data; - ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (ans_skb == NULL) goto list_failure; - data = netlbl_netlink_hdr_put(ans_skb, - info->snd_pid, - info->snd_seq, - netlbl_unlabel_gnl_family.id, - 0, - NLBL_UNLABEL_C_LIST); + data = genlmsg_put_reply(ans_skb, info, &netlbl_unlabel_gnl_family, + 0, NLBL_UNLABEL_C_LIST); if (data == NULL) { ret_val = -ENOMEM; goto list_failure; } + rcu_read_lock(); ret_val = nla_put_u8(ans_skb, NLBL_UNLABEL_A_ACPTFLG, - atomic_read(&netlabel_unlabel_accept_flg)); + netlabel_unlabel_acceptflg); + rcu_read_unlock(); if (ret_val != 0) goto list_failure; genlmsg_end(ans_skb, data); - ret_val = genlmsg_unicast(ans_skb, info->snd_pid); + ret_val = genlmsg_reply(ans_skb, info); if (ret_val != 0) goto list_failure; return 0; @@ -240,10 +247,17 @@ int netlbl_unlabel_genl_init(void) */ int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr) { - if (atomic_read(&netlabel_unlabel_accept_flg) == 1) - return netlbl_secattr_init(secattr); + int ret_val; - return -ENOMSG; + rcu_read_lock(); + if (netlabel_unlabel_acceptflg == 1) { + netlbl_secattr_init(secattr); + ret_val = 0; + } else + ret_val = -ENOMSG; + rcu_read_unlock(); + + return ret_val; } /** diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index 98a416381e6..42f12bd6596 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -46,6 +46,10 @@ #include "netlabel_cipso_v4.h" #include "netlabel_user.h" +/* do not do any auditing if audit_enabled == 0, see kernel/audit.c for + * details */ +extern int audit_enabled; + /* * NetLabel NETLINK Setup Functions */ @@ -101,6 +105,9 @@ struct audit_buffer *netlbl_audit_start_common(int type, char *secctx; u32 secctx_len; + if (audit_enabled == 0) + return NULL; + audit_buf = audit_log_start(audit_ctx, GFP_ATOMIC, type); if (audit_buf == NULL) return NULL; diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index 47967ef3296..6d7f4ab46c2 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -42,37 +42,6 @@ /* NetLabel NETLINK helper functions */ /** - * netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff - * @skb: the packet - * @pid: the PID of the receipient - * @seq: the sequence number - * @type: the generic NETLINK message family type - * @cmd: command - * - * Description: - * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr - * struct to the packet. Returns a pointer to the start of the payload buffer - * on success or NULL on failure. - * - */ -static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb, - u32 pid, - u32 seq, - int type, - int flags, - u8 cmd) -{ - return genlmsg_put(skb, - pid, - seq, - type, - 0, - flags, - cmd, - NETLBL_PROTO_VERSION); -} - -/** * netlbl_netlink_auditinfo - Fetch the audit information from a NETLINK msg * @skb: the packet * @audit_info: NetLabel audit information diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index d56e0d21f91..276131fe56d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -699,7 +699,7 @@ static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) struct sock *netlink_getsockbyfilp(struct file *filp) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = filp->f_path.dentry->d_inode; struct sock *sock; if (!S_ISSOCK(inode->i_mode)) @@ -1075,8 +1075,9 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, return -EINVAL; len = sizeof(int); val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; - put_user(len, optlen); - put_user(val, optval); + if (put_user(len, optlen) || + put_user(val, optval)) + return -EFAULT; err = 0; break; default: @@ -1147,12 +1148,11 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (len > sk->sk_sndbuf - 32) goto out; err = -ENOBUFS; - skb = nlmsg_new(len, GFP_KERNEL); + skb = alloc_skb(len, GFP_KERNEL); if (skb==NULL) goto out; NETLINK_CB(skb).pid = nlk->pid; - NETLINK_CB(skb).dst_pid = dst_pid; NETLINK_CB(skb).dst_group = dst_group; NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context); selinux_get_task_sid(current, &(NETLINK_CB(skb).sid)); @@ -1434,14 +1434,13 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) struct sk_buff *skb; struct nlmsghdr *rep; struct nlmsgerr *errmsg; - int size; + size_t payload = sizeof(*errmsg); - if (err == 0) - size = nlmsg_total_size(sizeof(*errmsg)); - else - size = nlmsg_total_size(sizeof(*errmsg) + nlmsg_len(nlh)); + /* error messages get the original request appened */ + if (err) + payload += nlmsg_len(nlh); - skb = nlmsg_new(size, GFP_KERNEL); + skb = nlmsg_new(payload, GFP_KERNEL); if (!skb) { struct sock *sk; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 49bc2db7982..548e4e6e698 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -143,6 +143,13 @@ int genl_register_ops(struct genl_family *family, struct genl_ops *ops) goto errout; } + if (ops->dumpit) + ops->flags |= GENL_CMD_CAP_DUMP; + if (ops->doit) + ops->flags |= GENL_CMD_CAP_DO; + if (ops->policy) + ops->flags |= GENL_CMD_CAP_HASPOL; + genl_lock(); list_add_tail(&ops->ops_list, &family->ops_list); genl_unlock(); @@ -331,7 +338,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, } *errp = err = netlink_dump_start(genl_sock, skb, nlh, - ops->dumpit, NULL); + ops->dumpit, ops->done); if (err == 0) skb_pull(skb, min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len)); @@ -384,16 +391,19 @@ static void genl_rcv(struct sock *sk, int len) * Controller **************************************************************************/ +static struct genl_family genl_ctrl = { + .id = GENL_ID_CTRL, + .name = "nlctrl", + .version = 0x2, + .maxattr = CTRL_ATTR_MAX, +}; + static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { - struct nlattr *nla_ops; - struct genl_ops *ops; void *hdr; - int idx = 1; - hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd, - family->version); + hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd); if (hdr == NULL) return -1; @@ -403,34 +413,31 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, NLA_PUT_U32(skb, CTRL_ATTR_HDRSIZE, family->hdrsize); NLA_PUT_U32(skb, CTRL_ATTR_MAXATTR, family->maxattr); - nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS); - if (nla_ops == NULL) - goto nla_put_failure; - - list_for_each_entry(ops, &family->ops_list, ops_list) { - struct nlattr *nest; + if (!list_empty(&family->ops_list)) { + struct nlattr *nla_ops; + struct genl_ops *ops; + int idx = 1; - nest = nla_nest_start(skb, idx++); - if (nest == NULL) + nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS); + if (nla_ops == NULL) goto nla_put_failure; - NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd); - NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags); + list_for_each_entry(ops, &family->ops_list, ops_list) { + struct nlattr *nest; - if (ops->policy) - NLA_PUT_FLAG(skb, CTRL_ATTR_OP_POLICY); + nest = nla_nest_start(skb, idx++); + if (nest == NULL) + goto nla_put_failure; - if (ops->doit) - NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DOIT); + NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd); + NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags); - if (ops->dumpit) - NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DUMPIT); + nla_nest_end(skb, nest); + } - nla_nest_end(skb, nest); + nla_nest_end(skb, nla_ops); } - nla_nest_end(skb, nla_ops); - return genlmsg_end(skb, hdr); nla_put_failure: @@ -480,7 +487,7 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, struct sk_buff *skb; int err; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (skb == NULL) return ERR_PTR(-ENOBUFS); @@ -529,7 +536,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) goto errout; } - err = genlmsg_unicast(msg, info->snd_pid); + err = genlmsg_reply(msg, info); errout: return err; } @@ -562,13 +569,6 @@ static struct genl_ops genl_ctrl_ops = { .policy = ctrl_policy, }; -static struct genl_family genl_ctrl = { - .id = GENL_ID_CTRL, - .name = "nlctrl", - .version = 0x1, - .maxattr = CTRL_ATTR_MAX, -}; - static int __init genl_init(void) { int i, err; diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index c11737f472d..0096105bcd4 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -155,14 +155,15 @@ static int nr_add_node(ax25_address *nr, const char *mnemonic, ax25_address *ax2 atomic_set(&nr_neigh->refcount, 1); if (ax25_digi != NULL && ax25_digi->ndigi > 0) { - if ((nr_neigh->digipeat = kmalloc(sizeof(*ax25_digi), GFP_KERNEL)) == NULL) { + nr_neigh->digipeat = kmemdup(ax25_digi, + sizeof(*ax25_digi), + GFP_KERNEL); + if (nr_neigh->digipeat == NULL) { kfree(nr_neigh); if (nr_node) nr_node_put(nr_node); return -ENOMEM; } - memcpy(nr_neigh->digipeat, ax25_digi, - sizeof(*ax25_digi)); } spin_lock_bh(&nr_neigh_list_lock); @@ -432,11 +433,12 @@ static int nr_add_neigh(ax25_address *callsign, ax25_digi *ax25_digi, struct net atomic_set(&nr_neigh->refcount, 1); if (ax25_digi != NULL && ax25_digi->ndigi > 0) { - if ((nr_neigh->digipeat = kmalloc(sizeof(*ax25_digi), GFP_KERNEL)) == NULL) { + nr_neigh->digipeat = kmemdup(ax25_digi, sizeof(*ax25_digi), + GFP_KERNEL); + if (nr_neigh->digipeat == NULL) { kfree(nr_neigh); return -ENOMEM; } - memcpy(nr_neigh->digipeat, ax25_digi, sizeof(*ax25_digi)); } spin_lock_bh(&nr_neigh_list_lock); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f4ccb90e673..da73e8a8c18 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -71,6 +71,7 @@ #include <asm/uaccess.h> #include <asm/ioctls.h> #include <asm/page.h> +#include <asm/cacheflush.h> #include <asm/io.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -201,7 +202,7 @@ struct packet_sock { spinlock_t bind_lock; char running; /* prot_hook is attached*/ int ifindex; /* bound device */ - unsigned short num; + __be16 num; #ifdef CONFIG_PACKET_MULTICAST struct packet_mclist *mclist; #endif @@ -331,7 +332,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name; struct sk_buff *skb; struct net_device *dev; - unsigned short proto=0; + __be16 proto=0; int err; /* @@ -659,7 +660,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe sll->sll_ifindex = dev->ifindex; h->tp_status = status; - mb(); + smp_mb(); { struct page *p_start, *p_end; @@ -704,7 +705,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name; struct sk_buff *skb; struct net_device *dev; - unsigned short proto; + __be16 proto; unsigned char *addr; int ifindex, err, reserve = 0; @@ -858,7 +859,7 @@ static int packet_release(struct socket *sock) * Attach a packet hook. */ -static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol) +static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol) { struct packet_sock *po = pkt_sk(sk); /* @@ -983,6 +984,7 @@ static int packet_create(struct socket *sock, int protocol) { struct sock *sk; struct packet_sock *po; + __be16 proto = (__force __be16)protocol; /* weird, but documented */ int err; if (!capable(CAP_NET_RAW)) @@ -1010,7 +1012,7 @@ static int packet_create(struct socket *sock, int protocol) po = pkt_sk(sk); sk->sk_family = PF_PACKET; - po->num = protocol; + po->num = proto; sk->sk_destruct = packet_sock_destruct; atomic_inc(&packet_socks_nr); @@ -1027,8 +1029,8 @@ static int packet_create(struct socket *sock, int protocol) #endif po->prot_hook.af_packet_priv = sk; - if (protocol) { - po->prot_hook.type = protocol; + if (proto) { + po->prot_hook.type = proto; dev_add_pack(&po->prot_hook); sock_hold(sk); po->running = 1; @@ -1624,7 +1626,8 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing { char **pg_vec = NULL; struct packet_sock *po = pkt_sk(sk); - int was_running, num, order = 0; + int was_running, order = 0; + __be16 num; int err = 0; if (req->tp_block_nr) { diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index a22542fa1bc..7252344779a 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -396,7 +396,7 @@ int rose_add_loopback_neigh(void) int rose_add_loopback_node(rose_address *address) { struct rose_node *rose_node; - unsigned int err = 0; + int err = 0; spin_lock_bh(&rose_node_list_lock); @@ -432,7 +432,7 @@ int rose_add_loopback_node(rose_address *address) out: spin_unlock_bh(&rose_node_list_lock); - return 0; + return err; } /* diff --git a/net/rxrpc/krxiod.c b/net/rxrpc/krxiod.c index dada34a77b2..49effd92144 100644 --- a/net/rxrpc/krxiod.c +++ b/net/rxrpc/krxiod.c @@ -13,6 +13,7 @@ #include <linux/completion.h> #include <linux/spinlock.h> #include <linux/init.h> +#include <linux/freezer.h> #include <rxrpc/krxiod.h> #include <rxrpc/transport.h> #include <rxrpc/peer.h> diff --git a/net/rxrpc/krxsecd.c b/net/rxrpc/krxsecd.c index cea4eb5e249..3ab0f77409f 100644 --- a/net/rxrpc/krxsecd.c +++ b/net/rxrpc/krxsecd.c @@ -27,6 +27,7 @@ #include <rxrpc/call.h> #include <linux/udp.h> #include <linux/ip.h> +#include <linux/freezer.h> #include <net/sock.h> #include "internal.h" diff --git a/net/rxrpc/krxtimod.c b/net/rxrpc/krxtimod.c index 3e7466900bd..9a9b6132dba 100644 --- a/net/rxrpc/krxtimod.c +++ b/net/rxrpc/krxtimod.c @@ -13,6 +13,7 @@ #include <linux/init.h> #include <linux/sched.h> #include <linux/completion.h> +#include <linux/freezer.h> #include <rxrpc/rxrpc.h> #include <rxrpc/krxtimod.h> #include <asm/errno.h> diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c index 94b2e2fe6fd..4268b38d92d 100644 --- a/net/rxrpc/transport.c +++ b/net/rxrpc/transport.c @@ -31,7 +31,6 @@ #endif #include <linux/errqueue.h> #include <asm/uaccess.h> -#include <asm/checksum.h> #include "internal.h" struct errormsg { diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 8298ea9ffe1..f4544dd8647 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -6,6 +6,7 @@ menu "QoS and/or fair queueing" config NET_SCHED bool "QoS and/or fair queueing" + select NET_SCH_FIFO ---help--- When the kernel has several packets to send out over a network device, it has to decide which ones to send first, which ones to @@ -40,6 +41,9 @@ config NET_SCHED The available schedulers are listed in the following questions; you can say Y to as many as you like. If unsure, say N now. +config NET_SCH_FIFO + bool + if NET_SCHED choice @@ -320,7 +324,7 @@ config CLS_U32_PERF config CLS_U32_MARK bool "Netfilter marks support" - depends on NET_CLS_U32 && NETFILTER + depends on NET_CLS_U32 ---help--- Say Y here to be able to use netfilter marks as u32 key. diff --git a/net/sched/Makefile b/net/sched/Makefile index 0f06aec6609..ff2d6e5e282 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -4,7 +4,7 @@ obj-y := sch_generic.o -obj-$(CONFIG_NET_SCHED) += sch_api.o sch_fifo.o sch_blackhole.o +obj-$(CONFIG_NET_SCHED) += sch_api.o sch_blackhole.o obj-$(CONFIG_NET_CLS) += cls_api.o obj-$(CONFIG_NET_CLS_ACT) += act_api.o obj-$(CONFIG_NET_ACT_POLICE) += act_police.o @@ -14,6 +14,7 @@ obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o +obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o obj-$(CONFIG_NET_SCH_HPFQ) += sch_hpfq.o diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 6cff56696a8..85de7efd5fe 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -48,14 +48,14 @@ static struct tcf_hashinfo gact_hash_info = { #ifdef CONFIG_GACT_PROB static int gact_net_rand(struct tcf_gact *gact) { - if (net_random() % gact->tcfg_pval) + if (!gact->tcfg_pval || net_random() % gact->tcfg_pval) return gact->tcf_action; return gact->tcfg_paction; } static int gact_determ(struct tcf_gact *gact) { - if (gact->tcf_bstats.packets % gact->tcfg_pval) + if (!gact->tcfg_pval || gact->tcf_bstats.packets % gact->tcfg_pval) return gact->tcf_action; return gact->tcfg_paction; } diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index d8c9310da6e..a9608064a4c 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -156,10 +156,9 @@ static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est, rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ) strcpy(tname, "mangle"); - t = kmalloc(td->u.target_size, GFP_KERNEL); + t = kmemdup(td, td->u.target_size, GFP_KERNEL); if (unlikely(!t)) goto err2; - memcpy(t, td, td->u.target_size); if ((err = ipt_init_target(t, tname, hook)) < 0) goto err3; @@ -256,13 +255,12 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ** for foolproof you need to not assume this */ - t = kmalloc(ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); + t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); if (unlikely(!t)) goto rtattr_failure; c.bindcnt = ipt->tcf_bindcnt - bind; c.refcnt = ipt->tcf_refcnt - ref; - memcpy(t, ipt->tcfi_t, ipt->tcfi_t->u.user.target_size); strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name); RTA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index fed47b65883..af68e1e8325 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -46,6 +46,18 @@ static struct tcf_hashinfo police_hash_info = { .lock = &police_lock, }; +/* old policer structure from before tc actions */ +struct tc_police_compat +{ + u32 index; + int action; + u32 limit; + u32 burst; + u32 mtu; + struct tc_ratespec rate; + struct tc_ratespec peakrate; +}; + /* Each policer is serialized by its individual spinlock */ #ifdef CONFIG_NET_CLS_ACT @@ -131,12 +143,15 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, struct tc_police *parm; struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; + int size; if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) return -EINVAL; - if (tb[TCA_POLICE_TBF-1] == NULL || - RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm)) + if (tb[TCA_POLICE_TBF-1] == NULL) + return -EINVAL; + size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]); + if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat)) return -EINVAL; parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); @@ -415,12 +430,15 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) struct tcf_police *police; struct rtattr *tb[TCA_POLICE_MAX]; struct tc_police *parm; + int size; if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) return NULL; - if (tb[TCA_POLICE_TBF-1] == NULL || - RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm)) + if (tb[TCA_POLICE_TBF-1] == NULL) + return NULL; + size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]); + if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat)) return NULL; parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 901571a6770..5fe80854ca9 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -71,11 +71,10 @@ static int tcf_simp_release(struct tcf_defact *d, int bind) static int alloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) { - d->tcfd_defdata = kmalloc(datalen, GFP_KERNEL); + d->tcfd_defdata = kmemdup(defdata, datalen, GFP_KERNEL); if (unlikely(!d->tcfd_defdata)) return -ENOMEM; d->tcfd_datalen = datalen; - memcpy(d->tcfd_defdata, defdata, datalen); return 0; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 37a18402164..edb8fc97ae1 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -217,7 +217,7 @@ replay: /* Create new proto tcf */ err = -ENOBUFS; - if ((tp = kmalloc(sizeof(*tp), GFP_KERNEL)) == NULL) + if ((tp = kzalloc(sizeof(*tp), GFP_KERNEL)) == NULL) goto errout; err = -EINVAL; tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND-1]); @@ -247,7 +247,6 @@ replay: kfree(tp); goto errout; } - memset(tp, 0, sizeof(*tp)); tp->ops = tp_ops; tp->protocol = protocol; tp->prio = nprio ? : tcf_auto_prio(*back); diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index e54acc6bccc..c797d6ada7d 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -101,13 +101,10 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, struct fw_head *head = (struct fw_head*)tp->root; struct fw_filter *f; int r; -#ifdef CONFIG_NETFILTER - u32 id = skb->nfmark & head->mask; -#else - u32 id = 0; -#endif + u32 id = skb->mark; if (head != NULL) { + id &= head->mask; for (f=head->ht[fw_hash(id)]; f; f=f->next) { if (f->id == id) { *res = f->res; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 6e230ecfba0..587b9adab38 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -77,7 +77,7 @@ struct rsvp_head struct rsvp_session { struct rsvp_session *next; - u32 dst[RSVP_DST_LEN]; + __be32 dst[RSVP_DST_LEN]; struct tc_rsvp_gpi dpi; u8 protocol; u8 tunnelid; @@ -89,7 +89,7 @@ struct rsvp_session struct rsvp_filter { struct rsvp_filter *next; - u32 src[RSVP_DST_LEN]; + __be32 src[RSVP_DST_LEN]; struct tc_rsvp_gpi spi; u8 tunnelhdr; @@ -100,17 +100,17 @@ struct rsvp_filter struct rsvp_session *sess; }; -static __inline__ unsigned hash_dst(u32 *dst, u8 protocol, u8 tunnelid) +static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) { - unsigned h = dst[RSVP_DST_LEN-1]; + unsigned h = (__force __u32)dst[RSVP_DST_LEN-1]; h ^= h>>16; h ^= h>>8; return (h ^ protocol ^ tunnelid) & 0xFF; } -static __inline__ unsigned hash_src(u32 *src) +static __inline__ unsigned hash_src(__be32 *src) { - unsigned h = src[RSVP_DST_LEN-1]; + unsigned h = (__force __u32)src[RSVP_DST_LEN-1]; h ^= h>>16; h ^= h>>8; h ^= h>>4; @@ -138,7 +138,7 @@ static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp, struct rsvp_session *s; struct rsvp_filter *f; unsigned h1, h2; - u32 *dst, *src; + __be32 *dst, *src; u8 protocol; u8 tunnelid = 0; u8 *xprt; @@ -410,7 +410,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, struct rtattr *tb[TCA_RSVP_MAX]; struct tcf_exts e; unsigned h1, h2; - u32 *dst; + __be32 *dst; int err; if (opt == NULL) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 0a6cfa0005b..8b519480199 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -143,7 +143,7 @@ next_knode: #endif #ifdef CONFIG_CLS_U32_MARK - if ((skb->nfmark & n->mark.mask) != n->mark.val) { + if ((skb->mark & n->mark.mask) != n->mark.val) { n = n->next; goto next_knode; } else { diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 61e3b740ab1..45d47d37155 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -208,13 +208,9 @@ META_COLLECTOR(int_maclen) * Netfilter **************************************************************************/ -META_COLLECTOR(int_nfmark) +META_COLLECTOR(int_mark) { -#ifdef CONFIG_NETFILTER - dst->value = skb->nfmark; -#else - dst->value = 0; -#endif + dst->value = skb->mark; } /************************************************************************** @@ -490,7 +486,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { [META_ID(PKTLEN)] = META_FUNC(int_pktlen), [META_ID(DATALEN)] = META_FUNC(int_datalen), [META_ID(MACLEN)] = META_FUNC(int_maclen), - [META_ID(NFMARK)] = META_FUNC(int_nfmark), + [META_ID(NFMARK)] = META_FUNC(int_mark), [META_ID(TCINDEX)] = META_FUNC(int_tcindex), [META_ID(RTCLASSID)] = META_FUNC(int_rtclassid), [META_ID(RTIIF)] = META_FUNC(int_rtiif), @@ -550,10 +546,9 @@ static int meta_var_change(struct meta_value *dst, struct rtattr *rta) { int len = RTA_PAYLOAD(rta); - dst->val = (unsigned long) kmalloc(len, GFP_KERNEL); + dst->val = (unsigned long)kmemdup(RTA_DATA(rta), len, GFP_KERNEL); if (dst->val == 0UL) return -ENOMEM; - memcpy((void *) dst->val, RTA_DATA(rta), len); dst->len = len; return 0; } diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index cc80babfd79..005db409be6 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -34,12 +34,10 @@ static int em_nbyte_change(struct tcf_proto *tp, void *data, int data_len, return -EINVAL; em->datalen = sizeof(*nbyte) + nbyte->len; - em->data = (unsigned long) kmalloc(em->datalen, GFP_KERNEL); + em->data = (unsigned long)kmemdup(data, em->datalen, GFP_KERNEL); if (em->data == 0UL) return -ENOBUFS; - memcpy((void *) em->data, data, em->datalen); - return 0; } diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 0fd0768a17c..8f8a16da72a 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -251,12 +251,11 @@ static int tcf_em_validate(struct tcf_proto *tp, goto errout; em->data = *(u32 *) data; } else { - void *v = kmalloc(data_len, GFP_KERNEL); + void *v = kmemdup(data, data_len, GFP_KERNEL); if (v == NULL) { err = -ENOBUFS; goto errout; } - memcpy(v, data, data_len); em->data = (unsigned long) v; } } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 0b648929114..65825f4409d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -191,21 +191,27 @@ int unregister_qdisc(struct Qdisc_ops *qops) (root qdisc, all its children, children of children etc.) */ -struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) +static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle) { struct Qdisc *q; - read_lock(&qdisc_tree_lock); list_for_each_entry(q, &dev->qdisc_list, list) { - if (q->handle == handle) { - read_unlock(&qdisc_tree_lock); + if (q->handle == handle) return q; - } } - read_unlock(&qdisc_tree_lock); return NULL; } +struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) +{ + struct Qdisc *q; + + read_lock(&qdisc_tree_lock); + q = __qdisc_lookup(dev, handle); + read_unlock(&qdisc_tree_lock); + return q; +} + static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) { unsigned long cl; @@ -348,6 +354,26 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) return oqdisc; } +void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) +{ + struct Qdisc_class_ops *cops; + unsigned long cl; + u32 parentid; + + if (n == 0) + return; + while ((parentid = sch->parent)) { + sch = __qdisc_lookup(sch->dev, TC_H_MAJ(parentid)); + cops = sch->ops->cl_ops; + if (cops->qlen_notify) { + cl = cops->get(sch, parentid); + cops->qlen_notify(sch, cl); + cops->put(sch, cl); + } + sch->q.qlen -= n; + } +} +EXPORT_SYMBOL(qdisc_tree_decrease_qlen); /* Graft qdisc "new" to class "classid" of qdisc "parent" or to device "dev". @@ -1112,7 +1138,7 @@ int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) { int err = 0; - u32 protocol = skb->protocol; + __be16 protocol = skb->protocol; #ifdef CONFIG_NET_CLS_ACT struct tcf_proto *otp = tp; reclassify: @@ -1277,7 +1303,6 @@ static int __init pktsched_init(void) subsys_initcall(pktsched_init); -EXPORT_SYMBOL(qdisc_lookup); EXPORT_SYMBOL(qdisc_get_rtab); EXPORT_SYMBOL(qdisc_put_rtab); EXPORT_SYMBOL(register_qdisc); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index dbf44da0912..edc7bb0b9c8 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -316,7 +316,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, } memset(flow,0,sizeof(*flow)); flow->filter_list = NULL; - if (!(flow->q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops))) + if (!(flow->q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops,classid))) flow->q = &noop_qdisc; DPRINTK("atm_tc_change: qdisc %p\n",flow->q); flow->sock = sock; @@ -576,7 +576,8 @@ static int atm_tc_init(struct Qdisc *sch,struct rtattr *opt) DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt); p->flows = &p->link; - if(!(p->link.q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops))) + if(!(p->link.q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops, + sch->handle))) p->link.q = &noop_qdisc; DPRINTK("atm_tc_init: link (%p) qdisc %p\n",&p->link,p->link.q); p->link.filter_list = NULL; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index bac881bfe36..f79a4f3d0a9 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -371,8 +371,6 @@ static void cbq_deactivate_class(struct cbq_class *this) return; } } - - cl = cl_prev->next_alive; return; } } while ((cl_prev = cl) != q->active[prio]); @@ -1258,6 +1256,8 @@ static unsigned int cbq_drop(struct Qdisc* sch) do { if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) { sch->q.qlen--; + if (!cl->q->q.qlen) + cbq_deactivate_class(cl); return len; } } while ((cl = cl->next_alive) != cl_head); @@ -1429,7 +1429,8 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt) q->link.sibling = &q->link; q->link.classid = sch->handle; q->link.qdisc = sch; - if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops))) + if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + sch->handle))) q->link.q = &noop_qdisc; q->link.priority = TC_CBQ_MAXPRIO-1; @@ -1674,7 +1675,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl) { if (new == NULL) { - if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL) + if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + cl->classid)) == NULL) return -ENOBUFS; } else { #ifdef CONFIG_NET_CLS_POLICE @@ -1683,9 +1685,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, #endif } sch_tree_lock(sch); - *old = cl->q; - cl->q = new; - sch->q.qlen -= (*old)->q.qlen; + *old = xchg(&cl->q, new); + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -1702,6 +1703,14 @@ cbq_leaf(struct Qdisc *sch, unsigned long arg) return cl ? cl->q : NULL; } +static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg) +{ + struct cbq_class *cl = (struct cbq_class *)arg; + + if (cl->q->q.qlen == 0) + cbq_deactivate_class(cl); +} + static unsigned long cbq_get(struct Qdisc *sch, u32 classid) { struct cbq_sched_data *q = qdisc_priv(sch); @@ -1932,7 +1941,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t cl->R_tab = rtab; rtab = NULL; cl->refcnt = 1; - if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops))) + if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid))) cl->q = &noop_qdisc; cl->classid = classid; cl->tparent = parent; @@ -1986,12 +1995,17 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class*)arg; + unsigned int qlen; if (cl->filters || cl->children || cl == &q->link) return -EBUSY; sch_tree_lock(sch); + qlen = cl->q->q.qlen; + qdisc_reset(cl->q); + qdisc_tree_decrease_qlen(cl->q, qlen); + if (cl->next_alive) cbq_deactivate_class(cl); @@ -2082,6 +2096,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) static struct Qdisc_class_ops cbq_class_ops = { .graft = cbq_graft, .leaf = cbq_leaf, + .qlen_notify = cbq_qlen_notify, .get = cbq_get, .put = cbq_put, .change = cbq_change_class, diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 11c8a2119b9..d5421816f00 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -88,15 +88,16 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, sch, p, new, old); if (new == NULL) { - new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + sch->handle); if (new == NULL) new = &noop_qdisc; } sch_tree_lock(sch); *old = xchg(&p->q, new); + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); - sch->q.qlen = 0; sch_tree_unlock(sch); return 0; @@ -307,7 +308,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) if (p->mask[index] != 0xff || p->value[index]) printk(KERN_WARNING "dsmark_dequeue: " "unsupported protocol %d\n", - htons(skb->protocol)); + ntohs(skb->protocol)); break; }; @@ -387,7 +388,7 @@ static int dsmark_init(struct Qdisc *sch, struct rtattr *opt) p->default_index = default_index; p->set_tc_index = RTA_GET_FLAG(tb[TCA_DSMARK_SET_TC_INDEX-1]); - p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle); if (p->q == NULL) p->q = &noop_qdisc; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 88c6a99ce53..bc116bd6937 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -450,13 +450,15 @@ errout: return ERR_PTR(-err); } -struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops) +struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops, + unsigned int parentid) { struct Qdisc *sch; sch = qdisc_alloc(dev, ops); if (IS_ERR(sch)) goto errout; + sch->parent = parentid; if (!ops->init || ops->init(sch, NULL) == 0) return sch; @@ -520,7 +522,8 @@ void dev_activate(struct net_device *dev) if (dev->qdisc_sleeping == &noop_qdisc) { struct Qdisc *qdisc; if (dev->tx_queue_len) { - qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops); + qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops, + TC_H_ROOT); if (qdisc == NULL) { printk(KERN_INFO "%s: activation failed\n", dev->name); return; @@ -606,13 +609,10 @@ void dev_shutdown(struct net_device *dev) qdisc_unlock_tree(dev); } -EXPORT_SYMBOL(__netdev_watchdog_up); EXPORT_SYMBOL(netif_carrier_on); EXPORT_SYMBOL(netif_carrier_off); EXPORT_SYMBOL(noop_qdisc); -EXPORT_SYMBOL(noop_qdisc_ops); EXPORT_SYMBOL(qdisc_create_dflt); -EXPORT_SYMBOL(qdisc_alloc); EXPORT_SYMBOL(qdisc_destroy); EXPORT_SYMBOL(qdisc_reset); EXPORT_SYMBOL(qdisc_lock_tree); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 6a6735a2ed3..6eefa699577 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -946,6 +946,7 @@ qdisc_peek_len(struct Qdisc *sch) if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) { if (net_ratelimit()) printk("qdisc_peek_len: failed to requeue\n"); + qdisc_tree_decrease_qlen(sch, 1); return 0; } return len; @@ -957,11 +958,7 @@ hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl) unsigned int len = cl->qdisc->q.qlen; qdisc_reset(cl->qdisc); - if (len > 0) { - update_vf(cl, 0, 0); - set_passive(cl); - sch->q.qlen -= len; - } + qdisc_tree_decrease_qlen(cl->qdisc, len); } static void @@ -1138,7 +1135,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->classid = classid; cl->sched = q; cl->cl_parent = parent; - cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; cl->stats_lock = &sch->dev->queue_lock; @@ -1271,7 +1268,8 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl->level > 0) return -EINVAL; if (new == NULL) { - new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + cl->classid); if (new == NULL) new = &noop_qdisc; } @@ -1294,6 +1292,17 @@ hfsc_class_leaf(struct Qdisc *sch, unsigned long arg) return NULL; } +static void +hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg) +{ + struct hfsc_class *cl = (struct hfsc_class *)arg; + + if (cl->qdisc->q.qlen == 0) { + update_vf(cl, 0, 0); + set_passive(cl); + } +} + static unsigned long hfsc_get_class(struct Qdisc *sch, u32 classid) { @@ -1514,7 +1523,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) q->root.refcnt = 1; q->root.classid = sch->handle; q->root.sched = q; - q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + sch->handle); if (q->root.qdisc == NULL) q->root.qdisc = &noop_qdisc; q->root.stats_lock = &sch->dev->queue_lock; @@ -1777,6 +1787,7 @@ static struct Qdisc_class_ops hfsc_class_ops = { .delete = hfsc_delete_class, .graft = hfsc_graft_class, .leaf = hfsc_class_leaf, + .qlen_notify = hfsc_qlen_notify, .get = hfsc_get_class, .put = hfsc_put_class, .bind_tcf = hfsc_bind_tcf, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 9b9c555c713..15f23c5511a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -147,6 +147,10 @@ struct htb_class { psched_tdiff_t mbuffer; /* max wait time */ long tokens, ctokens; /* current number of tokens */ psched_time_t t_c; /* checkpoint time */ + + int prio; /* For parent to leaf return possible here */ + int quantum; /* we do backup. Finally full replacement */ + /* of un.leaf originals should be done. */ }; /* TODO: maybe compute rate when size is too large .. or drop ? */ @@ -1223,17 +1227,14 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct htb_class *cl = (struct htb_class *)arg; if (cl && !cl->level) { - if (new == NULL && (new = qdisc_create_dflt(sch->dev, - &pfifo_qdisc_ops)) + if (new == NULL && + (new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + cl->classid)) == NULL) return -ENOBUFS; sch_tree_lock(sch); if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) { - if (cl->prio_activity) - htb_deactivate(qdisc_priv(sch), cl); - - /* TODO: is it correct ? Why CBQ doesn't do it ? */ - sch->q.qlen -= (*old)->q.qlen; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); } sch_tree_unlock(sch); @@ -1248,6 +1249,14 @@ static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg) return (cl && !cl->level) ? cl->un.leaf.q : NULL; } +static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) +{ + struct htb_class *cl = (struct htb_class *)arg; + + if (cl->un.leaf.q->q.qlen == 0) + htb_deactivate(qdisc_priv(sch), cl); +} + static unsigned long htb_get(struct Qdisc *sch, u32 classid) { struct htb_class *cl = htb_find(classid, sch); @@ -1266,12 +1275,44 @@ static void htb_destroy_filters(struct tcf_proto **fl) } } +static inline int htb_parent_last_child(struct htb_class *cl) +{ + if (!cl->parent) + /* the root class */ + return 0; + + if (!(cl->parent->children.next == &cl->sibling && + cl->parent->children.prev == &cl->sibling)) + /* not the last child */ + return 0; + + return 1; +} + +static void htb_parent_to_leaf(struct htb_class *cl, struct Qdisc *new_q) +{ + struct htb_class *parent = cl->parent; + + BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity); + + parent->level = 0; + memset(&parent->un.inner, 0, sizeof(parent->un.inner)); + INIT_LIST_HEAD(&parent->un.leaf.drop_list); + parent->un.leaf.q = new_q ? new_q : &noop_qdisc; + parent->un.leaf.quantum = parent->quantum; + parent->un.leaf.prio = parent->prio; + parent->tokens = parent->buffer; + parent->ctokens = parent->cbuffer; + PSCHED_GET_TIME(parent->t_c); + parent->cmode = HTB_CAN_SEND; +} + static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { struct htb_sched *q = qdisc_priv(sch); + if (!cl->level) { BUG_TRAP(cl->un.leaf.q); - sch->q.qlen -= cl->un.leaf.q->q.qlen; qdisc_destroy(cl->un.leaf.q); } qdisc_put_rtab(cl->rate); @@ -1284,8 +1325,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) struct htb_class, sibling)); /* note: this delete may happen twice (see htb_delete) */ - if (!hlist_unhashed(&cl->hlist)) - hlist_del(&cl->hlist); + hlist_del_init(&cl->hlist); list_del(&cl->sibling); if (cl->prio_activity) @@ -1323,6 +1363,9 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; + unsigned int qlen; + struct Qdisc *new_q = NULL; + int last_child = 0; // TODO: why don't allow to delete subtree ? references ? does // tc subsys quarantee us that in htb_destroy it holds no class @@ -1330,15 +1373,29 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) if (!list_empty(&cl->children) || cl->filter_cnt) return -EBUSY; + if (!cl->level && htb_parent_last_child(cl)) { + new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + cl->parent->classid); + last_child = 1; + } + sch_tree_lock(sch); /* delete from hash and active; remainder in destroy_class */ - if (!hlist_unhashed(&cl->hlist)) - hlist_del(&cl->hlist); + hlist_del_init(&cl->hlist); + + if (!cl->level) { + qlen = cl->un.leaf.q->q.qlen; + qdisc_reset(cl->un.leaf.q); + qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen); + } if (cl->prio_activity) htb_deactivate(q, cl); + if (last_child) + htb_parent_to_leaf(cl, new_q); + if (--cl->refcnt == 0) htb_destroy_class(sch, cl); @@ -1412,11 +1469,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) so that can't be used inside of sch_tree_lock -- thanks to Karlis Peisenieks */ - new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); sch_tree_lock(sch); if (parent && !parent->level) { + unsigned int qlen = parent->un.leaf.q->q.qlen; + /* turn parent into inner node */ - sch->q.qlen -= parent->un.leaf.q->q.qlen; + qdisc_reset(parent->un.leaf.q); + qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen); qdisc_destroy(parent->un.leaf.q); if (parent->prio_activity) htb_deactivate(q, parent); @@ -1470,6 +1530,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->un.leaf.quantum = hopt->quantum; if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO) cl->un.leaf.prio = TC_HTB_NUMPRIO - 1; + + /* backup for htb_parent_to_leaf */ + cl->quantum = cl->un.leaf.quantum; + cl->prio = cl->un.leaf.prio; } cl->buffer = hopt->buffer; @@ -1564,6 +1628,7 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) static struct Qdisc_class_ops htb_class_ops = { .graft = htb_graft, .leaf = htb_leaf, + .qlen_notify = htb_qlen_notify, .get = htb_get, .put = htb_put, .change = htb_change_class, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index ef8874babf6..79542af9dab 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -4,7 +4,7 @@ * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. + * 2 of the License. * * Many of the algorithms and ideas for this came from * NIST Net which is not copyrighted. @@ -287,13 +287,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now); if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) { + qdisc_tree_decrease_qlen(q->qdisc, 1); sch->qstats.drops++; - - /* After this qlen is confused */ printk(KERN_ERR "netem: queue discpline %s could not requeue\n", q->qdisc->ops->id); - - sch->q.qlen--; } mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay)); @@ -574,7 +571,8 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt) q->timer.function = netem_watchdog; q->timer.data = (unsigned long) sch; - q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops); + q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops, + TC_H_MAKE(sch->handle, 1)); if (!q->qdisc) { pr_debug("netem: qdisc create failed\n"); return -ENOMEM; @@ -661,8 +659,8 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, sch_tree_lock(sch); *old = xchg(&q->qdisc, new); + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); - sch->q.qlen = 0; sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index a5fa03c0c19..2567b4c96c1 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -222,21 +222,27 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt) for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); - if (child != &noop_qdisc) + if (child != &noop_qdisc) { + qdisc_tree_decrease_qlen(child, child->q.qlen); qdisc_destroy(child); + } } sch_tree_unlock(sch); for (i=0; i<q->bands; i++) { if (q->queues[i] == &noop_qdisc) { struct Qdisc *child; - child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, i + 1)); if (child) { sch_tree_lock(sch); child = xchg(&q->queues[i], child); - if (child != &noop_qdisc) + if (child != &noop_qdisc) { + qdisc_tree_decrease_qlen(child, + child->q.qlen); qdisc_destroy(child); + } sch_tree_unlock(sch); } } @@ -294,7 +300,7 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, sch_tree_lock(sch); *old = q->queues[band]; q->queues[band] = new; - sch->q.qlen -= (*old)->q.qlen; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index d65cadddea6..acddad08850 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -175,12 +175,14 @@ static void red_destroy(struct Qdisc *sch) qdisc_destroy(q->qdisc); } -static struct Qdisc *red_create_dflt(struct net_device *dev, u32 limit) +static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit) { - struct Qdisc *q = qdisc_create_dflt(dev, &bfifo_qdisc_ops); + struct Qdisc *q; struct rtattr *rta; int ret; + q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops, + TC_H_MAKE(sch->handle, 1)); if (q) { rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); @@ -219,7 +221,7 @@ static int red_change(struct Qdisc *sch, struct rtattr *opt) ctl = RTA_DATA(tb[TCA_RED_PARMS-1]); if (ctl->limit > 0) { - child = red_create_dflt(sch->dev, ctl->limit); + child = red_create_dflt(sch, ctl->limit); if (child == NULL) return -ENOMEM; } @@ -227,8 +229,10 @@ static int red_change(struct Qdisc *sch, struct rtattr *opt) sch_tree_lock(sch); q->flags = ctl->flags; q->limit = ctl->limit; - if (child) + if (child) { + qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); qdisc_destroy(xchg(&q->qdisc, child)); + } red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, ctl->Scell_log, @@ -306,8 +310,8 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, sch_tree_lock(sch); *old = xchg(&q->qdisc, new); + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); - sch->q.qlen = 0; sch_tree_unlock(sch); return 0; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index d0d6e595a78..459cda258a5 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -393,6 +393,7 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt) { struct sfq_sched_data *q = qdisc_priv(sch); struct tc_sfq_qopt *ctl = RTA_DATA(opt); + unsigned int qlen; if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) return -EINVAL; @@ -403,8 +404,10 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt) if (ctl->limit) q->limit = min_t(u32, ctl->limit, SFQ_DEPTH); + qlen = sch->q.qlen; while (sch->q.qlen >= q->limit-1) sfq_drop(sch); + qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); del_timer(&q->perturb_timer); if (q->perturb_period) { diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index d9a5d298d75..ed9b6d93854 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -250,7 +250,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) { /* When requeue fails skb is dropped */ - sch->q.qlen--; + qdisc_tree_decrease_qlen(q->qdisc, 1); sch->qstats.drops++; } @@ -273,12 +273,14 @@ static void tbf_reset(struct Qdisc* sch) del_timer(&q->wd_timer); } -static struct Qdisc *tbf_create_dflt_qdisc(struct net_device *dev, u32 limit) +static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit) { - struct Qdisc *q = qdisc_create_dflt(dev, &bfifo_qdisc_ops); + struct Qdisc *q; struct rtattr *rta; int ret; + q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops, + TC_H_MAKE(sch->handle, 1)); if (q) { rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); if (rta) { @@ -341,13 +343,15 @@ static int tbf_change(struct Qdisc* sch, struct rtattr *opt) goto done; if (qopt->limit > 0) { - if ((child = tbf_create_dflt_qdisc(sch->dev, qopt->limit)) == NULL) + if ((child = tbf_create_dflt_qdisc(sch, qopt->limit)) == NULL) goto done; } sch_tree_lock(sch); - if (child) + if (child) { + qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); qdisc_destroy(xchg(&q->qdisc, child)); + } q->limit = qopt->limit; q->mtu = qopt->mtu; q->max_size = max_size; @@ -449,8 +453,8 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, sch_tree_lock(sch); *old = xchg(&q->qdisc, new); + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); - sch->q.qlen = 0; sch_tree_unlock(sch); return 0; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 27329ce9c31..5db95caed0a 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -61,7 +61,7 @@ #include <net/sctp/sm.h> /* Forward declarations for internal functions. */ -static void sctp_assoc_bh_rcv(struct sctp_association *asoc); +static void sctp_assoc_bh_rcv(struct work_struct *work); /* 1st Level Abstractions. */ @@ -269,9 +269,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Create an input queue. */ sctp_inq_init(&asoc->base.inqueue); - sctp_inq_set_th_handler(&asoc->base.inqueue, - (void (*)(void *))sctp_assoc_bh_rcv, - asoc); + sctp_inq_set_th_handler(&asoc->base.inqueue, sctp_assoc_bh_rcv); /* Create an output queue. */ sctp_outq_init(asoc, &asoc->outqueue); @@ -300,6 +298,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->default_flags = sp->default_flags; asoc->default_context = sp->default_context; asoc->default_timetolive = sp->default_timetolive; + asoc->default_rcv_context = sp->default_rcv_context; return asoc; @@ -346,11 +345,18 @@ void sctp_association_free(struct sctp_association *asoc) struct list_head *pos, *temp; int i; - list_del(&asoc->asocs); + /* Only real associations count against the endpoint, so + * don't bother for if this is a temporary association. + */ + if (!asoc->temp) { + list_del(&asoc->asocs); - /* Decrement the backlog value for a TCP-style listening socket. */ - if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) - sk->sk_ack_backlog--; + /* Decrement the backlog value for a TCP-style listening + * socket. + */ + if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) + sk->sk_ack_backlog--; + } /* Mark as dead, so other users can know this structure is * going away. @@ -481,7 +487,7 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, " port: %d\n", asoc, (&peer->ipaddr), - peer->ipaddr.v4.sin_port); + ntohs(peer->ipaddr.v4.sin_port)); /* If we are to remove the current retran_path, update it * to the next peer before removing this peer from the list. @@ -530,13 +536,13 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, sp = sctp_sk(asoc->base.sk); /* AF_INET and AF_INET6 share common port field. */ - port = addr->v4.sin_port; + port = ntohs(addr->v4.sin_port); SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_add_peer:association %p addr: ", " port: %d state:%d\n", asoc, addr, - addr->v4.sin_port, + port, peer_state); /* Set the port if it has not been set yet. */ @@ -702,6 +708,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, struct sctp_transport *first; struct sctp_transport *second; struct sctp_ulpevent *event; + struct sockaddr_storage addr; struct list_head *pos; int spc_state = 0; @@ -724,8 +731,9 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, /* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the * user. */ - event = sctp_ulpevent_make_peer_addr_change(asoc, - (struct sockaddr_storage *) &transport->ipaddr, + memset(&addr, 0, sizeof(struct sockaddr_storage)); + memcpy(&addr, &transport->ipaddr, transport->af_specific->sockaddr_len); + event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, 0, spc_state, error, GFP_ATOMIC); if (event) sctp_ulpq_tail_event(&asoc->ulpq, event); @@ -861,7 +869,7 @@ struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *asoc, struct list_head *entry, *pos; struct sctp_transport *transport; struct sctp_chunk *chunk; - __u32 key = htonl(tsn); + __be32 key = htonl(tsn); match = NULL; @@ -919,8 +927,8 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc, sctp_read_lock(&asoc->base.addr_lock); - if ((asoc->base.bind_addr.port == laddr->v4.sin_port) && - (asoc->peer.port == paddr->v4.sin_port)) { + if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) && + (htons(asoc->peer.port) == paddr->v4.sin_port)) { transport = sctp_assoc_lookup_paddr(asoc, paddr); if (!transport) goto out; @@ -937,8 +945,11 @@ out: } /* Do delayed input processing. This is scheduled by sctp_rcv(). */ -static void sctp_assoc_bh_rcv(struct sctp_association *asoc) +static void sctp_assoc_bh_rcv(struct work_struct *work) { + struct sctp_association *asoc = + container_of(work, struct sctp_association, + base.inqueue.immediate); struct sctp_endpoint *ep; struct sctp_chunk *chunk; struct sock *sk; @@ -1128,7 +1139,7 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) " port: %d\n", asoc, (&t->ipaddr), - t->ipaddr.v4.sin_port); + ntohs(t->ipaddr.v4.sin_port)); } /* Choose the transport for sending a INIT packet. */ @@ -1153,7 +1164,7 @@ struct sctp_transport *sctp_assoc_choose_init_transport( " port: %d\n", asoc, (&t->ipaddr), - t->ipaddr.v4.sin_port); + ntohs(t->ipaddr.v4.sin_port)); return t; } diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 2b9c12a170e..00994158e49 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -161,7 +161,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, * Both v4 and v6 have the port at the same offset. */ if (!addr->a.v4.sin_port) - addr->a.v4.sin_port = bp->port; + addr->a.v4.sin_port = htons(bp->port); addr->use_as_src = use_as_src; @@ -275,7 +275,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list, break; } - af->from_addr_param(&addr, rawaddr, port, 0); + af->from_addr_param(&addr, rawaddr, htons(port), 0); retval = sctp_add_bind_addr(bp, &addr, 1, gfp); if (retval) { /* Can't finish building the list, clean up. */ diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 35c49ff2d06..129756908da 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -61,7 +61,7 @@ #include <net/sctp/sm.h> /* Forward declarations for internal helpers. */ -static void sctp_endpoint_bh_rcv(struct sctp_endpoint *ep); +static void sctp_endpoint_bh_rcv(struct work_struct *work); /* * Initialize the base fields of the endpoint structure. @@ -72,6 +72,10 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, { memset(ep, 0, sizeof(struct sctp_endpoint)); + ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp); + if (!ep->digest) + return NULL; + /* Initialize the base structure. */ /* What type of endpoint are we? */ ep->base.type = SCTP_EP_TYPE_SOCKET; @@ -85,8 +89,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, sctp_inq_init(&ep->base.inqueue); /* Set its top-half handler */ - sctp_inq_set_th_handler(&ep->base.inqueue, - (void (*)(void *))sctp_endpoint_bh_rcv, ep); + sctp_inq_set_th_handler(&ep->base.inqueue, sctp_endpoint_bh_rcv); /* Initialize the bind addr area */ sctp_bind_addr_init(&ep->base.bind_addr, 0); @@ -144,6 +147,13 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep, { struct sock *sk = ep->base.sk; + /* If this is a temporary association, don't bother + * since we'll be removing it shortly and don't + * want anyone to find it anyway. + */ + if (asoc->temp) + return; + /* Now just add it to our list of asocs */ list_add_tail(&asoc->asocs, &ep->asocs); @@ -175,6 +185,9 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) /* Free up the HMAC transform. */ crypto_free_hash(sctp_sk(ep->base.sk)->hmac); + /* Free the digest buffer */ + kfree(ep->digest); + /* Cleanup. */ sctp_inq_free(&ep->base.inqueue); sctp_bind_addr_free(&ep->base.bind_addr); @@ -216,7 +229,7 @@ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, struct sctp_endpoint *retval; sctp_read_lock(&ep->base.addr_lock); - if (ep->base.bind_addr.port == laddr->v4.sin_port) { + if (htons(ep->base.bind_addr.port) == laddr->v4.sin_port) { if (sctp_bind_addr_match(&ep->base.bind_addr, laddr, sctp_sk(ep->base.sk))) { retval = ep; @@ -244,7 +257,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( struct sctp_association *asoc; struct list_head *pos; - rport = paddr->v4.sin_port; + rport = ntohs(paddr->v4.sin_port); list_for_each(pos, &ep->asocs) { asoc = list_entry(pos, struct sctp_association, asocs); @@ -304,8 +317,11 @@ int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, /* Do delayed input processing. This is scheduled by sctp_rcv(). * This may be called on BH or task time. */ -static void sctp_endpoint_bh_rcv(struct sctp_endpoint *ep) +static void sctp_endpoint_bh_rcv(struct work_struct *work) { + struct sctp_endpoint *ep = + container_of(work, struct sctp_endpoint, + base.inqueue.immediate); struct sctp_association *asoc; struct sock *sk; struct sctp_transport *transport; diff --git a/net/sctp/input.c b/net/sctp/input.c index 64f63010253..33111873a48 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -135,6 +135,9 @@ int sctp_rcv(struct sk_buff *skb) SCTP_INC_STATS_BH(SCTP_MIB_INSCTPPACKS); + if (skb_linearize(skb)) + goto discard_it; + sh = (struct sctphdr *) skb->h.raw; /* Pull up the IP and SCTP headers. */ @@ -723,7 +726,7 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *l struct sctp_endpoint *ep; int hash; - hash = sctp_ep_hashfn(laddr->v4.sin_port); + hash = sctp_ep_hashfn(ntohs(laddr->v4.sin_port)); head = &sctp_ep_hashtable[hash]; read_lock(&head->lock); for (epb = head->chain; epb; epb = epb->next) { @@ -768,6 +771,9 @@ static void __sctp_hash_established(struct sctp_association *asoc) /* Add an association to the hash. Local BH-safe. */ void sctp_hash_established(struct sctp_association *asoc) { + if (asoc->temp) + return; + sctp_local_bh_disable(); __sctp_hash_established(asoc); sctp_local_bh_enable(); @@ -801,6 +807,9 @@ static void __sctp_unhash_established(struct sctp_association *asoc) /* Remove association from the hash table. Local BH-safe. */ void sctp_unhash_established(struct sctp_association *asoc) { + if (asoc->temp) + return; + sctp_local_bh_disable(); __sctp_unhash_established(asoc); sctp_local_bh_enable(); @@ -821,7 +830,7 @@ static struct sctp_association *__sctp_lookup_association( /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - hash = sctp_assoc_hashfn(local->v4.sin_port, peer->v4.sin_port); + hash = sctp_assoc_hashfn(ntohs(local->v4.sin_port), ntohs(peer->v4.sin_port)); head = &sctp_assoc_hashtable[hash]; read_lock(&head->lock); for (epb = head->chain; epb; epb = epb->next) { @@ -948,7 +957,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb, if (!af) continue; - af->from_addr_param(paddr, params.addr, ntohs(sh->source), 0); + af->from_addr_param(paddr, params.addr, sh->source, 0); asoc = __sctp_lookup_association(laddr, paddr, &transport); if (asoc) diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index cf6deed7e84..71b07466e88 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -54,7 +54,7 @@ void sctp_inq_init(struct sctp_inq *queue) queue->in_progress = NULL; /* Create a task for delivering data. */ - INIT_WORK(&queue->immediate, NULL, NULL); + INIT_WORK(&queue->immediate, NULL); queue->malloced = 0; } @@ -97,7 +97,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) * on the BH related data structures. */ list_add_tail(&chunk->list, &q->in_chunk_list); - q->immediate.func(q->immediate.data); + q->immediate.func(&q->immediate); } /* Extract a chunk from an SCTP inqueue. @@ -205,9 +205,8 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) * The intent is that this routine will pull stuff out of the * inqueue and process it. */ -void sctp_inq_set_th_handler(struct sctp_inq *q, - void (*callback)(void *), void *arg) +void sctp_inq_set_th_handler(struct sctp_inq *q, work_func_t callback) { - INIT_WORK(&q->immediate, callback, arg); + INIT_WORK(&q->immediate, callback); } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 78071c6e6cf..d8d36dee5ab 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -78,13 +78,49 @@ #include <asm/uaccess.h> +/* Event handler for inet6 address addition/deletion events. */ +int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, + void *ptr) +{ + struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; + struct sctp_sockaddr_entry *addr; + struct list_head *pos, *temp; + + switch (ev) { + case NETDEV_UP: + addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + if (addr) { + addr->a.v6.sin6_family = AF_INET6; + addr->a.v6.sin6_port = 0; + memcpy(&addr->a.v6.sin6_addr, &ifa->addr, + sizeof(struct in6_addr)); + addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; + list_add_tail(&addr->list, &sctp_local_addr_list); + } + break; + case NETDEV_DOWN: + list_for_each_safe(pos, temp, &sctp_local_addr_list) { + addr = list_entry(pos, struct sctp_sockaddr_entry, list); + if (ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) { + list_del(pos); + kfree(addr); + break; + } + } + + break; + } + + return NOTIFY_DONE; +} + static struct notifier_block sctp_inet6addr_notifier = { - .notifier_call = sctp_inetaddr_event, + .notifier_call = sctp_inet6addr_event, }; /* ICMP error handler. */ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __u32 info) + int type, int code, int offset, __be32 info) { struct inet6_dev *idev; struct ipv6hdr *iph = (struct ipv6hdr *)skb->data; @@ -170,8 +206,6 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport, fl.oif = transport->saddr.v6.sin6_scope_id; else fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_sport = inet_sk(sk)->sport; - fl.fl_ip_dport = transport->ipaddr.v6.sin6_port; if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; @@ -239,7 +273,7 @@ static inline int sctp_v6_addr_match_len(union sctp_addr *s1, int i, j; for (i = 0; i < 4 ; i++) { - __u32 a1xora2; + __be32 a1xora2; a1xora2 = a1->s6_addr32[i] ^ a2->s6_addr32[i]; @@ -350,7 +384,7 @@ static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb, int is_saddr) { void *from; - __u16 *port; + __be16 *port; struct sctphdr *sh; port = &addr->v6.sin6_port; @@ -360,10 +394,10 @@ static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb, sh = (struct sctphdr *) skb->h.raw; if (is_saddr) { - *port = ntohs(sh->source); + *port = sh->source; from = &skb->nh.ipv6h->saddr; } else { - *port = ntohs(sh->dest); + *port = sh->dest; from = &skb->nh.ipv6h->daddr; } ipv6_addr_copy(&addr->v6.sin6_addr, from); @@ -373,7 +407,7 @@ static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb, static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk) { addr->v6.sin6_family = AF_INET6; - addr->v6.sin6_port = inet_sk(sk)->num; + addr->v6.sin6_port = 0; addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr; } @@ -407,7 +441,7 @@ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk) /* Initialize a sctp_addr from an address parameter. */ static void sctp_v6_from_addr_param(union sctp_addr *addr, union sctp_addr_param *param, - __u16 port, int iif) + __be16 port, int iif) { addr->v6.sin6_family = AF_INET6; addr->v6.sin6_port = port; @@ -425,7 +459,7 @@ static int sctp_v6_to_addr_param(const union sctp_addr *addr, int length = sizeof(sctp_ipv6addr_param_t); param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS; - param->v6.param_hdr.length = ntohs(length); + param->v6.param_hdr.length = htons(length); ipv6_addr_copy(¶m->v6.addr, &addr->v6.sin6_addr); return length; @@ -433,7 +467,7 @@ static int sctp_v6_to_addr_param(const union sctp_addr *addr, /* Initialize a sctp_addr from a dst_entry. */ static void sctp_v6_dst_saddr(union sctp_addr *addr, struct dst_entry *dst, - unsigned short port) + __be16 port) { struct rt6_info *rt = (struct rt6_info *)dst; addr->sa.sa_family = AF_INET6; @@ -480,7 +514,7 @@ static int sctp_v6_cmp_addr(const union sctp_addr *addr1, } /* Initialize addr struct to INADDR_ANY. */ -static void sctp_v6_inaddr_any(union sctp_addr *addr, unsigned short port) +static void sctp_v6_inaddr_any(union sctp_addr *addr, __be16 port) { memset(addr, 0x00, sizeof(union sctp_addr)); addr->v6.sin6_family = AF_INET6; @@ -855,7 +889,7 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) * Returns number of addresses supported. */ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt, - __u16 *types) + __be16 *types) { types[0] = SCTP_PARAM_IPV4_ADDRESS; types[1] = SCTP_PARAM_IPV6_ADDRESS; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 739582415bf..fba567a7cb6 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1065,7 +1065,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) * A) Initialize the cacc_saw_newack to 0 for all destination * addresses. */ - if (sack->num_gap_ack_blocks > 0 && + if (sack->num_gap_ack_blocks && primary->cacc.changeover_active) { list_for_each(pos, transport_list) { transport = list_entry(pos, struct sctp_transport, @@ -1632,7 +1632,7 @@ pass: } static inline int sctp_get_skip_pos(struct sctp_fwdtsn_skip *skiplist, - int nskips, __u16 stream) + int nskips, __be16 stream) { int i; diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 7f49e769080..b3493bdbcac 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -160,7 +160,7 @@ static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_commo list_for_each(pos, &epb->bind_addr.address_list) { laddr = list_entry(pos, struct sctp_sockaddr_entry, list); - addr = (union sctp_addr *)&laddr->a; + addr = &laddr->a; af = sctp_get_af_specific(addr->sa.sa_family); if (primary && af->cmp_addr(addr, primary)) { seq_printf(seq, "*"); @@ -177,10 +177,10 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa union sctp_addr *addr, *primary; struct sctp_af *af; - primary = &(assoc->peer.primary_addr); + primary = &assoc->peer.primary_addr; list_for_each(pos, &assoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); - addr = (union sctp_addr *)&transport->ipaddr; + addr = &transport->ipaddr; af = sctp_get_af_specific(addr->sa.sa_family); if (af->cmp_addr(addr, primary)) { seq_printf(seq, "*"); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index fac7674438a..3a3db56729c 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -79,8 +79,8 @@ static struct sctp_pf *sctp_pf_inet_specific; static struct sctp_af *sctp_af_v4_specific; static struct sctp_af *sctp_af_v6_specific; -kmem_cache_t *sctp_chunk_cachep __read_mostly; -kmem_cache_t *sctp_bucket_cachep __read_mostly; +struct kmem_cache *sctp_chunk_cachep __read_mostly; +struct kmem_cache *sctp_bucket_cachep __read_mostly; /* Return the address of the control sock. */ struct sock *sctp_get_ctl_sock(void) @@ -163,7 +163,7 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, /* Extract our IP addresses from the system and stash them in the * protocol structure. */ -static void __sctp_get_local_addr_list(void) +static void sctp_get_local_addr_list(void) { struct net_device *dev; struct list_head *pos; @@ -179,17 +179,8 @@ static void __sctp_get_local_addr_list(void) read_unlock(&dev_base_lock); } -static void sctp_get_local_addr_list(void) -{ - unsigned long flags; - - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - __sctp_get_local_addr_list(); - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); -} - /* Free the existing local addresses. */ -static void __sctp_free_local_addr_list(void) +static void sctp_free_local_addr_list(void) { struct sctp_sockaddr_entry *addr; struct list_head *pos, *temp; @@ -201,27 +192,15 @@ static void __sctp_free_local_addr_list(void) } } -/* Free the existing local addresses. */ -static void sctp_free_local_addr_list(void) -{ - unsigned long flags; - - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - __sctp_free_local_addr_list(); - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); -} - /* Copy the local addresses which are valid for 'scope' into 'bp'. */ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, gfp_t gfp, int copy_flags) { struct sctp_sockaddr_entry *addr; int error = 0; - struct list_head *pos; - unsigned long flags; + struct list_head *pos, *temp; - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - list_for_each(pos, &sctp_local_addr_list) { + list_for_each_safe(pos, temp, &sctp_local_addr_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); if (sctp_in_scope(&addr->a, scope)) { /* Now that the address is in scope, check to see if @@ -242,7 +221,6 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, } end_copy: - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); return error; } @@ -251,7 +229,7 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb, int is_saddr) { void *from; - __u16 *port; + __be16 *port; struct sctphdr *sh; port = &addr->v4.sin_port; @@ -259,10 +237,10 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb, sh = (struct sctphdr *) skb->h.raw; if (is_saddr) { - *port = ntohs(sh->source); + *port = sh->source; from = &skb->nh.iph->saddr; } else { - *port = ntohs(sh->dest); + *port = sh->dest; from = &skb->nh.iph->daddr; } memcpy(&addr->v4.sin_addr.s_addr, from, sizeof(struct in_addr)); @@ -272,7 +250,7 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb, static void sctp_v4_from_sk(union sctp_addr *addr, struct sock *sk) { addr->v4.sin_family = AF_INET; - addr->v4.sin_port = inet_sk(sk)->num; + addr->v4.sin_port = 0; addr->v4.sin_addr.s_addr = inet_sk(sk)->rcv_saddr; } @@ -291,7 +269,7 @@ static void sctp_v4_to_sk_daddr(union sctp_addr *addr, struct sock *sk) /* Initialize a sctp_addr from an address parameter. */ static void sctp_v4_from_addr_param(union sctp_addr *addr, union sctp_addr_param *param, - __u16 port, int iif) + __be16 port, int iif) { addr->v4.sin_family = AF_INET; addr->v4.sin_port = port; @@ -307,7 +285,7 @@ static int sctp_v4_to_addr_param(const union sctp_addr *addr, int length = sizeof(sctp_ipv4addr_param_t); param->v4.param_hdr.type = SCTP_PARAM_IPV4_ADDRESS; - param->v4.param_hdr.length = ntohs(length); + param->v4.param_hdr.length = htons(length); param->v4.addr.s_addr = addr->v4.sin_addr.s_addr; return length; @@ -315,7 +293,7 @@ static int sctp_v4_to_addr_param(const union sctp_addr *addr, /* Initialize a sctp_addr from a dst_entry. */ static void sctp_v4_dst_saddr(union sctp_addr *saddr, struct dst_entry *dst, - unsigned short port) + __be16 port) { struct rtable *rt = (struct rtable *)dst; saddr->v4.sin_family = AF_INET; @@ -338,7 +316,7 @@ static int sctp_v4_cmp_addr(const union sctp_addr *addr1, } /* Initialize addr struct to INADDR_ANY. */ -static void sctp_v4_inaddr_any(union sctp_addr *addr, unsigned short port) +static void sctp_v4_inaddr_any(union sctp_addr *addr, __be16 port) { addr->v4.sin_family = AF_INET; addr->v4.sin_addr.s_addr = INADDR_ANY; @@ -481,7 +459,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, list); if (!laddr->use_as_src) continue; - sctp_v4_dst_saddr(&dst_saddr, dst, bp->port); + sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port)); if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) goto out_unlock; } @@ -538,7 +516,7 @@ static void sctp_v4_get_saddr(struct sctp_association *asoc, if (rt) { saddr->v4.sin_family = AF_INET; - saddr->v4.sin_port = asoc->base.bind_addr.port; + saddr->v4.sin_port = htons(asoc->base.bind_addr.port); saddr->v4.sin_addr.s_addr = rt->rt_src; } } @@ -591,7 +569,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, newinet->dport = htons(asoc->peer.port); newinet->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; newinet->pmtudisc = inet->pmtudisc; - newinet->id = 0; + newinet->id = asoc->next_tsn ^ jiffies; newinet->uc_ttl = -1; newinet->mc_loop = 1; @@ -622,18 +600,36 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr)); } -/* Event handler for inet address addition/deletion events. - * Basically, whenever there is an event, we re-build our local address list. - */ +/* Event handler for inet address addition/deletion events. */ int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, void *ptr) { - unsigned long flags; + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct sctp_sockaddr_entry *addr; + struct list_head *pos, *temp; - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - __sctp_free_local_addr_list(); - __sctp_get_local_addr_list(); - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); + switch (ev) { + case NETDEV_UP: + addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + if (addr) { + addr->a.v4.sin_family = AF_INET; + addr->a.v4.sin_port = 0; + addr->a.v4.sin_addr.s_addr = ifa->ifa_local; + list_add_tail(&addr->list, &sctp_local_addr_list); + } + break; + case NETDEV_DOWN: + list_for_each_safe(pos, temp, &sctp_local_addr_list) { + addr = list_entry(pos, struct sctp_sockaddr_entry, list); + if (addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { + list_del(pos); + kfree(addr); + break; + } + } + + break; + } return NOTIFY_DONE; } @@ -791,7 +787,7 @@ static int sctp_inet_send_verify(struct sctp_sock *opt, union sctp_addr *addr) * chunks. Returns number of addresses supported. */ static int sctp_inet_supported_addrs(const struct sctp_sock *opt, - __u16 *types) + __be16 *types) { types[0] = SCTP_PARAM_IPV4_ADDRESS; return 1; @@ -808,7 +804,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, NIPQUAD(((struct rtable *)skb->dst)->rt_dst)); SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); - return ip_queue_xmit(skb, ipfragok); + return ip_queue_xmit(skb, skb->sk, ipfragok); } static struct sctp_af sctp_ipv4_specific; @@ -1172,13 +1168,12 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize the local address list. */ INIT_LIST_HEAD(&sctp_local_addr_list); - spin_lock_init(&sctp_local_addr_lock); + + sctp_get_local_addr_list(); /* Register notifier for inet address additions/deletions. */ register_inetaddr_notifier(&sctp_inetaddr_notifier); - sctp_get_local_addr_list(); - __unsafe(THIS_MODULE); status = 0; out: @@ -1263,6 +1258,7 @@ module_exit(sctp_exit); * __stringify doesn't likes enums, so use IPPROTO_SCTP value (132) directly. */ MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-132"); +MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-132"); MODULE_AUTHOR("Linux Kernel SCTP developers <lksctp-developers@lists.sourceforge.net>"); MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)"); MODULE_LICENSE("GPL"); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 507dff72c58..30927d3a597 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -65,7 +65,7 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> -extern kmem_cache_t *sctp_chunk_cachep; +extern struct kmem_cache *sctp_chunk_cachep; SCTP_STATIC struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, @@ -111,7 +111,7 @@ static const struct sctp_paramhdr prsctp_param = { * provided chunk, as most cause codes will be embedded inside an * abort chunk. */ -void sctp_init_cause(struct sctp_chunk *chunk, __u16 cause_code, +void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code, const void *payload, size_t paylen) { sctp_errhdr_t err; @@ -183,7 +183,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, int num_types, addrs_len = 0; struct sctp_sock *sp; sctp_supported_addrs_param_t sat; - __u16 types[2]; + __be16 types[2]; sctp_adaption_ind_param_t aiparam; /* RFC 2960 3.3.2 Initiation (INIT) (1) @@ -775,7 +775,7 @@ struct sctp_chunk *sctp_make_abort_no_data( const struct sctp_chunk *chunk, __u32 tsn) { struct sctp_chunk *retval; - __u32 payload; + __be32 payload; retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + sizeof(tsn)); @@ -951,7 +951,7 @@ nodata: /* Create an Operation Error chunk. */ struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc, const struct sctp_chunk *chunk, - __u16 cause_code, const void *payload, + __be16 cause_code, const void *payload, size_t paylen) { struct sctp_chunk *retval; @@ -979,7 +979,7 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, { struct sctp_chunk *retval; - retval = kmem_cache_alloc(sctp_chunk_cachep, SLAB_ATOMIC); + retval = kmem_cache_alloc(sctp_chunk_cachep, GFP_ATOMIC); if (!retval) goto nodata; @@ -1190,15 +1190,14 @@ void sctp_chunk_assign_ssn(struct sctp_chunk *chunk) if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) { ssn = 0; } else { - sid = htons(chunk->subh.data_hdr->stream); + sid = ntohs(chunk->subh.data_hdr->stream); if (chunk->chunk_hdr->flags & SCTP_DATA_LAST_FRAG) ssn = sctp_ssn_next(&chunk->asoc->ssnmap->out, sid); else ssn = sctp_ssn_peek(&chunk->asoc->ssnmap->out, sid); - ssn = htons(ssn); } - chunk->subh.data_hdr->ssn = ssn; + chunk->subh.data_hdr->ssn = htons(ssn); chunk->has_ssn = 1; } @@ -1280,15 +1279,13 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, - (bodysize % SCTP_COOKIE_MULTIPLE); *cookie_len = headersize + bodysize; - retval = kmalloc(*cookie_len, GFP_ATOMIC); - - if (!retval) - goto nodata; - /* Clear this memory since we are sending this data structure * out on the network. */ - memset(retval, 0x00, *cookie_len); + retval = kzalloc(*cookie_len, GFP_ATOMIC); + if (!retval) + goto nodata; + cookie = (struct sctp_signed_cookie *) retval->body; /* Set up the parameter header. */ @@ -1438,7 +1435,7 @@ no_hmac: goto fail; } - if (ntohs(chunk->sctp_hdr->source) != bear_cookie->peer_addr.v4.sin_port || + if (chunk->sctp_hdr->source != bear_cookie->peer_addr.v4.sin_port || ntohs(chunk->sctp_hdr->dest) != bear_cookie->my_port) { *error = -SCTP_IERROR_BAD_PORTS; goto fail; @@ -1473,10 +1470,10 @@ no_hmac: suseconds_t usecs = (tv.tv_sec - bear_cookie->expiration.tv_sec) * 1000000L + tv.tv_usec - bear_cookie->expiration.tv_usec; + __be32 n = htonl(usecs); - usecs = htonl(usecs); sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE, - &usecs, sizeof(usecs)); + &n, sizeof(n)); *error = -SCTP_IERROR_STALE_COOKIE; } else *error = -SCTP_IERROR_NOMEM; @@ -1539,8 +1536,8 @@ malformed: ********************************************************************/ struct __sctp_missing { - __u32 num_missing; - __u16 type; + __be32 num_missing; + __be16 type; } __attribute__((packed)); /* @@ -1852,9 +1849,10 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, * added as the primary transport. The source address seems to * be a a better choice than any of the embedded addresses. */ - if (peer_addr) + if (peer_addr) { if(!sctp_assoc_add_peer(asoc, peer_addr, gfp, SCTP_ACTIVE)) goto nomem; + } /* Process the initialization parameters. */ @@ -1910,10 +1908,9 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, /* Copy cookie in case we need to resend COOKIE-ECHO. */ cookie = asoc->peer.cookie; if (cookie) { - asoc->peer.cookie = kmalloc(asoc->peer.cookie_len, gfp); + asoc->peer.cookie = kmemdup(cookie, asoc->peer.cookie_len, gfp); if (!asoc->peer.cookie) goto clean_up; - memcpy(asoc->peer.cookie, cookie, asoc->peer.cookie_len); } /* RFC 2960 7.2.1 The initial value of ssthresh MAY be arbitrarily @@ -2027,7 +2024,7 @@ static int sctp_process_param(struct sctp_association *asoc, /* Fall through. */ case SCTP_PARAM_IPV4_ADDRESS: af = sctp_get_af_specific(param_type2af(param.p->type)); - af->from_addr_param(&addr, param.addr, asoc->peer.port, 0); + af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0); scope = sctp_scope(peer_addr); if (sctp_in_scope(&addr, scope)) if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED)) @@ -2230,7 +2227,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, union sctp_addr *laddr, struct sockaddr *addrs, int addrcnt, - __u16 flags) + __be16 flags) { sctp_addip_param_t param; struct sctp_chunk *retval; @@ -2363,14 +2360,14 @@ static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *as } /* Add response parameters to an ASCONF_ACK chunk. */ -static void sctp_add_asconf_response(struct sctp_chunk *chunk, __u32 crr_id, - __u16 err_code, sctp_addip_param_t *asconf_param) +static void sctp_add_asconf_response(struct sctp_chunk *chunk, __be32 crr_id, + __be16 err_code, sctp_addip_param_t *asconf_param) { sctp_addip_param_t ack_param; sctp_errhdr_t err_param; int asconf_param_len = 0; int err_param_len = 0; - __u16 response_type; + __be16 response_type; if (SCTP_ERROR_NO_ERROR == err_code) { response_type = SCTP_PARAM_SUCCESS_REPORT; @@ -2404,7 +2401,7 @@ static void sctp_add_asconf_response(struct sctp_chunk *chunk, __u32 crr_id, } /* Process a asconf parameter. */ -static __u16 sctp_process_asconf_param(struct sctp_association *asoc, +static __be16 sctp_process_asconf_param(struct sctp_association *asoc, struct sctp_chunk *asconf, sctp_addip_param_t *asconf_param) { @@ -2413,7 +2410,7 @@ static __u16 sctp_process_asconf_param(struct sctp_association *asoc, union sctp_addr addr; struct list_head *pos; union sctp_addr_param *addr_param; - + addr_param = (union sctp_addr_param *) ((void *)asconf_param + sizeof(sctp_addip_param_t)); @@ -2421,7 +2418,7 @@ static __u16 sctp_process_asconf_param(struct sctp_association *asoc, if (unlikely(!af)) return SCTP_ERROR_INV_PARAM; - af->from_addr_param(&addr, addr_param, asoc->peer.port, 0); + af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0); switch (asconf_param->param_hdr.type) { case SCTP_PARAM_ADD_IP: /* ADDIP 4.3 D9) If an endpoint receives an ADD IP address @@ -2487,7 +2484,7 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, sctp_addip_param_t *asconf_param; struct sctp_chunk *asconf_ack; - __u16 err_code; + __be16 err_code; int length = 0; int chunk_len = asconf->skb->len; __u32 serial; @@ -2586,7 +2583,7 @@ static int sctp_asconf_param_success(struct sctp_association *asoc, /* We have checked the packet before, so we do not check again. */ af = sctp_get_af_specific(param_type2af(addr_param->v4.param_hdr.type)); - af->from_addr_param(&addr, addr_param, bp->port, 0); + af->from_addr_param(&addr, addr_param, htons(bp->port), 0); switch (asconf_param->param_hdr.type) { case SCTP_PARAM_ADD_IP: @@ -2630,7 +2627,7 @@ static int sctp_asconf_param_success(struct sctp_association *asoc, * All TLVs after the failed response are considered unsuccessful unless a * specific success indication is present for the parameter. */ -static __u16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, +static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, sctp_addip_param_t *asconf_param, int no_err) { @@ -2638,7 +2635,7 @@ static __u16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, sctp_errhdr_t *err_param; int length; int asconf_ack_len = asconf_ack->skb->len; - __u16 err_code; + __be16 err_code; if (no_err) err_code = SCTP_ERROR_NO_ERROR; @@ -2694,7 +2691,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc, int all_param_pass = 0; int no_err = 1; int retval = 0; - __u16 err_code = SCTP_ERROR_NO_ERROR; + __be16 err_code = SCTP_ERROR_NO_ERROR; /* Skip the chunkhdr and addiphdr from the last asconf sent and store * a pointer to address parameter. diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 9c10bdec1af..7bbc6156e45 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -442,7 +442,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, " transport IP: port:%d failed.\n", asoc, (&transport->ipaddr), - transport->ipaddr.v4.sin_port); + ntohs(transport->ipaddr.v4.sin_port)); sctp_assoc_control_transport(asoc, transport, SCTP_TRANSPORT_DOWN, SCTP_FAILED_THRESHOLD); @@ -1360,12 +1360,12 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; case SCTP_CMD_INIT_FAILED: - sctp_cmd_init_failed(commands, asoc, cmd->obj.u32); + sctp_cmd_init_failed(commands, asoc, cmd->obj.err); break; case SCTP_CMD_ASSOC_FAILED: sctp_cmd_assoc_failed(commands, asoc, event_type, - subtype, chunk, cmd->obj.u32); + subtype, chunk, cmd->obj.err); break; case SCTP_CMD_INIT_COUNTER_INC: @@ -1420,7 +1420,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_PROCESS_CTSN: /* Dummy up a SACK for processing. */ - sackh.cum_tsn_ack = cmd->obj.u32; + sackh.cum_tsn_ack = cmd->obj.be32; sackh.a_rwnd = 0; sackh.num_gap_ack_blocks = 0; sackh.num_dup_tsns = 0; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 1c42fe983a5..27cc444aaf1 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -93,7 +93,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk); static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, - __u16 error, int sk_err, + __be16 error, int sk_err, const struct sctp_association *asoc, struct sctp_transport *transport); @@ -443,7 +443,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, __u32 init_tag; struct sctp_chunk *err_chunk; struct sctp_packet *packet; - __u16 error; + sctp_error_t error; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); @@ -886,7 +886,7 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, SCTP_ERROR(ETIMEDOUT)); /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, - SCTP_U32(SCTP_ERROR_NO_ERROR)); + SCTP_PERR(SCTP_ERROR_NO_ERROR)); SCTP_INC_STATS(SCTP_MIB_ABORTEDS); SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; @@ -2138,7 +2138,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, - SCTP_U32(SCTP_ERROR_STALE_COOKIE)); + SCTP_PERR(SCTP_ERROR_STALE_COOKIE)); return SCTP_DISPOSITION_DELETE_TCB; } @@ -2158,7 +2158,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep, * to give ample time to retransmit the new cookie and thus * yield a higher probability of success on the reattempt. */ - stale = ntohl(*(suseconds_t *)((u8 *)err + sizeof(sctp_errhdr_t))); + stale = ntohl(*(__be32 *)((u8 *)err + sizeof(sctp_errhdr_t))); stale = (stale * 2) / 1000; bht.param_hdr.type = SCTP_PARAM_COOKIE_PRESERVATIVE; @@ -2250,7 +2250,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, { struct sctp_chunk *chunk = arg; unsigned len; - __u16 error = SCTP_ERROR_NO_ERROR; + __be16 error = SCTP_ERROR_NO_ERROR; if (!sctp_vtag_verify_either(chunk, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); @@ -2275,7 +2275,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET)); /* ASSOC_FAILED will DELETE_TCB. */ - sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_U32(error)); + sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(error)); SCTP_INC_STATS(SCTP_MIB_ABORTEDS); SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); @@ -2295,7 +2295,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep, { struct sctp_chunk *chunk = arg; unsigned len; - __u16 error = SCTP_ERROR_NO_ERROR; + __be16 error = SCTP_ERROR_NO_ERROR; if (!sctp_vtag_verify_either(chunk, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); @@ -2357,7 +2357,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep, * This is common code called by several sctp_sf_*_abort() functions above. */ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, - __u16 error, int sk_err, + __be16 error, int sk_err, const struct sctp_association *asoc, struct sctp_transport *transport) { @@ -2370,7 +2370,7 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(sk_err)); /* CMD_INIT_FAILED will DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, - SCTP_U32(error)); + SCTP_PERR(error)); return SCTP_DISPOSITION_ABORT; } @@ -2466,7 +2466,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, * received by the SHUTDOWN sender. */ sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_CTSN, - SCTP_U32(chunk->subh.shutdown_hdr->cum_tsn_ack)); + SCTP_BE32(chunk->subh.shutdown_hdr->cum_tsn_ack)); out: return disposition; @@ -2545,6 +2545,7 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep, { sctp_cwrhdr_t *cwr; struct sctp_chunk *chunk = arg; + u32 lowest_tsn; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); @@ -2556,14 +2557,14 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep, cwr = (sctp_cwrhdr_t *) chunk->skb->data; skb_pull(chunk->skb, sizeof(sctp_cwrhdr_t)); - cwr->lowest_tsn = ntohl(cwr->lowest_tsn); + lowest_tsn = ntohl(cwr->lowest_tsn); /* Does this CWR ack the last sent congestion notification? */ - if (TSN_lte(asoc->last_ecne_tsn, cwr->lowest_tsn)) { + if (TSN_lte(asoc->last_ecne_tsn, lowest_tsn)) { /* Stop sending ECNE. */ sctp_add_cmd_sf(commands, SCTP_CMD_ECN_CWR, - SCTP_U32(cwr->lowest_tsn)); + SCTP_U32(lowest_tsn)); } return SCTP_DISPOSITION_CONSUME; } @@ -3360,7 +3361,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, - SCTP_U32(SCTP_ERROR_ASCONF_ACK)); + SCTP_PERR(SCTP_ERROR_ASCONF_ACK)); SCTP_INC_STATS(SCTP_MIB_ABORTEDS); SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; @@ -3388,7 +3389,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, - SCTP_U32(SCTP_ERROR_ASCONF_ACK)); + SCTP_PERR(SCTP_ERROR_ASCONF_ACK)); SCTP_INC_STATS(SCTP_MIB_ABORTEDS); SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; @@ -3743,12 +3744,12 @@ static sctp_disposition_t sctp_sf_violation_chunklen( sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNREFUSED)); sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, - SCTP_U32(SCTP_ERROR_PROTO_VIOLATION)); + SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); } else { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, - SCTP_U32(SCTP_ERROR_PROTO_VIOLATION)); + SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); } @@ -4062,7 +4063,7 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( SCTP_ERROR(ECONNABORTED)); /* Delete the established association. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, - SCTP_U32(SCTP_ERROR_USER_ABORT)); + SCTP_PERR(SCTP_ERROR_USER_ABORT)); SCTP_INC_STATS(SCTP_MIB_ABORTEDS); SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); @@ -4199,7 +4200,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( SCTP_ERROR(ECONNREFUSED)); /* Delete the established association. */ sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, - SCTP_U32(SCTP_ERROR_USER_ABORT)); + SCTP_PERR(SCTP_ERROR_USER_ABORT)); return retval; } @@ -4571,7 +4572,7 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, SCTP_ERROR(ETIMEDOUT)); /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, - SCTP_U32(SCTP_ERROR_NO_ERROR)); + SCTP_PERR(SCTP_ERROR_NO_ERROR)); SCTP_INC_STATS(SCTP_MIB_ABORTEDS); SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; @@ -4693,7 +4694,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, - SCTP_U32(SCTP_ERROR_NO_ERROR)); + SCTP_PERR(SCTP_ERROR_NO_ERROR)); return SCTP_DISPOSITION_DELETE_TCB; } @@ -4745,7 +4746,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, - SCTP_U32(SCTP_ERROR_NO_ERROR)); + SCTP_PERR(SCTP_ERROR_NO_ERROR)); return SCTP_DISPOSITION_DELETE_TCB; } @@ -4781,7 +4782,7 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, SCTP_ERROR(ETIMEDOUT)); /* Note: CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, - SCTP_U32(SCTP_ERROR_NO_ERROR)); + SCTP_PERR(SCTP_ERROR_NO_ERROR)); SCTP_INC_STATS(SCTP_MIB_ABORTEDS); SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; @@ -4859,7 +4860,7 @@ sctp_disposition_t sctp_sf_t4_timer_expire( sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, - SCTP_U32(SCTP_ERROR_NO_ERROR)); + SCTP_PERR(SCTP_ERROR_NO_ERROR)); SCTP_INC_STATS(SCTP_MIB_ABORTEDS); SCTP_INC_STATS(SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; @@ -4915,7 +4916,7 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, - SCTP_U32(SCTP_ERROR_NO_ERROR)); + SCTP_PERR(SCTP_ERROR_NO_ERROR)); return SCTP_DISPOSITION_DELETE_TCB; nomem: @@ -5365,7 +5366,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, - SCTP_U32(SCTP_ERROR_NO_DATA)); + SCTP_PERR(SCTP_ERROR_NO_DATA)); SCTP_INC_STATS(SCTP_MIB_ABORTEDS); SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); return SCTP_IERROR_NO_DATA; diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 8bcca567615..733dd87b3a7 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -104,325 +104,322 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, }; } +#define TYPE_SCTP_FUNC(func) {.fn = func, .name = #func} + #define TYPE_SCTP_DATA { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \ + TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_eat_data_6_2, .name = "sctp_sf_eat_data_6_2"}, \ + TYPE_SCTP_FUNC(sctp_sf_eat_data_6_2), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_eat_data_6_2, .name = "sctp_sf_eat_data_6_2"}, \ + TYPE_SCTP_FUNC(sctp_sf_eat_data_6_2), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_eat_data_fast_4_4, .name = "sctp_sf_eat_data_fast_4_4"}, \ + TYPE_SCTP_FUNC(sctp_sf_eat_data_fast_4_4), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ } /* TYPE_SCTP_DATA */ #define TYPE_SCTP_INIT { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_do_5_1B_init, .name = "sctp_sf_do_5_1B_init"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_5_1B_init), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_do_5_2_1_siminit, .name = "sctp_sf_do_5_2_1_siminit"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_5_2_1_siminit), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_do_5_2_1_siminit, .name = "sctp_sf_do_5_2_1_siminit"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_5_2_1_siminit), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_do_5_2_2_dupinit, .name = "sctp_sf_do_5_2_2_dupinit"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_5_2_2_dupinit), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_do_5_2_2_dupinit, .name = "sctp_sf_do_5_2_2_dupinit"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_5_2_2_dupinit), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_do_5_2_2_dupinit, .name = "sctp_sf_do_5_2_2_dupinit"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_5_2_2_dupinit), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_do_5_2_2_dupinit, .name = "sctp_sf_do_5_2_2_dupinit"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_5_2_2_dupinit), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_do_9_2_reshutack, .name = "sctp_sf_do_9_2_reshutack"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_9_2_reshutack), \ } /* TYPE_SCTP_INIT */ #define TYPE_SCTP_INIT_ACK { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_do_5_1C_ack, .name = "sctp_sf_do_5_1C_ack"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_5_1C_ack), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ } /* TYPE_SCTP_INIT_ACK */ #define TYPE_SCTP_SACK { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \ + TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_eat_sack_6_2, .name = "sctp_sf_eat_sack_6_2"}, \ + TYPE_SCTP_FUNC(sctp_sf_eat_sack_6_2), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_eat_sack_6_2, .name = "sctp_sf_eat_sack_6_2"}, \ + TYPE_SCTP_FUNC(sctp_sf_eat_sack_6_2), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_eat_sack_6_2, .name = "sctp_sf_eat_sack_6_2"}, \ + TYPE_SCTP_FUNC(sctp_sf_eat_sack_6_2), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_eat_sack_6_2, .name = "sctp_sf_eat_sack_6_2"}, \ + TYPE_SCTP_FUNC(sctp_sf_eat_sack_6_2), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ } /* TYPE_SCTP_SACK */ #define TYPE_SCTP_HEARTBEAT { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \ + TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \ + TYPE_SCTP_FUNC(sctp_sf_beat_8_3), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \ + TYPE_SCTP_FUNC(sctp_sf_beat_8_3), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \ + TYPE_SCTP_FUNC(sctp_sf_beat_8_3), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \ + TYPE_SCTP_FUNC(sctp_sf_beat_8_3), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \ + TYPE_SCTP_FUNC(sctp_sf_beat_8_3), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ /* This should not happen, but we are nice. */ \ - {.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \ + TYPE_SCTP_FUNC(sctp_sf_beat_8_3), \ } /* TYPE_SCTP_HEARTBEAT */ #define TYPE_SCTP_HEARTBEAT_ACK { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \ + TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_violation, .name = "sctp_sf_violation"}, \ + TYPE_SCTP_FUNC(sctp_sf_violation), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_backbeat_8_3, .name = "sctp_sf_backbeat_8_3"}, \ + TYPE_SCTP_FUNC(sctp_sf_backbeat_8_3), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_backbeat_8_3, .name = "sctp_sf_backbeat_8_3"}, \ + TYPE_SCTP_FUNC(sctp_sf_backbeat_8_3), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_backbeat_8_3, .name = "sctp_sf_backbeat_8_3"}, \ + TYPE_SCTP_FUNC(sctp_sf_backbeat_8_3), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_backbeat_8_3, .name = "sctp_sf_backbeat_8_3"}, \ + TYPE_SCTP_FUNC(sctp_sf_backbeat_8_3), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ } /* TYPE_SCTP_HEARTBEAT_ACK */ #define TYPE_SCTP_ABORT { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_pdiscard, .name = "sctp_sf_pdiscard"}, \ + TYPE_SCTP_FUNC(sctp_sf_pdiscard), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_cookie_wait_abort, .name = "sctp_sf_cookie_wait_abort"}, \ + TYPE_SCTP_FUNC(sctp_sf_cookie_wait_abort), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_cookie_echoed_abort, \ - .name = "sctp_sf_cookie_echoed_abort"}, \ + TYPE_SCTP_FUNC(sctp_sf_cookie_echoed_abort), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_do_9_1_abort, .name = "sctp_sf_do_9_1_abort"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_9_1_abort), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_shutdown_pending_abort, \ - .name = "sctp_sf_shutdown_pending_abort"}, \ + TYPE_SCTP_FUNC(sctp_sf_shutdown_pending_abort), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_shutdown_sent_abort, \ - .name = "sctp_sf_shutdown_sent_abort"}, \ + TYPE_SCTP_FUNC(sctp_sf_shutdown_sent_abort), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_do_9_1_abort, .name = "sctp_sf_do_9_1_abort"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_9_1_abort), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_shutdown_ack_sent_abort, \ - .name = "sctp_sf_shutdown_ack_sent_abort"}, \ + TYPE_SCTP_FUNC(sctp_sf_shutdown_ack_sent_abort), \ } /* TYPE_SCTP_ABORT */ #define TYPE_SCTP_SHUTDOWN { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \ + TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_do_9_2_shutdown, .name = "sctp_sf_do_9_2_shutdown"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_do_9_2_shutdown_ack, \ - .name = "sctp_sf_do_9_2_shutdown_ack"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown_ack), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ } /* TYPE_SCTP_SHUTDOWN */ #define TYPE_SCTP_SHUTDOWN_ACK { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_do_8_5_1_E_sa, .name = "sctp_sf_do_8_5_1_E_sa"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_8_5_1_E_sa), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_do_8_5_1_E_sa, .name = "sctp_sf_do_8_5_1_E_sa"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_8_5_1_E_sa), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_violation, .name = "sctp_sf_violation"}, \ + TYPE_SCTP_FUNC(sctp_sf_violation), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_violation, .name = "sctp_sf_violation"}, \ + TYPE_SCTP_FUNC(sctp_sf_violation), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_do_9_2_final, .name = "sctp_sf_do_9_2_final"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_9_2_final), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_violation, .name = "sctp_sf_violation"}, \ + TYPE_SCTP_FUNC(sctp_sf_violation), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_do_9_2_final, .name = "sctp_sf_do_9_2_final"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_9_2_final), \ } /* TYPE_SCTP_SHUTDOWN_ACK */ #define TYPE_SCTP_ERROR { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \ + TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_cookie_echoed_err, .name = "sctp_sf_cookie_echoed_err"}, \ + TYPE_SCTP_FUNC(sctp_sf_cookie_echoed_err), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_operr_notify, .name = "sctp_sf_operr_notify"}, \ + TYPE_SCTP_FUNC(sctp_sf_operr_notify), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_operr_notify, .name = "sctp_sf_operr_notify"}, \ + TYPE_SCTP_FUNC(sctp_sf_operr_notify), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_operr_notify, .name = "sctp_sf_operr_notify"}, \ + TYPE_SCTP_FUNC(sctp_sf_operr_notify), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ } /* TYPE_SCTP_ERROR */ #define TYPE_SCTP_COOKIE_ECHO { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_do_5_1D_ce, .name = "sctp_sf_do_5_1D_ce"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_5_1D_ce), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_5_2_4_dupcook), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_5_2_4_dupcook), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_5_2_4_dupcook), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_5_2_4_dupcook), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_5_2_4_dupcook), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_5_2_4_dupcook), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_5_2_4_dupcook), \ } /* TYPE_SCTP_COOKIE_ECHO */ #define TYPE_SCTP_COOKIE_ACK { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_do_5_1E_ca, .name = "sctp_sf_do_5_1E_ca"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_5_1E_ca), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ } /* TYPE_SCTP_COOKIE_ACK */ #define TYPE_SCTP_ECN_ECNE { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_ecne), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_ecne), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_ecne), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_ecne), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_ecne), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ } /* TYPE_SCTP_ECN_ECNE */ #define TYPE_SCTP_ECN_CWR { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_do_ecn_cwr, .name = "sctp_sf_do_ecn_cwr"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_ecn_cwr), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_do_ecn_cwr, .name = "sctp_sf_do_ecn_cwr"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_ecn_cwr), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_do_ecn_cwr, .name = "sctp_sf_do_ecn_cwr"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_ecn_cwr), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ } /* TYPE_SCTP_ECN_CWR */ #define TYPE_SCTP_SHUTDOWN_COMPLETE { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_do_4_C, .name = "sctp_sf_do_4_C"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_4_C), \ } /* TYPE_SCTP_SHUTDOWN_COMPLETE */ /* The primary index for this table is the chunk type. @@ -450,44 +447,44 @@ static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][ #define TYPE_SCTP_ASCONF { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_do_asconf, .name = "sctp_sf_do_asconf"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_asconf), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ } /* TYPE_SCTP_ASCONF */ #define TYPE_SCTP_ASCONF_ACK { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_do_asconf_ack, .name = "sctp_sf_do_asconf_ack"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_asconf_ack), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ } /* TYPE_SCTP_ASCONF_ACK */ /* The primary index for this table is the chunk type. @@ -500,23 +497,23 @@ static const sctp_sm_table_entry_t addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_ #define TYPE_SCTP_FWD_TSN { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \ + TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_eat_fwd_tsn, .name = "sctp_sf_eat_fwd_tsn"}, \ + TYPE_SCTP_FUNC(sctp_sf_eat_fwd_tsn), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_eat_fwd_tsn, .name = "sctp_sf_eat_fwd_tsn"}, \ + TYPE_SCTP_FUNC(sctp_sf_eat_fwd_tsn), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_eat_fwd_tsn_fast, .name = "sctp_sf_eat_fwd_tsn_fast"}, \ + TYPE_SCTP_FUNC(sctp_sf_eat_fwd_tsn_fast), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ } /* TYPE_SCTP_FWD_TSN */ /* The primary index for this table is the chunk type. @@ -529,167 +526,150 @@ static const sctp_sm_table_entry_t prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUN static const sctp_sm_table_entry_t chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = { /* SCTP_STATE_EMPTY */ - {.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, + TYPE_SCTP_FUNC(sctp_sf_ootb), /* SCTP_STATE_CLOSED */ - {.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, + TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), /* SCTP_STATE_COOKIE_WAIT */ - {.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"}, + TYPE_SCTP_FUNC(sctp_sf_unk_chunk), /* SCTP_STATE_COOKIE_ECHOED */ - {.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"}, + TYPE_SCTP_FUNC(sctp_sf_unk_chunk), /* SCTP_STATE_ESTABLISHED */ - {.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"}, + TYPE_SCTP_FUNC(sctp_sf_unk_chunk), /* SCTP_STATE_SHUTDOWN_PENDING */ - {.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"}, + TYPE_SCTP_FUNC(sctp_sf_unk_chunk), /* SCTP_STATE_SHUTDOWN_SENT */ - {.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"}, + TYPE_SCTP_FUNC(sctp_sf_unk_chunk), /* SCTP_STATE_SHUTDOWN_RECEIVED */ - {.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"}, + TYPE_SCTP_FUNC(sctp_sf_unk_chunk), /* SCTP_STATE_SHUTDOWN_ACK_SENT */ - {.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"}, + TYPE_SCTP_FUNC(sctp_sf_unk_chunk), }; /* chunk unknown */ #define TYPE_SCTP_PRIMITIVE_ASSOCIATE { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_do_prm_asoc, .name = "sctp_sf_do_prm_asoc"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_prm_asoc), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \ + TYPE_SCTP_FUNC(sctp_sf_not_impl), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \ + TYPE_SCTP_FUNC(sctp_sf_not_impl), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \ + TYPE_SCTP_FUNC(sctp_sf_not_impl), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \ + TYPE_SCTP_FUNC(sctp_sf_not_impl), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \ + TYPE_SCTP_FUNC(sctp_sf_not_impl), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \ + TYPE_SCTP_FUNC(sctp_sf_not_impl), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \ + TYPE_SCTP_FUNC(sctp_sf_not_impl), \ } /* TYPE_SCTP_PRIMITIVE_ASSOCIATE */ #define TYPE_SCTP_PRIMITIVE_SHUTDOWN { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \ + TYPE_SCTP_FUNC(sctp_sf_error_closed), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_cookie_wait_prm_shutdown, \ - .name = "sctp_sf_cookie_wait_prm_shutdown"}, \ + TYPE_SCTP_FUNC(sctp_sf_cookie_wait_prm_shutdown), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_cookie_echoed_prm_shutdown, \ - .name = "sctp_sf_cookie_echoed_prm_shutdown"},\ + TYPE_SCTP_FUNC(sctp_sf_cookie_echoed_prm_shutdown),\ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_do_9_2_prm_shutdown, \ - .name = "sctp_sf_do_9_2_prm_shutdown"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_9_2_prm_shutdown), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_ignore_primitive, .name = "sctp_sf_ignore_primitive"}, \ + TYPE_SCTP_FUNC(sctp_sf_ignore_primitive), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_ignore_primitive, .name = "sctp_sf_ignore_primitive"}, \ + TYPE_SCTP_FUNC(sctp_sf_ignore_primitive), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_ignore_primitive, .name = "sctp_sf_ignore_primitive"}, \ + TYPE_SCTP_FUNC(sctp_sf_ignore_primitive), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_ignore_primitive, .name = "sctp_sf_ignore_primitive"}, \ + TYPE_SCTP_FUNC(sctp_sf_ignore_primitive), \ } /* TYPE_SCTP_PRIMITIVE_SHUTDOWN */ #define TYPE_SCTP_PRIMITIVE_ABORT { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \ + TYPE_SCTP_FUNC(sctp_sf_error_closed), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_cookie_wait_prm_abort, \ - .name = "sctp_sf_cookie_wait_prm_abort"}, \ + TYPE_SCTP_FUNC(sctp_sf_cookie_wait_prm_abort), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_cookie_echoed_prm_abort, \ - .name = "sctp_sf_cookie_echoed_prm_abort"}, \ + TYPE_SCTP_FUNC(sctp_sf_cookie_echoed_prm_abort), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_do_9_1_prm_abort, \ - .name = "sctp_sf_do_9_1_prm_abort"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_9_1_prm_abort), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_shutdown_pending_prm_abort, \ - .name = "sctp_sf_shutdown_pending_prm_abort"}, \ + TYPE_SCTP_FUNC(sctp_sf_shutdown_pending_prm_abort), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_shutdown_sent_prm_abort, \ - .name = "sctp_sf_shutdown_sent_prm_abort"}, \ + TYPE_SCTP_FUNC(sctp_sf_shutdown_sent_prm_abort), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_do_9_1_prm_abort, \ - .name = "sctp_sf_do_9_1_prm_abort"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_9_1_prm_abort), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_shutdown_ack_sent_prm_abort, \ - .name = "sctp_sf_shutdown_ack_sent_prm_abort"}, \ + TYPE_SCTP_FUNC(sctp_sf_shutdown_ack_sent_prm_abort), \ } /* TYPE_SCTP_PRIMITIVE_ABORT */ #define TYPE_SCTP_PRIMITIVE_SEND { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \ + TYPE_SCTP_FUNC(sctp_sf_error_closed), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_do_prm_send, .name = "sctp_sf_do_prm_send"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_prm_send), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_do_prm_send, .name = "sctp_sf_do_prm_send"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_prm_send), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_do_prm_send, .name = "sctp_sf_do_prm_send"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_prm_send), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \ + TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \ + TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \ + TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \ + TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ } /* TYPE_SCTP_PRIMITIVE_SEND */ #define TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \ + TYPE_SCTP_FUNC(sctp_sf_error_closed), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_do_prm_requestheartbeat, \ - .name = "sctp_sf_do_prm_requestheartbeat"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_prm_requestheartbeat), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_do_prm_requestheartbeat, \ - .name = "sctp_sf_do_prm_requestheartbeat"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_prm_requestheartbeat), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_do_prm_requestheartbeat, \ - .name = "sctp_sf_do_prm_requestheartbeat"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_prm_requestheartbeat), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_do_prm_requestheartbeat, \ - .name = "sctp_sf_do_prm_requestheartbeat"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_prm_requestheartbeat), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_do_prm_requestheartbeat, \ - .name = "sctp_sf_do_prm_requestheartbeat"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_prm_requestheartbeat), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_do_prm_requestheartbeat, \ - .name = "sctp_sf_do_prm_requestheartbeat"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_prm_requestheartbeat), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_do_prm_requestheartbeat, \ - .name = "sctp_sf_do_prm_requestheartbeat"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_prm_requestheartbeat), \ } /* TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT */ #define TYPE_SCTP_PRIMITIVE_ASCONF { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \ + TYPE_SCTP_FUNC(sctp_sf_error_closed), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \ + TYPE_SCTP_FUNC(sctp_sf_error_closed), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \ + TYPE_SCTP_FUNC(sctp_sf_error_closed), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_do_prm_asconf, .name = "sctp_sf_do_prm_asconf"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_prm_asconf), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \ + TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \ + TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \ + TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \ + TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ } /* TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT */ /* The primary index for this table is the primitive type. @@ -706,47 +686,44 @@ static const sctp_sm_table_entry_t primitive_event_table[SCTP_NUM_PRIMITIVE_TYPE #define TYPE_SCTP_OTHER_NO_PENDING_TSN { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \ + TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \ + TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \ + TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \ + TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_do_9_2_start_shutdown, \ - .name = "sctp_do_9_2_start_shutdown"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_9_2_start_shutdown), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \ + TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_do_9_2_shutdown_ack, \ - .name = "sctp_sf_do_9_2_shutdown_ack"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown_ack), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \ + TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ } #define TYPE_SCTP_OTHER_ICMP_PROTO_UNREACH { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \ + TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_cookie_wait_icmp_abort, \ - .name = "sctp_sf_cookie_wait_icmp_abort"}, \ + TYPE_SCTP_FUNC(sctp_sf_cookie_wait_icmp_abort), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \ + TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \ + TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \ + TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \ + TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \ + TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \ + TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ } static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = { @@ -756,215 +733,212 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_ #define TYPE_SCTP_EVENT_TIMEOUT_NONE { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ } #define TYPE_SCTP_EVENT_TIMEOUT_T1_COOKIE { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_t1_cookie_timer_expire, \ - .name = "sctp_sf_t1_cookie_timer_expire"}, \ + TYPE_SCTP_FUNC(sctp_sf_t1_cookie_timer_expire), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ } #define TYPE_SCTP_EVENT_TIMEOUT_T1_INIT { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_t1_init_timer_expire, \ - .name = "sctp_sf_t1_init_timer_expire"}, \ + TYPE_SCTP_FUNC(sctp_sf_t1_init_timer_expire), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ } #define TYPE_SCTP_EVENT_TIMEOUT_T2_SHUTDOWN { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_t2_timer_expire, .name = "sctp_sf_t2_timer_expire"}, \ + TYPE_SCTP_FUNC(sctp_sf_t2_timer_expire), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_t2_timer_expire, .name = "sctp_sf_t2_timer_expire"}, \ + TYPE_SCTP_FUNC(sctp_sf_t2_timer_expire), \ } #define TYPE_SCTP_EVENT_TIMEOUT_T3_RTX { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_do_6_3_3_rtx, .name = "sctp_sf_do_6_3_3_rtx"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_6_3_3_rtx), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_do_6_3_3_rtx, .name = "sctp_sf_do_6_3_3_rtx"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_6_3_3_rtx), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_do_6_3_3_rtx, .name = "sctp_sf_do_6_3_3_rtx"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_6_3_3_rtx), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_do_6_3_3_rtx, .name = "sctp_sf_do_6_3_3_rtx"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_6_3_3_rtx), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ } #define TYPE_SCTP_EVENT_TIMEOUT_T4_RTO { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_t4_timer_expire, .name = "sctp_sf_t4_timer_expire"}, \ + TYPE_SCTP_FUNC(sctp_sf_t4_timer_expire), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ } #define TYPE_SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_t5_timer_expire, .name = "sctp_sf_t5_timer_expire"}, \ + TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_t5_timer_expire, .name = "sctp_sf_t5_timer_expire"}, \ + TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ } #define TYPE_SCTP_EVENT_TIMEOUT_HEARTBEAT { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_sendbeat_8_3, .name = "sctp_sf_sendbeat_8_3"}, \ + TYPE_SCTP_FUNC(sctp_sf_sendbeat_8_3), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_sendbeat_8_3, .name = "sctp_sf_sendbeat_8_3"}, \ + TYPE_SCTP_FUNC(sctp_sf_sendbeat_8_3), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_sendbeat_8_3, .name = "sctp_sf_sendbeat_8_3"}, \ + TYPE_SCTP_FUNC(sctp_sf_sendbeat_8_3), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ } #define TYPE_SCTP_EVENT_TIMEOUT_SACK { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ + TYPE_SCTP_FUNC(sctp_sf_bug), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_do_6_2_sack, .name = "sctp_sf_do_6_2_sack"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_6_2_sack), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_do_6_2_sack, .name = "sctp_sf_do_6_2_sack"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_6_2_sack), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_do_6_2_sack, .name = "sctp_sf_do_6_2_sack"}, \ + TYPE_SCTP_FUNC(sctp_sf_do_6_2_sack), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ } #define TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE { \ /* SCTP_STATE_EMPTY */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_CLOSED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_COOKIE_WAIT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_COOKIE_ECHOED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_ESTABLISHED */ \ - {.fn = sctp_sf_autoclose_timer_expire, \ - .name = "sctp_sf_autoclose_timer_expire"}, \ + TYPE_SCTP_FUNC(sctp_sf_autoclose_timer_expire), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ - {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ } static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = { diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9f34dec6ff8..bdd8bd428b6 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -107,7 +107,7 @@ static void sctp_sock_migrate(struct sock *, struct sock *, struct sctp_association *, sctp_socket_type_t); static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG; -extern kmem_cache_t *sctp_bucket_cachep; +extern struct kmem_cache *sctp_bucket_cachep; /* Get the sndbuf space available at the time on the association. */ static inline int sctp_wspace(struct sctp_association *asoc) @@ -229,11 +229,9 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk, struct sctp_transport *transport; union sctp_addr *laddr = (union sctp_addr *)addr; - laddr->v4.sin_port = ntohs(laddr->v4.sin_port); addr_asoc = sctp_endpoint_lookup_assoc(sctp_sk(sk)->ep, - (union sctp_addr *)addr, + laddr, &transport); - laddr->v4.sin_port = htons(laddr->v4.sin_port); if (!addr_asoc) return NULL; @@ -368,9 +366,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) sctp_write_lock(&ep->base.addr_lock); /* Use GFP_ATOMIC since BHs are disabled. */ - addr->v4.sin_port = ntohs(addr->v4.sin_port); ret = sctp_add_bind_addr(bp, addr, 1, GFP_ATOMIC); - addr->v4.sin_port = htons(addr->v4.sin_port); sctp_write_unlock(&ep->base.addr_lock); sctp_local_bh_enable(); @@ -572,7 +568,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk, addr = (union sctp_addr *)addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); memcpy(&saveaddr, addr, af->sockaddr_len); - saveaddr.v4.sin_port = ntohs(saveaddr.v4.sin_port); retval = sctp_add_bind_addr(bp, &saveaddr, 0, GFP_ATOMIC); addr_buf += af->sockaddr_len; @@ -607,9 +602,8 @@ int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt) int cnt; struct sctp_bind_addr *bp = &ep->base.bind_addr; int retval = 0; - union sctp_addr saveaddr; void *addr_buf; - struct sockaddr *sa_addr; + union sctp_addr *sa_addr; struct sctp_af *af; SCTP_DEBUG_PRINTK("sctp_bindx_rem (sk: %p, addrs: %p, addrcnt: %d)\n", @@ -627,19 +621,13 @@ int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt) goto err_bindx_rem; } - /* The list may contain either IPv4 or IPv6 address; - * determine the address length to copy the address to - * saveaddr. - */ - sa_addr = (struct sockaddr *)addr_buf; - af = sctp_get_af_specific(sa_addr->sa_family); + sa_addr = (union sctp_addr *)addr_buf; + af = sctp_get_af_specific(sa_addr->sa.sa_family); if (!af) { retval = -EINVAL; goto err_bindx_rem; } - memcpy(&saveaddr, sa_addr, af->sockaddr_len); - saveaddr.v4.sin_port = ntohs(saveaddr.v4.sin_port); - if (saveaddr.v4.sin_port != bp->port) { + if (sa_addr->v4.sin_port != htons(bp->port)) { retval = -EINVAL; goto err_bindx_rem; } @@ -654,7 +642,7 @@ int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt) sctp_local_bh_disable(); sctp_write_lock(&ep->base.addr_lock); - retval = sctp_del_bind_addr(bp, &saveaddr); + retval = sctp_del_bind_addr(bp, sa_addr); sctp_write_unlock(&ep->base.addr_lock); sctp_local_bh_enable(); @@ -693,7 +681,6 @@ static int sctp_send_asconf_del_ip(struct sock *sk, struct sctp_bind_addr *bp; struct sctp_chunk *chunk; union sctp_addr *laddr; - union sctp_addr saveaddr; void *addr_buf; struct sctp_af *af; struct list_head *pos, *pos1; @@ -773,13 +760,11 @@ static int sctp_send_asconf_del_ip(struct sock *sk, for (i = 0; i < addrcnt; i++) { laddr = (union sctp_addr *)addr_buf; af = sctp_get_af_specific(laddr->v4.sin_family); - memcpy(&saveaddr, laddr, af->sockaddr_len); - saveaddr.v4.sin_port = ntohs(saveaddr.v4.sin_port); list_for_each(pos1, &bp->address_list) { saddr = list_entry(pos1, struct sctp_sockaddr_entry, list); - if (sctp_cmp_addr_exact(&saddr->a, &saveaddr)) + if (sctp_cmp_addr_exact(&saddr->a, laddr)) saddr->use_as_src = 0; } addr_buf += af->sockaddr_len; @@ -979,7 +964,7 @@ static int __sctp_connect(struct sock* sk, int err = 0; int addrcnt = 0; int walk_size = 0; - struct sockaddr *sa_addr; + union sctp_addr *sa_addr; void *addr_buf; sp = sctp_sk(sk); @@ -999,8 +984,8 @@ static int __sctp_connect(struct sock* sk, /* Walk through the addrs buffer and count the number of addresses. */ addr_buf = kaddrs; while (walk_size < addrs_size) { - sa_addr = (struct sockaddr *)addr_buf; - af = sctp_get_af_specific(sa_addr->sa_family); + sa_addr = (union sctp_addr *)addr_buf; + af = sctp_get_af_specific(sa_addr->sa.sa_family); /* If the address family is not supported or if this address * causes the address buffer to overflow return EINVAL. @@ -1010,18 +995,16 @@ static int __sctp_connect(struct sock* sk, goto out_free; } - err = sctp_verify_addr(sk, (union sctp_addr *)sa_addr, - af->sockaddr_len); + err = sctp_verify_addr(sk, sa_addr, af->sockaddr_len); if (err) goto out_free; memcpy(&to, sa_addr, af->sockaddr_len); - to.v4.sin_port = ntohs(to.v4.sin_port); /* Check if there already is a matching association on the * endpoint (other than the one created here). */ - asoc2 = sctp_endpoint_lookup_assoc(ep, &to, &transport); + asoc2 = sctp_endpoint_lookup_assoc(ep, sa_addr, &transport); if (asoc2 && asoc2 != asoc) { if (asoc2->state >= SCTP_STATE_ESTABLISHED) err = -EISCONN; @@ -1034,7 +1017,7 @@ static int __sctp_connect(struct sock* sk, * make sure that there is no peeled-off association matching * the peer address even on another socket. */ - if (sctp_endpoint_is_peeled_off(ep, &to)) { + if (sctp_endpoint_is_peeled_off(ep, sa_addr)) { err = -EADDRNOTAVAIL; goto out_free; } @@ -1065,7 +1048,7 @@ static int __sctp_connect(struct sock* sk, } } - scope = sctp_scope(&to); + scope = sctp_scope(sa_addr); asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL); if (!asoc) { err = -ENOMEM; @@ -1074,7 +1057,7 @@ static int __sctp_connect(struct sock* sk, } /* Prime the peer's transport structures. */ - transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL, + transport = sctp_assoc_add_peer(asoc, sa_addr, GFP_KERNEL, SCTP_UNKNOWN); if (!transport) { err = -ENOMEM; @@ -1427,11 +1410,6 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, if (msg_namelen > sizeof(to)) msg_namelen = sizeof(to); memcpy(&to, msg->msg_name, msg_namelen); - SCTP_DEBUG_PRINTK("Just memcpy'd. msg_name is " - "0x%x:%u.\n", - to.v4.sin_addr.s_addr, to.v4.sin_port); - - to.v4.sin_port = ntohs(to.v4.sin_port); msg_name = msg->msg_name; } @@ -2768,6 +2746,46 @@ static int sctp_setsockopt_adaption_layer(struct sock *sk, char __user *optval, return 0; } +/* + * 7.1.29. Set or Get the default context (SCTP_CONTEXT) + * + * The context field in the sctp_sndrcvinfo structure is normally only + * used when a failed message is retrieved holding the value that was + * sent down on the actual send call. This option allows the setting of + * a default context on an association basis that will be received on + * reading messages from the peer. This is especially helpful in the + * one-2-many model for an application to keep some reference to an + * internal state machine that is processing messages on the + * association. Note that the setting of this value only effects + * received messages from the peer and does not effect the value that is + * saved with outbound messages. + */ +static int sctp_setsockopt_context(struct sock *sk, char __user *optval, + int optlen) +{ + struct sctp_assoc_value params; + struct sctp_sock *sp; + struct sctp_association *asoc; + + if (optlen != sizeof(struct sctp_assoc_value)) + return -EINVAL; + if (copy_from_user(¶ms, optval, optlen)) + return -EFAULT; + + sp = sctp_sk(sk); + + if (params.assoc_id != 0) { + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) + return -EINVAL; + asoc->default_rcv_context = params.assoc_value; + } else { + sp->default_rcv_context = params.assoc_value; + } + + return 0; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -2879,6 +2897,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_ADAPTION_LAYER: retval = sctp_setsockopt_adaption_layer(sk, optval, optlen); break; + case SCTP_CONTEXT: + retval = sctp_setsockopt_context(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; @@ -3038,6 +3059,8 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->default_context = 0; sp->default_timetolive = 0; + sp->default_rcv_context = 0; + /* Initialize default setup parameters. These parameters * can be modified with the SCTP_INITMSG socket option or * overridden by the SCTP_INIT CMSG. @@ -3217,8 +3240,8 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, status.sstat_outstrms = asoc->c.sinit_num_ostreams; status.sstat_fragmentation_point = asoc->frag_point; status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc); - memcpy(&status.sstat_primary.spinfo_address, - &(transport->ipaddr), sizeof(union sctp_addr)); + memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr, + transport->af_specific->sockaddr_len); /* Map ipv4 address into v4-mapped-on-v6 address. */ sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), (union sctp_addr *)&status.sstat_primary.spinfo_address); @@ -3372,6 +3395,7 @@ SCTP_STATIC int sctp_do_peeloff(struct sctp_association *asoc, { struct sock *sk = asoc->base.sk; struct socket *sock; + struct inet_sock *inetsk; int err = 0; /* An association cannot be branched off from an already peeled-off @@ -3389,6 +3413,14 @@ SCTP_STATIC int sctp_do_peeloff(struct sctp_association *asoc, * asoc to the newsk. */ sctp_sock_migrate(sk, sock->sk, asoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH); + + /* Make peeled-off sockets more like 1-1 accepted sockets. + * Set the daddr and initialize id to something more random + */ + inetsk = inet_sk(sock->sk); + inetsk->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; + inetsk->id = asoc->next_tsn ^ jiffies; + *sockp = sock; return err; @@ -3761,7 +3793,6 @@ static int sctp_getsockopt_peer_addrs_old(struct sock *sk, int len, memcpy(&temp, &from->ipaddr, sizeof(temp)); sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len; - temp.v4.sin_port = htons(temp.v4.sin_port); if (copy_to_user(to, &temp, addrlen)) return -EFAULT; to += addrlen ; @@ -3812,7 +3843,6 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len; if(space_left < addrlen) return -ENOMEM; - temp.v4.sin_port = htons(temp.v4.sin_port); if (copy_to_user(to, &temp, addrlen)) return -EFAULT; to += addrlen; @@ -3836,10 +3866,9 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, sctp_assoc_t id; struct sctp_bind_addr *bp; struct sctp_association *asoc; - struct list_head *pos; + struct list_head *pos, *temp; struct sctp_sockaddr_entry *addr; rwlock_t *addr_lock; - unsigned long flags; int cnt = 0; if (len != sizeof(sctp_assoc_t)) @@ -3874,18 +3903,15 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); if (sctp_is_any(&addr->a)) { - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - list_for_each(pos, &sctp_local_addr_list) { + list_for_each_safe(pos, temp, &sctp_local_addr_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); if ((PF_INET == sk->sk_family) && - (AF_INET6 == addr->a.sa.sa_family)) + (AF_INET6 == addr->a.sa.sa_family)) continue; cnt++; } - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, - flags); } else { cnt = 1; } @@ -3907,15 +3933,13 @@ done: static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_addrs, void __user *to) { - struct list_head *pos; + struct list_head *pos, *next; struct sctp_sockaddr_entry *addr; - unsigned long flags; union sctp_addr temp; int cnt = 0; int addrlen; - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - list_for_each(pos, &sctp_local_addr_list) { + list_for_each_safe(pos, next, &sctp_local_addr_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) @@ -3924,17 +3948,13 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - temp.v4.sin_port = htons(port); - if (copy_to_user(to, &temp, addrlen)) { - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, - flags); + if (copy_to_user(to, &temp, addrlen)) return -EFAULT; - } + to += addrlen; cnt ++; if (cnt >= max_addrs) break; } - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); return cnt; } @@ -3942,15 +3962,13 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port, void __user **to, size_t space_left) { - struct list_head *pos; + struct list_head *pos, *next; struct sctp_sockaddr_entry *addr; - unsigned long flags; union sctp_addr temp; int cnt = 0; int addrlen; - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - list_for_each(pos, &sctp_local_addr_list) { + list_for_each_safe(pos, next, &sctp_local_addr_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) @@ -3961,17 +3979,13 @@ static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port, addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; if(space_left<addrlen) return -ENOMEM; - temp.v4.sin_port = htons(port); - if (copy_to_user(*to, &temp, addrlen)) { - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, - flags); + if (copy_to_user(*to, &temp, addrlen)) return -EFAULT; - } + *to += addrlen; cnt ++; space_left -= addrlen; } - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); return cnt; } @@ -4046,7 +4060,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, memcpy(&temp, &addr->a, sizeof(temp)); sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - temp.v4.sin_port = htons(temp.v4.sin_port); if (copy_to_user(to, &temp, addrlen)) { err = -EFAULT; goto unlock; @@ -4137,7 +4150,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; if(space_left < addrlen) return -ENOMEM; /*fixme: right error?*/ - temp.v4.sin_port = htons(temp.v4.sin_port); if (copy_to_user(to, &temp, addrlen)) { err = -EFAULT; goto unlock; @@ -4185,12 +4197,8 @@ static int sctp_getsockopt_primary_addr(struct sock *sk, int len, if (!asoc->peer.primary_path) return -ENOTCONN; - asoc->peer.primary_path->ipaddr.v4.sin_port = - htons(asoc->peer.primary_path->ipaddr.v4.sin_port); memcpy(&prim.ssp_addr, &asoc->peer.primary_path->ipaddr, - sizeof(union sctp_addr)); - asoc->peer.primary_path->ipaddr.v4.sin_port = - ntohs(asoc->peer.primary_path->ipaddr.v4.sin_port); + asoc->peer.primary_path->af_specific->sockaddr_len); sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, (union sctp_addr *)&prim.ssp_addr); @@ -4458,6 +4466,42 @@ static int sctp_getsockopt_mappedv4(struct sock *sk, int len, } /* + * 7.1.29. Set or Get the default context (SCTP_CONTEXT) + * (chapter and verse is quoted at sctp_setsockopt_context()) + */ +static int sctp_getsockopt_context(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_sock *sp; + struct sctp_association *asoc; + + if (len != sizeof(struct sctp_assoc_value)) + return -EINVAL; + + if (copy_from_user(¶ms, optval, len)) + return -EFAULT; + + sp = sctp_sk(sk); + + if (params.assoc_id != 0) { + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) + return -EINVAL; + params.assoc_value = asoc->default_rcv_context; + } else { + params.assoc_value = sp->default_rcv_context; + } + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, ¶ms, len)) + return -EFAULT; + + return 0; +} + +/* * 7.1.17 Set the maximum fragrmentation size (SCTP_MAXSEG) * * This socket option specifies the maximum size to put in any outgoing @@ -4595,6 +4639,9 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_adaption_layer(sk, len, optval, optlen); break; + case SCTP_CONTEXT: + retval = sctp_getsockopt_context(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -4636,9 +4683,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) unsigned short snum; int ret; - /* NOTE: Remember to put this back to net order. */ - addr->v4.sin_port = ntohs(addr->v4.sin_port); - snum = addr->v4.sin_port; + snum = ntohs(addr->v4.sin_port); SCTP_DEBUG_PRINTK("sctp_get_port() begins, snum=%d\n", snum); sctp_local_bh_disable(); @@ -4775,7 +4820,6 @@ fail_unlock: fail: sctp_local_bh_enable(); - addr->v4.sin_port = htons(addr->v4.sin_port); return ret; } @@ -5015,7 +5059,7 @@ static struct sctp_bind_bucket *sctp_bucket_create( { struct sctp_bind_bucket *pp; - pp = kmem_cache_alloc(sctp_bucket_cachep, SLAB_ATOMIC); + pp = kmem_cache_alloc(sctp_bucket_cachep, GFP_ATOMIC); SCTP_DBG_OBJCNT_INC(bind_bucket); if (pp) { pp->port = snum; @@ -5074,7 +5118,7 @@ static int sctp_autobind(struct sock *sk) { union sctp_addr autoaddr; struct sctp_af *af; - unsigned short port; + __be16 port; /* Initialize a local sockaddr structure to INADDR_ANY. */ af = sctp_sk(sk)->pf->af; diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index ac4fae161bc..42d9498c64f 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -401,13 +401,14 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map) /* Refresh the gap ack information. */ if (sctp_tsnmap_has_gap(map)) { + __u16 start, end; sctp_tsnmap_iter_init(map, &iter); while (sctp_tsnmap_next_gap_ack(map, &iter, - &map->gabs[gabs].start, - &map->gabs[gabs].end)) { + &start, + &end)) { - map->gabs[gabs].start = htons(map->gabs[gabs].start); - map->gabs[gabs].end = htons(map->gabs[gabs].end); + map->gabs[gabs].start = htons(start); + map->gabs[gabs].end = htons(end); gabs++; if (gabs >= SCTP_MAX_GABS) break; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index a015283a908..93ac63b055b 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -351,7 +351,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error( struct sctp_remote_error *sre; struct sk_buff *skb; sctp_errhdr_t *ch; - __u16 cause; + __be16 cause; int elen; ch = (sctp_errhdr_t *)(chunk->skb->data); @@ -849,8 +849,10 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, */ sinfo.sinfo_assoc_id = sctp_assoc2id(event->asoc); + /* context value that is set via SCTP_CONTEXT socket option. */ + sinfo.sinfo_context = event->asoc->default_rcv_context; + /* These fields are not used while receiving. */ - sinfo.sinfo_context = 0; sinfo.sinfo_timetolive = 0; put_cmsg(msghdr, IPPROTO_SCTP, SCTP_SNDRCV, diff --git a/net/socket.c b/net/socket.c index 6c9b9b326d7..4e396312f8d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -77,7 +77,6 @@ #include <linux/cache.h> #include <linux/module.h> #include <linux/highmem.h> -#include <linux/divert.h> #include <linux/mount.h> #include <linux/security.h> #include <linux/syscalls.h> @@ -231,13 +230,13 @@ int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, #define SOCKFS_MAGIC 0x534F434B -static kmem_cache_t *sock_inode_cachep __read_mostly; +static struct kmem_cache *sock_inode_cachep __read_mostly; static struct inode *sock_alloc_inode(struct super_block *sb) { struct socket_alloc *ei; - ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL); + ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); if (!ei) return NULL; init_waitqueue_head(&ei->socket.wait); @@ -258,7 +257,7 @@ static void sock_destroy_inode(struct inode *inode) container_of(inode, struct socket_alloc, vfs_inode)); } -static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct socket_alloc *ei = (struct socket_alloc *)foo; @@ -306,7 +305,14 @@ static struct file_system_type sock_fs_type = { static int sockfs_delete_dentry(struct dentry *dentry) { - return 1; + /* + * At creation time, we pretended this dentry was hashed + * (by clearing DCACHE_UNHASHED bit in d_flags) + * At delete time, we restore the truth : not hashed. + * (so that dput() can proceed correctly) + */ + dentry->d_flags |= DCACHE_UNHASHED; + return 0; } static struct dentry_operations sockfs_dentry_operations = { .d_delete = sockfs_delete_dentry, @@ -354,16 +360,22 @@ static int sock_attach_fd(struct socket *sock, struct file *file) this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino); this.name = name; - this.hash = SOCK_INODE(sock)->i_ino; + this.hash = 0; - file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this); - if (unlikely(!file->f_dentry)) + file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this); + if (unlikely(!file->f_path.dentry)) return -ENOMEM; - file->f_dentry->d_op = &sockfs_dentry_operations; - d_add(file->f_dentry, SOCK_INODE(sock)); - file->f_vfsmnt = mntget(sock_mnt); - file->f_mapping = file->f_dentry->d_inode->i_mapping; + file->f_path.dentry->d_op = &sockfs_dentry_operations; + /* + * We dont want to push this dentry into global dentry hash table. + * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED + * This permits a working /proc/$pid/fd/XXX on sockets + */ + file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED; + d_instantiate(file->f_path.dentry, SOCK_INODE(sock)); + file->f_path.mnt = mntget(sock_mnt); + file->f_mapping = file->f_path.dentry->d_inode->i_mapping; sock->file = file; file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops; @@ -401,7 +413,7 @@ static struct socket *sock_from_file(struct file *file, int *err) if (file->f_op == &socket_file_ops) return file->private_data; /* set in sock_map_fd */ - inode = file->f_dentry->d_inode; + inode = file->f_path.dentry->d_inode; if (!S_ISSOCK(inode->i_mode)) { *err = -ENOTSOCK; return NULL; @@ -852,11 +864,6 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) err = vlan_ioctl_hook(argp); mutex_unlock(&vlan_ioctl_mutex); break; - case SIOCGIFDIVERT: - case SIOCSIFDIVERT: - /* Convert this to call through a hook */ - err = divert_ioctl(cmd, argp); - break; case SIOCADDDLCI: case SIOCDELDLCI: err = -ENOPKG; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index b36b9463f5a..e1a104abb78 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -68,7 +68,7 @@ static struct rpc_credops gss_credops; #define GSS_CRED_SLACK 1024 /* XXX: unused */ /* length of a krb5 verifier (48), plus data added before arguments when * using integrity (two 4-byte integers): */ -#define GSS_VERF_SLACK 56 +#define GSS_VERF_SLACK 100 /* XXX this define must match the gssd define * as it is passed to gssd to signal the use of @@ -94,46 +94,6 @@ struct gss_auth { static void gss_destroy_ctx(struct gss_cl_ctx *); static struct rpc_pipe_ops gss_upcall_ops; -void -print_hexl(u32 *p, u_int length, u_int offset) -{ - u_int i, j, jm; - u8 c, *cp; - - dprintk("RPC: print_hexl: length %d\n",length); - dprintk("\n"); - cp = (u8 *) p; - - for (i = 0; i < length; i += 0x10) { - dprintk(" %04x: ", (u_int)(i + offset)); - jm = length - i; - jm = jm > 16 ? 16 : jm; - - for (j = 0; j < jm; j++) { - if ((j % 2) == 1) - dprintk("%02x ", (u_int)cp[i+j]); - else - dprintk("%02x", (u_int)cp[i+j]); - } - for (; j < 16; j++) { - if ((j % 2) == 1) - dprintk(" "); - else - dprintk(" "); - } - dprintk(" "); - - for (j = 0; j < jm; j++) { - c = cp[i+j]; - c = isprint(c) ? c : '.'; - dprintk("%c", c); - } - dprintk("\n"); - } -} - -EXPORT_SYMBOL(print_hexl); - static inline struct gss_cl_ctx * gss_get_ctx(struct gss_cl_ctx *ctx) { @@ -198,11 +158,10 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - dest->data = kmalloc(len, GFP_KERNEL); + dest->data = kmemdup(p, len, GFP_KERNEL); if (unlikely(dest->data == NULL)) return ERR_PTR(-ENOMEM); dest->len = len; - memcpy(dest->data, p, len); return q; } @@ -542,7 +501,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (!buf) goto out; - clnt = RPC_I(filp->f_dentry->d_inode)->private; + clnt = RPC_I(filp->f_path.dentry->d_inode)->private; err = -EFAULT; if (copy_from_user(buf, src, mlen)) goto err; diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index e11a40b25cc..d926cda8862 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -43,6 +43,7 @@ #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/sunrpc/gss_krb5.h> +#include <linux/sunrpc/xdr.h> #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -61,9 +62,6 @@ krb5_encrypt( u8 local_iv[16] = {0}; struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; - dprintk("RPC: krb5_encrypt: input data:\n"); - print_hexl((u32 *)in, length, 0); - if (length % crypto_blkcipher_blocksize(tfm) != 0) goto out; @@ -80,12 +78,9 @@ krb5_encrypt( sg_set_buf(sg, out, length); ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length); - - dprintk("RPC: krb5_encrypt: output data:\n"); - print_hexl((u32 *)out, length, 0); out: dprintk("RPC: krb5_encrypt returns %d\n",ret); - return(ret); + return ret; } EXPORT_SYMBOL(krb5_encrypt); @@ -103,9 +98,6 @@ krb5_decrypt( u8 local_iv[16] = {0}; struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; - dprintk("RPC: krb5_decrypt: input data:\n"); - print_hexl((u32 *)in, length, 0); - if (length % crypto_blkcipher_blocksize(tfm) != 0) goto out; @@ -121,83 +113,14 @@ krb5_decrypt( sg_set_buf(sg, out, length); ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length); - - dprintk("RPC: krb5_decrypt: output_data:\n"); - print_hexl((u32 *)out, length, 0); out: dprintk("RPC: gss_k5decrypt returns %d\n",ret); - return(ret); + return ret; } EXPORT_SYMBOL(krb5_decrypt); static int -process_xdr_buf(struct xdr_buf *buf, int offset, int len, - int (*actor)(struct scatterlist *, void *), void *data) -{ - int i, page_len, thislen, page_offset, ret = 0; - struct scatterlist sg[1]; - - if (offset >= buf->head[0].iov_len) { - offset -= buf->head[0].iov_len; - } else { - thislen = buf->head[0].iov_len - offset; - if (thislen > len) - thislen = len; - sg_set_buf(sg, buf->head[0].iov_base + offset, thislen); - ret = actor(sg, data); - if (ret) - goto out; - offset = 0; - len -= thislen; - } - if (len == 0) - goto out; - - if (offset >= buf->page_len) { - offset -= buf->page_len; - } else { - page_len = buf->page_len - offset; - if (page_len > len) - page_len = len; - len -= page_len; - page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1); - i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT; - thislen = PAGE_CACHE_SIZE - page_offset; - do { - if (thislen > page_len) - thislen = page_len; - sg->page = buf->pages[i]; - sg->offset = page_offset; - sg->length = thislen; - ret = actor(sg, data); - if (ret) - goto out; - page_len -= thislen; - i++; - page_offset = 0; - thislen = PAGE_CACHE_SIZE; - } while (page_len != 0); - offset = 0; - } - if (len == 0) - goto out; - - if (offset < buf->tail[0].iov_len) { - thislen = buf->tail[0].iov_len - offset; - if (thislen > len) - thislen = len; - sg_set_buf(sg, buf->tail[0].iov_base + offset, thislen); - ret = actor(sg, data); - len -= thislen; - } - if (len != 0) - ret = -EINVAL; -out: - return ret; -} - -static int checksummer(struct scatterlist *sg, void *data) { struct hash_desc *desc = data; @@ -207,23 +130,13 @@ checksummer(struct scatterlist *sg, void *data) /* checksum the plaintext data and hdrlen bytes of the token header */ s32 -make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, +make_checksum(char *cksumname, char *header, int hdrlen, struct xdr_buf *body, int body_offset, struct xdr_netobj *cksum) { - char *cksumname; struct hash_desc desc; /* XXX add to ctx? */ struct scatterlist sg[1]; int err; - switch (cksumtype) { - case CKSUMTYPE_RSA_MD5: - cksumname = "md5"; - break; - default: - dprintk("RPC: krb5_make_checksum:" - " unsupported checksum %d", cksumtype); - return GSS_S_FAILURE; - } desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(desc.tfm)) return GSS_S_FAILURE; @@ -237,7 +150,7 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, err = crypto_hash_update(&desc, sg, hdrlen); if (err) goto out; - err = process_xdr_buf(body, body_offset, body->len - body_offset, + err = xdr_process_buf(body, body_offset, body->len - body_offset, checksummer, &desc); if (err) goto out; @@ -335,7 +248,7 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, desc.fragno = 0; desc.fraglen = 0; - ret = process_xdr_buf(buf, offset, buf->len - offset, encryptor, &desc); + ret = xdr_process_buf(buf, offset, buf->len - offset, encryptor, &desc); return ret; } @@ -401,7 +314,7 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, desc.desc.flags = 0; desc.fragno = 0; desc.fraglen = 0; - return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc); + return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc); } EXPORT_SYMBOL(gss_decrypt_xdr_buf); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 325e72e4fd3..05d4bee86fc 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -70,10 +70,9 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - res->data = kmalloc(len, GFP_KERNEL); + res->data = kmemdup(p, len, GFP_KERNEL); if (unlikely(res->data == NULL)) return ERR_PTR(-ENOMEM); - memcpy(res->data, p, len); res->len = len; return q; } @@ -130,6 +129,7 @@ gss_import_sec_context_kerberos(const void *p, { const void *end = (const void *)((const char *)p + len); struct krb5_ctx *ctx; + int tmp; if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) goto out_err; @@ -137,18 +137,23 @@ gss_import_sec_context_kerberos(const void *p, p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate)); if (IS_ERR(p)) goto out_err_free_ctx; - p = simple_get_bytes(p, end, &ctx->seed_init, sizeof(ctx->seed_init)); - if (IS_ERR(p)) + /* The downcall format was designed before we completely understood + * the uses of the context fields; so it includes some stuff we + * just give some minimal sanity-checking, and some we ignore + * completely (like the next twenty bytes): */ + if (unlikely(p + 20 > end || p + 20 < p)) goto out_err_free_ctx; - p = simple_get_bytes(p, end, ctx->seed, sizeof(ctx->seed)); + p += 20; + p = simple_get_bytes(p, end, &tmp, sizeof(tmp)); if (IS_ERR(p)) goto out_err_free_ctx; - p = simple_get_bytes(p, end, &ctx->signalg, sizeof(ctx->signalg)); - if (IS_ERR(p)) + if (tmp != SGN_ALG_DES_MAC_MD5) goto out_err_free_ctx; - p = simple_get_bytes(p, end, &ctx->sealalg, sizeof(ctx->sealalg)); + p = simple_get_bytes(p, end, &tmp, sizeof(tmp)); if (IS_ERR(p)) goto out_err_free_ctx; + if (tmp != SEAL_ALG_DES) + goto out_err_free_ctx; p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime)); if (IS_ERR(p)) goto out_err_free_ctx; diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 08601ee4cd7..d0bb5064f8c 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -77,7 +77,6 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, struct xdr_netobj *token) { struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; - s32 checksum_type; char cksumdata[16]; struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; unsigned char *ptr, *krb5_hdr, *msg_start; @@ -88,21 +87,6 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, now = get_seconds(); - switch (ctx->signalg) { - case SGN_ALG_DES_MAC_MD5: - checksum_type = CKSUMTYPE_RSA_MD5; - break; - default: - dprintk("RPC: gss_krb5_seal: ctx->signalg %d not" - " supported\n", ctx->signalg); - goto out_err; - } - if (ctx->sealalg != SEAL_ALG_NONE && ctx->sealalg != SEAL_ALG_DES) { - dprintk("RPC: gss_krb5_seal: ctx->sealalg %d not supported\n", - ctx->sealalg); - goto out_err; - } - token->len = g_token_size(&ctx->mech_used, 22); ptr = token->data; @@ -115,37 +99,26 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, krb5_hdr = ptr - 2; msg_start = krb5_hdr + 24; - *(__be16 *)(krb5_hdr + 2) = htons(ctx->signalg); + *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5); memset(krb5_hdr + 4, 0xff, 4); - if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum)) - goto out_err; - - switch (ctx->signalg) { - case SGN_ALG_DES_MAC_MD5: - if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, - md5cksum.data, md5cksum.len)) - goto out_err; - memcpy(krb5_hdr + 16, - md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, - KRB5_CKSUM_LENGTH); - - dprintk("RPC: make_seal_token: cksum data: \n"); - print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0); - break; - default: - BUG(); - } + if (make_checksum("md5", krb5_hdr, 8, text, 0, &md5cksum)) + return GSS_S_FAILURE; + + if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, + md5cksum.data, md5cksum.len)) + return GSS_S_FAILURE; + + memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, + KRB5_CKSUM_LENGTH); spin_lock(&krb5_seq_lock); seq_send = ctx->seq_send++; spin_unlock(&krb5_seq_lock); - if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff, - seq_send, krb5_hdr + 16, krb5_hdr + 8))) - goto out_err; + if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff, + ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)) + return GSS_S_FAILURE; - return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); -out_err: - return GSS_S_FAILURE; + return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; } diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index 0828cf64100..87f8977ccec 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -78,7 +78,6 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; int signalg; int sealalg; - s32 checksum_type; char cksumdata[16]; struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; s32 now; @@ -86,96 +85,54 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, s32 seqnum; unsigned char *ptr = (unsigned char *)read_token->data; int bodysize; - u32 ret = GSS_S_DEFECTIVE_TOKEN; dprintk("RPC: krb5_read_token\n"); if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr, read_token->len)) - goto out; + return GSS_S_DEFECTIVE_TOKEN; if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) || (*ptr++ != ( KG_TOK_MIC_MSG &0xff)) ) - goto out; + return GSS_S_DEFECTIVE_TOKEN; /* XXX sanity-check bodysize?? */ - /* get the sign and seal algorithms */ - signalg = ptr[0] + (ptr[1] << 8); - sealalg = ptr[2] + (ptr[3] << 8); + if (signalg != SGN_ALG_DES_MAC_MD5) + return GSS_S_DEFECTIVE_TOKEN; - /* Sanity checks */ + sealalg = ptr[2] + (ptr[3] << 8); + if (sealalg != SEAL_ALG_NONE) + return GSS_S_DEFECTIVE_TOKEN; if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) - goto out; - - if (sealalg != 0xffff) - goto out; - - /* there are several mappings of seal algorithms to sign algorithms, - but few enough that we can try them all. */ - - if ((ctx->sealalg == SEAL_ALG_NONE && signalg > 1) || - (ctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) || - (ctx->sealalg == SEAL_ALG_DES3KD && - signalg != SGN_ALG_HMAC_SHA1_DES3_KD)) - goto out; - - /* compute the checksum of the message */ - - /* initialize the the cksum */ - switch (signalg) { - case SGN_ALG_DES_MAC_MD5: - checksum_type = CKSUMTYPE_RSA_MD5; - break; - default: - ret = GSS_S_DEFECTIVE_TOKEN; - goto out; - } - - switch (signalg) { - case SGN_ALG_DES_MAC_MD5: - ret = make_checksum(checksum_type, ptr - 2, 8, - message_buffer, 0, &md5cksum); - if (ret) - goto out; - - ret = krb5_encrypt(ctx->seq, NULL, md5cksum.data, - md5cksum.data, 16); - if (ret) - goto out; - - if (memcmp(md5cksum.data + 8, ptr + 14, 8)) { - ret = GSS_S_BAD_SIG; - goto out; - } - break; - default: - ret = GSS_S_DEFECTIVE_TOKEN; - goto out; - } + return GSS_S_DEFECTIVE_TOKEN; + + if (make_checksum("md5", ptr - 2, 8, message_buffer, 0, &md5cksum)) + return GSS_S_FAILURE; + + if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16)) + return GSS_S_FAILURE; + + if (memcmp(md5cksum.data + 8, ptr + 14, 8)) + return GSS_S_BAD_SIG; /* it got through unscathed. Make sure the context is unexpired */ now = get_seconds(); - ret = GSS_S_CONTEXT_EXPIRED; if (now > ctx->endtime) - goto out; + return GSS_S_CONTEXT_EXPIRED; /* do sequencing checks */ - ret = GSS_S_BAD_SIG; - if ((ret = krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, - &seqnum))) - goto out; + if (krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, &seqnum)) + return GSS_S_FAILURE; if ((ctx->initiate && direction != 0xff) || (!ctx->initiate && direction != 0)) - goto out; + return GSS_S_BAD_SIG; - ret = GSS_S_COMPLETE; -out: - return ret; + return GSS_S_COMPLETE; } diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index cc45c1605f8..fe25b3d898d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -57,9 +57,9 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) >>PAGE_CACHE_SHIFT; int offset = (buf->page_base + len - 1) & (PAGE_CACHE_SIZE - 1); - ptr = kmap_atomic(buf->pages[last], KM_SKB_SUNRPC_DATA); + ptr = kmap_atomic(buf->pages[last], KM_USER0); pad = *(ptr + offset); - kunmap_atomic(ptr, KM_SKB_SUNRPC_DATA); + kunmap_atomic(ptr, KM_USER0); goto out; } else len -= buf->page_len; @@ -120,7 +120,6 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf, struct page **pages) { struct krb5_ctx *kctx = ctx->internal_ctx_id; - s32 checksum_type; char cksumdata[16]; struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; int blocksize = 0, plainlen; @@ -134,21 +133,6 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, now = get_seconds(); - switch (kctx->signalg) { - case SGN_ALG_DES_MAC_MD5: - checksum_type = CKSUMTYPE_RSA_MD5; - break; - default: - dprintk("RPC: gss_krb5_seal: kctx->signalg %d not" - " supported\n", kctx->signalg); - goto out_err; - } - if (kctx->sealalg != SEAL_ALG_NONE && kctx->sealalg != SEAL_ALG_DES) { - dprintk("RPC: gss_krb5_seal: kctx->sealalg %d not supported\n", - kctx->sealalg); - goto out_err; - } - blocksize = crypto_blkcipher_blocksize(kctx->enc); gss_krb5_add_padding(buf, offset, blocksize); BUG_ON((buf->len - offset) % blocksize); @@ -175,37 +159,27 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ krb5_hdr = ptr - 2; msg_start = krb5_hdr + 24; - /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize); - *(__be16 *)(krb5_hdr + 2) = htons(kctx->signalg); + *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5); memset(krb5_hdr + 4, 0xff, 4); - *(__be16 *)(krb5_hdr + 4) = htons(kctx->sealalg); + *(__be16 *)(krb5_hdr + 4) = htons(SEAL_ALG_DES); make_confounder(msg_start, blocksize); /* XXXJBF: UGH!: */ tmp_pages = buf->pages; buf->pages = pages; - if (make_checksum(checksum_type, krb5_hdr, 8, buf, + if (make_checksum("md5", krb5_hdr, 8, buf, offset + headlen - blocksize, &md5cksum)) - goto out_err; + return GSS_S_FAILURE; buf->pages = tmp_pages; - switch (kctx->signalg) { - case SGN_ALG_DES_MAC_MD5: - if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, - md5cksum.data, md5cksum.len)) - goto out_err; - memcpy(krb5_hdr + 16, - md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, - KRB5_CKSUM_LENGTH); - - dprintk("RPC: make_seal_token: cksum data: \n"); - print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0); - break; - default: - BUG(); - } + if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, + md5cksum.data, md5cksum.len)) + return GSS_S_FAILURE; + memcpy(krb5_hdr + 16, + md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, + KRB5_CKSUM_LENGTH); spin_lock(&krb5_seq_lock); seq_send = kctx->seq_send++; @@ -215,15 +189,13 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, * and encrypt at the same time: */ if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff, seq_send, krb5_hdr + 16, krb5_hdr + 8))) - goto out_err; + return GSS_S_FAILURE; if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize, pages)) - goto out_err; + return GSS_S_FAILURE; - return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); -out_err: - return GSS_S_FAILURE; + return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; } u32 @@ -232,7 +204,6 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) struct krb5_ctx *kctx = ctx->internal_ctx_id; int signalg; int sealalg; - s32 checksum_type; char cksumdata[16]; struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; s32 now; @@ -240,7 +211,6 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) s32 seqnum; unsigned char *ptr; int bodysize; - u32 ret = GSS_S_DEFECTIVE_TOKEN; void *data_start, *orig_start; int data_len; int blocksize; @@ -250,98 +220,58 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) ptr = (u8 *)buf->head[0].iov_base + offset; if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr, buf->len - offset)) - goto out; + return GSS_S_DEFECTIVE_TOKEN; if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) || (*ptr++ != (KG_TOK_WRAP_MSG &0xff)) ) - goto out; + return GSS_S_DEFECTIVE_TOKEN; /* XXX sanity-check bodysize?? */ /* get the sign and seal algorithms */ signalg = ptr[0] + (ptr[1] << 8); - sealalg = ptr[2] + (ptr[3] << 8); + if (signalg != SGN_ALG_DES_MAC_MD5) + return GSS_S_DEFECTIVE_TOKEN; - /* Sanity checks */ + sealalg = ptr[2] + (ptr[3] << 8); + if (sealalg != SEAL_ALG_DES) + return GSS_S_DEFECTIVE_TOKEN; if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) - goto out; - - if (sealalg == 0xffff) - goto out; - - /* in the current spec, there is only one valid seal algorithm per - key type, so a simple comparison is ok */ - - if (sealalg != kctx->sealalg) - goto out; - - /* there are several mappings of seal algorithms to sign algorithms, - but few enough that we can try them all. */ - - if ((kctx->sealalg == SEAL_ALG_NONE && signalg > 1) || - (kctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) || - (kctx->sealalg == SEAL_ALG_DES3KD && - signalg != SGN_ALG_HMAC_SHA1_DES3_KD)) - goto out; + return GSS_S_DEFECTIVE_TOKEN; if (gss_decrypt_xdr_buf(kctx->enc, buf, ptr + 22 - (unsigned char *)buf->head[0].iov_base)) - goto out; + return GSS_S_DEFECTIVE_TOKEN; - /* compute the checksum of the message */ + if (make_checksum("md5", ptr - 2, 8, buf, + ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum)) + return GSS_S_FAILURE; - /* initialize the the cksum */ - switch (signalg) { - case SGN_ALG_DES_MAC_MD5: - checksum_type = CKSUMTYPE_RSA_MD5; - break; - default: - ret = GSS_S_DEFECTIVE_TOKEN; - goto out; - } - - switch (signalg) { - case SGN_ALG_DES_MAC_MD5: - ret = make_checksum(checksum_type, ptr - 2, 8, buf, - ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum); - if (ret) - goto out; - - ret = krb5_encrypt(kctx->seq, NULL, md5cksum.data, - md5cksum.data, md5cksum.len); - if (ret) - goto out; - - if (memcmp(md5cksum.data + 8, ptr + 14, 8)) { - ret = GSS_S_BAD_SIG; - goto out; - } - break; - default: - ret = GSS_S_DEFECTIVE_TOKEN; - goto out; - } + if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, + md5cksum.data, md5cksum.len)) + return GSS_S_FAILURE; + + if (memcmp(md5cksum.data + 8, ptr + 14, 8)) + return GSS_S_BAD_SIG; /* it got through unscathed. Make sure the context is unexpired */ now = get_seconds(); - ret = GSS_S_CONTEXT_EXPIRED; if (now > kctx->endtime) - goto out; + return GSS_S_CONTEXT_EXPIRED; /* do sequencing checks */ - ret = GSS_S_BAD_SIG; - if ((ret = krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction, - &seqnum))) - goto out; + if (krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction, + &seqnum)) + return GSS_S_BAD_SIG; if ((kctx->initiate && direction != 0xff) || (!kctx->initiate && direction != 0)) - goto out; + return GSS_S_BAD_SIG; /* Copy the data back to the right position. XXX: Would probably be * better to copy and encrypt at the same time. */ @@ -354,11 +284,8 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) buf->head[0].iov_len -= (data_start - orig_start); buf->len -= (data_start - orig_start); - ret = GSS_S_DEFECTIVE_TOKEN; if (gss_krb5_remove_padding(buf, blocksize)) - goto out; + return GSS_S_DEFECTIVE_TOKEN; - ret = GSS_S_COMPLETE; -out: - return ret; + return GSS_S_COMPLETE; } diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index bdedf456bc1..41465072d0b 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -76,140 +76,79 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - res->data = kmalloc(len, GFP_KERNEL); + res->data = kmemdup(p, len, GFP_KERNEL); if (unlikely(res->data == NULL)) return ERR_PTR(-ENOMEM); - memcpy(res->data, p, len); return q; } -static inline const void * -get_key(const void *p, const void *end, struct crypto_blkcipher **res, - int *resalg) -{ - struct xdr_netobj key = { 0 }; - int setkey = 0; - char *alg_name; - - p = simple_get_bytes(p, end, resalg, sizeof(*resalg)); - if (IS_ERR(p)) - goto out_err; - p = simple_get_netobj(p, end, &key); - if (IS_ERR(p)) - goto out_err; - - switch (*resalg) { - case NID_des_cbc: - alg_name = "cbc(des)"; - setkey = 1; - break; - case NID_cast5_cbc: - /* XXXX here in name only, not used */ - alg_name = "cbc(cast5)"; - setkey = 0; /* XXX will need to set to 1 */ - break; - case NID_md5: - if (key.len == 0) { - dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n"); - } - alg_name = "md5"; - setkey = 0; - break; - default: - dprintk("gss_spkm3_mech: unsupported algorithm %d\n", *resalg); - goto out_err_free_key; - } - *res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(*res)) { - printk("gss_spkm3_mech: unable to initialize crypto algorthm %s\n", alg_name); - *res = NULL; - goto out_err_free_key; - } - if (setkey) { - if (crypto_blkcipher_setkey(*res, key.data, key.len)) { - printk("gss_spkm3_mech: error setting key for crypto algorthm %s\n", alg_name); - goto out_err_free_tfm; - } - } - - if(key.len > 0) - kfree(key.data); - return p; - -out_err_free_tfm: - crypto_free_blkcipher(*res); -out_err_free_key: - if(key.len > 0) - kfree(key.data); - p = ERR_PTR(-EINVAL); -out_err: - return p; -} - static int gss_import_sec_context_spkm3(const void *p, size_t len, struct gss_ctx *ctx_id) { const void *end = (const void *)((const char *)p + len); struct spkm3_ctx *ctx; + int version; if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) goto out_err; + p = simple_get_bytes(p, end, &version, sizeof(version)); + if (IS_ERR(p)) + goto out_err_free_ctx; + if (version != 1) { + dprintk("RPC: unknown spkm3 token format: obsolete nfs-utils?\n"); + goto out_err_free_ctx; + } + p = simple_get_netobj(p, end, &ctx->ctx_id); if (IS_ERR(p)) goto out_err_free_ctx; - p = simple_get_bytes(p, end, &ctx->qop, sizeof(ctx->qop)); + p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime)); if (IS_ERR(p)) goto out_err_free_ctx_id; p = simple_get_netobj(p, end, &ctx->mech_used); if (IS_ERR(p)) - goto out_err_free_mech; + goto out_err_free_ctx_id; p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags)); if (IS_ERR(p)) goto out_err_free_mech; - p = simple_get_bytes(p, end, &ctx->req_flags, sizeof(ctx->req_flags)); + p = simple_get_netobj(p, end, &ctx->conf_alg); if (IS_ERR(p)) goto out_err_free_mech; - p = simple_get_netobj(p, end, &ctx->share_key); - if (IS_ERR(p)) - goto out_err_free_s_key; - - p = get_key(p, end, &ctx->derived_conf_key, &ctx->conf_alg); + p = simple_get_netobj(p, end, &ctx->derived_conf_key); if (IS_ERR(p)) - goto out_err_free_s_key; + goto out_err_free_conf_alg; - p = get_key(p, end, &ctx->derived_integ_key, &ctx->intg_alg); + p = simple_get_netobj(p, end, &ctx->intg_alg); if (IS_ERR(p)) - goto out_err_free_key1; + goto out_err_free_conf_key; - p = simple_get_bytes(p, end, &ctx->keyestb_alg, sizeof(ctx->keyestb_alg)); + p = simple_get_netobj(p, end, &ctx->derived_integ_key); if (IS_ERR(p)) - goto out_err_free_key2; - - p = simple_get_bytes(p, end, &ctx->owf_alg, sizeof(ctx->owf_alg)); - if (IS_ERR(p)) - goto out_err_free_key2; + goto out_err_free_intg_alg; if (p != end) - goto out_err_free_key2; + goto out_err_free_intg_key; ctx_id->internal_ctx_id = ctx; dprintk("Successfully imported new spkm context.\n"); return 0; -out_err_free_key2: - crypto_free_blkcipher(ctx->derived_integ_key); -out_err_free_key1: - crypto_free_blkcipher(ctx->derived_conf_key); -out_err_free_s_key: - kfree(ctx->share_key.data); +out_err_free_intg_key: + kfree(ctx->derived_integ_key.data); +out_err_free_intg_alg: + kfree(ctx->intg_alg.data); +out_err_free_conf_key: + kfree(ctx->derived_conf_key.data); +out_err_free_conf_alg: + kfree(ctx->conf_alg.data); out_err_free_mech: kfree(ctx->mech_used.data); out_err_free_ctx_id: @@ -221,13 +160,16 @@ out_err: } static void -gss_delete_sec_context_spkm3(void *internal_ctx) { +gss_delete_sec_context_spkm3(void *internal_ctx) +{ struct spkm3_ctx *sctx = internal_ctx; - crypto_free_blkcipher(sctx->derived_integ_key); - crypto_free_blkcipher(sctx->derived_conf_key); - kfree(sctx->share_key.data); + kfree(sctx->derived_integ_key.data); + kfree(sctx->intg_alg.data); + kfree(sctx->derived_conf_key.data); + kfree(sctx->conf_alg.data); kfree(sctx->mech_used.data); + kfree(sctx->ctx_id.data); kfree(sctx); } @@ -239,7 +181,6 @@ gss_verify_mic_spkm3(struct gss_ctx *ctx, u32 maj_stat = 0; struct spkm3_ctx *sctx = ctx->internal_ctx_id; - dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n"); maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK); dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat); @@ -254,10 +195,9 @@ gss_get_mic_spkm3(struct gss_ctx *ctx, u32 err = 0; struct spkm3_ctx *sctx = ctx->internal_ctx_id; - dprintk("RPC: gss_get_mic_spkm3\n"); - err = spkm3_make_token(sctx, message_buffer, - message_token, SPKM_MIC_TOK); + message_token, SPKM_MIC_TOK); + dprintk("RPC: gss_get_mic_spkm3 returning %d\n", err); return err; } diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c index 18c7862bc23..b179d58c624 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c @@ -39,11 +39,17 @@ #include <linux/sunrpc/gss_spkm3.h> #include <linux/random.h> #include <linux/crypto.h> +#include <linux/pagemap.h> +#include <linux/scatterlist.h> +#include <linux/sunrpc/xdr.h> #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH #endif +const struct xdr_netobj hmac_md5_oid = { 8, "\x2B\x06\x01\x05\x05\x08\x01\x01"}; +const struct xdr_netobj cast5_cbc_oid = {9, "\x2A\x86\x48\x86\xF6\x7D\x07\x42\x0A"}; + /* * spkm3_make_token() * @@ -66,29 +72,23 @@ spkm3_make_token(struct spkm3_ctx *ctx, int ctxelen = 0, ctxzbit = 0; int md5elen = 0, md5zbit = 0; - dprintk("RPC: spkm3_make_token\n"); - now = jiffies; if (ctx->ctx_id.len != 16) { dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n", - ctx->ctx_id.len); + ctx->ctx_id.len); goto out_err; } - - switch (ctx->intg_alg) { - case NID_md5: - checksum_type = CKSUMTYPE_RSA_MD5; - break; - default: - dprintk("RPC: gss_spkm3_seal: ctx->signalg %d not" - " supported\n", ctx->intg_alg); - goto out_err; - } - /* XXX since we don't support WRAP, perhaps we don't care... */ - if (ctx->conf_alg != NID_cast5_cbc) { - dprintk("RPC: gss_spkm3_seal: ctx->sealalg %d not supported\n", - ctx->conf_alg); + + if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) { + dprintk("RPC: gss_spkm3_seal: unsupported I-ALG algorithm." + "only support hmac-md5 I-ALG.\n"); + goto out_err; + } else + checksum_type = CKSUMTYPE_HMAC_MD5; + + if (!g_OID_equal(&ctx->conf_alg, &cast5_cbc_oid)) { + dprintk("RPC: gss_spkm3_seal: unsupported C-ALG algorithm\n"); goto out_err; } @@ -96,10 +96,10 @@ spkm3_make_token(struct spkm3_ctx *ctx, /* Calculate checksum over the mic-header */ asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit); spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data, - ctxelen, ctxzbit); - - if (make_checksum(checksum_type, mic_hdr.data, mic_hdr.len, - text, 0, &md5cksum)) + ctxelen, ctxzbit); + if (make_spkm3_checksum(checksum_type, &ctx->derived_integ_key, + (char *)mic_hdr.data, mic_hdr.len, + text, 0, &md5cksum)) goto out_err; asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit); @@ -121,7 +121,66 @@ spkm3_make_token(struct spkm3_ctx *ctx, return GSS_S_COMPLETE; out_err: + if (md5cksum.data) + kfree(md5cksum.data); + token->data = NULL; token->len = 0; return GSS_S_FAILURE; } + +static int +spkm3_checksummer(struct scatterlist *sg, void *data) +{ + struct hash_desc *desc = data; + + return crypto_hash_update(desc, sg, sg->length); +} + +/* checksum the plaintext data and hdrlen bytes of the token header */ +s32 +make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header, + unsigned int hdrlen, struct xdr_buf *body, + unsigned int body_offset, struct xdr_netobj *cksum) +{ + char *cksumname; + struct hash_desc desc; /* XXX add to ctx? */ + struct scatterlist sg[1]; + int err; + + switch (cksumtype) { + case CKSUMTYPE_HMAC_MD5: + cksumname = "md5"; + break; + default: + dprintk("RPC: spkm3_make_checksum:" + " unsupported checksum %d", cksumtype); + return GSS_S_FAILURE; + } + + if (key->data == NULL || key->len <= 0) return GSS_S_FAILURE; + + desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(desc.tfm)) + return GSS_S_FAILURE; + cksum->len = crypto_hash_digestsize(desc.tfm); + desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; + + err = crypto_hash_setkey(desc.tfm, key->data, key->len); + if (err) + goto out; + + sg_set_buf(sg, header, hdrlen); + crypto_hash_update(&desc, sg, 1); + + xdr_process_buf(body, body_offset, body->len - body_offset, + spkm3_checksummer, &desc); + crypto_hash_final(&desc, cksum->data); + +out: + crypto_free_hash(desc.tfm); + + return err ? GSS_S_FAILURE : 0; +} + +EXPORT_SYMBOL(make_spkm3_checksum); diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c index 854a983ccf2..35188b6ea8f 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_token.c +++ b/net/sunrpc/auth_gss/gss_spkm3_token.c @@ -172,10 +172,10 @@ spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ct *(u8 *)hptr++ = zbit; memcpy(hptr, ctxdata, elen); hptr += elen; - *hdrlen = hptr - top; + *hdrlen = hptr - top; } - -/* + +/* * spkm3_mic_innercontext_token() * * *tokp points to the beginning of the SPKM_MIC token described diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c index 8537f581ef9..e54581ca757 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c @@ -54,70 +54,70 @@ spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_buf *message_buffer, /* signbuf */ int toktype) { + s32 checksum_type; s32 code; struct xdr_netobj wire_cksum = {.len =0, .data = NULL}; char cksumdata[16]; struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; unsigned char *ptr = (unsigned char *)read_token->data; - unsigned char *cksum; + unsigned char *cksum; int bodysize, md5elen; int mic_hdrlen; u32 ret = GSS_S_DEFECTIVE_TOKEN; - dprintk("RPC: spkm3_read_token read_token->len %d\n", read_token->len); - if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used, &bodysize, &ptr, read_token->len)) goto out; /* decode the token */ - if (toktype == SPKM_MIC_TOK) { - - if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum))) - goto out; - - if (*cksum++ != 0x03) { - dprintk("RPC: spkm3_read_token BAD checksum type\n"); - goto out; - } - md5elen = *cksum++; - cksum++; /* move past the zbit */ - - if(!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16)) - goto out; - - /* HARD CODED FOR MD5 */ - - /* compute the checksum of the message. - * ptr + 2 = start of header piece of checksum - * mic_hdrlen + 2 = length of header piece of checksum - */ - ret = GSS_S_DEFECTIVE_TOKEN; - code = make_checksum(CKSUMTYPE_RSA_MD5, ptr + 2, - mic_hdrlen + 2, - message_buffer, 0, &md5cksum); - - if (code) - goto out; - - dprintk("RPC: spkm3_read_token: digest wire_cksum.len %d:\n", - wire_cksum.len); - dprintk(" md5cksum.data\n"); - print_hexl((u32 *) md5cksum.data, 16, 0); - dprintk(" cksum.data:\n"); - print_hexl((u32 *) wire_cksum.data, wire_cksum.len, 0); - - ret = GSS_S_BAD_SIG; - code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len); - if (code) - goto out; - - } else { - dprintk("RPC: BAD or UNSUPPORTED SPKM3 token type: %d\n",toktype); + if (toktype != SPKM_MIC_TOK) { + dprintk("RPC: BAD SPKM3 token type: %d\n", toktype); + goto out; + } + + if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum))) + goto out; + + if (*cksum++ != 0x03) { + dprintk("RPC: spkm3_read_token BAD checksum type\n"); + goto out; + } + md5elen = *cksum++; + cksum++; /* move past the zbit */ + + if (!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16)) + goto out; + + /* HARD CODED FOR MD5 */ + + /* compute the checksum of the message. + * ptr + 2 = start of header piece of checksum + * mic_hdrlen + 2 = length of header piece of checksum + */ + ret = GSS_S_DEFECTIVE_TOKEN; + if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) { + dprintk("RPC: gss_spkm3_seal: unsupported I-ALG algorithm\n"); + goto out; + } + + checksum_type = CKSUMTYPE_HMAC_MD5; + + code = make_spkm3_checksum(checksum_type, + &ctx->derived_integ_key, ptr + 2, mic_hdrlen + 2, + message_buffer, 0, &md5cksum); + + if (code) + goto out; + + ret = GSS_S_BAD_SIG; + code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len); + if (code) { + dprintk("RPC: bad MIC checksum\n"); goto out; } + /* XXX: need to add expiration and sequencing */ ret = GSS_S_COMPLETE; out: diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 1f0f079ffa6..066c64a97fd 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -113,9 +113,7 @@ static int rsi_match(struct cache_head *a, struct cache_head *b) static int dup_to_netobj(struct xdr_netobj *dst, char *src, int len) { dst->len = len; - dst->data = (len ? kmalloc(len, GFP_KERNEL) : NULL); - if (dst->data) - memcpy(dst->data, src, len); + dst->data = (len ? kmemdup(src, len, GFP_KERNEL) : NULL); if (len && !dst->data) return -ENOMEM; return 0; @@ -756,10 +754,9 @@ svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name) if (!new) goto out; kref_init(&new->h.ref); - new->h.name = kmalloc(strlen(name) + 1, GFP_KERNEL); + new->h.name = kstrdup(name, GFP_KERNEL); if (!new->h.name) goto out_free_dom; - strcpy(new->h.name, name); new->h.flavour = &svcauthops_gss; new->pseudoflavor = pseudoflavor; @@ -807,19 +804,19 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) integ_len = svc_getnl(&buf->head[0]); if (integ_len & 3) - goto out; + return stat; if (integ_len > buf->len) - goto out; + return stat; if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) BUG(); /* copy out mic... */ if (read_u32_from_xdr_buf(buf, integ_len, &mic.len)) BUG(); if (mic.len > RPC_MAX_AUTH_SIZE) - goto out; + return stat; mic.data = kmalloc(mic.len, GFP_KERNEL); if (!mic.data) - goto out; + return stat; if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len)) goto out; maj_stat = gss_verify_mic(ctx, &integ_buf, &mic); @@ -829,6 +826,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) goto out; stat = 0; out: + kfree(mic.data); return stat; } @@ -1068,7 +1066,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) } switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { case -EAGAIN: - goto drop; + case -ETIMEDOUT: case -ENOENT: goto drop; case 0: diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 00cb388ece0..14274490f92 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -34,7 +34,7 @@ #define RPCDBG_FACILITY RPCDBG_CACHE -static void cache_defer_req(struct cache_req *req, struct cache_head *item); +static int cache_defer_req(struct cache_req *req, struct cache_head *item); static void cache_revisit_request(struct cache_head *item); static void cache_init(struct cache_head *h) @@ -185,6 +185,7 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h); * * Returns 0 if the cache_head can be used, or cache_puts it and returns * -EAGAIN if upcall is pending, + * -ETIMEDOUT if upcall failed and should be retried, * -ENOENT if cache entry was negative */ int cache_check(struct cache_detail *detail, @@ -236,7 +237,8 @@ int cache_check(struct cache_detail *detail, } if (rv == -EAGAIN) - cache_defer_req(rqstp, h); + if (cache_defer_req(rqstp, h) != 0) + rv = -ETIMEDOUT; if (rv) cache_put(h, detail); @@ -284,8 +286,8 @@ static struct file_operations cache_file_operations; static struct file_operations content_file_operations; static struct file_operations cache_flush_operations; -static void do_cache_clean(void *data); -static DECLARE_WORK(cache_cleaner, do_cache_clean, NULL); +static void do_cache_clean(struct work_struct *work); +static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); void cache_register(struct cache_detail *cd) { @@ -337,7 +339,7 @@ void cache_register(struct cache_detail *cd) spin_unlock(&cache_list_lock); /* start the cleaning process */ - schedule_work(&cache_cleaner); + schedule_delayed_work(&cache_cleaner, 0); } int cache_unregister(struct cache_detail *cd) @@ -461,7 +463,7 @@ static int cache_clean(void) /* * We want to regularly clean the cache, so we need to schedule some work ... */ -static void do_cache_clean(void *data) +static void do_cache_clean(struct work_struct *work) { int delay = 5; if (cache_clean() == -1) @@ -523,14 +525,21 @@ static LIST_HEAD(cache_defer_list); static struct list_head cache_defer_hash[DFR_HASHSIZE]; static int cache_defer_cnt; -static void cache_defer_req(struct cache_req *req, struct cache_head *item) +static int cache_defer_req(struct cache_req *req, struct cache_head *item) { struct cache_deferred_req *dreq; int hash = DFR_HASH(item); + if (cache_defer_cnt >= DFR_MAX) { + /* too much in the cache, randomly drop this one, + * or continue and drop the oldest below + */ + if (net_random()&1) + return -ETIMEDOUT; + } dreq = req->defer(req); if (dreq == NULL) - return; + return -ETIMEDOUT; dreq->item = item; dreq->recv_time = get_seconds(); @@ -546,17 +555,8 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item) /* it is in, now maybe clean up */ dreq = NULL; if (++cache_defer_cnt > DFR_MAX) { - /* too much in the cache, randomly drop - * first or last - */ - if (net_random()&1) - dreq = list_entry(cache_defer_list.next, - struct cache_deferred_req, - recent); - else - dreq = list_entry(cache_defer_list.prev, - struct cache_deferred_req, - recent); + dreq = list_entry(cache_defer_list.prev, + struct cache_deferred_req, recent); list_del(&dreq->recent); list_del(&dreq->hash); cache_defer_cnt--; @@ -571,6 +571,7 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item) /* must have just been validated... */ cache_revisit_request(item); } + return 0; } static void cache_revisit_request(struct cache_head *item) @@ -670,7 +671,7 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { struct cache_reader *rp = filp->private_data; struct cache_request *rq; - struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data; + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; int err; if (count == 0) @@ -747,7 +748,7 @@ cache_write(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { int err; - struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data; + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; if (count == 0) return 0; @@ -778,7 +779,7 @@ cache_poll(struct file *filp, poll_table *wait) unsigned int mask; struct cache_reader *rp = filp->private_data; struct cache_queue *cq; - struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data; + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; poll_wait(filp, &queue_wait, wait); @@ -1254,7 +1255,7 @@ static struct file_operations content_file_operations = { static ssize_t read_flush(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data; + struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; char tbuf[20]; unsigned long p = *ppos; int len; @@ -1275,7 +1276,7 @@ static ssize_t read_flush(struct file *file, char __user *buf, static ssize_t write_flush(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data; + struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; char tbuf[20]; char *ep; long flushtime; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 78696f2dc7d..aba528b9ae7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -27,6 +27,7 @@ #include <linux/types.h> #include <linux/mm.h> #include <linux/slab.h> +#include <linux/smp_lock.h> #include <linux/utsname.h> #include <linux/workqueue.h> @@ -141,6 +142,10 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s clnt->cl_vers = version->number; clnt->cl_stats = program->stats; clnt->cl_metrics = rpc_alloc_iostats(clnt); + err = -ENOMEM; + if (clnt->cl_metrics == NULL) + goto out_no_stats; + clnt->cl_program = program; if (!xprt_bound(clnt->cl_xprt)) clnt->cl_autobind = 1; @@ -173,6 +178,8 @@ out_no_auth: rpc_put_mount(); } out_no_path: + rpc_free_iostats(clnt->cl_metrics); +out_no_stats: if (clnt->cl_server != clnt->cl_inline_name) kfree(clnt->cl_server); kfree(clnt); @@ -252,13 +259,19 @@ struct rpc_clnt * rpc_clone_client(struct rpc_clnt *clnt) { struct rpc_clnt *new; + int err = -ENOMEM; - new = kmalloc(sizeof(*new), GFP_KERNEL); + new = kmemdup(clnt, sizeof(*new), GFP_KERNEL); if (!new) goto out_no_clnt; - memcpy(new, clnt, sizeof(*new)); atomic_set(&new->cl_count, 1); atomic_set(&new->cl_users, 0); + new->cl_metrics = rpc_alloc_iostats(clnt); + if (new->cl_metrics == NULL) + goto out_no_stats; + err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); + if (err != 0) + goto out_no_path; new->cl_parent = clnt; atomic_inc(&clnt->cl_count); new->cl_xprt = xprt_get(clnt->cl_xprt); @@ -266,16 +279,17 @@ rpc_clone_client(struct rpc_clnt *clnt) new->cl_autobind = 0; new->cl_oneshot = 0; new->cl_dead = 0; - if (!IS_ERR(new->cl_dentry)) - dget(new->cl_dentry); rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); - new->cl_metrics = rpc_alloc_iostats(clnt); return new; +out_no_path: + rpc_free_iostats(new->cl_metrics); +out_no_stats: + kfree(new); out_no_clnt: - printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); - return ERR_PTR(-ENOMEM); + dprintk("RPC: %s returned error %d\n", __FUNCTION__, err); + return ERR_PTR(err); } /* @@ -328,16 +342,14 @@ rpc_destroy_client(struct rpc_clnt *clnt) rpcauth_destroy(clnt->cl_auth); clnt->cl_auth = NULL; } - if (clnt->cl_parent != clnt) { - if (!IS_ERR(clnt->cl_dentry)) - dput(clnt->cl_dentry); - rpc_destroy_client(clnt->cl_parent); - goto out_free; - } if (!IS_ERR(clnt->cl_dentry)) { rpc_rmdir(clnt->cl_dentry); rpc_put_mount(); } + if (clnt->cl_parent != clnt) { + rpc_destroy_client(clnt->cl_parent); + goto out_free; + } if (clnt->cl_server != clnt->cl_inline_name) kfree(clnt->cl_server); out_free: @@ -467,10 +479,9 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) BUG_ON(flags & RPC_TASK_ASYNC); - status = -ENOMEM; task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL); if (task == NULL) - goto out; + return -ENOMEM; /* Mask signals on RPC calls _and_ GSS_AUTH upcalls */ rpc_task_sigmask(task, &oldset); @@ -479,15 +490,17 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) /* Set up the call info struct and execute the task */ status = task->tk_status; - if (status == 0) { - atomic_inc(&task->tk_count); - status = rpc_execute(task); - if (status == 0) - status = task->tk_status; + if (status != 0) { + rpc_release_task(task); + goto out; } - rpc_restore_sigmask(&oldset); - rpc_release_task(task); + atomic_inc(&task->tk_count); + status = rpc_execute(task); + if (status == 0) + status = task->tk_status; + rpc_put_task(task); out: + rpc_restore_sigmask(&oldset); return status; } @@ -529,8 +542,7 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, rpc_restore_sigmask(&oldset); return status; out_release: - if (tk_ops->rpc_release != NULL) - tk_ops->rpc_release(data); + rpc_release_calldata(tk_ops, data); return status; } @@ -582,7 +594,11 @@ EXPORT_SYMBOL_GPL(rpc_peeraddr); char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format) { struct rpc_xprt *xprt = clnt->cl_xprt; - return xprt->ops->print_addr(xprt, format); + + if (xprt->address_strings[format] != NULL) + return xprt->address_strings[format]; + else + return "unprintable"; } EXPORT_SYMBOL_GPL(rpc_peeraddr2str); @@ -812,8 +828,10 @@ call_encode(struct rpc_task *task) if (encode == NULL) return; + lock_kernel(); task->tk_status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp); + unlock_kernel(); if (task->tk_status == -ENOMEM) { /* XXX: Is this sane? */ rpc_delay(task, 3*HZ); @@ -1144,9 +1162,12 @@ call_decode(struct rpc_task *task) task->tk_action = rpc_exit_task; - if (decode) + if (decode) { + lock_kernel(); task->tk_status = rpcauth_unwrap_resp(task, decode, req, p, task->tk_msg.rpc_resp); + unlock_kernel(); + } dprintk("RPC: %4d call_decode result %d\n", task->tk_pid, task->tk_status); return; diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index e52afab413d..3946ec3eb51 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -101,14 +101,14 @@ void rpc_getport(struct rpc_task *task) /* Autobind on cloned rpc clients is discouraged */ BUG_ON(clnt->cl_parent != clnt); + status = -EACCES; /* tell caller to check again */ + if (xprt_test_and_set_binding(xprt)) + goto bailout_nowake; + /* Put self on queue before sending rpcbind request, in case * pmap_getport_done completes before we return from rpc_run_task */ rpc_sleep_on(&xprt->binding, task, NULL, NULL); - status = -EACCES; /* tell caller to check again */ - if (xprt_test_and_set_binding(xprt)) - goto bailout_nofree; - /* Someone else may have bound if we slept */ status = 0; if (xprt_bound(xprt)) @@ -134,7 +134,7 @@ void rpc_getport(struct rpc_task *task) child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map); if (IS_ERR(child)) goto bailout; - rpc_release_task(child); + rpc_put_task(child); task->tk_xprt->stat.bind_count++; return; @@ -143,8 +143,9 @@ bailout: pmap_map_free(map); xprt_put(xprt); bailout_nofree: - task->tk_status = status; pmap_wake_portmap_waiters(xprt, status); +bailout_nowake: + task->tk_status = status; } #ifdef CONFIG_ROOT_NFS diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 9a0b41a97f9..89273d35e0c 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -33,7 +33,7 @@ static int rpc_mount_count; static struct file_system_type rpc_pipe_fs_type; -static kmem_cache_t *rpc_inode_cachep __read_mostly; +static struct kmem_cache *rpc_inode_cachep __read_mostly; #define RPC_UPCALL_TIMEOUT (30*HZ) @@ -54,10 +54,11 @@ static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head, } static void -rpc_timeout_upcall_queue(void *data) +rpc_timeout_upcall_queue(struct work_struct *work) { LIST_HEAD(free_list); - struct rpc_inode *rpci = (struct rpc_inode *)data; + struct rpc_inode *rpci = + container_of(work, struct rpc_inode, queue_timeout.work); struct inode *inode = &rpci->vfs_inode; void (*destroy_msg)(struct rpc_pipe_msg *); @@ -142,7 +143,7 @@ static struct inode * rpc_alloc_inode(struct super_block *sb) { struct rpc_inode *rpci; - rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, SLAB_KERNEL); + rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL); if (!rpci) return NULL; return &rpci->vfs_inode; @@ -213,7 +214,7 @@ out: static ssize_t rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = filp->f_path.dentry->d_inode; struct rpc_inode *rpci = RPC_I(inode); struct rpc_pipe_msg *msg; int res = 0; @@ -256,7 +257,7 @@ out_unlock: static ssize_t rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = filp->f_path.dentry->d_inode; struct rpc_inode *rpci = RPC_I(inode); int res; @@ -274,7 +275,7 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) struct rpc_inode *rpci; unsigned int mask = 0; - rpci = RPC_I(filp->f_dentry->d_inode); + rpci = RPC_I(filp->f_path.dentry->d_inode); poll_wait(filp, &rpci->waitq, wait); mask = POLLOUT | POLLWRNORM; @@ -289,7 +290,7 @@ static int rpc_pipe_ioctl(struct inode *ino, struct file *filp, unsigned int cmd, unsigned long arg) { - struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode); + struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); int len; switch (cmd) { @@ -823,7 +824,7 @@ static struct file_system_type rpc_pipe_fs_type = { }; static void -init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct rpc_inode *rpci = (struct rpc_inode *) foo; @@ -837,7 +838,8 @@ init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) INIT_LIST_HEAD(&rpci->pipe); rpci->pipelen = 0; init_waitqueue_head(&rpci->waitq); - INIT_WORK(&rpci->queue_timeout, rpc_timeout_upcall_queue, rpci); + INIT_DELAYED_WORK(&rpci->queue_timeout, + rpc_timeout_upcall_queue); rpci->ops = NULL; } } diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index a1ab4eed41f..79bc4cdf5d4 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -34,14 +34,14 @@ static int rpc_task_id; #define RPC_BUFFER_MAXSIZE (2048) #define RPC_BUFFER_POOLSIZE (8) #define RPC_TASK_POOLSIZE (8) -static kmem_cache_t *rpc_task_slabp __read_mostly; -static kmem_cache_t *rpc_buffer_slabp __read_mostly; +static struct kmem_cache *rpc_task_slabp __read_mostly; +static struct kmem_cache *rpc_buffer_slabp __read_mostly; static mempool_t *rpc_task_mempool __read_mostly; static mempool_t *rpc_buffer_mempool __read_mostly; static void __rpc_default_timer(struct rpc_task *task); static void rpciod_killall(void); -static void rpc_async_schedule(void *); +static void rpc_async_schedule(struct work_struct *); /* * RPC tasks sit here while waiting for conditions to improve. @@ -266,12 +266,28 @@ static int rpc_wait_bit_interruptible(void *word) return 0; } +static void rpc_set_active(struct rpc_task *task) +{ + if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0) + return; + spin_lock(&rpc_sched_lock); +#ifdef RPC_DEBUG + task->tk_magic = RPC_TASK_MAGIC_ID; + task->tk_pid = rpc_task_id++; +#endif + /* Add to global list of all tasks */ + list_add_tail(&task->tk_task, &all_tasks); + spin_unlock(&rpc_sched_lock); +} + /* * Mark an RPC call as having completed by clearing the 'active' bit */ -static inline void rpc_mark_complete_task(struct rpc_task *task) +static void rpc_mark_complete_task(struct rpc_task *task) { - rpc_clear_active(task); + smp_mb__before_clear_bit(); + clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); + smp_mb__after_clear_bit(); wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE); } @@ -295,17 +311,19 @@ EXPORT_SYMBOL(__rpc_wait_for_completion_task); */ static void rpc_make_runnable(struct rpc_task *task) { - int do_ret; - BUG_ON(task->tk_timeout_fn); - do_ret = rpc_test_and_set_running(task); rpc_clear_queued(task); - if (do_ret) + if (rpc_test_and_set_running(task)) return; + /* We might have raced */ + if (RPC_IS_QUEUED(task)) { + rpc_clear_running(task); + return; + } if (RPC_IS_ASYNC(task)) { int status; - INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task); + INIT_WORK(&task->u.tk_work, rpc_async_schedule); status = queue_work(task->tk_workqueue, &task->u.tk_work); if (status < 0) { printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); @@ -333,9 +351,6 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, return; } - /* Mark the task as being activated if so needed */ - rpc_set_active(task); - __rpc_add_wait_queue(q, task); BUG_ON(task->tk_callback != NULL); @@ -346,6 +361,9 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action, rpc_action timer) { + /* Mark the task as being activated if so needed */ + rpc_set_active(task); + /* * Protect the queue operations. */ @@ -409,16 +427,19 @@ __rpc_default_timer(struct rpc_task *task) */ void rpc_wake_up_task(struct rpc_task *task) { + rcu_read_lock_bh(); if (rpc_start_wakeup(task)) { if (RPC_IS_QUEUED(task)) { struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; - spin_lock_bh(&queue->lock); + /* Note: we're already in a bh-safe context */ + spin_lock(&queue->lock); __rpc_do_wake_up_task(task); - spin_unlock_bh(&queue->lock); + spin_unlock(&queue->lock); } rpc_finish_wakeup(task); } + rcu_read_unlock_bh(); } /* @@ -481,14 +502,16 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) struct rpc_task *task = NULL; dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); - spin_lock_bh(&queue->lock); + rcu_read_lock_bh(); + spin_lock(&queue->lock); if (RPC_IS_PRIORITY(queue)) task = __rpc_wake_up_next_priority(queue); else { task_for_first(task, &queue->tasks[0]) __rpc_wake_up_task(task); } - spin_unlock_bh(&queue->lock); + spin_unlock(&queue->lock); + rcu_read_unlock_bh(); return task; } @@ -504,7 +527,8 @@ void rpc_wake_up(struct rpc_wait_queue *queue) struct rpc_task *task, *next; struct list_head *head; - spin_lock_bh(&queue->lock); + rcu_read_lock_bh(); + spin_lock(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { list_for_each_entry_safe(task, next, head, u.tk_wait.list) @@ -513,7 +537,8 @@ void rpc_wake_up(struct rpc_wait_queue *queue) break; head--; } - spin_unlock_bh(&queue->lock); + spin_unlock(&queue->lock); + rcu_read_unlock_bh(); } /** @@ -528,7 +553,8 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) struct rpc_task *task, *next; struct list_head *head; - spin_lock_bh(&queue->lock); + rcu_read_lock_bh(); + spin_lock(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { list_for_each_entry_safe(task, next, head, u.tk_wait.list) { @@ -539,7 +565,8 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) break; head--; } - spin_unlock_bh(&queue->lock); + spin_unlock(&queue->lock); + rcu_read_unlock_bh(); } static void __rpc_atrun(struct rpc_task *task) @@ -561,7 +588,9 @@ void rpc_delay(struct rpc_task *task, unsigned long delay) */ static void rpc_prepare_task(struct rpc_task *task) { + lock_kernel(); task->tk_ops->rpc_call_prepare(task, task->tk_calldata); + unlock_kernel(); } /* @@ -571,7 +600,9 @@ void rpc_exit_task(struct rpc_task *task) { task->tk_action = NULL; if (task->tk_ops->rpc_call_done != NULL) { + lock_kernel(); task->tk_ops->rpc_call_done(task, task->tk_calldata); + unlock_kernel(); if (task->tk_action != NULL) { WARN_ON(RPC_ASSASSINATED(task)); /* Always release the RPC slot and buffer memory */ @@ -581,6 +612,15 @@ void rpc_exit_task(struct rpc_task *task) } EXPORT_SYMBOL(rpc_exit_task); +void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) +{ + if (ops->rpc_release != NULL) { + lock_kernel(); + ops->rpc_release(calldata); + unlock_kernel(); + } +} + /* * This is the RPC `scheduler' (or rather, the finite state machine). */ @@ -615,9 +655,7 @@ static int __rpc_execute(struct rpc_task *task) */ save_callback=task->tk_callback; task->tk_callback=NULL; - lock_kernel(); save_callback(task); - unlock_kernel(); } /* @@ -628,9 +666,7 @@ static int __rpc_execute(struct rpc_task *task) if (!RPC_IS_QUEUED(task)) { if (task->tk_action == NULL) break; - lock_kernel(); task->tk_action(task); - unlock_kernel(); } /* @@ -671,8 +707,6 @@ static int __rpc_execute(struct rpc_task *task) } dprintk("RPC: %4d, return %d, status %d\n", task->tk_pid, status, task->tk_status); - /* Wake up anyone who is waiting for task completion */ - rpc_mark_complete_task(task); /* Release all resources associated with the task */ rpc_release_task(task); return status; @@ -695,9 +729,9 @@ rpc_execute(struct rpc_task *task) return __rpc_execute(task); } -static void rpc_async_schedule(void *arg) +static void rpc_async_schedule(struct work_struct *work) { - __rpc_execute((struct rpc_task *)arg); + __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); } /** @@ -786,15 +820,6 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons task->tk_flags |= RPC_TASK_NOINTR; } -#ifdef RPC_DEBUG - task->tk_magic = RPC_TASK_MAGIC_ID; - task->tk_pid = rpc_task_id++; -#endif - /* Add to global list of all tasks */ - spin_lock(&rpc_sched_lock); - list_add_tail(&task->tk_task, &all_tasks); - spin_unlock(&rpc_sched_lock); - BUG_ON(task->tk_ops == NULL); /* starting timestamp */ @@ -810,8 +835,9 @@ rpc_alloc_task(void) return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); } -static void rpc_free_task(struct rpc_task *task) +static void rpc_free_task(struct rcu_head *rcu) { + struct rpc_task *task = container_of(rcu, struct rpc_task, u.tk_rcu); dprintk("RPC: %4d freeing task\n", task->tk_pid); mempool_free(task, rpc_task_mempool); } @@ -847,16 +873,34 @@ cleanup: goto out; } -void rpc_release_task(struct rpc_task *task) + +void rpc_put_task(struct rpc_task *task) { const struct rpc_call_ops *tk_ops = task->tk_ops; void *calldata = task->tk_calldata; + if (!atomic_dec_and_test(&task->tk_count)) + return; + /* Release resources */ + if (task->tk_rqstp) + xprt_release(task); + if (task->tk_msg.rpc_cred) + rpcauth_unbindcred(task); + if (task->tk_client) { + rpc_release_client(task->tk_client); + task->tk_client = NULL; + } + if (task->tk_flags & RPC_TASK_DYNAMIC) + call_rcu_bh(&task->u.tk_rcu, rpc_free_task); + rpc_release_calldata(tk_ops, calldata); +} +EXPORT_SYMBOL(rpc_put_task); + +void rpc_release_task(struct rpc_task *task) +{ #ifdef RPC_DEBUG BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID); #endif - if (!atomic_dec_and_test(&task->tk_count)) - return; dprintk("RPC: %4d release task\n", task->tk_pid); /* Remove from global task list */ @@ -869,23 +913,13 @@ void rpc_release_task(struct rpc_task *task) /* Synchronously delete any running timer */ rpc_delete_timer(task); - /* Release resources */ - if (task->tk_rqstp) - xprt_release(task); - if (task->tk_msg.rpc_cred) - rpcauth_unbindcred(task); - if (task->tk_client) { - rpc_release_client(task->tk_client); - task->tk_client = NULL; - } - #ifdef RPC_DEBUG task->tk_magic = 0; #endif - if (task->tk_flags & RPC_TASK_DYNAMIC) - rpc_free_task(task); - if (tk_ops->rpc_release) - tk_ops->rpc_release(calldata); + /* Wake up anyone who is waiting for task completion */ + rpc_mark_complete_task(task); + + rpc_put_task(task); } /** @@ -902,8 +936,7 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, struct rpc_task *task; task = rpc_new_task(clnt, flags, ops, data); if (task == NULL) { - if (ops->rpc_release != NULL) - ops->rpc_release(data); + rpc_release_calldata(ops, data); return ERR_PTR(-ENOMEM); } atomic_inc(&task->tk_count); diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 6f17527b9e6..634885b0c04 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -16,7 +16,7 @@ /** - * skb_read_bits - copy some data bits from skb to internal buffer + * xdr_skb_read_bits - copy some data bits from skb to internal buffer * @desc: sk_buff copy helper * @to: copy destination * @len: number of bytes to copy @@ -24,11 +24,11 @@ * Possibly called several times to iterate over an sk_buff and copy * data out of it. */ -static size_t skb_read_bits(skb_reader_t *desc, void *to, size_t len) +size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len) { if (len > desc->count) len = desc->count; - if (skb_copy_bits(desc->skb, desc->offset, to, len)) + if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len))) return 0; desc->count -= len; desc->offset += len; @@ -36,16 +36,17 @@ static size_t skb_read_bits(skb_reader_t *desc, void *to, size_t len) } /** - * skb_read_and_csum_bits - copy and checksum from skb to buffer + * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer * @desc: sk_buff copy helper * @to: copy destination * @len: number of bytes to copy * * Same as skb_read_bits, but calculate a checksum at the same time. */ -static size_t skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) +static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to, size_t len) { - unsigned int csum2, pos; + unsigned int pos; + __wsum csum2; if (len > desc->count) len = desc->count; @@ -65,7 +66,7 @@ static size_t skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) * @copy_actor: virtual method for copying data * */ -ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, skb_reader_t *desc, skb_read_actor_t copy_actor) +ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor) { struct page **ppage = xdr->pages; unsigned int len, pglen = xdr->page_len; @@ -147,7 +148,7 @@ out: */ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) { - skb_reader_t desc; + struct xdr_skb_reader desc; desc.skb = skb; desc.offset = sizeof(struct udphdr); @@ -157,22 +158,22 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) goto no_checksum; desc.csum = csum_partial(skb->data, desc.offset, skb->csum); - if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0) + if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_and_csum_bits) < 0) return -1; if (desc.offset != skb->len) { - unsigned int csum2; + __wsum csum2; csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); desc.csum = csum_block_add(desc.csum, csum2, desc.offset); } if (desc.count) return -1; - if ((unsigned short)csum_fold(desc.csum)) + if (csum_fold(desc.csum)) return -1; if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); return 0; no_checksum: - if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) + if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0) return -1; if (desc.count) return -1; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 192dff5dabc..d85fddeb638 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -33,7 +33,6 @@ EXPORT_SYMBOL(rpciod_down); EXPORT_SYMBOL(rpciod_up); EXPORT_SYMBOL(rpc_new_task); EXPORT_SYMBOL(rpc_wake_up_status); -EXPORT_SYMBOL(rpc_release_task); /* RPC client functions */ EXPORT_SYMBOL(rpc_clone_client); @@ -139,6 +138,8 @@ EXPORT_SYMBOL(nlm_debug); extern int register_rpc_pipefs(void); extern void unregister_rpc_pipefs(void); extern struct cache_detail ip_map_cache; +extern int init_socket_xprt(void); +extern void cleanup_socket_xprt(void); static int __init init_sunrpc(void) @@ -156,6 +157,7 @@ init_sunrpc(void) rpc_proc_init(); #endif cache_register(&ip_map_cache); + init_socket_xprt(); out: return err; } @@ -163,6 +165,7 @@ out: static void __exit cleanup_sunrpc(void) { + cleanup_socket_xprt(); unregister_rpc_pipefs(); rpc_destroy_mempool(); if (cache_unregister(&ip_map_cache)) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index eb44ec929ca..f3001f3626f 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -308,7 +308,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, serv->sv_nrpools = npools; serv->sv_pools = - kcalloc(sizeof(struct svc_pool), serv->sv_nrpools, + kcalloc(serv->sv_nrpools, sizeof(struct svc_pool), GFP_KERNEL); if (!serv->sv_pools) { kfree(serv); diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 8f2320aded5..c7bb5f7f21a 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -119,13 +119,15 @@ EXPORT_SYMBOL(svc_auth_unregister); #define DN_HASHMASK (DN_HASHMAX-1) static struct hlist_head auth_domain_table[DN_HASHMAX]; -static spinlock_t auth_domain_lock = SPIN_LOCK_UNLOCKED; +static spinlock_t auth_domain_lock = + __SPIN_LOCK_UNLOCKED(auth_domain_lock); void auth_domain_put(struct auth_domain *dom) { if (atomic_dec_and_lock(&dom->ref.refcount, &auth_domain_lock)) { hlist_del(&dom->hash); dom->flavour->domain_release(dom); + spin_unlock(&auth_domain_lock); } } @@ -147,10 +149,8 @@ auth_domain_lookup(char *name, struct auth_domain *new) return hp; } } - if (new) { + if (new) hlist_add_head(&new->hash, head); - kref_get(&new->ref); - } spin_unlock(&auth_domain_lock); return new; } diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index e1bd933629f..0d1e8fb83b9 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -53,6 +53,10 @@ struct auth_domain *unix_domain_find(char *name) return NULL; kref_init(&new->h.ref); new->h.name = kstrdup(name, GFP_KERNEL); + if (new->h.name == NULL) { + kfree(new); + return NULL; + } new->h.flavour = &svcauth_unix; new->addr_changes = 0; rv = auth_domain_lookup(name, &new->h); @@ -101,9 +105,9 @@ static void ip_map_put(struct kref *kref) * IP addresses in reverse-endian (i.e. on a little-endian machine). * So use a trivial but reliable hash instead */ -static inline int hash_ip(unsigned long ip) +static inline int hash_ip(__be32 ip) { - int hash = ip ^ (ip>>16); + int hash = (__force u32)ip ^ ((__force u32)ip>>16); return (hash ^ (hash>>8)) & 0xff; } #endif @@ -284,7 +288,7 @@ static struct ip_map *ip_map_lookup(char *class, struct in_addr addr) ip.m_addr = addr; ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h, hash_str(class, IP_HASHBITS) ^ - hash_ip((unsigned long)addr.s_addr)); + hash_ip(addr.s_addr)); if (ch) return container_of(ch, struct ip_map, h); @@ -313,7 +317,7 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex ch = sunrpc_cache_update(&ip_map_cache, &ip.h, &ipm->h, hash_str(ipm->m_class, IP_HASHBITS) ^ - hash_ip((unsigned long)ipm->m_addr.s_addr)); + hash_ip(ipm->m_addr.s_addr)); if (!ch) return -ENOMEM; cache_put(ch, &ip_map_cache); @@ -435,6 +439,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) default: BUG(); case -EAGAIN: + case -ETIMEDOUT: return SVC_DROP; case -ENOENT: return SVC_DENIED; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 96521f16342..99f54fb6d66 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -32,6 +32,7 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/file.h> +#include <linux/freezer.h> #include <net/sock.h> #include <net/checksum.h> #include <net/ip.h> @@ -84,6 +85,35 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req); */ static int svc_conn_age_period = 6*60; +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static struct lock_class_key svc_key[2]; +static struct lock_class_key svc_slock_key[2]; + +static inline void svc_reclassify_socket(struct socket *sock) +{ + struct sock *sk = sock->sk; + BUG_ON(sk->sk_lock.owner != NULL); + switch (sk->sk_family) { + case AF_INET: + sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", + &svc_slock_key[0], "sk_lock-AF_INET-NFSD", &svc_key[0]); + break; + + case AF_INET6: + sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD", + &svc_slock_key[1], "sk_lock-AF_INET6-NFSD", &svc_key[1]); + break; + + default: + BUG(); + } +} +#else +static inline void svc_reclassify_socket(struct socket *sock) +{ +} +#endif + /* * Queue up an idle server thread. Must have pool->sp_lock held. * Note: this is really a stack rather than a queue, so that we only @@ -299,9 +329,15 @@ void svc_reserve(struct svc_rqst *rqstp, int space) static inline void svc_sock_put(struct svc_sock *svsk) { - if (atomic_dec_and_test(&svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) { + if (atomic_dec_and_test(&svsk->sk_inuse) && + test_bit(SK_DEAD, &svsk->sk_flags)) { dprintk("svc: releasing dead socket\n"); - sock_release(svsk->sk_sock); + if (svsk->sk_sock->file) + sockfd_put(svsk->sk_sock); + else + sock_release(svsk->sk_sock); + if (svsk->sk_info_authunix != NULL) + svcauth_unix_info_release(svsk->sk_info_authunix); kfree(svsk); } } @@ -1550,6 +1586,8 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin) if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0) return error; + svc_reclassify_socket(sock); + if (type == SOCK_STREAM) sock->sk->sk_reuse = 1; /* allow address reuse */ error = kernel_bind(sock, (struct sockaddr *) sin, @@ -1604,20 +1642,13 @@ svc_delete_socket(struct svc_sock *svsk) if (test_bit(SK_TEMP, &svsk->sk_flags)) serv->sv_tmpcnt--; - if (!atomic_read(&svsk->sk_inuse)) { - spin_unlock_bh(&serv->sv_lock); - if (svsk->sk_sock->file) - sockfd_put(svsk->sk_sock); - else - sock_release(svsk->sk_sock); - if (svsk->sk_info_authunix != NULL) - svcauth_unix_info_release(svsk->sk_info_authunix); - kfree(svsk); - } else { - spin_unlock_bh(&serv->sv_lock); - dprintk(KERN_NOTICE "svc: server socket destroy delayed\n"); - /* svsk->sk_server = NULL; */ - } + /* This atomic_inc should be needed - svc_delete_socket + * should have the semantic of dropping a reference. + * But it doesn't yet.... + */ + atomic_inc(&svsk->sk_inuse); + spin_unlock_bh(&serv->sv_lock); + svc_sock_put(svsk); } /* diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index d89b048ad6b..82b27528d0c 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -18,7 +18,6 @@ #include <linux/sunrpc/types.h> #include <linux/sunrpc/sched.h> #include <linux/sunrpc/stats.h> -#include <linux/sunrpc/xprt.h> /* * Declare the debug flags here @@ -119,11 +118,6 @@ done: } -static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; -static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; -static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; -static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; - static ctl_table debug_table[] = { { .ctl_name = CTL_RPCDEBUG, @@ -157,50 +151,6 @@ static ctl_table debug_table[] = { .mode = 0644, .proc_handler = &proc_dodebug }, - { - .ctl_name = CTL_SLOTTABLE_UDP, - .procname = "udp_slot_table_entries", - .data = &xprt_udp_slot_table_entries, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &min_slot_table_size, - .extra2 = &max_slot_table_size - }, - { - .ctl_name = CTL_SLOTTABLE_TCP, - .procname = "tcp_slot_table_entries", - .data = &xprt_tcp_slot_table_entries, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &min_slot_table_size, - .extra2 = &max_slot_table_size - }, - { - .ctl_name = CTL_MIN_RESVPORT, - .procname = "min_resvport", - .data = &xprt_min_resvport, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &xprt_min_resvport_limit, - .extra2 = &xprt_max_resvport_limit - }, - { - .ctl_name = CTL_MAX_RESVPORT, - .procname = "max_resvport", - .data = &xprt_max_resvport, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &xprt_min_resvport_limit, - .extra2 = &xprt_max_resvport_limit - }, { .ctl_name = 0 } }; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 9022eb8b37e..a0af250ca31 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -640,41 +640,30 @@ xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf) buf->buflen = buf->len = iov->iov_len; } -/* Sets subiov to the intersection of iov with the buffer of length len - * starting base bytes after iov. Indicates empty intersection by setting - * length of subiov to zero. Decrements len by length of subiov, sets base - * to zero (or decrements it by length of iov if subiov is empty). */ -static void -iov_subsegment(struct kvec *iov, struct kvec *subiov, int *base, int *len) -{ - if (*base > iov->iov_len) { - subiov->iov_base = NULL; - subiov->iov_len = 0; - *base -= iov->iov_len; - } else { - subiov->iov_base = iov->iov_base + *base; - subiov->iov_len = min(*len, (int)iov->iov_len - *base); - *base = 0; - } - *len -= subiov->iov_len; -} - /* Sets subbuf to the portion of buf of length len beginning base bytes * from the start of buf. Returns -1 if base of length are out of bounds. */ int xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, - int base, int len) + unsigned int base, unsigned int len) { - int i; - subbuf->buflen = subbuf->len = len; - iov_subsegment(buf->head, subbuf->head, &base, &len); + if (base < buf->head[0].iov_len) { + subbuf->head[0].iov_base = buf->head[0].iov_base + base; + subbuf->head[0].iov_len = min_t(unsigned int, len, + buf->head[0].iov_len - base); + len -= subbuf->head[0].iov_len; + base = 0; + } else { + subbuf->head[0].iov_base = NULL; + subbuf->head[0].iov_len = 0; + base -= buf->head[0].iov_len; + } if (base < buf->page_len) { - i = (base + buf->page_base) >> PAGE_CACHE_SHIFT; - subbuf->pages = &buf->pages[i]; - subbuf->page_base = (base + buf->page_base) & ~PAGE_CACHE_MASK; - subbuf->page_len = min((int)buf->page_len - base, len); + subbuf->page_len = min(buf->page_len - base, len); + base += buf->page_base; + subbuf->page_base = base & ~PAGE_CACHE_MASK; + subbuf->pages = &buf->pages[base >> PAGE_CACHE_SHIFT]; len -= subbuf->page_len; base = 0; } else { @@ -682,66 +671,85 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, subbuf->page_len = 0; } - iov_subsegment(buf->tail, subbuf->tail, &base, &len); + if (base < buf->tail[0].iov_len) { + subbuf->tail[0].iov_base = buf->tail[0].iov_base + base; + subbuf->tail[0].iov_len = min_t(unsigned int, len, + buf->tail[0].iov_len - base); + len -= subbuf->tail[0].iov_len; + base = 0; + } else { + subbuf->tail[0].iov_base = NULL; + subbuf->tail[0].iov_len = 0; + base -= buf->tail[0].iov_len; + } + if (base || len) return -1; return 0; } -/* obj is assumed to point to allocated memory of size at least len: */ -int -read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len) +static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) { - struct xdr_buf subbuf; - int this_len; - int status; + unsigned int this_len; - status = xdr_buf_subsegment(buf, &subbuf, base, len); - if (status) - goto out; - this_len = min(len, (int)subbuf.head[0].iov_len); - memcpy(obj, subbuf.head[0].iov_base, this_len); + this_len = min_t(unsigned int, len, subbuf->head[0].iov_len); + memcpy(obj, subbuf->head[0].iov_base, this_len); len -= this_len; obj += this_len; - this_len = min(len, (int)subbuf.page_len); + this_len = min_t(unsigned int, len, subbuf->page_len); if (this_len) - _copy_from_pages(obj, subbuf.pages, subbuf.page_base, this_len); + _copy_from_pages(obj, subbuf->pages, subbuf->page_base, this_len); len -= this_len; obj += this_len; - this_len = min(len, (int)subbuf.tail[0].iov_len); - memcpy(obj, subbuf.tail[0].iov_base, this_len); -out: - return status; + this_len = min_t(unsigned int, len, subbuf->tail[0].iov_len); + memcpy(obj, subbuf->tail[0].iov_base, this_len); } /* obj is assumed to point to allocated memory of size at least len: */ -int -write_bytes_to_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len) +int read_bytes_from_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, unsigned int len) { struct xdr_buf subbuf; - int this_len; int status; status = xdr_buf_subsegment(buf, &subbuf, base, len); - if (status) - goto out; - this_len = min(len, (int)subbuf.head[0].iov_len); - memcpy(subbuf.head[0].iov_base, obj, this_len); + if (status != 0) + return status; + __read_bytes_from_xdr_buf(&subbuf, obj, len); + return 0; +} + +static void __write_bytes_to_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) +{ + unsigned int this_len; + + this_len = min_t(unsigned int, len, subbuf->head[0].iov_len); + memcpy(subbuf->head[0].iov_base, obj, this_len); len -= this_len; obj += this_len; - this_len = min(len, (int)subbuf.page_len); + this_len = min_t(unsigned int, len, subbuf->page_len); if (this_len) - _copy_to_pages(subbuf.pages, subbuf.page_base, obj, this_len); + _copy_to_pages(subbuf->pages, subbuf->page_base, obj, this_len); len -= this_len; obj += this_len; - this_len = min(len, (int)subbuf.tail[0].iov_len); - memcpy(subbuf.tail[0].iov_base, obj, this_len); -out: - return status; + this_len = min_t(unsigned int, len, subbuf->tail[0].iov_len); + memcpy(subbuf->tail[0].iov_base, obj, this_len); +} + +/* obj is assumed to point to allocated memory of size at least len: */ +int write_bytes_to_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, unsigned int len) +{ + struct xdr_buf subbuf; + int status; + + status = xdr_buf_subsegment(buf, &subbuf, base, len); + if (status != 0) + return status; + __write_bytes_to_xdr_buf(&subbuf, obj, len); + return 0; } int -xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj) +xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj) { __be32 raw; int status; @@ -754,7 +762,7 @@ xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj) } int -xdr_encode_word(struct xdr_buf *buf, int base, u32 obj) +xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj) { __be32 raw = htonl(obj); @@ -765,44 +773,37 @@ xdr_encode_word(struct xdr_buf *buf, int base, u32 obj) * entirely in the head or the tail, set object to point to it; otherwise * try to find space for it at the end of the tail, copy it there, and * set obj to point to it. */ -int -xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, int offset) +int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, unsigned int offset) { - u32 tail_offset = buf->head[0].iov_len + buf->page_len; - u32 obj_end_offset; + struct xdr_buf subbuf; if (xdr_decode_word(buf, offset, &obj->len)) - goto out; - obj_end_offset = offset + 4 + obj->len; - - if (obj_end_offset <= buf->head[0].iov_len) { - /* The obj is contained entirely in the head: */ - obj->data = buf->head[0].iov_base + offset + 4; - } else if (offset + 4 >= tail_offset) { - if (obj_end_offset - tail_offset - > buf->tail[0].iov_len) - goto out; - /* The obj is contained entirely in the tail: */ - obj->data = buf->tail[0].iov_base - + offset - tail_offset + 4; - } else { - /* use end of tail as storage for obj: - * (We don't copy to the beginning because then we'd have - * to worry about doing a potentially overlapping copy. - * This assumes the object is at most half the length of the - * tail.) */ - if (obj->len > buf->tail[0].iov_len) - goto out; - obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len - - obj->len; - if (read_bytes_from_xdr_buf(buf, offset + 4, - obj->data, obj->len)) - goto out; + return -EFAULT; + if (xdr_buf_subsegment(buf, &subbuf, offset + 4, obj->len)) + return -EFAULT; - } + /* Is the obj contained entirely in the head? */ + obj->data = subbuf.head[0].iov_base; + if (subbuf.head[0].iov_len == obj->len) + return 0; + /* ..or is the obj contained entirely in the tail? */ + obj->data = subbuf.tail[0].iov_base; + if (subbuf.tail[0].iov_len == obj->len) + return 0; + + /* use end of tail as storage for obj: + * (We don't copy to the beginning because then we'd have + * to worry about doing a potentially overlapping copy. + * This assumes the object is at most half the length of the + * tail.) */ + if (obj->len > buf->buflen - buf->len) + return -ENOMEM; + if (buf->tail[0].iov_len != 0) + obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len; + else + obj->data = buf->head[0].iov_base + buf->head[0].iov_len; + __read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len); return 0; -out: - return -1; } /* Returns 0 on success, or else a negative error code. */ @@ -1020,3 +1021,71 @@ xdr_encode_array2(struct xdr_buf *buf, unsigned int base, return xdr_xcode_array2(buf, base, desc, 1); } + +int +xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, + int (*actor)(struct scatterlist *, void *), void *data) +{ + int i, ret = 0; + unsigned page_len, thislen, page_offset; + struct scatterlist sg[1]; + + if (offset >= buf->head[0].iov_len) { + offset -= buf->head[0].iov_len; + } else { + thislen = buf->head[0].iov_len - offset; + if (thislen > len) + thislen = len; + sg_set_buf(sg, buf->head[0].iov_base + offset, thislen); + ret = actor(sg, data); + if (ret) + goto out; + offset = 0; + len -= thislen; + } + if (len == 0) + goto out; + + if (offset >= buf->page_len) { + offset -= buf->page_len; + } else { + page_len = buf->page_len - offset; + if (page_len > len) + page_len = len; + len -= page_len; + page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1); + i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT; + thislen = PAGE_CACHE_SIZE - page_offset; + do { + if (thislen > page_len) + thislen = page_len; + sg->page = buf->pages[i]; + sg->offset = page_offset; + sg->length = thislen; + ret = actor(sg, data); + if (ret) + goto out; + page_len -= thislen; + i++; + page_offset = 0; + thislen = PAGE_CACHE_SIZE; + } while (page_len != 0); + offset = 0; + } + if (len == 0) + goto out; + if (offset < buf->tail[0].iov_len) { + thislen = buf->tail[0].iov_len - offset; + if (thislen > len) + thislen = len; + sg_set_buf(sg, buf->tail[0].iov_base + offset, thislen); + ret = actor(sg, data); + len -= thislen; + } + if (len != 0) + ret = -EINVAL; +out: + return ret; +} +EXPORT_SYMBOL(xdr_process_buf); + diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 80857470dc1..7a3999f0a4a 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -459,7 +459,6 @@ int xprt_adjust_timeout(struct rpc_rqst *req) if (to->to_maxval && req->rq_timeout >= to->to_maxval) req->rq_timeout = to->to_maxval; req->rq_retries++; - pprintk("RPC: %lu retrans\n", jiffies); } else { req->rq_timeout = to->to_initval; req->rq_retries = 0; @@ -468,7 +467,6 @@ int xprt_adjust_timeout(struct rpc_rqst *req) spin_lock_bh(&xprt->transport_lock); rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); spin_unlock_bh(&xprt->transport_lock); - pprintk("RPC: %lu timeout\n", jiffies); status = -ETIMEDOUT; } @@ -479,9 +477,10 @@ int xprt_adjust_timeout(struct rpc_rqst *req) return status; } -static void xprt_autoclose(void *args) +static void xprt_autoclose(struct work_struct *work) { - struct rpc_xprt *xprt = (struct rpc_xprt *)args; + struct rpc_xprt *xprt = + container_of(work, struct rpc_xprt, task_cleanup); xprt_disconnect(xprt); xprt->ops->close(xprt); @@ -891,39 +890,25 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i */ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to) { - int result; struct rpc_xprt *xprt; struct rpc_rqst *req; - if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) { - dprintk("RPC: xprt_create_transport: no memory\n"); - return ERR_PTR(-ENOMEM); - } - if (size <= sizeof(xprt->addr)) { - memcpy(&xprt->addr, ap, size); - xprt->addrlen = size; - } else { - kfree(xprt); - dprintk("RPC: xprt_create_transport: address too large\n"); - return ERR_PTR(-EBADF); - } - switch (proto) { case IPPROTO_UDP: - result = xs_setup_udp(xprt, to); + xprt = xs_setup_udp(ap, size, to); break; case IPPROTO_TCP: - result = xs_setup_tcp(xprt, to); + xprt = xs_setup_tcp(ap, size, to); break; default: printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", proto); return ERR_PTR(-EIO); } - if (result) { - kfree(xprt); - dprintk("RPC: xprt_create_transport: failed, %d\n", result); - return ERR_PTR(result); + if (IS_ERR(xprt)) { + dprintk("RPC: xprt_create_transport: failed, %ld\n", + -PTR_ERR(xprt)); + return xprt; } kref_init(&xprt->kref); @@ -932,7 +917,7 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si INIT_LIST_HEAD(&xprt->free); INIT_LIST_HEAD(&xprt->recv); - INIT_WORK(&xprt->task_cleanup, xprt_autoclose, xprt); + INIT_WORK(&xprt->task_cleanup, xprt_autoclose); init_timer(&xprt->timer); xprt->timer.function = xprt_init_autodisconnect; xprt->timer.data = (unsigned long) xprt; @@ -969,8 +954,11 @@ static void xprt_destroy(struct kref *kref) dprintk("RPC: destroying transport %p\n", xprt); xprt->shutdown = 1; del_timer_sync(&xprt->timer); + + /* + * Tear down transport state and free the rpc_xprt + */ xprt->ops->destroy(xprt); - kfree(xprt); } /** diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 757fc91ef25..49cabffd7fd 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -46,6 +46,92 @@ unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; /* + * We can register our own files under /proc/sys/sunrpc by + * calling register_sysctl_table() again. The files in that + * directory become the union of all files registered there. + * + * We simply need to make sure that we don't collide with + * someone else's file names! + */ + +#ifdef RPC_DEBUG + +static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; +static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; +static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; +static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; + +static struct ctl_table_header *sunrpc_table_header; + +/* + * FIXME: changing the UDP slot table size should also resize the UDP + * socket buffers for existing UDP transports + */ +static ctl_table xs_tunables_table[] = { + { + .ctl_name = CTL_SLOTTABLE_UDP, + .procname = "udp_slot_table_entries", + .data = &xprt_udp_slot_table_entries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &min_slot_table_size, + .extra2 = &max_slot_table_size + }, + { + .ctl_name = CTL_SLOTTABLE_TCP, + .procname = "tcp_slot_table_entries", + .data = &xprt_tcp_slot_table_entries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &min_slot_table_size, + .extra2 = &max_slot_table_size + }, + { + .ctl_name = CTL_MIN_RESVPORT, + .procname = "min_resvport", + .data = &xprt_min_resvport, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &xprt_min_resvport_limit, + .extra2 = &xprt_max_resvport_limit + }, + { + .ctl_name = CTL_MAX_RESVPORT, + .procname = "max_resvport", + .data = &xprt_max_resvport, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &xprt_min_resvport_limit, + .extra2 = &xprt_max_resvport_limit + }, + { + .ctl_name = 0, + }, +}; + +static ctl_table sunrpc_table[] = { + { + .ctl_name = CTL_SUNRPC, + .procname = "sunrpc", + .mode = 0555, + .child = xs_tunables_table + }, + { + .ctl_name = 0, + }, +}; + +#endif + +/* * How many times to try sending a request on a socket before waiting * for the socket buffer to clear. */ @@ -125,6 +211,55 @@ static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) } #endif +struct sock_xprt { + struct rpc_xprt xprt; + + /* + * Network layer + */ + struct socket * sock; + struct sock * inet; + + /* + * State of TCP reply receive + */ + __be32 tcp_fraghdr, + tcp_xid; + + u32 tcp_offset, + tcp_reclen; + + unsigned long tcp_copied, + tcp_flags; + + /* + * Connection of transports + */ + struct delayed_work connect_worker; + unsigned short port; + + /* + * UDP socket buffer size parameters + */ + size_t rcvsize, + sndsize; + + /* + * Saved socket callback addresses + */ + void (*old_data_ready)(struct sock *, int); + void (*old_state_change)(struct sock *); + void (*old_write_space)(struct sock *); +}; + +/* + * TCP receive state flags + */ +#define TCP_RCV_LAST_FRAG (1UL << 0) +#define TCP_RCV_COPY_FRAGHDR (1UL << 1) +#define TCP_RCV_COPY_XID (1UL << 2) +#define TCP_RCV_COPY_DATA (1UL << 3) + static void xs_format_peer_addresses(struct rpc_xprt *xprt) { struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; @@ -168,37 +303,52 @@ static void xs_free_peer_addresses(struct rpc_xprt *xprt) #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) -static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len) +static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more) { - struct kvec iov = { - .iov_base = xdr->head[0].iov_base + base, - .iov_len = len - base, - }; struct msghdr msg = { .msg_name = addr, .msg_namelen = addrlen, - .msg_flags = XS_SENDMSG_FLAGS, + .msg_flags = XS_SENDMSG_FLAGS | (more ? MSG_MORE : 0), + }; + struct kvec iov = { + .iov_base = vec->iov_base + base, + .iov_len = vec->iov_len - base, }; - if (xdr->len > len) - msg.msg_flags |= MSG_MORE; - - if (likely(iov.iov_len)) + if (iov.iov_len != 0) return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); return kernel_sendmsg(sock, &msg, NULL, 0, 0); } -static int xs_send_tail(struct socket *sock, struct xdr_buf *xdr, unsigned int base, unsigned int len) +static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more) { - struct kvec iov = { - .iov_base = xdr->tail[0].iov_base + base, - .iov_len = len - base, - }; - struct msghdr msg = { - .msg_flags = XS_SENDMSG_FLAGS, - }; + struct page **ppage; + unsigned int remainder; + int err, sent = 0; + + remainder = xdr->page_len - base; + base += xdr->page_base; + ppage = xdr->pages + (base >> PAGE_SHIFT); + base &= ~PAGE_MASK; + for(;;) { + unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder); + int flags = XS_SENDMSG_FLAGS; - return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); + remainder -= len; + if (remainder != 0 || more) + flags |= MSG_MORE; + err = sock->ops->sendpage(sock, *ppage, base, len, flags); + if (remainder == 0 || err != len) + break; + sent += err; + ppage++; + base = 0; + } + if (sent == 0) + return err; + if (err > 0) + sent += err; + return sent; } /** @@ -210,76 +360,51 @@ static int xs_send_tail(struct socket *sock, struct xdr_buf *xdr, unsigned int b * @base: starting position in the buffer * */ -static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base) +static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base) { - struct page **ppage = xdr->pages; - unsigned int len, pglen = xdr->page_len; - int err, ret = 0; + unsigned int remainder = xdr->len - base; + int err, sent = 0; if (unlikely(!sock)) return -ENOTCONN; clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); + if (base != 0) { + addr = NULL; + addrlen = 0; + } - len = xdr->head[0].iov_len; - if (base < len || (addr != NULL && base == 0)) { - err = xs_send_head(sock, addr, addrlen, xdr, base, len); - if (ret == 0) - ret = err; - else if (err > 0) - ret += err; - if (err != (len - base)) + if (base < xdr->head[0].iov_len || addr != NULL) { + unsigned int len = xdr->head[0].iov_len - base; + remainder -= len; + err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0); + if (remainder == 0 || err != len) goto out; + sent += err; base = 0; } else - base -= len; + base -= xdr->head[0].iov_len; - if (unlikely(pglen == 0)) - goto copy_tail; - if (unlikely(base >= pglen)) { - base -= pglen; - goto copy_tail; - } - if (base || xdr->page_base) { - pglen -= base; - base += xdr->page_base; - ppage += base >> PAGE_CACHE_SHIFT; - base &= ~PAGE_CACHE_MASK; - } - - do { - int flags = XS_SENDMSG_FLAGS; - - len = PAGE_CACHE_SIZE; - if (base) - len -= base; - if (pglen < len) - len = pglen; - - if (pglen != len || xdr->tail[0].iov_len != 0) - flags |= MSG_MORE; - - err = kernel_sendpage(sock, *ppage, base, len, flags); - if (ret == 0) - ret = err; - else if (err > 0) - ret += err; - if (err != len) + if (base < xdr->page_len) { + unsigned int len = xdr->page_len - base; + remainder -= len; + err = xs_send_pagedata(sock, xdr, base, remainder != 0); + if (remainder == 0 || err != len) goto out; + sent += err; base = 0; - ppage++; - } while ((pglen -= len) != 0); -copy_tail: - len = xdr->tail[0].iov_len; - if (base < len) { - err = xs_send_tail(sock, xdr, base, len); - if (ret == 0) - ret = err; - else if (err > 0) - ret += err; - } + } else + base -= xdr->page_len; + + if (base >= xdr->tail[0].iov_len) + return sent; + err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0); out: - return ret; + if (sent == 0) + return err; + if (err > 0) + sent += err; + return sent; } /** @@ -291,19 +416,20 @@ static void xs_nospace(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); dprintk("RPC: %4d xmit incomplete (%u left of %u)\n", task->tk_pid, req->rq_slen - req->rq_bytes_sent, req->rq_slen); - if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { + if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { /* Protect against races with write_space */ spin_lock_bh(&xprt->transport_lock); /* Don't race with disconnect */ if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; - else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) + else if (test_bit(SOCK_NOSPACE, &transport->sock->flags)) xprt_wait_for_buffer_space(task); spin_unlock_bh(&xprt->transport_lock); @@ -327,6 +453,7 @@ static int xs_udp_send_request(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; int status; @@ -335,8 +462,10 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_svec->iov_len); req->rq_xtime = jiffies; - status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr, - xprt->addrlen, xdr, req->rq_bytes_sent); + status = xs_sendpages(transport->sock, + (struct sockaddr *) &xprt->addr, + xprt->addrlen, xdr, + req->rq_bytes_sent); dprintk("RPC: xs_udp_send_request(%u) = %d\n", xdr->len - req->rq_bytes_sent, status); @@ -392,6 +521,7 @@ static int xs_tcp_send_request(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; int status, retry = 0; @@ -406,8 +536,8 @@ static int xs_tcp_send_request(struct rpc_task *task) * called sendmsg(). */ while (1) { req->rq_xtime = jiffies; - status = xs_sendpages(xprt->sock, NULL, 0, xdr, - req->rq_bytes_sent); + status = xs_sendpages(transport->sock, + NULL, 0, xdr, req->rq_bytes_sent); dprintk("RPC: xs_tcp_send_request(%u) = %d\n", xdr->len - req->rq_bytes_sent, status); @@ -485,8 +615,9 @@ out_release: */ static void xs_close(struct rpc_xprt *xprt) { - struct socket *sock = xprt->sock; - struct sock *sk = xprt->inet; + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct socket *sock = transport->sock; + struct sock *sk = transport->inet; if (!sk) goto clear_close_wait; @@ -494,13 +625,13 @@ static void xs_close(struct rpc_xprt *xprt) dprintk("RPC: xs_close xprt %p\n", xprt); write_lock_bh(&sk->sk_callback_lock); - xprt->inet = NULL; - xprt->sock = NULL; + transport->inet = NULL; + transport->sock = NULL; sk->sk_user_data = NULL; - sk->sk_data_ready = xprt->old_data_ready; - sk->sk_state_change = xprt->old_state_change; - sk->sk_write_space = xprt->old_write_space; + sk->sk_data_ready = transport->old_data_ready; + sk->sk_state_change = transport->old_state_change; + sk->sk_write_space = transport->old_write_space; write_unlock_bh(&sk->sk_callback_lock); sk->sk_no_check = 0; @@ -519,15 +650,18 @@ clear_close_wait: */ static void xs_destroy(struct rpc_xprt *xprt) { + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + dprintk("RPC: xs_destroy xprt %p\n", xprt); - cancel_delayed_work(&xprt->connect_worker); + cancel_delayed_work(&transport->connect_worker); flush_scheduled_work(); xprt_disconnect(xprt); xs_close(xprt); xs_free_peer_addresses(xprt); kfree(xprt->slot); + kfree(xprt); } static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) @@ -603,91 +737,75 @@ static void xs_udp_data_ready(struct sock *sk, int len) read_unlock(&sk->sk_callback_lock); } -static inline size_t xs_tcp_copy_data(skb_reader_t *desc, void *p, size_t len) -{ - if (len > desc->count) - len = desc->count; - if (skb_copy_bits(desc->skb, desc->offset, p, len)) { - dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n", - len, desc->count); - return 0; - } - desc->offset += len; - desc->count -= len; - dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n", - len, desc->count); - return len; -} - -static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) +static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) { + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); size_t len, used; char *p; - p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; - len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; - used = xs_tcp_copy_data(desc, p, len); - xprt->tcp_offset += used; + p = ((char *) &transport->tcp_fraghdr) + transport->tcp_offset; + len = sizeof(transport->tcp_fraghdr) - transport->tcp_offset; + used = xdr_skb_read_bits(desc, p, len); + transport->tcp_offset += used; if (used != len) return; - xprt->tcp_reclen = ntohl(xprt->tcp_recm); - if (xprt->tcp_reclen & RPC_LAST_STREAM_FRAGMENT) - xprt->tcp_flags |= XPRT_LAST_FRAG; + transport->tcp_reclen = ntohl(transport->tcp_fraghdr); + if (transport->tcp_reclen & RPC_LAST_STREAM_FRAGMENT) + transport->tcp_flags |= TCP_RCV_LAST_FRAG; else - xprt->tcp_flags &= ~XPRT_LAST_FRAG; - xprt->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK; + transport->tcp_flags &= ~TCP_RCV_LAST_FRAG; + transport->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK; - xprt->tcp_flags &= ~XPRT_COPY_RECM; - xprt->tcp_offset = 0; + transport->tcp_flags &= ~TCP_RCV_COPY_FRAGHDR; + transport->tcp_offset = 0; /* Sanity check of the record length */ - if (unlikely(xprt->tcp_reclen < 4)) { + if (unlikely(transport->tcp_reclen < 4)) { dprintk("RPC: invalid TCP record fragment length\n"); xprt_disconnect(xprt); return; } dprintk("RPC: reading TCP record fragment of length %d\n", - xprt->tcp_reclen); + transport->tcp_reclen); } -static void xs_tcp_check_recm(struct rpc_xprt *xprt) +static void xs_tcp_check_fraghdr(struct sock_xprt *transport) { - dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n", - xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags); - if (xprt->tcp_offset == xprt->tcp_reclen) { - xprt->tcp_flags |= XPRT_COPY_RECM; - xprt->tcp_offset = 0; - if (xprt->tcp_flags & XPRT_LAST_FRAG) { - xprt->tcp_flags &= ~XPRT_COPY_DATA; - xprt->tcp_flags |= XPRT_COPY_XID; - xprt->tcp_copied = 0; + if (transport->tcp_offset == transport->tcp_reclen) { + transport->tcp_flags |= TCP_RCV_COPY_FRAGHDR; + transport->tcp_offset = 0; + if (transport->tcp_flags & TCP_RCV_LAST_FRAG) { + transport->tcp_flags &= ~TCP_RCV_COPY_DATA; + transport->tcp_flags |= TCP_RCV_COPY_XID; + transport->tcp_copied = 0; } } } -static inline void xs_tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) +static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_reader *desc) { size_t len, used; char *p; - len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; + len = sizeof(transport->tcp_xid) - transport->tcp_offset; dprintk("RPC: reading XID (%Zu bytes)\n", len); - p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; - used = xs_tcp_copy_data(desc, p, len); - xprt->tcp_offset += used; + p = ((char *) &transport->tcp_xid) + transport->tcp_offset; + used = xdr_skb_read_bits(desc, p, len); + transport->tcp_offset += used; if (used != len) return; - xprt->tcp_flags &= ~XPRT_COPY_XID; - xprt->tcp_flags |= XPRT_COPY_DATA; - xprt->tcp_copied = 4; + transport->tcp_flags &= ~TCP_RCV_COPY_XID; + transport->tcp_flags |= TCP_RCV_COPY_DATA; + transport->tcp_copied = 4; dprintk("RPC: reading reply for XID %08x\n", - ntohl(xprt->tcp_xid)); - xs_tcp_check_recm(xprt); + ntohl(transport->tcp_xid)); + xs_tcp_check_fraghdr(transport); } -static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) +static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) { + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct rpc_rqst *req; struct xdr_buf *rcvbuf; size_t len; @@ -695,116 +813,118 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc /* Find and lock the request corresponding to this xid */ spin_lock(&xprt->transport_lock); - req = xprt_lookup_rqst(xprt, xprt->tcp_xid); + req = xprt_lookup_rqst(xprt, transport->tcp_xid); if (!req) { - xprt->tcp_flags &= ~XPRT_COPY_DATA; + transport->tcp_flags &= ~TCP_RCV_COPY_DATA; dprintk("RPC: XID %08x request not found!\n", - ntohl(xprt->tcp_xid)); + ntohl(transport->tcp_xid)); spin_unlock(&xprt->transport_lock); return; } rcvbuf = &req->rq_private_buf; len = desc->count; - if (len > xprt->tcp_reclen - xprt->tcp_offset) { - skb_reader_t my_desc; + if (len > transport->tcp_reclen - transport->tcp_offset) { + struct xdr_skb_reader my_desc; - len = xprt->tcp_reclen - xprt->tcp_offset; + len = transport->tcp_reclen - transport->tcp_offset; memcpy(&my_desc, desc, sizeof(my_desc)); my_desc.count = len; - r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, - &my_desc, xs_tcp_copy_data); + r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied, + &my_desc, xdr_skb_read_bits); desc->count -= r; desc->offset += r; } else - r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, - desc, xs_tcp_copy_data); + r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied, + desc, xdr_skb_read_bits); if (r > 0) { - xprt->tcp_copied += r; - xprt->tcp_offset += r; + transport->tcp_copied += r; + transport->tcp_offset += r; } if (r != len) { /* Error when copying to the receive buffer, * usually because we weren't able to allocate * additional buffer pages. All we can do now - * is turn off XPRT_COPY_DATA, so the request + * is turn off TCP_RCV_COPY_DATA, so the request * will not receive any additional updates, * and time out. * Any remaining data from this record will * be discarded. */ - xprt->tcp_flags &= ~XPRT_COPY_DATA; + transport->tcp_flags &= ~TCP_RCV_COPY_DATA; dprintk("RPC: XID %08x truncated request\n", - ntohl(xprt->tcp_xid)); + ntohl(transport->tcp_xid)); dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", - xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); + xprt, transport->tcp_copied, transport->tcp_offset, + transport->tcp_reclen); goto out; } dprintk("RPC: XID %08x read %Zd bytes\n", - ntohl(xprt->tcp_xid), r); + ntohl(transport->tcp_xid), r); dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", - xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); - - if (xprt->tcp_copied == req->rq_private_buf.buflen) - xprt->tcp_flags &= ~XPRT_COPY_DATA; - else if (xprt->tcp_offset == xprt->tcp_reclen) { - if (xprt->tcp_flags & XPRT_LAST_FRAG) - xprt->tcp_flags &= ~XPRT_COPY_DATA; + xprt, transport->tcp_copied, transport->tcp_offset, + transport->tcp_reclen); + + if (transport->tcp_copied == req->rq_private_buf.buflen) + transport->tcp_flags &= ~TCP_RCV_COPY_DATA; + else if (transport->tcp_offset == transport->tcp_reclen) { + if (transport->tcp_flags & TCP_RCV_LAST_FRAG) + transport->tcp_flags &= ~TCP_RCV_COPY_DATA; } out: - if (!(xprt->tcp_flags & XPRT_COPY_DATA)) - xprt_complete_rqst(req->rq_task, xprt->tcp_copied); + if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) + xprt_complete_rqst(req->rq_task, transport->tcp_copied); spin_unlock(&xprt->transport_lock); - xs_tcp_check_recm(xprt); + xs_tcp_check_fraghdr(transport); } -static inline void xs_tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) +static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc) { size_t len; - len = xprt->tcp_reclen - xprt->tcp_offset; + len = transport->tcp_reclen - transport->tcp_offset; if (len > desc->count) len = desc->count; desc->count -= len; desc->offset += len; - xprt->tcp_offset += len; + transport->tcp_offset += len; dprintk("RPC: discarded %Zu bytes\n", len); - xs_tcp_check_recm(xprt); + xs_tcp_check_fraghdr(transport); } static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len) { struct rpc_xprt *xprt = rd_desc->arg.data; - skb_reader_t desc = { + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct xdr_skb_reader desc = { .skb = skb, .offset = offset, .count = len, - .csum = 0 }; dprintk("RPC: xs_tcp_data_recv started\n"); do { /* Read in a new fragment marker if necessary */ /* Can we ever really expect to get completely empty fragments? */ - if (xprt->tcp_flags & XPRT_COPY_RECM) { + if (transport->tcp_flags & TCP_RCV_COPY_FRAGHDR) { xs_tcp_read_fraghdr(xprt, &desc); continue; } /* Read in the xid if necessary */ - if (xprt->tcp_flags & XPRT_COPY_XID) { - xs_tcp_read_xid(xprt, &desc); + if (transport->tcp_flags & TCP_RCV_COPY_XID) { + xs_tcp_read_xid(transport, &desc); continue; } /* Read in the request data */ - if (xprt->tcp_flags & XPRT_COPY_DATA) { + if (transport->tcp_flags & TCP_RCV_COPY_DATA) { xs_tcp_read_request(xprt, &desc); continue; } /* Skip over any trailing bytes on short reads */ - xs_tcp_read_discard(xprt, &desc); + xs_tcp_read_discard(transport, &desc); } while (desc.count); dprintk("RPC: xs_tcp_data_recv done\n"); return len - desc.count; @@ -858,11 +978,16 @@ static void xs_tcp_state_change(struct sock *sk) case TCP_ESTABLISHED: spin_lock_bh(&xprt->transport_lock); if (!xprt_test_and_set_connected(xprt)) { + struct sock_xprt *transport = container_of(xprt, + struct sock_xprt, xprt); + /* Reset TCP record info */ - xprt->tcp_offset = 0; - xprt->tcp_reclen = 0; - xprt->tcp_copied = 0; - xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; + transport->tcp_offset = 0; + transport->tcp_reclen = 0; + transport->tcp_copied = 0; + transport->tcp_flags = + TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; xprt_wake_pending_tasks(xprt, 0); } @@ -951,15 +1076,16 @@ static void xs_tcp_write_space(struct sock *sk) static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) { - struct sock *sk = xprt->inet; + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct sock *sk = transport->inet; - if (xprt->rcvsize) { + if (transport->rcvsize) { sk->sk_userlocks |= SOCK_RCVBUF_LOCK; - sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; + sk->sk_rcvbuf = transport->rcvsize * xprt->max_reqs * 2; } - if (xprt->sndsize) { + if (transport->sndsize) { sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; + sk->sk_sndbuf = transport->sndsize * xprt->max_reqs * 2; sk->sk_write_space(sk); } } @@ -974,12 +1100,14 @@ static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) */ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize) { - xprt->sndsize = 0; + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + + transport->sndsize = 0; if (sndsize) - xprt->sndsize = sndsize + 1024; - xprt->rcvsize = 0; + transport->sndsize = sndsize + 1024; + transport->rcvsize = 0; if (rcvsize) - xprt->rcvsize = rcvsize + 1024; + transport->rcvsize = rcvsize + 1024; xs_udp_do_set_buffer_size(xprt); } @@ -1003,19 +1131,6 @@ static unsigned short xs_get_random_port(void) } /** - * xs_print_peer_address - format an IPv4 address for printing - * @xprt: generic transport - * @format: flags field indicating which parts of the address to render - */ -static char *xs_print_peer_address(struct rpc_xprt *xprt, enum rpc_display_format_t format) -{ - if (xprt->address_strings[format] != NULL) - return xprt->address_strings[format]; - else - return "unprintable"; -} - -/** * xs_set_port - reset the port number in the remote endpoint address * @xprt: generic transport * @port: new port number @@ -1030,20 +1145,20 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) sap->sin_port = htons(port); } -static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) +static int xs_bindresvport(struct sock_xprt *transport, struct socket *sock) { struct sockaddr_in myaddr = { .sin_family = AF_INET, }; int err; - unsigned short port = xprt->port; + unsigned short port = transport->port; do { myaddr.sin_port = htons(port); err = kernel_bind(sock, (struct sockaddr *) &myaddr, sizeof(myaddr)); if (err == 0) { - xprt->port = port; + transport->port = port; dprintk("RPC: xs_bindresvport bound to port %u\n", port); return 0; @@ -1052,22 +1167,53 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) port = xprt_max_resvport; else port--; - } while (err == -EADDRINUSE && port != xprt->port); + } while (err == -EADDRINUSE && port != transport->port); dprintk("RPC: can't bind to reserved port (%d).\n", -err); return err; } +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static struct lock_class_key xs_key[2]; +static struct lock_class_key xs_slock_key[2]; + +static inline void xs_reclassify_socket(struct socket *sock) +{ + struct sock *sk = sock->sk; + BUG_ON(sk->sk_lock.owner != NULL); + switch (sk->sk_family) { + case AF_INET: + sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS", + &xs_slock_key[0], "sk_lock-AF_INET-NFS", &xs_key[0]); + break; + + case AF_INET6: + sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFS", + &xs_slock_key[1], "sk_lock-AF_INET6-NFS", &xs_key[1]); + break; + + default: + BUG(); + } +} +#else +static inline void xs_reclassify_socket(struct socket *sock) +{ +} +#endif + /** * xs_udp_connect_worker - set up a UDP socket - * @args: RPC transport to connect + * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_udp_connect_worker(void *args) +static void xs_udp_connect_worker(struct work_struct *work) { - struct rpc_xprt *xprt = (struct rpc_xprt *) args; - struct socket *sock = xprt->sock; + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; + struct socket *sock = transport->sock; int err, status = -EIO; if (xprt->shutdown || !xprt_bound(xprt)) @@ -1080,24 +1226,25 @@ static void xs_udp_connect_worker(void *args) dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; } + xs_reclassify_socket(sock); - if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { + if (xprt->resvport && xs_bindresvport(transport, sock) < 0) { sock_release(sock); goto out; } dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - if (!xprt->inet) { + if (!transport->inet) { struct sock *sk = sock->sk; write_lock_bh(&sk->sk_callback_lock); sk->sk_user_data = xprt; - xprt->old_data_ready = sk->sk_data_ready; - xprt->old_state_change = sk->sk_state_change; - xprt->old_write_space = sk->sk_write_space; + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; sk->sk_data_ready = xs_udp_data_ready; sk->sk_write_space = xs_udp_write_space; sk->sk_no_check = UDP_CSUM_NORCV; @@ -1106,8 +1253,8 @@ static void xs_udp_connect_worker(void *args) xprt_set_connected(xprt); /* Reset to new socket */ - xprt->sock = sock; - xprt->inet = sk; + transport->sock = sock; + transport->inet = sk; write_unlock_bh(&sk->sk_callback_lock); } @@ -1125,7 +1272,7 @@ out: static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) { int result; - struct socket *sock = xprt->sock; + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct sockaddr any; dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); @@ -1136,7 +1283,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) */ memset(&any, 0, sizeof(any)); any.sa_family = AF_UNSPEC; - result = kernel_connect(sock, &any, sizeof(any), 0); + result = kernel_connect(transport->sock, &any, sizeof(any), 0); if (result) dprintk("RPC: AF_UNSPEC connect return code %d\n", result); @@ -1144,27 +1291,30 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) /** * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint - * @args: RPC transport to connect + * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_tcp_connect_worker(void *args) +static void xs_tcp_connect_worker(struct work_struct *work) { - struct rpc_xprt *xprt = (struct rpc_xprt *)args; - struct socket *sock = xprt->sock; + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; + struct socket *sock = transport->sock; int err, status = -EIO; if (xprt->shutdown || !xprt_bound(xprt)) goto out; - if (!xprt->sock) { + if (!sock) { /* start from scratch */ if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { dprintk("RPC: can't create TCP transport socket (%d).\n", -err); goto out; } + xs_reclassify_socket(sock); - if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { + if (xprt->resvport && xs_bindresvport(transport, sock) < 0) { sock_release(sock); goto out; } @@ -1173,17 +1323,17 @@ static void xs_tcp_connect_worker(void *args) xs_tcp_reuse_connection(xprt); dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - if (!xprt->inet) { + if (!transport->inet) { struct sock *sk = sock->sk; write_lock_bh(&sk->sk_callback_lock); sk->sk_user_data = xprt; - xprt->old_data_ready = sk->sk_data_ready; - xprt->old_state_change = sk->sk_state_change; - xprt->old_write_space = sk->sk_write_space; + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; sk->sk_data_ready = xs_tcp_data_ready; sk->sk_state_change = xs_tcp_state_change; sk->sk_write_space = xs_tcp_write_space; @@ -1198,8 +1348,8 @@ static void xs_tcp_connect_worker(void *args) xprt_clear_connected(xprt); /* Reset to new socket */ - xprt->sock = sock; - xprt->inet = sk; + transport->sock = sock; + transport->inet = sk; write_unlock_bh(&sk->sk_callback_lock); } @@ -1248,21 +1398,22 @@ out_clear: static void xs_connect(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); if (xprt_test_and_set_connecting(xprt)) return; - if (xprt->sock != NULL) { + if (transport->sock != NULL) { dprintk("RPC: xs_connect delayed xprt %p for %lu seconds\n", xprt, xprt->reestablish_timeout / HZ); - schedule_delayed_work(&xprt->connect_worker, + schedule_delayed_work(&transport->connect_worker, xprt->reestablish_timeout); xprt->reestablish_timeout <<= 1; if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; } else { dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); - schedule_work(&xprt->connect_worker); + schedule_delayed_work(&transport->connect_worker, 0); /* flush_scheduled_work can sleep... */ if (!RPC_IS_ASYNC(task)) @@ -1278,8 +1429,10 @@ static void xs_connect(struct rpc_task *task) */ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) { + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n", - xprt->port, + transport->port, xprt->stat.bind_count, xprt->stat.sends, xprt->stat.recvs, @@ -1296,13 +1449,14 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) */ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) { + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); long idle_time = 0; if (xprt_connected(xprt)) idle_time = (long)(jiffies - xprt->last_used) / HZ; seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n", - xprt->port, + transport->port, xprt->stat.bind_count, xprt->stat.connect_count, xprt->stat.connect_time, @@ -1316,7 +1470,6 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, - .print_addr = xs_print_peer_address, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, .rpcbind = rpc_getport, @@ -1334,7 +1487,6 @@ static struct rpc_xprt_ops xs_udp_ops = { }; static struct rpc_xprt_ops xs_tcp_ops = { - .print_addr = xs_print_peer_address, .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, .rpcbind = rpc_getport, @@ -1349,33 +1501,64 @@ static struct rpc_xprt_ops xs_tcp_ops = { .print_stats = xs_tcp_print_stats, }; +static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, unsigned int slot_table_size) +{ + struct rpc_xprt *xprt; + struct sock_xprt *new; + + if (addrlen > sizeof(xprt->addr)) { + dprintk("RPC: xs_setup_xprt: address too large\n"); + return ERR_PTR(-EBADF); + } + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (new == NULL) { + dprintk("RPC: xs_setup_xprt: couldn't allocate rpc_xprt\n"); + return ERR_PTR(-ENOMEM); + } + xprt = &new->xprt; + + xprt->max_reqs = slot_table_size; + xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL); + if (xprt->slot == NULL) { + kfree(xprt); + dprintk("RPC: xs_setup_xprt: couldn't allocate slot table\n"); + return ERR_PTR(-ENOMEM); + } + + memcpy(&xprt->addr, addr, addrlen); + xprt->addrlen = addrlen; + new->port = xs_get_random_port(); + + return xprt; +} + /** * xs_setup_udp - Set up transport to use a UDP socket - * @xprt: transport to set up + * @addr: address of remote server + * @addrlen: length of address in bytes * @to: timeout parameters * */ -int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) +struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to) { - size_t slot_table_size; - struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; + struct rpc_xprt *xprt; + struct sock_xprt *transport; - xprt->max_reqs = xprt_udp_slot_table_entries; - slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); - xprt->slot = kzalloc(slot_table_size, GFP_KERNEL); - if (xprt->slot == NULL) - return -ENOMEM; + xprt = xs_setup_xprt(addr, addrlen, xprt_udp_slot_table_entries); + if (IS_ERR(xprt)) + return xprt; + transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(addr->sin_port) != 0) + if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0) xprt_set_bound(xprt); - xprt->port = xs_get_random_port(); xprt->prot = IPPROTO_UDP; xprt->tsh_size = 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); - INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt); + INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker); xprt->bind_timeout = XS_BIND_TO; xprt->connect_timeout = XS_UDP_CONN_TO; xprt->reestablish_timeout = XS_UDP_REEST_TO; @@ -1390,37 +1573,36 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) xs_format_peer_addresses(xprt); dprintk("RPC: set up transport to address %s\n", - xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + xprt->address_strings[RPC_DISPLAY_ALL]); - return 0; + return xprt; } /** * xs_setup_tcp - Set up transport to use a TCP socket - * @xprt: transport to set up + * @addr: address of remote server + * @addrlen: length of address in bytes * @to: timeout parameters * */ -int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) +struct rpc_xprt *xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to) { - size_t slot_table_size; - struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; + struct rpc_xprt *xprt; + struct sock_xprt *transport; - xprt->max_reqs = xprt_tcp_slot_table_entries; - slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); - xprt->slot = kzalloc(slot_table_size, GFP_KERNEL); - if (xprt->slot == NULL) - return -ENOMEM; + xprt = xs_setup_xprt(addr, addrlen, xprt_tcp_slot_table_entries); + if (IS_ERR(xprt)) + return xprt; + transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(addr->sin_port) != 0) + if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0) xprt_set_bound(xprt); - xprt->port = xs_get_random_port(); xprt->prot = IPPROTO_TCP; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; - INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); + INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker); xprt->bind_timeout = XS_BIND_TO; xprt->connect_timeout = XS_TCP_CONN_TO; xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; @@ -1435,7 +1617,40 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) xs_format_peer_addresses(xprt); dprintk("RPC: set up transport to address %s\n", - xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + xprt->address_strings[RPC_DISPLAY_ALL]); + + return xprt; +} + +/** + * init_socket_xprt - set up xprtsock's sysctls + * + */ +int init_socket_xprt(void) +{ +#ifdef RPC_DEBUG + if (!sunrpc_table_header) { + sunrpc_table_header = register_sysctl_table(sunrpc_table, 1); +#ifdef CONFIG_PROC_FS + if (sunrpc_table[0].de) + sunrpc_table[0].de->owner = THIS_MODULE; +#endif + } +#endif return 0; } + +/** + * cleanup_socket_xprt - remove xprtsock's sysctls + * + */ +void cleanup_socket_xprt(void) +{ +#ifdef RPC_DEBUG + if (sunrpc_table_header) { + unregister_sysctl_table(sunrpc_table_header); + sunrpc_table_header = NULL; + } +#endif +} diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 1bb75703f38..730c5c47ed8 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -774,8 +774,8 @@ int tipc_bclink_set_queue_limits(u32 limit) int tipc_bclink_init(void) { - bcbearer = kmalloc(sizeof(*bcbearer), GFP_ATOMIC); - bclink = kmalloc(sizeof(*bclink), GFP_ATOMIC); + bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC); + bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC); if (!bcbearer || !bclink) { nomem: warn("Multicast link creation failed, no memory\n"); @@ -786,14 +786,12 @@ int tipc_bclink_init(void) return -ENOMEM; } - memset(bcbearer, 0, sizeof(struct bcbearer)); INIT_LIST_HEAD(&bcbearer->bearer.cong_links); bcbearer->bearer.media = &bcbearer->media; bcbearer->media.send_msg = tipc_bcbearer_send; sprintf(bcbearer->media.name, "tipc-multicast"); bcl = &bclink->link; - memset(bclink, 0, sizeof(struct bclink)); INIT_LIST_HEAD(&bcl->waiting_ports); bcl->next_out_no = 1; spin_lock_init(&bclink->node.lock); diff --git a/net/tipc/config.c b/net/tipc/config.c index ed1351ed05e..baf55c459c8 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -107,7 +107,7 @@ int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type, struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value) { struct sk_buff *buf; - u32 value_net; + __be32 value_net; buf = tipc_cfg_reply_alloc(TLV_SPACE(sizeof(value))); if (buf) { @@ -208,7 +208,7 @@ static void cfg_cmd_event(struct tipc_cmd_msg *msg, if (mng.link_subscriptions > 64) break; - sub = (struct subscr_data *)kmalloc(sizeof(*sub), + sub = kmalloc(sizeof(*sub), GFP_ATOMIC); if (sub == NULL) { warn("Memory squeeze; dropped remote link subscription\n"); @@ -284,8 +284,7 @@ static struct sk_buff *cfg_set_own_addr(void) if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - addr = *(u32 *)TLV_DATA(req_tlv_area); - addr = ntohl(addr); + addr = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); if (addr == tipc_own_addr) return tipc_cfg_reply_none(); if (!tipc_addr_node_valid(addr)) @@ -319,8 +318,7 @@ static struct sk_buff *cfg_set_remote_mng(void) if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = *(u32 *)TLV_DATA(req_tlv_area); - value = ntohl(value); + value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); tipc_remote_management = (value != 0); return tipc_cfg_reply_none(); } @@ -332,8 +330,7 @@ static struct sk_buff *cfg_set_max_publications(void) if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = *(u32 *)TLV_DATA(req_tlv_area); - value = ntohl(value); + value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); if (value != delimit(value, 1, 65535)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (max publications must be 1-65535)"); @@ -348,8 +345,7 @@ static struct sk_buff *cfg_set_max_subscriptions(void) if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = *(u32 *)TLV_DATA(req_tlv_area); - value = ntohl(value); + value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); if (value != delimit(value, 1, 65535)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (max subscriptions must be 1-65535"); @@ -363,8 +359,7 @@ static struct sk_buff *cfg_set_max_ports(void) if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = *(u32 *)TLV_DATA(req_tlv_area); - value = ntohl(value); + value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); if (value == tipc_max_ports) return tipc_cfg_reply_none(); if (value != delimit(value, 127, 65535)) @@ -383,8 +378,7 @@ static struct sk_buff *cfg_set_max_zones(void) if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = *(u32 *)TLV_DATA(req_tlv_area); - value = ntohl(value); + value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); if (value == tipc_max_zones) return tipc_cfg_reply_none(); if (value != delimit(value, 1, 255)) @@ -403,8 +397,7 @@ static struct sk_buff *cfg_set_max_clusters(void) if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = *(u32 *)TLV_DATA(req_tlv_area); - value = ntohl(value); + value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); if (value != delimit(value, 1, 1)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (max clusters fixed at 1)"); @@ -417,8 +410,7 @@ static struct sk_buff *cfg_set_max_nodes(void) if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = *(u32 *)TLV_DATA(req_tlv_area); - value = ntohl(value); + value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); if (value == tipc_max_nodes) return tipc_cfg_reply_none(); if (value != delimit(value, 8, 2047)) @@ -437,8 +429,7 @@ static struct sk_buff *cfg_set_max_slaves(void) if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = *(u32 *)TLV_DATA(req_tlv_area); - value = ntohl(value); + value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); if (value != 0) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (max secondary nodes fixed at 0)"); @@ -451,8 +442,7 @@ static struct sk_buff *cfg_set_netid(void) if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = *(u32 *)TLV_DATA(req_tlv_area); - value = ntohl(value); + value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); if (value == tipc_net_id) return tipc_cfg_reply_none(); if (value != delimit(value, 1, 9999)) diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c index d8af4c28695..627f99b7afd 100644 --- a/net/tipc/dbg.c +++ b/net/tipc/dbg.c @@ -393,8 +393,7 @@ struct sk_buff *tipc_log_resize(const void *req_tlv_area, int req_tlv_space) if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = *(u32 *)TLV_DATA(req_tlv_area); - value = ntohl(value); + value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); if (value != delimit(value, 0, 32768)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (log size must be 0-32768)"); diff --git a/net/tipc/handler.c b/net/tipc/handler.c index ae6ddf00a1a..eb80778d6d9 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -42,7 +42,7 @@ struct queue_item { unsigned long data; }; -static kmem_cache_t *tipc_queue_item_cache; +static struct kmem_cache *tipc_queue_item_cache; static struct list_head signal_queue_head; static DEFINE_SPINLOCK(qitem_lock); static int handler_enabled = 0; diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 03bd659c43c..7bf87cb26ef 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -66,11 +66,11 @@ */ struct distr_item { - u32 type; - u32 lower; - u32 upper; - u32 ref; - u32 key; + __be32 type; + __be32 lower; + __be32 upper; + __be32 ref; + __be32 key; }; /** diff --git a/net/tipc/node.c b/net/tipc/node.c index 886bda5e88d..4111a31def7 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -60,7 +60,7 @@ struct node *tipc_node_create(u32 addr) struct node *n_ptr; struct node **curr_node; - n_ptr = kmalloc(sizeof(*n_ptr),GFP_ATOMIC); + n_ptr = kzalloc(sizeof(*n_ptr),GFP_ATOMIC); if (!n_ptr) { warn("Node creation failed, no memory\n"); return NULL; @@ -75,7 +75,6 @@ struct node *tipc_node_create(u32 addr) return NULL; } - memset(n_ptr, 0, sizeof(*n_ptr)); n_ptr->addr = addr; spin_lock_init(&n_ptr->lock); INIT_LIST_HEAD(&n_ptr->nsub); @@ -597,8 +596,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - domain = *(u32 *)TLV_DATA(req_tlv_area); - domain = ntohl(domain); + domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); if (!tipc_addr_domain_valid(domain)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (network address)"); @@ -642,8 +640,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - domain = *(u32 *)TLV_DATA(req_tlv_area); - domain = ntohl(domain); + domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); if (!tipc_addr_domain_valid(domain)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (network address)"); @@ -664,8 +661,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) /* Add TLV for broadcast link */ - link_info.dest = tipc_own_addr & 0xfffff00; - link_info.dest = htonl(link_info.dest); + link_info.dest = htonl(tipc_own_addr & 0xfffff00); link_info.up = htonl(1); sprintf(link_info.str, tipc_bclink_name); tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); diff --git a/net/tipc/port.c b/net/tipc/port.c index c1a1a76759b..b7f3199523c 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -1136,11 +1136,12 @@ int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) int res = -EINVAL; p_ptr = tipc_port_lock(ref); + if (!p_ptr) + return -EINVAL; + dbg("tipc_publ %u, p_ptr = %x, conn = %x, scope = %x, " "lower = %u, upper = %u\n", ref, p_ptr, p_ptr->publ.connected, scope, seq->lower, seq->upper); - if (!p_ptr) - return -EINVAL; if (p_ptr->publ.connected) goto exit; if (seq->lower > seq->upper) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 7a918f12a5d..ddade7388aa 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -350,7 +350,7 @@ static void subscr_subscribe(struct tipc_subscr *s, /* Allocate subscription object */ - sub = kmalloc(sizeof(*sub), GFP_ATOMIC); + sub = kzalloc(sizeof(*sub), GFP_ATOMIC); if (!sub) { warn("Subscription rejected, no memory\n"); subscr_terminate(subscriber); @@ -359,7 +359,6 @@ static void subscr_subscribe(struct tipc_subscr *s, /* Initialize subscription object */ - memset(sub, 0, sizeof(*sub)); sub->seq.type = htohl(s->seq.type, subscriber->swap); sub->seq.lower = htohl(s->seq.lower, subscriber->swap); sub->seq.upper = htohl(s->seq.upper, subscriber->swap); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index b43a27828df..2f208c7f4d4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -151,8 +151,9 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) * each socket state is protected by separate rwlock. */ -static inline unsigned unix_hash_fold(unsigned hash) +static inline unsigned unix_hash_fold(__wsum n) { + unsigned hash = (__force unsigned)n; hash ^= hash>>16; hash ^= hash>>8; return hash&(UNIX_HASH_SIZE-1); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 746c2f4a5fa..f14ad6635fc 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -96,7 +96,7 @@ atomic_t unix_tot_inflight = ATOMIC_INIT(0); static struct sock *unix_get_socket(struct file *filp) { struct sock *u_sock = NULL; - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = filp->f_path.dentry->d_inode; /* * Socket ? diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c index 6f39faa1583..c2059733e15 100644 --- a/net/wanrouter/af_wanpipe.c +++ b/net/wanrouter/af_wanpipe.c @@ -13,7 +13,7 @@ * Due Credit: * Wanpipe socket layer is based on Packet and * the X25 socket layers. The above sockets were -* used for the specific use of Sangoma Technoloiges +* used for the specific use of Sangoma Technologies * API programs. * Packet socket Authors: Ross Biro, Fred N. van Kempen and * Alan Cox. @@ -23,7 +23,7 @@ * Apr 25, 2000 Nenad Corbic o Added the ability to send zero length packets. * Mar 13, 2000 Nenad Corbic o Added a tx buffer check via ioctl call. * Mar 06, 2000 Nenad Corbic o Fixed the corrupt sock lcn problem. -* Server and client applicaton can run +* Server and client application can run * simultaneously without conflicts. * Feb 29, 2000 Nenad Corbic o Added support for PVC protocols, such as * CHDLC, Frame Relay and HDLC API. diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 9479659277a..769cdd62c1b 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -3,7 +3,7 @@ * * This module is completely hardware-independent and provides * the following common services for the WAN Link Drivers: -* o WAN device managenment (registering, unregistering) +* o WAN device management (registering, unregistering) * o Network interface management * o Physical connection management (dial-up, incoming calls) * o Logical connection management (switched virtual circuits) @@ -62,63 +62,6 @@ #define KMEM_SAFETYZONE 8 -/***********FOR DEBUGGING PURPOSES********************************************* -static void * dbg_kmalloc(unsigned int size, int prio, int line) { - int i = 0; - void * v = kmalloc(size+sizeof(unsigned int)+2*KMEM_SAFETYZONE*8,prio); - char * c1 = v; - c1 += sizeof(unsigned int); - *((unsigned int *)v) = size; - - for (i = 0; i < KMEM_SAFETYZONE; i++) { - c1[0] = 'D'; c1[1] = 'E'; c1[2] = 'A'; c1[3] = 'D'; - c1[4] = 'B'; c1[5] = 'E'; c1[6] = 'E'; c1[7] = 'F'; - c1 += 8; - } - c1 += size; - for (i = 0; i < KMEM_SAFETYZONE; i++) { - c1[0] = 'M'; c1[1] = 'U'; c1[2] = 'N'; c1[3] = 'G'; - c1[4] = 'W'; c1[5] = 'A'; c1[6] = 'L'; c1[7] = 'L'; - c1 += 8; - } - v = ((char *)v) + sizeof(unsigned int) + KMEM_SAFETYZONE*8; - printk(KERN_INFO "line %d kmalloc(%d,%d) = %p\n",line,size,prio,v); - return v; -} -static void dbg_kfree(void * v, int line) { - unsigned int * sp = (unsigned int *)(((char *)v) - (sizeof(unsigned int) + KMEM_SAFETYZONE*8)); - unsigned int size = *sp; - char * c1 = ((char *)v) - KMEM_SAFETYZONE*8; - int i = 0; - for (i = 0; i < KMEM_SAFETYZONE; i++) { - if ( c1[0] != 'D' || c1[1] != 'E' || c1[2] != 'A' || c1[3] != 'D' - || c1[4] != 'B' || c1[5] != 'E' || c1[6] != 'E' || c1[7] != 'F') { - printk(KERN_INFO "kmalloced block at %p has been corrupted (underrun)!\n",v); - printk(KERN_INFO " %4x: %2x %2x %2x %2x %2x %2x %2x %2x\n", i*8, - c1[0],c1[1],c1[2],c1[3],c1[4],c1[5],c1[6],c1[7] ); - } - c1 += 8; - } - c1 += size; - for (i = 0; i < KMEM_SAFETYZONE; i++) { - if ( c1[0] != 'M' || c1[1] != 'U' || c1[2] != 'N' || c1[3] != 'G' - || c1[4] != 'W' || c1[5] != 'A' || c1[6] != 'L' || c1[7] != 'L' - ) { - printk(KERN_INFO "kmalloced block at %p has been corrupted (overrun):\n",v); - printk(KERN_INFO " %4x: %2x %2x %2x %2x %2x %2x %2x %2x\n", i*8, - c1[0],c1[1],c1[2],c1[3],c1[4],c1[5],c1[6],c1[7] ); - } - c1 += 8; - } - printk(KERN_INFO "line %d kfree(%p)\n",line,v); - v = ((char *)v) - (sizeof(unsigned int) + KMEM_SAFETYZONE*8); - kfree(v); -} - -#define kmalloc(x,y) dbg_kmalloc(x,y,__LINE__) -#define kfree(x) dbg_kfree(x,__LINE__) -*****************************************************************************/ - /* * Function Prototypes */ diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 5a0dbeb6bbe..6b381fc0383 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -119,6 +119,23 @@ static struct xfrm_algo_desc aalg_list[] = { .sadb_alg_maxbits = 160 } }, +{ + .name = "xcbc(aes)", + + .uinfo = { + .auth = { + .icv_truncbits = 96, + .icv_fullbits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_AALG_AES_XCBC_MAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 128 + } +}, }; static struct xfrm_algo_desc ealg_list[] = { diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index e8198a2c785..414f8907038 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -12,7 +12,7 @@ #include <net/ip.h> #include <net/xfrm.h> -static kmem_cache_t *secpath_cachep __read_mostly; +static struct kmem_cache *secpath_cachep __read_mostly; void __secpath_destroy(struct sec_path *sp) { @@ -27,7 +27,7 @@ struct sec_path *secpath_dup(struct sec_path *src) { struct sec_path *sp; - sp = kmem_cache_alloc(secpath_cachep, SLAB_ATOMIC); + sp = kmem_cache_alloc(secpath_cachep, GFP_ATOMIC); if (!sp) return NULL; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7736b23c3f0..bebd40e5a62 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -25,6 +25,7 @@ #include <linux/cache.h> #include <net/xfrm.h> #include <net/ip.h> +#include <linux/audit.h> #include "xfrm_hash.h" @@ -39,7 +40,7 @@ EXPORT_SYMBOL(xfrm_policy_count); static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; -static kmem_cache_t *xfrm_dst_cache __read_mostly; +static struct kmem_cache *xfrm_dst_cache __read_mostly; static struct work_struct xfrm_policy_gc_work; static HLIST_HEAD(xfrm_policy_gc_list); @@ -50,6 +51,40 @@ static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family); static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo); +static inline int +__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) +{ + return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) && + addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && + !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && + (fl->proto == sel->proto || !sel->proto) && + (fl->oif == sel->ifindex || !sel->ifindex); +} + +static inline int +__xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl) +{ + return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && + addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && + !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && + (fl->proto == sel->proto || !sel->proto) && + (fl->oif == sel->ifindex || !sel->ifindex); +} + +int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, + unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_selector_match(sel, fl); + case AF_INET6: + return __xfrm6_selector_match(sel, fl); + } + return 0; +} + int xfrm_register_type(struct xfrm_type *type, unsigned short family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family); @@ -358,7 +393,7 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy) xfrm_pol_put(policy); } -static void xfrm_policy_gc_task(void *data) +static void xfrm_policy_gc_task(struct work_struct *work) { struct xfrm_policy *policy; struct hlist_node *entry, *tmp; @@ -546,7 +581,7 @@ static inline int xfrm_byidx_should_resize(int total) static DEFINE_MUTEX(hash_resize_mutex); -static void xfrm_hash_resize(void *__unused) +static void xfrm_hash_resize(struct work_struct *__unused) { int dir, total; @@ -563,7 +598,7 @@ static void xfrm_hash_resize(void *__unused) mutex_unlock(&hash_resize_mutex); } -static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL); +static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize); /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ @@ -770,7 +805,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete) } EXPORT_SYMBOL(xfrm_policy_byid); -void xfrm_policy_flush(u8 type) +void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) { int dir; @@ -790,6 +825,9 @@ void xfrm_policy_flush(u8 type) hlist_del(&pol->byidx); write_unlock_bh(&xfrm_policy_lock); + xfrm_audit_log(audit_info->loginuid, audit_info->secid, + AUDIT_MAC_IPSEC_DELSPD, 1, pol, NULL); + xfrm_policy_kill(pol); killed++; @@ -808,6 +846,11 @@ void xfrm_policy_flush(u8 type) hlist_del(&pol->byidx); write_unlock_bh(&xfrm_policy_lock); + xfrm_audit_log(audit_info->loginuid, + audit_info->secid, + AUDIT_MAC_IPSEC_DELSPD, 1, + pol, NULL); + xfrm_policy_kill(pol); killed++; @@ -826,33 +869,12 @@ EXPORT_SYMBOL(xfrm_policy_flush); int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { - struct xfrm_policy *pol; + struct xfrm_policy *pol, *last = NULL; struct hlist_node *entry; - int dir, count, error; + int dir, last_dir = 0, count, error; read_lock_bh(&xfrm_policy_lock); count = 0; - for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - struct hlist_head *table = xfrm_policy_bydst[dir].table; - int i; - - hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { - if (pol->type == type) - count++; - } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { - hlist_for_each_entry(pol, entry, table + i, bydst) { - if (pol->type == type) - count++; - } - } - } - - if (count == 0) { - error = -ENOENT; - goto out; - } for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { struct hlist_head *table = xfrm_policy_bydst[dir].table; @@ -862,21 +884,37 @@ int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*) &xfrm_policy_inexact[dir], bydst) { if (pol->type != type) continue; - error = func(pol, dir % XFRM_POLICY_MAX, --count, data); - if (error) - goto out; + if (last) { + error = func(last, last_dir % XFRM_POLICY_MAX, + count, data); + if (error) + goto out; + } + last = pol; + last_dir = dir; + count++; } for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { hlist_for_each_entry(pol, entry, table + i, bydst) { if (pol->type != type) continue; - error = func(pol, dir % XFRM_POLICY_MAX, --count, data); - if (error) - goto out; + if (last) { + error = func(last, last_dir % XFRM_POLICY_MAX, + count, data); + if (error) + goto out; + } + last = pol; + last_dir = dir; + count++; } } } - error = 0; + if (count == 0) { + error = -ENOENT; + goto out; + } + error = func(last, last_dir % XFRM_POLICY_MAX, 0, data); out: read_unlock_bh(&xfrm_policy_lock); return error; @@ -1177,6 +1215,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, if (tmpl->mode == XFRM_MODE_TUNNEL) { remote = &tmpl->id.daddr; local = &tmpl->saddr; + family = tmpl->encap_family; if (xfrm_addr_any(local, family)) { error = xfrm_get_saddr(&tmp, remote, family); if (error) @@ -1894,7 +1933,8 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) return 0; - if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm, pol)) + if (fl && pol && + !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl)) return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; @@ -1946,6 +1986,117 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, EXPORT_SYMBOL(xfrm_bundle_ok); +#ifdef CONFIG_AUDITSYSCALL +/* Audit addition and deletion of SAs and ipsec policy */ + +void xfrm_audit_log(uid_t auid, u32 sid, int type, int result, + struct xfrm_policy *xp, struct xfrm_state *x) +{ + + char *secctx; + u32 secctx_len; + struct xfrm_sec_ctx *sctx = NULL; + struct audit_buffer *audit_buf; + int family; + extern int audit_enabled; + + if (audit_enabled == 0) + return; + + audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC, type); + if (audit_buf == NULL) + return; + + switch(type) { + case AUDIT_MAC_IPSEC_ADDSA: + audit_log_format(audit_buf, "SAD add: auid=%u", auid); + break; + case AUDIT_MAC_IPSEC_DELSA: + audit_log_format(audit_buf, "SAD delete: auid=%u", auid); + break; + case AUDIT_MAC_IPSEC_ADDSPD: + audit_log_format(audit_buf, "SPD add: auid=%u", auid); + break; + case AUDIT_MAC_IPSEC_DELSPD: + audit_log_format(audit_buf, "SPD delete: auid=%u", auid); + break; + default: + return; + } + + if (sid != 0 && + security_secid_to_secctx(sid, &secctx, &secctx_len) == 0) + audit_log_format(audit_buf, " subj=%s", secctx); + else + audit_log_task_context(audit_buf); + + if (xp) { + family = xp->selector.family; + if (xp->security) + sctx = xp->security; + } else { + family = x->props.family; + if (x->security) + sctx = x->security; + } + + if (sctx) + audit_log_format(audit_buf, + " sec_alg=%u sec_doi=%u sec_obj=%s", + sctx->ctx_alg, sctx->ctx_doi, sctx->ctx_str); + + switch(family) { + case AF_INET: + { + struct in_addr saddr, daddr; + if (xp) { + saddr.s_addr = xp->selector.saddr.a4; + daddr.s_addr = xp->selector.daddr.a4; + } else { + saddr.s_addr = x->props.saddr.a4; + daddr.s_addr = x->id.daddr.a4; + } + audit_log_format(audit_buf, + " src=%u.%u.%u.%u dst=%u.%u.%u.%u", + NIPQUAD(saddr), NIPQUAD(daddr)); + } + break; + case AF_INET6: + { + struct in6_addr saddr6, daddr6; + if (xp) { + memcpy(&saddr6, xp->selector.saddr.a6, + sizeof(struct in6_addr)); + memcpy(&daddr6, xp->selector.daddr.a6, + sizeof(struct in6_addr)); + } else { + memcpy(&saddr6, x->props.saddr.a6, + sizeof(struct in6_addr)); + memcpy(&daddr6, x->id.daddr.a6, + sizeof(struct in6_addr)); + } + audit_log_format(audit_buf, + " src=" NIP6_FMT "dst=" NIP6_FMT, + NIP6(saddr6), NIP6(daddr6)); + } + break; + } + + if (x) + audit_log_format(audit_buf, " spi=%lu(0x%lx) protocol=%s", + (unsigned long)ntohl(x->id.spi), + (unsigned long)ntohl(x->id.spi), + x->id.proto == IPPROTO_AH ? "AH" : + (x->id.proto == IPPROTO_ESP ? + "ESP" : "IPCOMP")); + + audit_log_format(audit_buf, " res=%u", result); + audit_log_end(audit_buf); +} + +EXPORT_SYMBOL(xfrm_audit_log); +#endif /* CONFIG_AUDITSYSCALL */ + int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { int err = 0; @@ -2080,7 +2231,7 @@ static void __init xfrm_policy_init(void) panic("XFRM: failed to allocate bydst hash\n"); } - INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL); + INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task); register_netdevice_notifier(&xfrm_dev_notifier); } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 899de9ed22a..fdb08d9f34a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -20,6 +20,7 @@ #include <linux/module.h> #include <linux/cache.h> #include <asm/uaccess.h> +#include <linux/audit.h> #include "xfrm_hash.h" @@ -115,7 +116,7 @@ static unsigned long xfrm_hash_new_size(void) static DEFINE_MUTEX(hash_resize_mutex); -static void xfrm_hash_resize(void *__unused) +static void xfrm_hash_resize(struct work_struct *__unused) { struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi; unsigned long nsize, osize; @@ -168,7 +169,7 @@ out_unlock: mutex_unlock(&hash_resize_mutex); } -static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL); +static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize); DECLARE_WAIT_QUEUE_HEAD(km_waitq); EXPORT_SYMBOL(km_waitq); @@ -207,7 +208,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x); } -static void xfrm_state_gc_task(void *data) +static void xfrm_state_gc_task(struct work_struct *data) { struct xfrm_state *x; struct hlist_node *entry, *tmp; @@ -238,6 +239,7 @@ static void xfrm_timer_handler(unsigned long data) unsigned long now = (unsigned long)xtime.tv_sec; long next = LONG_MAX; int warn = 0; + int err = 0; spin_lock(&x->lock); if (x->km.state == XFRM_STATE_DEAD) @@ -295,9 +297,14 @@ expired: next = 2; goto resched; } - if (!__xfrm_state_delete(x) && x->id.spi) + + err = __xfrm_state_delete(x); + if (!err && x->id.spi) km_state_expired(x, 1, 0); + xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, + AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x); + out: spin_unlock(&x->lock); } @@ -384,9 +391,10 @@ int xfrm_state_delete(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete); -void xfrm_state_flush(u8 proto) +void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info) { int i; + int err = 0; spin_lock_bh(&xfrm_state_lock); for (i = 0; i <= xfrm_state_hmask; i++) { @@ -399,7 +407,11 @@ restart: xfrm_state_hold(x); spin_unlock_bh(&xfrm_state_lock); - xfrm_state_delete(x); + err = xfrm_state_delete(x); + xfrm_audit_log(audit_info->loginuid, + audit_info->secid, + AUDIT_MAC_IPSEC_DELSA, + err ? 0 : 1, NULL, x); xfrm_state_put(x); spin_lock_bh(&xfrm_state_lock); @@ -1099,7 +1111,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), void *data) { int i; - struct xfrm_state *x; + struct xfrm_state *x, *last = NULL; struct hlist_node *entry; int count = 0; int err = 0; @@ -1107,24 +1119,22 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), spin_lock_bh(&xfrm_state_lock); for (i = 0; i <= xfrm_state_hmask; i++) { hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { - if (xfrm_id_proto_match(x->id.proto, proto)) - count++; + if (!xfrm_id_proto_match(x->id.proto, proto)) + continue; + if (last) { + err = func(last, count, data); + if (err) + goto out; + } + last = x; + count++; } } if (count == 0) { err = -ENOENT; goto out; } - - for (i = 0; i <= xfrm_state_hmask; i++) { - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { - if (!xfrm_id_proto_match(x->id.proto, proto)) - continue; - err = func(x, --count, data); - if (err) - goto out; - } - } + err = func(last, 0, data); out: spin_unlock_bh(&xfrm_state_lock); return err; @@ -1304,7 +1314,7 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) } EXPORT_SYMBOL(km_query); -int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport) +int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) { int err = -EINVAL; struct xfrm_mgr *km; @@ -1568,6 +1578,6 @@ void __init xfrm_state_init(void) panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes."); xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); - INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL); + INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2b2e59d8ffb..e5372b11fc8 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -31,6 +31,7 @@ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #include <linux/in6.h> #endif +#include <linux/audit.h> static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) { @@ -244,11 +245,10 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, *props = algo->desc.sadb_alg_id; len = sizeof(*ualg) + (ualg->alg_key_len + 7U) / 8; - p = kmalloc(len, GFP_KERNEL); + p = kmemdup(ualg, len, GFP_KERNEL); if (!p) return -ENOMEM; - memcpy(p, ualg, len); strcpy(p->alg_name, algo->name); *algpp = p; return 0; @@ -263,11 +263,10 @@ static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_a return 0; uencap = RTA_DATA(rta); - p = kmalloc(sizeof(*p), GFP_KERNEL); + p = kmemdup(uencap, sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; - memcpy(p, uencap, sizeof(*p)); *encapp = p; return 0; } @@ -305,11 +304,10 @@ static int attach_one_addr(xfrm_address_t **addrpp, struct rtattr *u_arg) return 0; uaddrp = RTA_DATA(rta); - p = kmalloc(sizeof(*p), GFP_KERNEL); + p = kmemdup(uaddrp, sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; - memcpy(p, uaddrp, sizeof(*p)); *addrpp = p; return 0; } @@ -323,7 +321,7 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * x->props.replay_window = p->replay_window; x->props.reqid = p->reqid; x->props.family = p->family; - x->props.saddr = p->saddr; + memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); x->props.flags = p->flags; } @@ -457,6 +455,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) else err = xfrm_state_update(x); + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_ADDSA, err ? 0 : 1, NULL, x); + if (err < 0) { x->km.state = XFRM_STATE_DEAD; __xfrm_state_put(x); @@ -495,6 +496,7 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, goto out; } + err = -ESRCH; x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto, p->family); } @@ -525,6 +527,10 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) } err = xfrm_state_delete(x); + + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x); + if (err < 0) goto out; @@ -545,7 +551,7 @@ static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) memcpy(&p->lft, &x->lft, sizeof(p->lft)); memcpy(&p->curlft, &x->curlft, sizeof(p->curlft)); memcpy(&p->stats, &x->stats, sizeof(p->stats)); - p->saddr = x->props.saddr; + memcpy(&p->saddr, &x->props.saddr, sizeof(p->saddr)); p->mode = x->props.mode; p->replay_window = x->props.replay_window; p->reqid = x->props.reqid; @@ -652,7 +658,6 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, if (!skb) return ERR_PTR(-ENOMEM); - NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; info.in_skb = in_skb; info.out_skb = skb; info.nlmsg_seq = seq; @@ -772,7 +777,7 @@ out_noput: return err; } -static int verify_policy_dir(__u8 dir) +static int verify_policy_dir(u8 dir) { switch (dir) { case XFRM_POLICY_IN: @@ -787,7 +792,7 @@ static int verify_policy_dir(__u8 dir) return 0; } -static int verify_policy_type(__u8 type) +static int verify_policy_type(u8 type) { switch (type) { case XFRM_POLICY_TYPE_MAIN: @@ -874,22 +879,57 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, t->aalgos = ut->aalgos; t->ealgos = ut->ealgos; t->calgos = ut->calgos; + t->encap_family = ut->family; } } +static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) +{ + int i; + + if (nr > XFRM_MAX_DEPTH) + return -EINVAL; + + for (i = 0; i < nr; i++) { + /* We never validated the ut->family value, so many + * applications simply leave it at zero. The check was + * never made and ut->family was ignored because all + * templates could be assumed to have the same family as + * the policy itself. Now that we will have ipv4-in-ipv6 + * and ipv6-in-ipv4 tunnels, this is no longer true. + */ + if (!ut[i].family) + ut[i].family = family; + + switch (ut[i].family) { + case AF_INET: + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + break; +#endif + default: + return -EINVAL; + }; + } + + return 0; +} + static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma) { struct rtattr *rt = xfrma[XFRMA_TMPL-1]; - struct xfrm_user_tmpl *utmpl; - int nr; if (!rt) { pol->xfrm_nr = 0; } else { - nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl); + struct xfrm_user_tmpl *utmpl = RTA_DATA(rt); + int nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl); + int err; - if (nr > XFRM_MAX_DEPTH) - return -EINVAL; + err = validate_tmpl(nr, utmpl, pol->family); + if (err) + return err; copy_templates(pol, RTA_DATA(rt), nr); } @@ -900,7 +940,7 @@ static int copy_from_user_policy_type(u8 *tp, struct rtattr **xfrma) { struct rtattr *rt = xfrma[XFRMA_POLICY_TYPE-1]; struct xfrm_userpolicy_type *upt; - __u8 type = XFRM_POLICY_TYPE_MAIN; + u8 type = XFRM_POLICY_TYPE_MAIN; int err; if (rt) { @@ -998,6 +1038,9 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr * a type XFRM_MSG_UPDPOLICY - JHS */ excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL); + if (err) { security_xfrm_policy_free(xp); kfree(xp); @@ -1027,7 +1070,7 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; memcpy(&up->id, &kp->id, sizeof(up->id)); - up->family = xp->family; + up->family = kp->encap_family; memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr)); up->reqid = kp->reqid; up->mode = kp->mode; @@ -1082,12 +1125,12 @@ static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *s } #ifdef CONFIG_XFRM_SUB_POLICY -static int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb) +static int copy_to_user_policy_type(u8 type, struct sk_buff *skb) { struct xfrm_userpolicy_type upt; memset(&upt, 0, sizeof(upt)); - upt.type = xp->type; + upt.type = type; RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); @@ -1098,7 +1141,7 @@ rtattr_failure: } #else -static inline int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb) +static inline int copy_to_user_policy_type(u8 type, struct sk_buff *skb) { return 0; } @@ -1127,7 +1170,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr goto nlmsg_failure; if (copy_to_user_sec_ctx(xp, skb)) goto nlmsg_failure; - if (copy_to_user_policy_type(xp, skb) < 0) + if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; @@ -1170,7 +1213,6 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, if (!skb) return ERR_PTR(-ENOMEM); - NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; info.in_skb = in_skb; info.out_skb = skb; info.nlmsg_seq = seq; @@ -1189,7 +1231,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr { struct xfrm_policy *xp; struct xfrm_userpolicy_id *p; - __u8 type = XFRM_POLICY_TYPE_MAIN; + u8 type = XFRM_POLICY_TYPE_MAIN; int err; struct km_event c; int delete; @@ -1226,6 +1268,10 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete); security_xfrm_policy_free(&tmp); } + if (delete) + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_DELSPD, (xp) ? 1 : 0, xp, NULL); + if (xp == NULL) return -ENOENT; @@ -1260,8 +1306,11 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma { struct km_event c; struct xfrm_usersa_flush *p = NLMSG_DATA(nlh); + struct xfrm_audit audit_info; - xfrm_state_flush(p->proto); + audit_info.loginuid = NETLINK_CB(skb).loginuid; + audit_info.secid = NETLINK_CB(skb).sid; + xfrm_state_flush(p->proto, &audit_info); c.data.proto = p->proto; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; @@ -1283,10 +1332,12 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve id = NLMSG_DATA(nlh); nlh->nlmsg_flags = 0; - id->sa_id.daddr = x->id.daddr; + memcpy(&id->sa_id.daddr, &x->id.daddr,sizeof(x->id.daddr)); id->sa_id.spi = x->id.spi; id->sa_id.family = x->props.family; id->sa_id.proto = x->id.proto; + memcpy(&id->saddr, &x->props.saddr,sizeof(x->props.saddr)); + id->reqid = x->props.reqid; id->flags = c->data.aevent; RTA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); @@ -1407,14 +1458,17 @@ out: static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) { struct km_event c; - __u8 type = XFRM_POLICY_TYPE_MAIN; + u8 type = XFRM_POLICY_TYPE_MAIN; int err; + struct xfrm_audit audit_info; err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); if (err) return err; - xfrm_policy_flush(type); + audit_info.loginuid = NETLINK_CB(skb).loginuid; + audit_info.secid = NETLINK_CB(skb).sid; + xfrm_policy_flush(type, &audit_info); c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; @@ -1428,7 +1482,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * struct xfrm_policy *xp; struct xfrm_user_polexpire *up = NLMSG_DATA(nlh); struct xfrm_userpolicy_info *p = &up->pol; - __u8 type = XFRM_POLICY_TYPE_MAIN; + u8 type = XFRM_POLICY_TYPE_MAIN; int err = -ENOENT; err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); @@ -1469,6 +1523,9 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * err = 0; if (up->hard) { xfrm_policy_delete(xp, p->dir); + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_DELSPD, 1, xp, NULL); + } else { // reset the timers here? printk("Dont know what to do with soft policy expire\n"); @@ -1500,8 +1557,11 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void ** goto out; km_state_expired(x, ue->hard, current->pid); - if (ue->hard) + if (ue->hard) { __xfrm_state_delete(x); + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_DELSA, 1, NULL, x); + } out: spin_unlock_bh(&x->lock); xfrm_state_put(x); @@ -1530,7 +1590,8 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, void **xf } /* build an XP */ - xp = xfrm_policy_construct(&ua->policy, (struct rtattr **) xfrma, &err); if (!xp) { + xp = xfrm_policy_construct(&ua->policy, (struct rtattr **) xfrma, &err); + if (!xp) { kfree(x); return err; } @@ -1907,7 +1968,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, goto nlmsg_failure; if (copy_to_user_state_sec_ctx(x, skb)) goto nlmsg_failure; - if (copy_to_user_policy_type(xp, skb) < 0) + if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; @@ -1927,6 +1988,9 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire)); len += RTA_SPACE(xfrm_user_sec_ctx_size(xp)); +#ifdef CONFIG_XFRM_SUB_POLICY + len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); +#endif skb = alloc_skb(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; @@ -1976,7 +2040,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, return NULL; nr = ((len - sizeof(*p)) / sizeof(*ut)); - if (nr > XFRM_MAX_DEPTH) + if (validate_tmpl(nr, ut, p->sel.family)) return NULL; if (p->dir > XFRM_POLICY_OUT) @@ -2014,7 +2078,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, goto nlmsg_failure; if (copy_to_user_sec_ctx(xp, skb)) goto nlmsg_failure; - if (copy_to_user_policy_type(xp, skb) < 0) + if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; upe->hard = !!hard; @@ -2034,6 +2098,9 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire)); len += RTA_SPACE(xfrm_user_sec_ctx_size(xp)); +#ifdef CONFIG_XFRM_SUB_POLICY + len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); +#endif skb = alloc_skb(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; @@ -2060,6 +2127,9 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * len += RTA_SPACE(headlen); headlen = sizeof(*id); } +#ifdef CONFIG_XFRM_SUB_POLICY + len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); +#endif len += NLMSG_SPACE(headlen); skb = alloc_skb(len, GFP_ATOMIC); @@ -2087,7 +2157,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * copy_to_user_policy(xp, p, dir); if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; - if (copy_to_user_policy_type(xp, skb) < 0) + if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; @@ -2106,10 +2176,11 @@ static int xfrm_notify_policy_flush(struct km_event *c) struct nlmsghdr *nlh; struct sk_buff *skb; unsigned char *b; + int len = 0; #ifdef CONFIG_XFRM_SUB_POLICY - struct xfrm_userpolicy_type upt; + len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); #endif - int len = NLMSG_LENGTH(0); + len += NLMSG_LENGTH(0); skb = alloc_skb(len, GFP_ATOMIC); if (skb == NULL) @@ -2119,12 +2190,8 @@ static int xfrm_notify_policy_flush(struct km_event *c) nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0); nlh->nlmsg_flags = 0; - -#ifdef CONFIG_XFRM_SUB_POLICY - memset(&upt, 0, sizeof(upt)); - upt.type = c->data.type; - RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); -#endif + if (copy_to_user_policy_type(c->data.type, skb) < 0) + goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; @@ -2132,9 +2199,6 @@ static int xfrm_notify_policy_flush(struct km_event *c) return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: -#ifdef CONFIG_XFRM_SUB_POLICY -rtattr_failure: -#endif kfree_skb(skb); return -1; } |