From 6869c4d8e066e21623c812c448a05f1ed931c9c6 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:24:19 -0700 Subject: [NETFILTER]: reduce netfilter sk_buff enlargement As discussed at netconf'05, we're trying to save every bit in sk_buff. The patch below makes sk_buff 8 bytes smaller. I did some basic testing on my notebook and it seems to work. The only real in-tree user of nfcache was IPVS, who only needs a single bit. Unfortunately I couldn't find some other free bit in sk_buff to stuff that bit into, so I introduced a separate field for them. Maybe the IPVS guys can resolve that to further save space. Initially I wanted to shrink pkt_type to three bits (PACKET_HOST and alike are only 6 values defined), but unfortunately the bluetooth code overloads pkt_type :( The conntrack-event-api (out-of-tree) uses nfcache, but Rusty just came up with a way how to do it without any skb fields, so it's safe to remove it. - remove all never-implemented 'nfcache' code - don't have ipvs code abuse 'nfcache' field. currently get's their own compile-conditional skb->ipvs_property field. IPVS maintainers can decide to move this bit elswhere, but nfcache needs to die. - remove skb->nfcache field to save 4 bytes - move skb->nfctinfo into three unused bits to save further 4 bytes Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 16 ++-------------- net/ipv6/netfilter/ip6_queue.c | 1 - net/ipv6/netfilter/ip6_tables.c | 1 - net/ipv6/netfilter/ip6t_MARK.c | 5 ++--- 4 files changed, 4 insertions(+), 19 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ae652ca14bc..590d2b79719 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -185,19 +185,6 @@ int ip6_route_me_harder(struct sk_buff *skb) } #endif -static inline int ip6_maybe_reroute(struct sk_buff *skb) -{ -#ifdef CONFIG_NETFILTER - if (skb->nfcache & NFC_ALTERED){ - if (ip6_route_me_harder(skb) != 0){ - kfree_skb(skb); - return -EINVAL; - } - } -#endif /* CONFIG_NETFILTER */ - return dst_output(skb); -} - /* * xmit an sk_buff (used by TCP) */ @@ -266,7 +253,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, mtu = dst_mtu(dst); if ((skb->len <= mtu) || ipfragok) { IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); - return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute); + return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, + dst_output); } if (net_ratelimit()) diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index a16df5b27c8..83ccedceed1 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -388,7 +388,6 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; - e->skb->nfcache |= NFC_ALTERED; /* * Extra routing may needed on local out, as the QUEUE target never diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 73034511c8d..41a67cf6e33 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -401,7 +401,6 @@ ip6t_do_table(struct sk_buff **pskb, do { IP_NF_ASSERT(e); IP_NF_ASSERT(back); - (*pskb)->nfcache |= e->nfcache; if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6, &protoff, &offset)) { struct ip6t_entry_target *t; diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c index d09ceb05013..81924fcc585 100644 --- a/net/ipv6/netfilter/ip6t_MARK.c +++ b/net/ipv6/netfilter/ip6t_MARK.c @@ -28,10 +28,9 @@ target(struct sk_buff **pskb, { const struct ip6t_mark_target_info *markinfo = targinfo; - if((*pskb)->nfmark != markinfo->mark) { + if((*pskb)->nfmark != markinfo->mark) (*pskb)->nfmark = markinfo->mark; - (*pskb)->nfcache |= NFC_ALTERED; - } + return IP6T_CONTINUE; } -- cgit v1.2.3-70-g09d2 From f2ccd8fa06c8e302116e71df372f5c1f83432e03 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Aug 2005 19:34:12 -0700 Subject: [NET]: Kill skb->real_dev Bonding just wants the device before the skb_bond() decapsulation occurs, so simply pass that original device into packet_type->func() as an argument. It remains to be seen whether we can use this same exact thing to get rid of skb->input_dev as well. Signed-off-by: David S. Miller --- drivers/block/aoe/aoenet.c | 2 +- drivers/net/bonding/bond_3ad.c | 11 ++++------- drivers/net/bonding/bond_3ad.h | 2 +- drivers/net/bonding/bond_alb.c | 5 ++--- drivers/net/hamradio/bpqether.c | 4 ++-- drivers/net/pppoe.c | 6 ++++-- drivers/net/wan/hdlc_generic.c | 2 +- drivers/net/wan/lapbether.c | 2 +- drivers/net/wan/syncppp.c | 2 +- include/linux/if_vlan.h | 1 - include/linux/netdevice.h | 10 ++++++---- include/linux/skbuff.h | 2 -- include/net/arp.h | 2 +- include/net/ax25.h | 2 +- include/net/datalink.h | 2 +- include/net/ip.h | 2 +- include/net/ipv6.h | 3 ++- include/net/llc.h | 8 +++++--- include/net/p8022.h | 3 ++- include/net/psnap.h | 2 +- include/net/x25.h | 2 +- net/802/p8022.c | 3 ++- net/802/psnap.c | 7 ++++--- net/8021q/vlan.h | 2 +- net/8021q/vlan_dev.c | 2 +- net/appletalk/aarp.c | 2 +- net/appletalk/ddp.c | 6 +++--- net/ax25/ax25_in.c | 8 ++++---- net/core/dev.c | 35 +++++++++++++++++++---------------- net/core/skbuff.c | 2 -- net/decnet/af_decnet.c | 2 +- net/decnet/dn_route.c | 2 +- net/econet/af_econet.c | 2 +- net/ipv4/arp.c | 4 ++-- net/ipv4/ip_input.c | 2 +- net/ipv4/ipconfig.c | 8 ++++---- net/ipv6/ip6_input.c | 2 +- net/ipx/af_ipx.c | 2 +- net/irda/irlap_frame.c | 2 +- net/irda/irmod.c | 2 +- net/llc/llc_core.c | 3 ++- net/llc/llc_input.c | 4 ++-- net/netrom/nr_dev.c | 2 +- net/packet/af_packet.c | 6 +++--- net/x25/x25_dev.c | 2 +- 45 files changed, 96 insertions(+), 91 deletions(-) (limited to 'net/ipv6') diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 9e6f51c528b..4be976940f6 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -120,7 +120,7 @@ aoenet_xmit(struct sk_buff *sl) * (1) len doesn't include the header by default. I want this. */ static int -aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt) +aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev) { struct aoe_hdr *h; u32 n; diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index a2e8dda5afa..d2f34d5a808 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2419,22 +2419,19 @@ out: return 0; } -int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype) +int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype, struct net_device *orig_dev) { struct bonding *bond = dev->priv; struct slave *slave = NULL; int ret = NET_RX_DROP; - if (!(dev->flags & IFF_MASTER)) { + if (!(dev->flags & IFF_MASTER)) goto out; - } read_lock(&bond->lock); - slave = bond_get_slave_by_dev((struct bonding *)dev->priv, - skb->real_dev); - if (slave == NULL) { + slave = bond_get_slave_by_dev((struct bonding *)dev->priv, orig_dev); + if (!slave) goto out_unlock; - } bond_3ad_rx_indication((struct lacpdu *) skb->data, slave, skb->len); diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h index f4682389418..673a30af566 100644 --- a/drivers/net/bonding/bond_3ad.h +++ b/drivers/net/bonding/bond_3ad.h @@ -295,6 +295,6 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave); void bond_3ad_handle_link_change(struct slave *slave, char link); int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info); int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev); -int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype); +int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype, struct net_device *orig_dev); #endif //__BOND_3AD_H__ diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 19e829b567d..f8fce396119 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -354,15 +354,14 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) _unlock_rx_hashtbl(bond); } -static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype) +static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev) { struct bonding *bond = bond_dev->priv; struct arp_pkt *arp = (struct arp_pkt *)skb->data; int res = NET_RX_DROP; - if (!(bond_dev->flags & IFF_MASTER)) { + if (!(bond_dev->flags & IFF_MASTER)) goto out; - } if (!arp) { dprintk("Packet has no ARP data\n"); diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index ba9f0580e1f..2946e037a9b 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -98,7 +98,7 @@ static char bcast_addr[6]={0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; static char bpq_eth_addr[6]; -static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *); +static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); static int bpq_device_event(struct notifier_block *, unsigned long, void *); static const char *bpq_print_ethaddr(const unsigned char *); @@ -165,7 +165,7 @@ static inline int dev_is_ethdev(struct net_device *dev) /* * Receive an AX.25 frame via an ethernet interface. */ -static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype) +static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { int len; char * ptr; diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index ce1a9bf7b9a..82f236cc3b9 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -377,7 +377,8 @@ abort_kfree: ***********************************************************************/ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, + struct net_device *orig_dev) { struct pppoe_hdr *ph; @@ -426,7 +427,8 @@ out: ***********************************************************************/ static int pppoe_disc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, + struct net_device *orig_dev) { struct pppoe_hdr *ph; diff --git a/drivers/net/wan/hdlc_generic.c b/drivers/net/wan/hdlc_generic.c index a63f6a2cc4f..cdd4c09c2d9 100644 --- a/drivers/net/wan/hdlc_generic.c +++ b/drivers/net/wan/hdlc_generic.c @@ -61,7 +61,7 @@ static struct net_device_stats *hdlc_get_stats(struct net_device *dev) static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *p) + struct packet_type *p, struct net_device *orig_dev) { hdlc_device *hdlc = dev_to_hdlc(dev); if (hdlc->proto.netif_rx) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 7f2e3653c5e..6c302e9dbca 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -86,7 +86,7 @@ static __inline__ int dev_is_ethdev(struct net_device *dev) /* * Receive a LAPB frame via an ethernet interface. */ -static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype) +static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { int len, err; struct lapbethdev *lapbeth; diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c index 84b65c60c79..f58c794a963 100644 --- a/drivers/net/wan/syncppp.c +++ b/drivers/net/wan/syncppp.c @@ -1447,7 +1447,7 @@ static void sppp_print_bytes (u_char *p, u16 len) * after interrupt servicing to process frames queued via netif_rx. */ -static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p) +static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev) { if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 62a9d89dfbe..17d0c0d40b0 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -155,7 +155,6 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, { struct net_device_stats *stats; - skb->real_dev = skb->dev; skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK]; if (skb->dev == NULL) { dev_kfree_skb_any(skb); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3a0ed7f9e80..296cf93a65e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -497,10 +497,12 @@ static inline void *netdev_priv(struct net_device *dev) #define SET_NETDEV_DEV(net, pdev) ((net)->class_dev.dev = (pdev)) struct packet_type { - __be16 type; /* This is really htons(ether_type). */ - struct net_device *dev; /* NULL is wildcarded here */ - int (*func) (struct sk_buff *, struct net_device *, - struct packet_type *); + __be16 type; /* This is really htons(ether_type). */ + struct net_device *dev; /* NULL is wildcarded here */ + int (*func) (struct sk_buff *, + struct net_device *, + struct packet_type *, + struct net_device *); void *af_packet_priv; struct list_head list; }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index af4f02e9824..60b32151f76 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -164,7 +164,6 @@ struct skb_shared_info { * @stamp: Time we arrived * @dev: Device we arrived on/are leaving by * @input_dev: Device we arrived on - * @real_dev: The real device we are using * @h: Transport layer header * @nh: Network layer header * @mac: Link layer header @@ -206,7 +205,6 @@ struct sk_buff { struct timeval stamp; struct net_device *dev; struct net_device *input_dev; - struct net_device *real_dev; union { struct tcphdr *th; diff --git a/include/net/arp.h b/include/net/arp.h index a1f09fad6a5..a13e30c35f4 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -11,7 +11,7 @@ extern struct neigh_table arp_tbl; extern void arp_init(void); extern int arp_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int arp_find(unsigned char *haddr, struct sk_buff *skb); extern int arp_ioctl(unsigned int cmd, void __user *arg); extern void arp_send(int type, int ptype, u32 dest_ip, diff --git a/include/net/ax25.h b/include/net/ax25.h index 3696f988a9f..926eed54302 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -316,7 +316,7 @@ extern int ax25_protocol_is_registered(unsigned int); /* ax25_in.c */ extern int ax25_rx_iframe(ax25_cb *, struct sk_buff *); -extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *); +extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); /* ax25_ip.c */ extern int ax25_encapsulate(struct sk_buff *, struct net_device *, unsigned short, void *, void *, unsigned int); diff --git a/include/net/datalink.h b/include/net/datalink.h index 5797ba3d2eb..deb7ca75db4 100644 --- a/include/net/datalink.h +++ b/include/net/datalink.h @@ -9,7 +9,7 @@ struct datalink_proto { unsigned short header_length; int (*rcvfunc)(struct sk_buff *, struct net_device *, - struct packet_type *); + struct packet_type *, struct net_device *); int (*request)(struct datalink_proto *, struct sk_buff *, unsigned char *); struct list_head node; diff --git a/include/net/ip.h b/include/net/ip.h index 32360bbe143..2570b536c8f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -86,7 +86,7 @@ extern int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr, struct ip_options *opt); extern int ip_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int ip_local_deliver(struct sk_buff *skb); extern int ip_mr_input(struct sk_buff *skb); extern int ip_output(struct sk_buff *skb); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 69324465e8b..533fc074ed9 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -346,7 +346,8 @@ static inline int ipv6_addr_any(const struct in6_addr *a) extern int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, + struct net_device *orig_dev); /* * upper-layer output functions diff --git a/include/net/llc.h b/include/net/llc.h index c9aed2a8b4e..71769a5aeef 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -46,7 +46,8 @@ struct llc_sap { unsigned char f_bit; int (*rcv_func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, + struct net_device *orig_dev); struct llc_addr laddr; struct list_head node; struct { @@ -64,7 +65,7 @@ extern rwlock_t llc_sap_list_lock; extern unsigned char llc_station_mac_sa[ETH_ALEN]; extern int llc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int llc_mac_hdr_init(struct sk_buff *skb, unsigned char *sa, unsigned char *da); @@ -78,7 +79,8 @@ extern void llc_set_station_handler(void (*handler)(struct sk_buff *skb)); extern struct llc_sap *llc_sap_open(unsigned char lsap, int (*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)); + struct packet_type *pt, + struct net_device *orig_dev)); extern void llc_sap_close(struct llc_sap *sap); extern struct llc_sap *llc_sap_find(unsigned char sap_value); diff --git a/include/net/p8022.h b/include/net/p8022.h index 3c99a86c358..223f8fa9ffc 100644 --- a/include/net/p8022.h +++ b/include/net/p8022.h @@ -4,7 +4,8 @@ extern struct datalink_proto * register_8022_client(unsigned char type, int (*func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)); + struct packet_type *pt, + struct net_device *orig_dev)); extern void unregister_8022_client(struct datalink_proto *proto); #endif diff --git a/include/net/psnap.h b/include/net/psnap.h index 9c94e8f98b3..b2e01cc3fc8 100644 --- a/include/net/psnap.h +++ b/include/net/psnap.h @@ -1,7 +1,7 @@ #ifndef _NET_PSNAP_H #define _NET_PSNAP_H -extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *)); +extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *orig_dev)); extern void unregister_snap_client(struct datalink_proto *proto); #endif diff --git a/include/net/x25.h b/include/net/x25.h index 8b39b98876e..fee62ff8c19 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -175,7 +175,7 @@ extern void x25_kill_by_neigh(struct x25_neigh *); /* x25_dev.c */ extern void x25_send_frame(struct sk_buff *, struct x25_neigh *); -extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *); +extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); extern void x25_establish_link(struct x25_neigh *); extern void x25_terminate_link(struct x25_neigh *); diff --git a/net/802/p8022.c b/net/802/p8022.c index 5ae63416df6..b24817c63ca 100644 --- a/net/802/p8022.c +++ b/net/802/p8022.c @@ -35,7 +35,8 @@ static int p8022_request(struct datalink_proto *dl, struct sk_buff *skb, struct datalink_proto *register_8022_client(unsigned char type, int (*func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)) + struct packet_type *pt, + struct net_device *orig_dev)) { struct datalink_proto *proto; diff --git a/net/802/psnap.c b/net/802/psnap.c index 1053821ddf9..ab80b1fab53 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -47,7 +47,7 @@ static struct datalink_proto *find_snap_client(unsigned char *desc) * A SNAP packet has arrived */ static int snap_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { int rc = 1; struct datalink_proto *proto; @@ -61,7 +61,7 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev, /* Pass the frame on. */ skb->h.raw += 5; skb_pull(skb, 5); - rc = proto->rcvfunc(skb, dev, &snap_packet_type); + rc = proto->rcvfunc(skb, dev, &snap_packet_type, orig_dev); } else { skb->sk = NULL; kfree_skb(skb); @@ -118,7 +118,8 @@ module_exit(snap_exit); struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, - struct packet_type *)) + struct packet_type *, + struct net_device *)) { struct datalink_proto *proto = NULL; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 508b1fa1454..9ae3a14dd01 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -51,7 +51,7 @@ struct net_device *__find_vlan_dev(struct net_device* real_dev, /* found in vlan_dev.c */ int vlan_dev_rebuild_header(struct sk_buff *skb); int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, - struct packet_type* ptype); + struct packet_type *ptype, struct net_device *orig_dev); int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 49c48741351..145f5cde96c 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -113,7 +113,7 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) * */ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, - struct packet_type* ptype) + struct packet_type* ptype, struct net_device *orig_dev) { unsigned char *rawp = NULL; struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data); diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index c34614ea5fc..7076097debc 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -698,7 +698,7 @@ static void __aarp_resolved(struct aarp_entry **list, struct aarp_entry *a, * frame. We currently only support Ethernet. */ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { struct elapaarp *ea = aarp_hdr(skb); int hash, ret = 0; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 192b529f86a..ffde33cd09b 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1390,7 +1390,7 @@ free_it: * [ie ARPHRD_ETHERTALK] */ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { struct ddpehdr *ddp; struct sock *sock; @@ -1482,7 +1482,7 @@ freeit: * header and append a long one. */ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { /* Expand any short form frames */ if (skb->mac.raw[2] == 1) { @@ -1528,7 +1528,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, } skb->h.raw = skb->data; - return atalk_rcv(skb, dev, pt); + return atalk_rcv(skb, dev, pt, orig_dev); freeit: kfree_skb(skb); return 0; diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 3dc808fde33..124eec8216d 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -132,7 +132,7 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb) skb->dev = ax25->ax25_dev->dev; skb->pkt_type = PACKET_HOST; skb->protocol = htons(ETH_P_IP); - ip_rcv(skb, skb->dev, NULL); /* Wrong ptype */ + ip_rcv(skb, skb->dev, NULL, skb->dev); /* Wrong ptype */ return 1; } #endif @@ -258,7 +258,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = dev; skb->pkt_type = PACKET_HOST; skb->protocol = htons(ETH_P_IP); - ip_rcv(skb, dev, ptype); /* Note ptype here is the wrong one, fix me later */ + ip_rcv(skb, dev, ptype, dev); /* Note ptype here is the wrong one, fix me later */ break; case AX25_P_ARP: @@ -268,7 +268,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = dev; skb->pkt_type = PACKET_HOST; skb->protocol = htons(ETH_P_ARP); - arp_rcv(skb, dev, ptype); /* Note ptype here is wrong... */ + arp_rcv(skb, dev, ptype, dev); /* Note ptype here is wrong... */ break; #endif case AX25_P_TEXT: @@ -454,7 +454,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, * Receive an AX.25 frame via a SLIP interface. */ int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype) + struct packet_type *ptype, struct net_device *orig_dev) { skb->sk = NULL; /* Initially we don't know who it's for */ skb->destructor = NULL; /* Who initializes this, dammit?! */ diff --git a/net/core/dev.c b/net/core/dev.c index faf59b02c4b..e1cc162bf29 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1058,7 +1058,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) skb2->h.raw = skb2->nh.raw; skb2->pkt_type = PACKET_OUTGOING; - ptype->func(skb2, skb->dev, ptype); + ptype->func(skb2, skb->dev, ptype, skb->dev); } } rcu_read_unlock(); @@ -1425,14 +1425,14 @@ int netif_rx_ni(struct sk_buff *skb) EXPORT_SYMBOL(netif_rx_ni); -static __inline__ void skb_bond(struct sk_buff *skb) +static inline struct net_device *skb_bond(struct sk_buff *skb) { struct net_device *dev = skb->dev; - if (dev->master) { - skb->real_dev = skb->dev; + if (dev->master) skb->dev = dev->master; - } + + return dev; } static void net_tx_action(struct softirq_action *h) @@ -1482,10 +1482,11 @@ static void net_tx_action(struct softirq_action *h) } static __inline__ int deliver_skb(struct sk_buff *skb, - struct packet_type *pt_prev) + struct packet_type *pt_prev, + struct net_device *orig_dev) { atomic_inc(&skb->users); - return pt_prev->func(skb, skb->dev, pt_prev); + return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) @@ -1496,7 +1497,8 @@ struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); static __inline__ int handle_bridge(struct sk_buff **pskb, - struct packet_type **pt_prev, int *ret) + struct packet_type **pt_prev, int *ret, + struct net_device *orig_dev) { struct net_bridge_port *port; @@ -1505,14 +1507,14 @@ static __inline__ int handle_bridge(struct sk_buff **pskb, return 0; if (*pt_prev) { - *ret = deliver_skb(*pskb, *pt_prev); + *ret = deliver_skb(*pskb, *pt_prev, orig_dev); *pt_prev = NULL; } return br_handle_frame_hook(port, pskb); } #else -#define handle_bridge(skb, pt_prev, ret) (0) +#define handle_bridge(skb, pt_prev, ret, orig_dev) (0) #endif #ifdef CONFIG_NET_CLS_ACT @@ -1559,6 +1561,7 @@ static int ing_filter(struct sk_buff *skb) int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; + struct net_device *orig_dev; int ret = NET_RX_DROP; unsigned short type; @@ -1569,7 +1572,7 @@ int netif_receive_skb(struct sk_buff *skb) if (!skb->stamp.tv_sec) net_timestamp(&skb->stamp); - skb_bond(skb); + orig_dev = skb_bond(skb); __get_cpu_var(netdev_rx_stat).total++; @@ -1590,14 +1593,14 @@ int netif_receive_skb(struct sk_buff *skb) list_for_each_entry_rcu(ptype, &ptype_all, list) { if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) - ret = deliver_skb(skb, pt_prev); + ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } } #ifdef CONFIG_NET_CLS_ACT if (pt_prev) { - ret = deliver_skb(skb, pt_prev); + ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; /* noone else should process this after*/ } else { skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); @@ -1616,7 +1619,7 @@ ncls: handle_diverter(skb); - if (handle_bridge(&skb, &pt_prev, &ret)) + if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) goto out; type = skb->protocol; @@ -1624,13 +1627,13 @@ ncls: if (ptype->type == type && (!ptype->dev || ptype->dev == skb->dev)) { if (pt_prev) - ret = deliver_skb(skb, pt_prev); + ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } } if (pt_prev) { - ret = pt_prev->func(skb, skb->dev, pt_prev); + ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { kfree_skb(skb); /* Jamal, now you will not able to escape explaining diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 16df7bd77e7..ef498cb9f78 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -333,7 +333,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) n->sk = NULL; C(stamp); C(dev); - C(real_dev); C(h); C(nh); C(mac); @@ -397,7 +396,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->sk = NULL; new->dev = old->dev; - new->real_dev = old->real_dev; new->priority = old->priority; new->protocol = old->protocol; new->dst = dst_clone(old->dst); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 0c30409fe9e..bd49dd97a09 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2064,7 +2064,7 @@ static struct notifier_block dn_dev_notifier = { .notifier_call = dn_device_event, }; -extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *); +extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); static struct packet_type dn_dix_packet_type = { .type = __constant_htons(ETH_P_DNA_RT), diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 2399fa8a3f8..2c915f305be 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -572,7 +572,7 @@ static int dn_route_ptp_hello(struct sk_buff *skb) return NET_RX_SUCCESS; } -int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct dn_skb_cb *cb; unsigned char flags = 0; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index b807a314269..8f063990555 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -1009,7 +1009,7 @@ release: * Receive an Econet frame from a device. */ -static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct ec_framehdr *hdr; struct sock *sk; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index a642fd61285..6eb9c549d64 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -700,7 +700,7 @@ void arp_send(int type, int ptype, u32 dest_ip, static void parp_redo(struct sk_buff *skb) { nf_reset(skb); - arp_rcv(skb, skb->dev, NULL); + arp_rcv(skb, skb->dev, NULL, skb->dev); } /* @@ -927,7 +927,7 @@ out: * Receive an arp request from the device layer. */ -int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct arphdr *arp; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c703528e0bc..d603247bdfe 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -358,7 +358,7 @@ drop: /* * Main IP Receive routine. */ -int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct iphdr *iph; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index d2bf8e1930a..63e106605f2 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -393,7 +393,7 @@ static int __init ic_defaults(void) #ifdef IPCONFIG_RARP -static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt); +static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); static struct packet_type rarp_packet_type __initdata = { .type = __constant_htons(ETH_P_RARP), @@ -414,7 +414,7 @@ static inline void ic_rarp_cleanup(void) * Process received RARP packet. */ static int __init -ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct arphdr *rarp; unsigned char *rarp_ptr; @@ -555,7 +555,7 @@ struct bootp_pkt { /* BOOTP packet format */ #define DHCPRELEASE 7 #define DHCPINFORM 8 -static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt); +static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); static struct packet_type bootp_packet_type __initdata = { .type = __constant_htons(ETH_P_IP), @@ -823,7 +823,7 @@ static void __init ic_do_bootp_ext(u8 *ext) /* * Receive BOOTP reply. */ -static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct bootp_pkt *b; struct iphdr *h; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 10fbb50daea..ab51c0369e1 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -56,7 +56,7 @@ static inline int ip6_rcv_finish( struct sk_buff *skb) return dst_input(skb); } -int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct ipv6hdr *hdr; u32 pkt_len; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 5a27e5df588..3a13c5d1d4d 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1627,7 +1627,7 @@ out: return rc; } -static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { /* NULL here for pt means the packet was looped back */ struct ipx_interface *intrfc; diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index eb65b4925b5..3e9a06abbdd 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -1303,7 +1303,7 @@ static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb, * Jean II */ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype) + struct packet_type *ptype, struct net_device *orig_dev) { struct irlap_info info; struct irlap_cb *self; diff --git a/net/irda/irmod.c b/net/irda/irmod.c index 6ffaed4544e..634901dd156 100644 --- a/net/irda/irmod.c +++ b/net/irda/irmod.c @@ -54,7 +54,7 @@ extern int irsock_init(void); extern void irsock_cleanup(void); /* irlap_frame.c */ extern int irlap_driver_rcv(struct sk_buff *, struct net_device *, - struct packet_type *); + struct packet_type *, struct net_device *); /* * Module parameters diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 5ff02c080a0..9727455bf0e 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -103,7 +103,8 @@ out: struct llc_sap *llc_sap_open(unsigned char lsap, int (*func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)) + struct packet_type *pt, + struct net_device *orig_dev)) { struct llc_sap *sap = llc_sap_find(lsap); diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 4da6976efc9..13b46240b7a 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -132,7 +132,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb) * data now), it queues this frame in the connection's backlog. */ int llc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { struct llc_sap *sap; struct llc_pdu_sn *pdu; @@ -165,7 +165,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, * LLC functionality */ if (sap->rcv_func) { - sap->rcv_func(skb, dev, pt); + sap->rcv_func(skb, dev, pt, orig_dev); goto out; } dest = llc_pdu_type(skb); diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 220bf7494f7..83eb41d9b93 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -64,7 +64,7 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) skb->nh.raw = skb->data; skb->pkt_type = PACKET_HOST; - ip_rcv(skb, skb->dev, NULL); + ip_rcv(skb, skb->dev, NULL, skb->dev); return 1; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c9d5980aa4d..deb5f6f7f85 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -241,7 +241,7 @@ static struct proto_ops packet_ops; #ifdef CONFIG_SOCK_PACKET static struct proto_ops packet_ops_spkt; -static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct sockaddr_pkt *spkt; @@ -441,7 +441,7 @@ static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned we will not harm anyone. */ -static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct sockaddr_ll *sll; @@ -546,7 +546,7 @@ drop: } #ifdef CONFIG_PACKET_MMAP -static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct packet_sock *po; diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 36fc3bf6d88..adfe7b8df35 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -81,7 +81,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) } int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype) + struct packet_type *ptype, struct net_device *orig_dev) { struct sk_buff *nskb; struct x25_neigh *nb; -- cgit v1.2.3-70-g09d2 From 089af26c706d1473f641c909fee7c878d29c1f1a Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:37:23 -0700 Subject: [NETFILTER]: Rename skb_ip_make_writable() to skb_make_writable() There is nothing IPv4-specific in it. In fact, it was already used by IPv6, too... Upcoming nfnetlink_queue code will use it for any kind of packet. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 5 +++++ include/linux/netfilter_ipv4.h | 5 ----- net/core/netfilter.c | 6 +++--- net/ipv4/netfilter/ip_nat_core.c | 4 ++-- net/ipv4/netfilter/ip_nat_helper.c | 8 ++++---- net/ipv4/netfilter/ip_nat_proto_icmp.c | 2 +- net/ipv4/netfilter/ip_nat_proto_tcp.c | 2 +- net/ipv4/netfilter/ip_nat_proto_udp.c | 2 +- net/ipv4/netfilter/ip_nat_snmp_basic.c | 2 +- net/ipv4/netfilter/ip_queue.c | 2 +- net/ipv4/netfilter/ipt_DSCP.c | 2 +- net/ipv4/netfilter/ipt_ECN.c | 4 ++-- net/ipv4/netfilter/ipt_TCPMSS.c | 2 +- net/ipv4/netfilter/ipt_TOS.c | 2 +- net/ipv6/netfilter/ip6_queue.c | 2 +- 15 files changed, 25 insertions(+), 25 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ec60856408f..54b97a1baba 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -193,6 +193,11 @@ extern void nf_ct_attach(struct sk_buff *, struct sk_buff *); /* FIXME: Before cache is ever used, this must be implemented for real. */ extern void nf_invalidate_cache(int pf); +/* Call this before modifying an existing packet: ensures it is + modifiable and linear to the point you care about (writable_len). + Returns true or false. */ +extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len); + #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 552815b8193..fdc4a952734 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -80,11 +80,6 @@ enum nf_ip_hook_priorities { #ifdef __KERNEL__ extern int ip_route_me_harder(struct sk_buff **pskb); -/* Call this before modifying an existing IP packet: ensures it is - modifiable and linear to the point you care about (writable_len). - Returns true or false. */ -extern int skb_ip_make_writable(struct sk_buff **pskb, - unsigned int writable_len); #endif /*__KERNEL__*/ #endif /*__LINUX_IP_NETFILTER_H*/ diff --git a/net/core/netfilter.c b/net/core/netfilter.c index 076c156d5ed..bbf9081a680 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -512,8 +512,9 @@ int ip_route_me_harder(struct sk_buff **pskb) return 0; } EXPORT_SYMBOL(ip_route_me_harder); +#endif /*CONFIG_INET*/ -int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len) +int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len) { struct sk_buff *nskb; @@ -540,8 +541,7 @@ copy_skb: *pskb = nskb; return 1; } -EXPORT_SYMBOL(skb_ip_make_writable); -#endif /*CONFIG_INET*/ +EXPORT_SYMBOL(skb_make_writable); /* Internal logging interface, which relies on the real LOG target modules */ diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 567c802fecf..1adedb743f6 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -359,7 +359,7 @@ manip_pkt(u_int16_t proto, struct iphdr *iph; struct ip_nat_protocol *p; - if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph))) + if (!skb_make_writable(pskb, iphdroff + sizeof(*iph))) return 0; iph = (void *)(*pskb)->data + iphdroff; @@ -431,7 +431,7 @@ int icmp_reply_translation(struct sk_buff **pskb, struct ip_conntrack_tuple inner, target; int hdrlen = (*pskb)->nh.iph->ihl * 4; - if (!skb_ip_make_writable(pskb, hdrlen + sizeof(*inside))) + if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) return 0; inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index 158f34f32c0..d2dd5d31355 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -168,7 +168,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, struct tcphdr *tcph; int datalen; - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return 0; if (rep_len > match_len @@ -228,7 +228,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, match_offset + match_len) return 0; - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return 0; if (rep_len > match_len @@ -315,7 +315,7 @@ ip_nat_sack_adjust(struct sk_buff **pskb, optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr); optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4; - if (!skb_ip_make_writable(pskb, optend)) + if (!skb_make_writable(pskb, optend)) return 0; dir = CTINFO2DIR(ctinfo); @@ -363,7 +363,7 @@ ip_nat_seq_adjust(struct sk_buff **pskb, this_way = &ct->nat.info.seq[dir]; other_way = &ct->nat.info.seq[!dir]; - if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) + if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) return 0; tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c index 7ed2fdb5345..93871904399 100644 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c @@ -62,7 +62,7 @@ icmp_manip_pkt(struct sk_buff **pskb, struct icmphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; - if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr))) + if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) return 0; hdr = (struct icmphdr *)((*pskb)->data + hdroff); diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index 6113a16af86..1d381bf6857 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -103,7 +103,7 @@ tcp_manip_pkt(struct sk_buff **pskb, if ((*pskb)->len >= hdroff + sizeof(struct tcphdr)) hdrsize = sizeof(struct tcphdr); - if (!skb_ip_make_writable(pskb, hdroff + hdrsize)) + if (!skb_make_writable(pskb, hdroff + hdrsize)) return 0; iph = (struct iphdr *)((*pskb)->data + iphdroff); diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index 689478e637a..c4906e1aa24 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -94,7 +94,7 @@ udp_manip_pkt(struct sk_buff **pskb, u32 oldip, newip; u16 *portptr, newport; - if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr))) + if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) return 0; iph = (struct iphdr *)((*pskb)->data + iphdroff); diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c index 2a48b6e635a..93b2c5111bb 100644 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c @@ -1275,7 +1275,7 @@ static int help(struct sk_buff **pskb, return NF_DROP; } - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; spin_lock_bh(&snmp_lock); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index bc0af8d8e91..ae975ac59c6 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -388,7 +388,7 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) } skb_put(e->skb, diff); } - if (!skb_ip_make_writable(&e->skb, v->data_len)) + if (!skb_make_writable(&e->skb, v->data_len)) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c index 975476fef27..6e319570a28 100644 --- a/net/ipv4/netfilter/ipt_DSCP.c +++ b/net/ipv4/netfilter/ipt_DSCP.c @@ -39,7 +39,7 @@ target(struct sk_buff **pskb, if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) { u_int16_t diffs[2]; - if (!skb_ip_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(pskb, sizeof(struct iphdr))) return NF_DROP; diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index f63a9bc0e4d..a1319693f64 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -31,7 +31,7 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { u_int16_t diffs[2]; - if (!skb_ip_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(pskb, sizeof(struct iphdr))) return 0; diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; @@ -66,7 +66,7 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) tcph->cwr == einfo->proto.tcp.cwr))) return 1; - if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) + if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) return 0; tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index 949288319ca..8db70d6908c 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -58,7 +58,7 @@ ipt_tcpmss_target(struct sk_buff **pskb, unsigned int i; u_int8_t *opt; - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; if ((*pskb)->ip_summed == CHECKSUM_HW && diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 49abb7eef0a..deadb36d442 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -33,7 +33,7 @@ target(struct sk_buff **pskb, if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { u_int16_t diffs[2]; - if (!skb_ip_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(pskb, sizeof(struct iphdr))) return NF_DROP; diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 83ccedceed1..7130603a32c 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -384,7 +384,7 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) } skb_put(e->skb, diff); } - if (!skb_ip_make_writable(&e->skb, v->data_len)) + if (!skb_make_writable(&e->skb, v->data_len)) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; -- cgit v1.2.3-70-g09d2 From 020b4c12dbe3868d792a01d7c1470cd837abe10f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:39:00 -0700 Subject: [NETFILTER]: Move ipv4 specific code from net/core/netfilter.c to net/ipv4/netfilter.c Netfilter cleanup - Move ipv4 code from net/core/netfilter.c to net/ipv4/netfilter.c - Move ipv6 netfilter code from net/ipv6/ip6_output.c to net/ipv6/netfilter.c Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/core/netfilter.c | 72 ---------------------------------------------- net/ipv4/Makefile | 2 +- net/ipv4/netfilter.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/Makefile | 2 +- net/ipv6/ip6_output.c | 32 --------------------- net/ipv6/ipv6_syms.c | 3 -- net/ipv6/netfilter.c | 43 ++++++++++++++++++++++++++++ 7 files changed, 124 insertions(+), 109 deletions(-) create mode 100644 net/ipv4/netfilter.c create mode 100644 net/ipv6/netfilter.c (limited to 'net/ipv6') diff --git a/net/core/netfilter.c b/net/core/netfilter.c index bbf9081a680..9849357f612 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -22,12 +22,7 @@ #include #include #include -#include -#include -#include #include -#include -#include /* In this code, we can be waiting indefinitely for userspace to * service a packet if a hook returns NF_QUEUE. We could keep a count @@ -447,73 +442,6 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, return; } -#ifdef CONFIG_INET -/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct sk_buff **pskb) -{ - struct iphdr *iph = (*pskb)->nh.iph; - struct rtable *rt; - struct flowi fl = {}; - struct dst_entry *odst; - unsigned int hh_len; - - /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause - * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. - */ - if (inet_addr_type(iph->saddr) == RTN_LOCAL) { - fl.nl_u.ip4_u.daddr = iph->daddr; - fl.nl_u.ip4_u.saddr = iph->saddr; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; -#ifdef CONFIG_IP_ROUTE_FWMARK - fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; -#endif - fl.proto = iph->protocol; - if (ip_route_output_key(&rt, &fl) != 0) - return -1; - - /* Drop old route. */ - dst_release((*pskb)->dst); - (*pskb)->dst = &rt->u.dst; - } else { - /* non-local src, find valid iif to satisfy - * rp-filter when calling ip_route_input. */ - fl.nl_u.ip4_u.daddr = iph->saddr; - if (ip_route_output_key(&rt, &fl) != 0) - return -1; - - odst = (*pskb)->dst; - if (ip_route_input(*pskb, iph->daddr, iph->saddr, - RT_TOS(iph->tos), rt->u.dst.dev) != 0) { - dst_release(&rt->u.dst); - return -1; - } - dst_release(&rt->u.dst); - dst_release(odst); - } - - if ((*pskb)->dst->error) - return -1; - - /* Change in oif may mean change in hh_len. */ - hh_len = (*pskb)->dst->dev->hard_header_len; - if (skb_headroom(*pskb) < hh_len) { - struct sk_buff *nskb; - - nskb = skb_realloc_headroom(*pskb, hh_len); - if (!nskb) - return -1; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } - - return 0; -} -EXPORT_SYMBOL(ip_route_me_harder); -#endif /*CONFIG_INET*/ - int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len) { struct sk_buff *nskb; diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 55dc6cca1e7..61c7386bcd2 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ - sysctl_net_ipv4.o fib_frontend.o fib_semantics.o + sysctl_net_ipv4.o fib_frontend.o fib_semantics.o netfilter.o obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c new file mode 100644 index 00000000000..6594d1c9697 --- /dev/null +++ b/net/ipv4/netfilter.c @@ -0,0 +1,79 @@ +#include + +#ifdef CONFIG_NETFILTER + +/* IPv4 specific functions of netfilter core */ +#include +#include + +#include +#include +#include +#include +#include + +/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ +int ip_route_me_harder(struct sk_buff **pskb) +{ + struct iphdr *iph = (*pskb)->nh.iph; + struct rtable *rt; + struct flowi fl = {}; + struct dst_entry *odst; + unsigned int hh_len; + + /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause + * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. + */ + if (inet_addr_type(iph->saddr) == RTN_LOCAL) { + fl.nl_u.ip4_u.daddr = iph->daddr; + fl.nl_u.ip4_u.saddr = iph->saddr; + fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); + fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; +#ifdef CONFIG_IP_ROUTE_FWMARK + fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; +#endif + fl.proto = iph->protocol; + if (ip_route_output_key(&rt, &fl) != 0) + return -1; + + /* Drop old route. */ + dst_release((*pskb)->dst); + (*pskb)->dst = &rt->u.dst; + } else { + /* non-local src, find valid iif to satisfy + * rp-filter when calling ip_route_input. */ + fl.nl_u.ip4_u.daddr = iph->saddr; + if (ip_route_output_key(&rt, &fl) != 0) + return -1; + + odst = (*pskb)->dst; + if (ip_route_input(*pskb, iph->daddr, iph->saddr, + RT_TOS(iph->tos), rt->u.dst.dev) != 0) { + dst_release(&rt->u.dst); + return -1; + } + dst_release(&rt->u.dst); + dst_release(odst); + } + + if ((*pskb)->dst->error) + return -1; + + /* Change in oif may mean change in hh_len. */ + hh_len = (*pskb)->dst->dev->hard_header_len; + if (skb_headroom(*pskb) < hh_len) { + struct sk_buff *nskb; + + nskb = skb_realloc_headroom(*pskb, hh_len); + if (!nskb) + return -1; + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + } + + return 0; +} +EXPORT_SYMBOL(ip_route_me_harder); +#endif /* CONFIG_NETFILTER */ diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index b39e0494059..5bccea2d81b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -8,7 +8,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ - ip6_flowlabel.o ipv6_syms.o + ip6_flowlabel.o ipv6_syms.o netfilter.o ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 590d2b79719..a7fcbcc8357 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -153,38 +153,6 @@ int ip6_output(struct sk_buff *skb) return ip6_output2(skb); } -#ifdef CONFIG_NETFILTER -int ip6_route_me_harder(struct sk_buff *skb) -{ - struct ipv6hdr *iph = skb->nh.ipv6h; - struct dst_entry *dst; - struct flowi fl = { - .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, - .nl_u = - { .ip6_u = - { .daddr = iph->daddr, - .saddr = iph->saddr, } }, - .proto = iph->nexthdr, - }; - - dst = ip6_route_output(skb->sk, &fl); - - if (dst->error) { - IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); - LIMIT_NETDEBUG( - printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n")); - dst_release(dst); - return -EINVAL; - } - - /* Drop old route. */ - dst_release(skb->dst); - - skb->dst = dst; - return 0; -} -#endif - /* * xmit an sk_buff (used by TCP) */ diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index 5ade5a5d199..37a4a99c9fe 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c @@ -15,9 +15,6 @@ EXPORT_SYMBOL(ndisc_mc_map); EXPORT_SYMBOL(register_inet6addr_notifier); EXPORT_SYMBOL(unregister_inet6addr_notifier); EXPORT_SYMBOL(ip6_route_output); -#ifdef CONFIG_NETFILTER -EXPORT_SYMBOL(ip6_route_me_harder); -#endif EXPORT_SYMBOL(addrconf_lock); EXPORT_SYMBOL(ipv6_setsockopt); EXPORT_SYMBOL(ipv6_getsockopt); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c new file mode 100644 index 00000000000..5656d0959ab --- /dev/null +++ b/net/ipv6/netfilter.c @@ -0,0 +1,43 @@ +#include +#include + +#ifdef CONFIG_NETFILTER + +#include +#include +#include +#include +#include + +int ip6_route_me_harder(struct sk_buff *skb) +{ + struct ipv6hdr *iph = skb->nh.ipv6h; + struct dst_entry *dst; + struct flowi fl = { + .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, + .nl_u = + { .ip6_u = + { .daddr = iph->daddr, + .saddr = iph->saddr, } }, + .proto = iph->nexthdr, + }; + + dst = ip6_route_output(skb->sk, &fl); + + if (dst->error) { + IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); + LIMIT_NETDEBUG( + printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n")); + dst_release(dst); + return -EINVAL; + } + + /* Drop old route. */ + dst_release(skb->dst); + + skb->dst = dst; + return 0; +} +EXPORT_SYMBOL(ip6_route_me_harder); + +#endif /* CONFIG_NETFILTER */ -- cgit v1.2.3-70-g09d2 From 4fdb3bb723db469717c6d38fda667d8b0fa86ebd Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:40:55 -0700 Subject: [NETLINK]: Add properly module refcounting for kernel netlink sockets. - Remove bogus code for compiling netlink as module - Add module refcounting support for modules implementing a netlink protocol - Add support for autoloading modules that implement a netlink protocol as soon as someone opens a socket for that protocol Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- drivers/w1/w1_int.c | 4 +- include/linux/net.h | 3 ++ include/linux/netlink.h | 2 +- kernel/audit.c | 3 +- lib/kobject_uevent.c | 3 +- net/bridge/netfilter/ebt_ulog.c | 2 +- net/core/rtnetlink.c | 2 +- net/decnet/netfilter/dn_rtmsg.c | 4 +- net/ipv4/fib_frontend.c | 2 +- net/ipv4/netfilter/ip_queue.c | 3 +- net/ipv4/netfilter/ipt_ULOG.c | 3 +- net/ipv4/tcp_diag.c | 3 +- net/ipv6/netfilter/ip6_queue.c | 2 +- net/netfilter/nfnetlink.c | 5 +- net/netlink/af_netlink.c | 108 ++++++++++++++++++++++++++++++++-------- net/xfrm/xfrm_user.c | 4 +- security/selinux/netlink.c | 2 +- 17 files changed, 119 insertions(+), 36 deletions(-) (limited to 'net/ipv6') diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index b5a5e04b6d3..8809788dac2 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -88,7 +88,7 @@ static struct w1_master * w1_alloc_dev(u32 id, int slave_count, int slave_ttl, dev->groups = 23; dev->seq = 1; - dev->nls = netlink_kernel_create(NETLINK_W1, NULL); + dev->nls = netlink_kernel_create(NETLINK_W1, NULL, THIS_MODULE); if (!dev->nls) { printk(KERN_ERR "Failed to create new netlink socket(%u) for w1 master %s.\n", NETLINK_NFLOG, dev->dev.bus_id); @@ -225,3 +225,5 @@ void w1_remove_master_device(struct w1_bus_master *bm) EXPORT_SYMBOL(w1_add_master_device); EXPORT_SYMBOL(w1_remove_master_device); + +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_W1); diff --git a/include/linux/net.h b/include/linux/net.h index 20cb226b226..39906619b9d 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -282,5 +282,8 @@ static struct proto_ops name##_ops = { \ #define MODULE_ALIAS_NETPROTO(proto) \ MODULE_ALIAS("net-pf-" __stringify(proto)) +#define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \ + MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto)) + #endif /* __KERNEL__ */ #endif /* _LINUX_NET_H */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 6552b71bfa7..1c50fea8995 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -117,7 +117,7 @@ struct netlink_skb_parms #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) -extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)); +extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, diff --git a/kernel/audit.c b/kernel/audit.c index ef35166fdc2..ed4019563d5 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -514,7 +514,8 @@ static int __init audit_init(void) { printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); - audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive); + audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive, + THIS_MODULE); if (!audit_sock) audit_panic("cannot initialize netlink socket"); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 8e49d21057e..88f4d746aa0 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -153,7 +153,8 @@ EXPORT_SYMBOL_GPL(kobject_uevent_atomic); static int __init kobject_uevent_init(void) { - uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL); + uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL, + THIS_MODULE); if (!uevent_sock) { printk(KERN_ERR diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 01af4fcef26..561d75c8ed5 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -258,7 +258,7 @@ static int __init init(void) spin_lock_init(&ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL); + ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE); if (!ebtulognl) ret = -ENOMEM; else if ((ret = ebt_register_watcher(&ulog))) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4b1bb30e638..9b3c61f1a37 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -708,7 +708,7 @@ void __init rtnetlink_init(void) if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv); + rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv, THIS_MODULE); if (rtnl == NULL) panic("rtnetlink_init: cannot initialize rtnetlink\n"); netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 284a9998e53..3068fddb2da 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -138,7 +138,8 @@ static int __init init(void) { int rv = 0; - dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk); + dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk, + THIS_MODULE); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); return -ENOMEM; @@ -162,6 +163,7 @@ static void __exit fini(void) MODULE_DESCRIPTION("DECnet Routing Message Grabulator"); MODULE_AUTHOR("Steven Whitehouse "); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_DNRTMSG); module_init(init); module_exit(fini); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index e5722084239..b5e2f1550c9 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -567,7 +567,7 @@ static void nl_fib_input(struct sock *sk, int len) static void nl_fib_lookup_init(void) { - netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input); + netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input, THIS_MODULE); } static void fib_disable_ip(struct net_device *dev, int force) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index ae975ac59c6..b237f7fcad9 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -692,7 +692,8 @@ init_or_cleanup(int init) goto cleanup; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk); + ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk, + THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 52a0076302a..4ea8371ab27 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -62,6 +62,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("iptables userspace logging module"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG); #define ULOG_NL_EVENT 111 /* Harald's favorite number */ #define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */ @@ -372,7 +373,7 @@ static int __init init(void) ulog_buffers[i].timer.data = i; } - nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL); + nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE); if (!nflognl) return -ENOMEM; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index f66945cb158..f79bd11a470 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -774,7 +774,8 @@ static void tcpdiag_rcv(struct sock *sk, int len) static int __init tcpdiag_init(void) { - tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv); + tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv, + THIS_MODULE); if (tcpnl == NULL) return -ENOMEM; return 0; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 7130603a32c..1c3d247a22c 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -685,7 +685,7 @@ init_or_cleanup(int init) goto cleanup; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk); + ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index b0ed5798184..6210ca42166 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -38,6 +38,8 @@ #include MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte "); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; @@ -324,7 +326,8 @@ int __init nfnetlink_init(void) { printk("Netfilter messages via NETLINK v%s.\n", nfversion); - nfnl = netlink_kernel_create(NETLINK_NETFILTER, nfnetlink_rcv); + nfnl = netlink_kernel_create(NETLINK_NETFILTER, nfnetlink_rcv, + THIS_MODULE); if (!nfnl) { printk(KERN_ERR "cannot initialize nfnetlink!\n"); return -1; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index ff774a06c89..5d487cd69c8 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -13,7 +13,12 @@ * added netlink_proto_exit * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo * use nlk_sk, as sk->protinfo is on a diet 8) - * + * Fri Jul 22 19:51:12 MEST 2005 Harald Welte + * - inc module use count of module that owns + * the kernel socket in case userspace opens + * socket of same protocol + * - remove all module support, since netlink is + * mandatory if CONFIG_NET=y these days */ #include @@ -92,6 +97,7 @@ struct netlink_table { struct nl_pid_hash hash; struct hlist_head mc_list; unsigned int nl_nonroot; + struct proto_ops *p_ops; }; static struct netlink_table *nl_table; @@ -341,7 +347,21 @@ static int netlink_create(struct socket *sock, int protocol) if (protocol<0 || protocol >= MAX_LINKS) return -EPROTONOSUPPORT; - sock->ops = &netlink_ops; + netlink_table_grab(); + if (!nl_table[protocol].hash.entries) { +#ifdef CONFIG_KMOD + /* We do 'best effort'. If we find a matching module, + * it is loaded. If not, we don't return an error to + * allow pure userspace<->userspace communication. -HW + */ + netlink_table_ungrab(); + request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); + netlink_table_grab(); +#endif + } + netlink_table_ungrab(); + + sock->ops = nl_table[protocol].p_ops; sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); if (!sk) @@ -394,6 +414,22 @@ static int netlink_release(struct socket *sock) }; notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); } + + /* When this is a kernel socket, we need to remove the owner pointer, + * since we don't know whether the module will be dying at any given + * point - HW + */ + if (!nlk->pid) { + struct proto_ops *p_tmp; + + netlink_table_grab(); + p_tmp = nl_table[sk->sk_protocol].p_ops; + if (p_tmp != &netlink_ops) { + nl_table[sk->sk_protocol].p_ops = &netlink_ops; + kfree(p_tmp); + } + netlink_table_ungrab(); + } sock_put(sk); return 0; @@ -1023,8 +1059,9 @@ static void netlink_data_ready(struct sock *sk, int len) */ struct sock * -netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)) +netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module) { + struct proto_ops *p_ops; struct socket *sock; struct sock *sk; @@ -1034,22 +1071,63 @@ netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)) if (unit<0 || unit>=MAX_LINKS) return NULL; + /* Do a quick check, to make us not go down to netlink_insert() + * if protocol already has kernel socket. + */ + sk = netlink_lookup(unit, 0); + if (unlikely(sk)) { + sock_put(sk); + return NULL; + } + if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; + sk = NULL; + if (module) { + /* Every registering protocol implemented in a module needs + * it's own p_ops, since the socket code cannot deal with + * module refcounting otherwise. -HW + */ + p_ops = kmalloc(sizeof(*p_ops), GFP_KERNEL); + if (!p_ops) + goto out_sock_release; + + memcpy(p_ops, &netlink_ops, sizeof(*p_ops)); + p_ops->owner = module; + } else + p_ops = &netlink_ops; + + netlink_table_grab(); + nl_table[unit].p_ops = p_ops; + netlink_table_ungrab(); + if (netlink_create(sock, unit) < 0) { - sock_release(sock); - return NULL; + sk = NULL; + goto out_kfree_p_ops; } + sk = sock->sk; sk->sk_data_ready = netlink_data_ready; if (input) nlk_sk(sk)->data_ready = input; if (netlink_insert(sk, 0)) { - sock_release(sock); - return NULL; + sk = NULL; + goto out_kfree_p_ops; + } + + return sk; + +out_kfree_p_ops: + netlink_table_grab(); + if (nl_table[unit].p_ops != &netlink_ops) { + kfree(nl_table[unit].p_ops); + nl_table[unit].p_ops = &netlink_ops; } + netlink_table_ungrab(); +out_sock_release: + sock_release(sock); return sk; } @@ -1413,6 +1491,8 @@ enomem: for (i = 0; i < MAX_LINKS; i++) { struct nl_pid_hash *hash = &nl_table[i].hash; + nl_table[i].p_ops = &netlink_ops; + hash->table = nl_pid_hash_alloc(1 * sizeof(*hash->table)); if (!hash->table) { while (i-- > 0) @@ -1438,21 +1518,7 @@ out: return err; } -static void __exit netlink_proto_exit(void) -{ - sock_unregister(PF_NETLINK); - proc_net_remove("netlink"); - kfree(nl_table); - nl_table = NULL; - proto_unregister(&netlink_proto); -} - core_initcall(netlink_proto_init); -module_exit(netlink_proto_exit); - -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_NETPROTO(PF_NETLINK); EXPORT_SYMBOL(netlink_ack); EXPORT_SYMBOL(netlink_broadcast); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8da3e25b2c4..33ceeea783b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1519,7 +1519,8 @@ static int __init xfrm_user_init(void) { printk(KERN_INFO "Initializing IPsec netlink socket\n"); - xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv); + xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv, + THIS_MODULE); if (xfrm_nl == NULL) return -ENOMEM; @@ -1537,3 +1538,4 @@ static void __exit xfrm_user_exit(void) module_init(xfrm_user_init); module_exit(xfrm_user_exit); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM); diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c index 18d08acafa7..341dbe2579b 100644 --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c @@ -103,7 +103,7 @@ void selnl_notify_policyload(u32 seqno) static int __init selnl_init(void) { - selnl = netlink_kernel_create(NETLINK_SELINUX, NULL); + selnl = netlink_kernel_create(NETLINK_SELINUX, NULL, THIS_MODULE); if (selnl == NULL) panic("SELinux: Cannot create netlink socket."); netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV); -- cgit v1.2.3-70-g09d2 From 2cc7d5730957c4a3f3659d17d2ba5e06d5581c1f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:42:34 -0700 Subject: [NETFILTER]: Move reroute-after-queue code up to the nf_queue layer. The rerouting functionality is required by the core, therefore it has to be implemented by the core and not in individual queue handlers. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 11 +++++++ include/linux/netfilter_ipv6.h | 3 ++ net/core/netfilter.c | 66 ++++++++++++++++++++++++++++++++++-------- net/ipv4/netfilter.c | 64 ++++++++++++++++++++++++++++++++++++++-- net/ipv4/netfilter/ip_queue.c | 27 ----------------- net/ipv6/af_inet6.c | 7 +++++ net/ipv6/netfilter.c | 62 +++++++++++++++++++++++++++++++++++++++ net/ipv6/netfilter/ip6_queue.c | 24 --------------- 8 files changed, 199 insertions(+), 65 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 54b97a1baba..d163e20ca8d 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -198,6 +198,17 @@ extern void nf_invalidate_cache(int pf); Returns true or false. */ extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len); +struct nf_queue_rerouter { + void (*save)(const struct sk_buff *skb, struct nf_info *info); + int (*reroute)(struct sk_buff **skb, const struct nf_info *info); + int rer_size; +}; + +#define nf_info_reroute(x) ((void *)x + sizeof(struct nf_info)) + +extern int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer); +extern int nf_unregister_queue_rerouter(int pf); + #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 20c069a5e4a..5d204ee7a31 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -71,4 +71,7 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_LAST = INT_MAX, }; +int ipv6_netfilter_init(void); +void ipv6_netfilter_fini(void); + #endif /*__LINUX_IP6_NETFILTER_H*/ diff --git a/net/core/netfilter.c b/net/core/netfilter.c index 9849357f612..1ed4f311042 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -53,6 +53,9 @@ static struct nf_queue_handler_t { nf_queue_outfn_t outfn; void *data; } queue_handler[NPROTO]; + +static struct nf_queue_rerouter *queue_rerouter; + static DEFINE_RWLOCK(queue_handler_lock); int nf_register_hook(struct nf_hook_ops *reg) @@ -260,11 +263,34 @@ int nf_unregister_queue_handler(int pf) return 0; } +int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer) +{ + if (pf >= NPROTO) + return -EINVAL; + + write_lock_bh(&queue_handler_lock); + memcpy(&queue_rerouter[pf], rer, sizeof(queue_rerouter[pf])); + write_unlock_bh(&queue_handler_lock); + + return 0; +} + +int nf_unregister_queue_rerouter(int pf) +{ + if (pf >= NPROTO) + return -EINVAL; + + write_lock_bh(&queue_handler_lock); + memset(&queue_rerouter[pf], 0, sizeof(queue_rerouter[pf])); + write_unlock_bh(&queue_handler_lock); + return 0; +} + /* * Any packet that leaves via this function must come back * through nf_reinject(). */ -static int nf_queue(struct sk_buff *skb, +static int nf_queue(struct sk_buff **skb, struct list_head *elem, int pf, unsigned int hook, struct net_device *indev, @@ -282,17 +308,17 @@ static int nf_queue(struct sk_buff *skb, read_lock(&queue_handler_lock); if (!queue_handler[pf].outfn) { read_unlock(&queue_handler_lock); - kfree_skb(skb); + kfree_skb(*skb); return 1; } - info = kmalloc(sizeof(*info), GFP_ATOMIC); + info = kmalloc(sizeof(*info)+queue_rerouter[pf].rer_size, GFP_ATOMIC); if (!info) { if (net_ratelimit()) printk(KERN_ERR "OOM queueing packet %p\n", - skb); + *skb); read_unlock(&queue_handler_lock); - kfree_skb(skb); + kfree_skb(*skb); return 1; } @@ -311,15 +337,21 @@ static int nf_queue(struct sk_buff *skb, if (outdev) dev_hold(outdev); #ifdef CONFIG_BRIDGE_NETFILTER - if (skb->nf_bridge) { - physindev = skb->nf_bridge->physindev; + if ((*skb)->nf_bridge) { + physindev = (*skb)->nf_bridge->physindev; if (physindev) dev_hold(physindev); - physoutdev = skb->nf_bridge->physoutdev; + physoutdev = (*skb)->nf_bridge->physoutdev; if (physoutdev) dev_hold(physoutdev); } #endif + if (queue_rerouter[pf].save) + queue_rerouter[pf].save(*skb, info); + + status = queue_handler[pf].outfn(*skb, info, queue_handler[pf].data); + + if (status >= 0 && queue_rerouter[pf].reroute) + status = queue_rerouter[pf].reroute(skb, info); - status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data); read_unlock(&queue_handler_lock); if (status < 0) { @@ -332,9 +364,11 @@ static int nf_queue(struct sk_buff *skb, #endif module_put(info->elem->owner); kfree(info); - kfree_skb(skb); + kfree_skb(*skb); + return 1; } + return 1; } @@ -365,7 +399,7 @@ next_hook: ret = -EPERM; } else if (verdict == NF_QUEUE) { NFDEBUG("nf_hook: Verdict = QUEUE.\n"); - if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn)) + if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn)) goto next_hook; } unlock: @@ -428,7 +462,7 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, break; case NF_QUEUE: - if (!nf_queue(skb, elem, info->pf, info->hook, + if (!nf_queue(&skb, elem, info->pf, info->hook, info->indev, info->outdev, info->okfn)) goto next_hook; break; @@ -555,6 +589,12 @@ void __init netfilter_init(void) { int i, h; + queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter), + GFP_KERNEL); + if (!queue_rerouter) + panic("netfilter: cannot allocate queue rerouter array\n"); + memset(queue_rerouter, 0, NPROTO * sizeof(struct nf_queue_rerouter)); + for (i = 0; i < NPROTO; i++) { for (h = 0; h < NF_MAX_HOOKS; h++) INIT_LIST_HEAD(&nf_hooks[i][h]); @@ -573,4 +613,6 @@ EXPORT_SYMBOL(nf_reinject); EXPORT_SYMBOL(nf_setsockopt); EXPORT_SYMBOL(nf_unregister_hook); EXPORT_SYMBOL(nf_unregister_queue_handler); +EXPORT_SYMBOL_GPL(nf_register_queue_rerouter); +EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter); EXPORT_SYMBOL(nf_unregister_sockopt); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 6594d1c9697..ae0779d82c5 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -1,10 +1,11 @@ -#include +/* IPv4 specific functions of netfilter core */ +#include #ifdef CONFIG_NETFILTER -/* IPv4 specific functions of netfilter core */ #include #include +#include #include #include @@ -76,4 +77,63 @@ int ip_route_me_harder(struct sk_buff **pskb) return 0; } EXPORT_SYMBOL(ip_route_me_harder); + +/* + * Extra routing may needed on local out, as the QUEUE target never + * returns control to the table. + */ + +struct ip_rt_info { + u_int32_t daddr; + u_int32_t saddr; + u_int8_t tos; +}; + +static void queue_save(const struct sk_buff *skb, struct nf_info *info) +{ + struct ip_rt_info *rt_info = nf_info_reroute(info); + + if (info->hook == NF_IP_LOCAL_OUT) { + const struct iphdr *iph = skb->nh.iph; + + rt_info->tos = iph->tos; + rt_info->daddr = iph->daddr; + rt_info->saddr = iph->saddr; + } +} + +static int queue_reroute(struct sk_buff **pskb, const struct nf_info *info) +{ + const struct ip_rt_info *rt_info = nf_info_reroute(info); + + if (info->hook == NF_IP_LOCAL_OUT) { + struct iphdr *iph = (*pskb)->nh.iph; + + if (!(iph->tos == rt_info->tos + && iph->daddr == rt_info->daddr + && iph->saddr == rt_info->saddr)) + return ip_route_me_harder(pskb); + } + return 0; +} + +static struct nf_queue_rerouter ip_reroute = { + .rer_size = sizeof(struct ip_rt_info), + .save = queue_save, + .reroute = queue_reroute, +}; + +static int init(void) +{ + return nf_register_queue_rerouter(PF_INET, &ip_reroute); +} + +static void fini(void) +{ + nf_unregister_queue_rerouter(PF_INET); +} + +module_init(init); +module_exit(fini); + #endif /* CONFIG_NETFILTER */ diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index b237f7fcad9..78892980f42 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -43,17 +43,10 @@ #define NET_IPQ_QMAX 2088 #define NET_IPQ_QMAX_NAME "ip_queue_maxlen" -struct ipq_rt_info { - __u8 tos; - __u32 daddr; - __u32 saddr; -}; - struct ipq_queue_entry { struct list_head list; struct nf_info *info; struct sk_buff *skb; - struct ipq_rt_info rt_info; }; typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); @@ -305,14 +298,6 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) entry->info = info; entry->skb = skb; - if (entry->info->hook == NF_IP_LOCAL_OUT) { - struct iphdr *iph = skb->nh.iph; - - entry->rt_info.tos = iph->tos; - entry->rt_info.daddr = iph->daddr; - entry->rt_info.saddr = iph->saddr; - } - nskb = ipq_build_packet_message(entry, &status); if (nskb == NULL) goto err_out_free; @@ -393,18 +378,6 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; - /* - * Extra routing may needed on local out, as the QUEUE target never - * returns control to the table. - */ - if (e->info->hook == NF_IP_LOCAL_OUT) { - struct iphdr *iph = e->skb->nh.iph; - - if (!(iph->tos == e->rt_info.tos - && iph->daddr == e->rt_info.daddr - && iph->saddr == e->rt_info.saddr)) - return ip_route_me_harder(&e->skb); - } return 0; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 28d9bcab097..57404735362 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -757,6 +758,9 @@ static int __init inet6_init(void) err = igmp6_init(&inet6_family_ops); if (err) goto igmp_fail; + err = ipv6_netfilter_init(); + if (err) + goto netfilter_fail; /* Create /proc/foo6 entries. */ #ifdef CONFIG_PROC_FS err = -ENOMEM; @@ -813,6 +817,8 @@ proc_tcp6_fail: raw6_proc_exit(); proc_raw6_fail: #endif + ipv6_netfilter_fini(); +netfilter_fail: igmp6_cleanup(); igmp_fail: ndisc_cleanup(); @@ -852,6 +858,7 @@ static void __exit inet6_exit(void) ip6_route_cleanup(); ipv6_packet_cleanup(); igmp6_cleanup(); + ipv6_netfilter_fini(); ndisc_cleanup(); icmpv6_cleanup(); #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 5656d0959ab..c8daef97cf5 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -5,6 +5,8 @@ #include #include +#include +#include #include #include #include @@ -40,4 +42,64 @@ int ip6_route_me_harder(struct sk_buff *skb) } EXPORT_SYMBOL(ip6_route_me_harder); +/* + * Extra routing may needed on local out, as the QUEUE target never + * returns control to the table. + */ + +struct ip6_rt_info { + struct in6_addr daddr; + struct in6_addr saddr; +}; + +static void save(const struct sk_buff *skb, struct nf_info *info) +{ + struct ip6_rt_info *rt_info = nf_info_reroute(info); + + if (info->hook == NF_IP6_LOCAL_OUT) { + struct ipv6hdr *iph = skb->nh.ipv6h; + + rt_info->daddr = iph->daddr; + rt_info->saddr = iph->saddr; + } +} + +static int reroute(struct sk_buff **pskb, const struct nf_info *info) +{ + struct ip6_rt_info *rt_info = nf_info_reroute(info); + + if (info->hook == NF_IP6_LOCAL_OUT) { + struct ipv6hdr *iph = (*pskb)->nh.ipv6h; + if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || + !ipv6_addr_equal(&iph->saddr, &rt_info->saddr)) + return ip6_route_me_harder(*pskb); + } + return 0; +} + +static struct nf_queue_rerouter ip6_reroute = { + .rer_size = sizeof(struct ip6_rt_info), + .save = &save, + .reroute = &reroute, +}; + +int __init ipv6_netfilter_init(void) +{ + return nf_register_queue_rerouter(PF_INET6, &ip6_reroute); +} + +void ipv6_netfilter_fini(void) +{ + nf_unregister_queue_rerouter(PF_INET6); +} + +#else /* CONFIG_NETFILTER */ +int __init ipv6_netfilter_init(void) +{ + return 0; +} + +void ipv6_netfilter_fini(void) +{ +} #endif /* CONFIG_NETFILTER */ diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 1c3d247a22c..c45d8f8815d 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -47,16 +47,10 @@ #define NET_IPQ_QMAX 2088 #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" -struct ipq_rt_info { - struct in6_addr daddr; - struct in6_addr saddr; -}; - struct ipq_queue_entry { struct list_head list; struct nf_info *info; struct sk_buff *skb; - struct ipq_rt_info rt_info; }; typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); @@ -302,13 +296,6 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) entry->info = info; entry->skb = skb; - if (entry->info->hook == NF_IP_LOCAL_OUT) { - struct ipv6hdr *iph = skb->nh.ipv6h; - - entry->rt_info.daddr = iph->daddr; - entry->rt_info.saddr = iph->saddr; - } - nskb = ipq_build_packet_message(entry, &status); if (nskb == NULL) goto err_out_free; @@ -389,17 +376,6 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; - /* - * Extra routing may needed on local out, as the QUEUE target never - * returns control to the table. - * Not a nice way to cmp, but works - */ - if (e->info->hook == NF_IP_LOCAL_OUT) { - struct ipv6hdr *iph = e->skb->nh.ipv6h; - if (!ipv6_addr_equal(&iph->daddr, &e->rt_info.daddr) || - !ipv6_addr_equal(&iph->saddr, &e->rt_info.saddr)) - return ip6_route_me_harder(e->skb); - } return 0; } -- cgit v1.2.3-70-g09d2 From 0ab43f84995f2c2fcc5cc58a9accaa1095e1317f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:43:44 -0700 Subject: [NETFILTER]: Core changes required by upcoming nfnetlink_queue code - split netfiler verdict in 16bit verdict and 16bit queue number - add 'queuenum' argument to nf_queue_outfn_t and its users ip[6]_queue - move NFNL_SUBSYS_ definitions from enum to #define - introduce autoloading for nfnetlink subsystem modules - add MODULE_ALIAS_NFNL_SUBSYS macro - add nf_unregister_queue_handlers() to register all handlers for a given nf_queue_outfn_t - add more verbose DEBUGP macro definition to nfnetlink.c - make nfnetlink_subsys_register fail if subsys already exists - add some more comments and debug statements to nfnetlink.c Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 14 ++++++++++++- include/linux/netfilter/nfnetlink.h | 20 ++++++++++--------- net/core/netfilter.c | 40 ++++++++++++++++++++++++++++++------- net/ipv4/netfilter/ip_queue.c | 3 ++- net/ipv6/netfilter/ip6_queue.c | 3 ++- net/netfilter/nfnetlink.c | 28 ++++++++++++++++++++------ 6 files changed, 83 insertions(+), 25 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index d163e20ca8d..711e05f33d6 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -21,6 +21,16 @@ #define NF_STOP 5 #define NF_MAX_VERDICT NF_STOP +/* we overload the higher bits for encoding auxiliary data such as the queue + * number. Not nice, but better than additional function arguments. */ +#define NF_VERDICT_MASK 0x0000ffff +#define NF_VERDICT_BITS 16 + +#define NF_VERDICT_QMASK 0xffff0000 +#define NF_VERDICT_QBITS 16 + +#define NF_QUEUE_NR(x) ((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK || NF_QUEUE) + /* only for userspace compatibility */ #ifndef __KERNEL__ /* Generic cache responses from hook functions. @@ -179,10 +189,12 @@ int nf_getsockopt(struct sock *sk, int pf, int optval, char __user *opt, /* Packet queuing */ typedef int (*nf_queue_outfn_t)(struct sk_buff *skb, - struct nf_info *info, void *data); + struct nf_info *info, + unsigned int queuenum, void *data); extern int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data); extern int nf_unregister_queue_handler(int pf); +extern void nf_unregister_queue_handlers(nf_queue_outfn_t outfn); extern void nf_reinject(struct sk_buff *skb, struct nf_info *info, unsigned int verdict); diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index ace7a7be074..561f9df2880 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -69,15 +69,14 @@ struct nfgenmsg { #define NFNL_SUBSYS_ID(x) ((x & 0xff00) >> 8) #define NFNL_MSG_TYPE(x) (x & 0x00ff) -enum nfnl_subsys_id { - NFNL_SUBSYS_NONE = 0, - NFNL_SUBSYS_CTNETLINK, - NFNL_SUBSYS_CTNETLINK_EXP, - NFNL_SUBSYS_IPTNETLINK, - NFNL_SUBSYS_QUEUE, - NFNL_SUBSYS_ULOG, - NFNL_SUBSYS_COUNT, -}; +/* No enum here, otherwise __stringify() trick of MODULE_ALIAS_NFNL_SUBSYS() + * won't work anymore */ +#define NFNL_SUBSYS_NONE 0 +#define NFNL_SUBSYS_CTNETLINK 1 +#define NFNL_SUBSYS_CTNETLINK_EXP 2 +#define NFNL_SUBSYS_QUEUE 3 +#define NFNL_SUBSYS_ULOG 4 +#define NFNL_SUBSYS_COUNT 5 #ifdef __KERNEL__ @@ -142,5 +141,8 @@ extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo); extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags); +#define MODULE_ALIAS_NFNL_SUBSYS(subsys) \ + MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys)) + #endif /* __KERNEL__ */ #endif /* _NFNETLINK_H */ diff --git a/net/core/netfilter.c b/net/core/netfilter.c index 1ed4f311042..3e38084ac2b 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -221,7 +221,8 @@ static unsigned int nf_iterate(struct list_head *head, verdict = elem->hook(hook, skb, indev, outdev, okfn); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG - if (unlikely(verdict > NF_MAX_VERDICT)) { + if (unlikely((verdict & NF_VERDICT_MASK) + > NF_MAX_VERDICT)) { NFDEBUG("Evil return from %p(%u).\n", elem->hook, hook); continue; @@ -239,6 +240,9 @@ int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) { int ret; + if (pf >= NPROTO) + return -EINVAL; + write_lock_bh(&queue_handler_lock); if (queue_handler[pf].outfn) ret = -EBUSY; @@ -255,6 +259,9 @@ int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) /* The caller must flush their queue before this */ int nf_unregister_queue_handler(int pf) { + if (pf >= NPROTO) + return -EINVAL; + write_lock_bh(&queue_handler_lock); queue_handler[pf].outfn = NULL; queue_handler[pf].data = NULL; @@ -286,6 +293,20 @@ int nf_unregister_queue_rerouter(int pf) return 0; } +void nf_unregister_queue_handlers(nf_queue_outfn_t outfn) +{ + int pf; + + write_lock_bh(&queue_handler_lock); + for (pf = 0; pf < NPROTO; pf++) { + if (queue_handler[pf].outfn == outfn) { + queue_handler[pf].outfn = NULL; + queue_handler[pf].data = NULL; + } + } + write_unlock_bh(&queue_handler_lock); +} + /* * Any packet that leaves via this function must come back * through nf_reinject(). @@ -295,7 +316,8 @@ static int nf_queue(struct sk_buff **skb, int pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *)) + int (*okfn)(struct sk_buff *), + unsigned int queuenum) { int status; struct nf_info *info; @@ -347,7 +369,8 @@ static int nf_queue(struct sk_buff **skb, if (queue_rerouter[pf].save) queue_rerouter[pf].save(*skb, info); - status = queue_handler[pf].outfn(*skb, info, queue_handler[pf].data); + status = queue_handler[pf].outfn(*skb, info, queuenum, + queue_handler[pf].data); if (status >= 0 && queue_rerouter[pf].reroute) status = queue_rerouter[pf].reroute(skb, info); @@ -397,9 +420,10 @@ next_hook: } else if (verdict == NF_DROP) { kfree_skb(*pskb); ret = -EPERM; - } else if (verdict == NF_QUEUE) { + } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { NFDEBUG("nf_hook: Verdict = QUEUE.\n"); - if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn)) + if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn, + verdict >> NF_VERDICT_BITS)) goto next_hook; } unlock: @@ -456,14 +480,15 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, info->okfn, INT_MIN); } - switch (verdict) { + switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: info->okfn(skb); break; case NF_QUEUE: if (!nf_queue(&skb, elem, info->pf, info->hook, - info->indev, info->outdev, info->okfn)) + info->indev, info->outdev, info->okfn, + verdict >> NF_VERDICT_BITS)) goto next_hook; break; } @@ -613,6 +638,7 @@ EXPORT_SYMBOL(nf_reinject); EXPORT_SYMBOL(nf_setsockopt); EXPORT_SYMBOL(nf_unregister_hook); EXPORT_SYMBOL(nf_unregister_queue_handler); +EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); EXPORT_SYMBOL_GPL(nf_register_queue_rerouter); EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter); EXPORT_SYMBOL(nf_unregister_sockopt); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 78892980f42..cfc886f382a 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -280,7 +280,8 @@ nlmsg_failure: } static int -ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) +ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, + unsigned int queuenum, void *data) { int status = -EINVAL; struct sk_buff *nskb; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index c45d8f8815d..5af4cee93d9 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -278,7 +278,8 @@ nlmsg_failure: } static int -ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) +ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, + unsigned int queuenum, void *data) { int status = -EINVAL; struct sk_buff *nskb; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 6210ca42166..30b25f47f7c 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -44,7 +44,9 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; #if 0 -#define DEBUGP printk +#define DEBUGP(format, args...) \ + printk(KERN_DEBUG "%s(%d):%s(): " format, __FILE__, \ + __LINE__, __FUNCTION__, ## args) #else #define DEBUGP(format, args...) #endif @@ -67,11 +69,11 @@ int nfnetlink_subsys_register(struct nfnetlink_subsystem *n) { DEBUGP("registering subsystem ID %u\n", n->subsys_id); - /* If the netlink socket wasn't created, then fail */ - if (!nfnl) - return -1; - nfnl_lock(); + if (subsys_table[n->subsys_id]) { + nfnl_unlock(); + return -EBUSY; + } subsys_table[n->subsys_id] = n; nfnl_unlock(); @@ -227,8 +229,18 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb, type = nlh->nlmsg_type; ss = nfnetlink_get_subsys(type); - if (!ss) + if (!ss) { +#ifdef CONFIG_KMOD + /* don't call nfnl_shunlock, since it would reenter + * with further packet processing */ + up(&nfnl_sem); + request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type)); + nfnl_shlock(); + ss = nfnetlink_get_subsys(type); + if (!ss) +#endif goto err_inval; + } nc = nfnetlink_find_client(type, ss); if (!nc) { @@ -252,12 +264,14 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb, if (err < 0) goto err_inval; + DEBUGP("calling handler\n"); err = nc->call(nfnl, skb, nlh, cda, errp); *errp = err; return err; } err_inval: + DEBUGP("returning -EINVAL\n"); *errp = -EINVAL; return -1; } @@ -311,6 +325,8 @@ static void nfnetlink_rcv(struct sock *sk, int len) kfree_skb(skb); } + /* don't call nfnl_shunlock, since it would reenter + * with further packet processing */ up(&nfnl_sem); } while(nfnl && nfnl->sk_receive_queue.qlen); } -- cgit v1.2.3-70-g09d2 From 7af4cc3fa158ff1dda6e7451c7e6afa6b0bb85cb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:44:15 -0700 Subject: [NETFILTER]: Add "nfnetlink_queue" netfilter queue handler over nfnetlink - Add new nfnetlink_queue module - Add new ipt_NFQUEUE and ip6t_NFQUEUE modules to access queue numbers 1-65535 - Mark ip_queue and ip6_queue Kconfig options as OBSOLETE - Update feature-removal-schedule to remove ip[6]_queue in December Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- Documentation/feature-removal-schedule.txt | 12 + include/linux/netfilter/nfnetlink_queue.h | 85 +++ include/linux/netfilter_ipv4/ipt_NFQUEUE.h | 16 + net/ipv4/netfilter/Kconfig | 6 +- net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/ipt_NFQUEUE.c | 70 +++ net/ipv6/netfilter/Kconfig | 11 +- net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_NFQUEUE.c | 70 +++ net/netfilter/Kconfig | 8 + net/netfilter/Makefile | 1 + net/netfilter/nfnetlink_queue.c | 877 +++++++++++++++++++++++++++++ 12 files changed, 1153 insertions(+), 5 deletions(-) create mode 100644 include/linux/netfilter/nfnetlink_queue.h create mode 100644 include/linux/netfilter_ipv4/ipt_NFQUEUE.h create mode 100644 net/ipv4/netfilter/ipt_NFQUEUE.c create mode 100644 net/ipv6/netfilter/ip6t_NFQUEUE.c create mode 100644 net/netfilter/nfnetlink_queue.c (limited to 'net/ipv6') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 8b1430b4665..0665cb12bd6 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -135,3 +135,15 @@ Why: With the 16-bit PCMCIA subsystem now behaving (almost) like a pcmciautils package available at http://kernel.org/pub/linux/utils/kernel/pcmcia/ Who: Dominik Brodowski + +--------------------------- + +What: ip_queue and ip6_queue (old ipv4-only and ipv6-only netfilter queue) +When: December 2005 +Why: This interface has been obsoleted by the new layer3-independent + "nfnetlink_queue". The Kernel interface is compatible, so the old + ip[6]tables "QUEUE" targets still work and will transparently handle + all packets into nfnetlink queue number 0. Userspace users will have + to link against API-compatible library on top of libnfnetlink_queue + instead of the current 'libipq'. +Who: Harald Welte diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h new file mode 100644 index 00000000000..edb463a87eb --- /dev/null +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -0,0 +1,85 @@ +#ifndef _NFNETLINK_QUEUE_H +#define _NFNETLINK_QUEUE_H + +#include + +enum nfqnl_msg_types { + NFQNL_MSG_PACKET, /* packet from kernel to userspace */ + NFQNL_MSG_VERDICT, /* verdict from userspace to kernel */ + NFQNL_MSG_CONFIG, /* connect to a particular queue */ + + NFQNL_MSG_MAX +}; + +struct nfqnl_msg_packet_hdr { + u_int32_t packet_id; /* unique ID of packet in queue */ + u_int16_t hw_protocol; /* hw protocol (network order) */ + u_int8_t hook; /* netfilter hook */ +} __attribute__ ((packed)); + +struct nfqnl_msg_packet_hw { + u_int16_t hw_addrlen; + u_int16_t _pad; + u_int8_t hw_addr[8]; +} __attribute__ ((packed)); + +struct nfqnl_msg_packet_timestamp { + u_int64_t sec; + u_int64_t usec; +} __attribute__ ((packed)); + +enum nfqnl_attr_type { + NFQA_UNSPEC, + NFQA_PACKET_HDR, + NFQA_VERDICT_HDR, /* nfqnl_msg_verdict_hrd */ + NFQA_MARK, /* u_int32_t nfmark */ + NFQA_TIMESTAMP, /* nfqnl_msg_packet_timestamp */ + NFQA_IFINDEX_INDEV, /* u_int32_t ifindex */ + NFQA_IFINDEX_OUTDEV, /* u_int32_t ifindex */ + NFQA_HWADDR, /* nfqnl_msg_packet_hw */ + NFQA_PAYLOAD, /* opaque data payload */ + + __NFQA_MAX +}; +#define NFQA_MAX (__NFQA_MAX - 1) + +struct nfqnl_msg_verdict_hdr { + u_int32_t verdict; + u_int32_t id; +} __attribute__ ((packed)); + + +enum nfqnl_msg_config_cmds { + NFQNL_CFG_CMD_NONE, + NFQNL_CFG_CMD_BIND, + NFQNL_CFG_CMD_UNBIND, + NFQNL_CFG_CMD_PF_BIND, + NFQNL_CFG_CMD_PF_UNBIND, +}; + +struct nfqnl_msg_config_cmd { + u_int8_t command; /* nfqnl_msg_config_cmds */ + u_int8_t _pad; + u_int16_t pf; /* AF_xxx for PF_[UN]BIND */ +} __attribute__ ((packed)); + +enum nfqnl_config_mode { + NFQNL_COPY_NONE, + NFQNL_COPY_META, + NFQNL_COPY_PACKET, +}; + +struct nfqnl_msg_config_params { + u_int32_t copy_range; + u_int8_t copy_mode; /* enum nfqnl_config_mode */ +} __attribute__ ((packed)); + + +enum nfqnl_attr_config { + NFQA_CFG_UNSPEC, + NFQA_CFG_CMD, /* nfqnl_msg_config_cmd */ + NFQA_CFG_PARAMS, /* nfqnl_msg_config_params */ + __NFQA_CFG_MAX +}; + +#endif /* _NFNETLINK_QUEUE_H */ diff --git a/include/linux/netfilter_ipv4/ipt_NFQUEUE.h b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h new file mode 100644 index 00000000000..b5b2943b0c6 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h @@ -0,0 +1,16 @@ +/* iptables module for using NFQUEUE mechanism + * + * (C) 2005 Harald Welte + * + * This software is distributed under GNU GPL v2, 1991 + * +*/ +#ifndef _IPT_NFQ_TARGET_H +#define _IPT_NFQ_TARGET_H + +/* target info */ +struct ipt_NFQ_info { + u_int16_t queuenum; +}; + +#endif /* _IPT_DSCP_TARGET_H */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index e47ba39eb65..2fa26a41fa4 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -110,11 +110,15 @@ config IP_NF_AMANDA To compile it as a module, choose M here. If unsure, say Y. config IP_NF_QUEUE - tristate "Userspace queueing via NETLINK" + tristate "IP Userspace queueing via NETLINK (OBSOLETE)" help Netfilter has the ability to queue packets to user space: the netlink device can be used to access them using this driver. + This option enables the old IPv4-only "ip_queue" implementation + which has been obsoleted by the new "nfnetlink_queue" code (see + CONFIG_NETFILTER_NETLINK_QUEUE). + To compile it as a module, choose M here. If unsure, say N. config IP_NF_IPTABLES diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index abf2a7d1a58..c2ae663b723 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -91,3 +91,4 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o +obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ipt_NFQUEUE.o diff --git a/net/ipv4/netfilter/ipt_NFQUEUE.c b/net/ipv4/netfilter/ipt_NFQUEUE.c new file mode 100644 index 00000000000..3cedc9be880 --- /dev/null +++ b/net/ipv4/netfilter/ipt_NFQUEUE.c @@ -0,0 +1,70 @@ +/* iptables module for using new netfilter netlink queue + * + * (C) 2005 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("iptables NFQUEUE target"); +MODULE_LICENSE("GPL"); + +static unsigned int +target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + const struct ipt_NFQ_info *tinfo = targinfo; + + return NF_QUEUE_NR(tinfo->queuenum); +} + +static int +checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_NFQ_info))) { + printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_NFQ_info))); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_NFQ_reg = { + .name = "NFQUEUE", + .target = target, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ipt_register_target(&ipt_NFQ_reg); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_NFQ_reg); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 77ec704c9ee..cd1551983c6 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -10,13 +10,16 @@ menu "IPv6: Netfilter Configuration (EXPERIMENTAL)" # dep_tristate ' FTP protocol support' CONFIG_IP6_NF_FTP $CONFIG_IP6_NF_CONNTRACK #fi config IP6_NF_QUEUE - tristate "Userspace queueing via NETLINK" + tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)" ---help--- This option adds a queue handler to the kernel for IPv6 - packets which lets us to receive the filtered packets - with QUEUE target using libiptc as we can do with - the IPv4 now. + packets which enables users to receive the filtered packets + with QUEUE target using libipq. + + THis option enables the old IPv6-only "ip6_queue" implementation + which has been obsoleted by the new "nfnetlink_queue" code (see + CONFIG_NETFILTER_NETLINK_QUEUE). (C) Fernando Anton 2001 IPv64 Project - Work based in IPv64 draft by Arturo Azcorra. diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 2e51714953b..847651dbcd2 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -24,3 +24,4 @@ obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o +obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ip6t_NFQUEUE.o diff --git a/net/ipv6/netfilter/ip6t_NFQUEUE.c b/net/ipv6/netfilter/ip6t_NFQUEUE.c new file mode 100644 index 00000000000..c6e3730e740 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_NFQUEUE.c @@ -0,0 +1,70 @@ +/* ip6tables module for using new netfilter netlink queue + * + * (C) 2005 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("ip6tables NFQUEUE target"); +MODULE_LICENSE("GPL"); + +static unsigned int +target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + const struct ipt_NFQ_info *tinfo = targinfo; + + return NF_QUEUE_NR(tinfo->queuenum); +} + +static int +checkentry(const char *tablename, + const struct ip6t_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (targinfosize != IP6T_ALIGN(sizeof(struct ipt_NFQ_info))) { + printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n", + targinfosize, + IP6T_ALIGN(sizeof(struct ipt_NFQ_info))); + return 0; + } + + return 1; +} + +static struct ip6t_target ipt_NFQ_reg = { + .name = "NFQUEUE", + .target = target, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ip6t_register_target(&ipt_NFQ_reg); +} + +static void __exit fini(void) +{ + ip6t_unregister_target(&ipt_NFQ_reg); +} + +module_init(init); +module_exit(fini); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 3629d3d1776..f0eb23e5c5f 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -3,3 +3,11 @@ config NETFILTER_NETLINK help If this option is enabled, the kernel will include support for the new netfilter netlink interface. + +config NETFILTER_NETLINK_QUEUE + tristate "Netfilter NFQUEUE over NFNETLINK interface" + depends on NETFILTER_NETLINK + help + If this option isenabled, the kernel will include support + for queueing packets via NFNETLINK. + diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 02e67d37194..14a0b187e75 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o +obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c new file mode 100644 index 00000000000..24032610c42 --- /dev/null +++ b/net/netfilter/nfnetlink_queue.c @@ -0,0 +1,877 @@ +/* + * This is a module which is used for queueing packets and communicating with + * userspace via nfetlink. + * + * (C) 2005 by Harald Welte + * + * Based on the old ipv4-only ip_queue.c: + * (C) 2000-2002 James Morris + * (C) 2003-2005 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define NFQNL_QMAX_DEFAULT 1024 + +#if 0 +#define QDEBUG(x, args ...) printk(KERN_DEBUG "%s(%d):%s(): " x, \ + __FILE__, __LINE__, __FUNCTION__, \ + ## args) +#else +#define QDEBUG(x, ...) +#endif + +struct nfqnl_queue_entry { + struct list_head list; + struct nf_info *info; + struct sk_buff *skb; + unsigned int id; +}; + +struct nfqnl_instance { + struct hlist_node hlist; /* global list of queues */ + + int peer_pid; + unsigned int queue_maxlen; + unsigned int copy_range; + unsigned int queue_total; + unsigned int queue_dropped; + unsigned int queue_user_dropped; + + atomic_t id_sequence; /* 'sequence' of pkt ids */ + + u_int16_t queue_num; /* number of this queue */ + u_int8_t copy_mode; + + spinlock_t lock; + + struct list_head queue_list; /* packets in queue */ +}; + +typedef int (*nfqnl_cmpfn)(struct nfqnl_queue_entry *, unsigned long); + +static DEFINE_RWLOCK(instances_lock); + +u_int64_t htonll(u_int64_t in) +{ + u_int64_t out; + int i; + + for (i = 0; i < sizeof(u_int64_t); i++) + ((u_int8_t *)&out)[sizeof(u_int64_t)-1] = ((u_int8_t *)&in)[i]; + + return out; +} + +#define INSTANCE_BUCKETS 16 +static struct hlist_head instance_table[INSTANCE_BUCKETS]; + +static inline u_int8_t instance_hashfn(u_int16_t queue_num) +{ + return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS; +} + +static struct nfqnl_instance * +__instance_lookup(u_int16_t queue_num) +{ + struct hlist_head *head; + struct hlist_node *pos; + struct nfqnl_instance *inst; + + head = &instance_table[instance_hashfn(queue_num)]; + hlist_for_each_entry(inst, pos, head, hlist) { + if (inst->queue_num == queue_num) + return inst; + } + return NULL; +} + +static struct nfqnl_instance * +instance_lookup(u_int16_t queue_num) +{ + struct nfqnl_instance *inst; + + read_lock_bh(&instances_lock); + inst = __instance_lookup(queue_num); + read_unlock_bh(&instances_lock); + + return inst; +} + +static struct nfqnl_instance * +instance_create(u_int16_t queue_num, int pid) +{ + struct nfqnl_instance *inst; + + QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid); + + write_lock_bh(&instances_lock); + if (__instance_lookup(queue_num)) { + inst = NULL; + QDEBUG("aborting, instance already exists\n"); + goto out_unlock; + } + + inst = kmalloc(sizeof(*inst), GFP_ATOMIC); + if (!inst) + goto out_unlock; + + memset(inst, 0, sizeof(*inst)); + inst->queue_num = queue_num; + inst->peer_pid = pid; + inst->queue_maxlen = NFQNL_QMAX_DEFAULT; + inst->copy_range = 0xfffff; + inst->copy_mode = NFQNL_COPY_NONE; + atomic_set(&inst->id_sequence, 0); + inst->lock = SPIN_LOCK_UNLOCKED; + INIT_LIST_HEAD(&inst->queue_list); + + if (!try_module_get(THIS_MODULE)) + goto out_free; + + hlist_add_head(&inst->hlist, + &instance_table[instance_hashfn(queue_num)]); + + write_unlock_bh(&instances_lock); + + QDEBUG("successfully created new instance\n"); + + return inst; + +out_free: + kfree(inst); +out_unlock: + write_unlock_bh(&instances_lock); + return NULL; +} + +static void nfqnl_flush(struct nfqnl_instance *queue, int verdict); + +static void +_instance_destroy2(struct nfqnl_instance *inst, int lock) +{ + /* first pull it out of the global list */ + if (lock) + write_lock_bh(&instances_lock); + + QDEBUG("removing instance %p (queuenum=%u) from hash\n", + inst, inst->queue_num); + hlist_del(&inst->hlist); + + if (lock) + write_unlock_bh(&instances_lock); + + /* then flush all pending skbs from the queue */ + nfqnl_flush(inst, NF_DROP); + + /* and finally free the data structure */ + kfree(inst); + + module_put(THIS_MODULE); +} + +static inline void +__instance_destroy(struct nfqnl_instance *inst) +{ + _instance_destroy2(inst, 0); +} + +static inline void +instance_destroy(struct nfqnl_instance *inst) +{ + _instance_destroy2(inst, 1); +} + + + +static void +issue_verdict(struct nfqnl_queue_entry *entry, int verdict) +{ + QDEBUG("entering for entry %p, verdict %u\n", entry, verdict); + + /* TCP input path (and probably other bits) assume to be called + * from softirq context, not from syscall, like issue_verdict is + * called. TCP input path deadlocks with locks taken from timer + * softirq, e.g. We therefore emulate this by local_bh_disable() */ + + local_bh_disable(); + nf_reinject(entry->skb, entry->info, verdict); + local_bh_enable(); + + kfree(entry); +} + +static inline void +__enqueue_entry(struct nfqnl_instance *queue, + struct nfqnl_queue_entry *entry) +{ + list_add(&entry->list, &queue->queue_list); + queue->queue_total++; +} + +/* + * Find and return a queued entry matched by cmpfn, or return the last + * entry if cmpfn is NULL. + */ +static inline struct nfqnl_queue_entry * +__find_entry(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, + unsigned long data) +{ + struct list_head *p; + + list_for_each_prev(p, &queue->queue_list) { + struct nfqnl_queue_entry *entry = (struct nfqnl_queue_entry *)p; + + if (!cmpfn || cmpfn(entry, data)) + return entry; + } + return NULL; +} + +static inline void +__dequeue_entry(struct nfqnl_instance *q, struct nfqnl_queue_entry *entry) +{ + list_del(&entry->list); + q->queue_total--; +} + +static inline struct nfqnl_queue_entry * +__find_dequeue_entry(struct nfqnl_instance *queue, + nfqnl_cmpfn cmpfn, unsigned long data) +{ + struct nfqnl_queue_entry *entry; + + entry = __find_entry(queue, cmpfn, data); + if (entry == NULL) + return NULL; + + __dequeue_entry(queue, entry); + return entry; +} + + +static inline void +__nfqnl_flush(struct nfqnl_instance *queue, int verdict) +{ + struct nfqnl_queue_entry *entry; + + while ((entry = __find_dequeue_entry(queue, NULL, 0))) + issue_verdict(entry, verdict); +} + +static inline int +__nfqnl_set_mode(struct nfqnl_instance *queue, + unsigned char mode, unsigned int range) +{ + int status = 0; + + switch (mode) { + case NFQNL_COPY_NONE: + case NFQNL_COPY_META: + queue->copy_mode = mode; + queue->copy_range = 0; + break; + + case NFQNL_COPY_PACKET: + queue->copy_mode = mode; + /* we're using struct nfattr which has 16bit nfa_len */ + if (range > 0xffff) + queue->copy_range = 0xffff; + else + queue->copy_range = range; + break; + + default: + status = -EINVAL; + + } + return status; +} + +static struct nfqnl_queue_entry * +find_dequeue_entry(struct nfqnl_instance *queue, + nfqnl_cmpfn cmpfn, unsigned long data) +{ + struct nfqnl_queue_entry *entry; + + spin_lock_bh(&queue->lock); + entry = __find_dequeue_entry(queue, cmpfn, data); + spin_unlock_bh(&queue->lock); + + return entry; +} + +static void +nfqnl_flush(struct nfqnl_instance *queue, int verdict) +{ + spin_lock_bh(&queue->lock); + __nfqnl_flush(queue, verdict); + spin_unlock_bh(&queue->lock); +} + +static struct sk_buff * +nfqnl_build_packet_message(struct nfqnl_instance *queue, + struct nfqnl_queue_entry *entry, int *errp) +{ + unsigned char *old_tail; + size_t size; + size_t data_len = 0; + struct sk_buff *skb; + struct nfqnl_msg_packet_hdr pmsg; + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int tmp_uint; + + QDEBUG("entered\n"); + + /* all macros expand to constant values at compile time */ + size = NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hdr)) + + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ + + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ + + NLMSG_SPACE(sizeof(u_int32_t)) /* mark */ + + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hw)) + + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_timestamp)); + + spin_lock_bh(&queue->lock); + + switch (queue->copy_mode) { + case NFQNL_COPY_META: + case NFQNL_COPY_NONE: + data_len = 0; + break; + + case NFQNL_COPY_PACKET: + if (queue->copy_range == 0 + || queue->copy_range > entry->skb->len) + data_len = entry->skb->len; + else + data_len = queue->copy_range; + + size += NLMSG_SPACE(data_len); + break; + + default: + *errp = -EINVAL; + spin_unlock_bh(&queue->lock); + return NULL; + } + + spin_unlock_bh(&queue->lock); + + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) + goto nlmsg_failure; + + old_tail= skb->tail; + nlh = NLMSG_PUT(skb, 0, 0, + NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, + sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + nfmsg->nfgen_family = entry->info->pf; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(queue->queue_num); + + pmsg.packet_id = htonl(entry->id); + pmsg.hw_protocol = htons(entry->skb->protocol); + pmsg.hook = entry->info->hook; + + NFA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); + + if (entry->info->indev) { + tmp_uint = htonl(entry->info->indev->ifindex); + NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); + } + + if (entry->info->outdev) { + tmp_uint = htonl(entry->info->outdev->ifindex); + NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); + } + + if (entry->skb->nfmark) { + tmp_uint = htonl(entry->skb->nfmark); + NFA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint); + } + + if (entry->info->indev && entry->skb->dev + && entry->skb->dev->hard_header_parse) { + struct nfqnl_msg_packet_hw phw; + + phw.hw_addrlen = + entry->skb->dev->hard_header_parse(entry->skb, + phw.hw_addr); + phw.hw_addrlen = htons(phw.hw_addrlen); + NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); + } + + if (entry->skb->stamp.tv_sec) { + struct nfqnl_msg_packet_timestamp ts; + + ts.sec = htonll(entry->skb->stamp.tv_sec); + ts.usec = htonll(entry->skb->stamp.tv_usec); + + NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); + } + + if (data_len) { + struct nfattr *nfa; + int size = NFA_LENGTH(data_len); + + if (skb_tailroom(skb) < (int)NFA_SPACE(data_len)) { + printk(KERN_WARNING "nf_queue: no tailroom!\n"); + goto nlmsg_failure; + } + + nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size)); + nfa->nfa_type = NFQA_PAYLOAD; + nfa->nfa_len = size; + + if (skb_copy_bits(entry->skb, 0, NFA_DATA(nfa), data_len)) + BUG(); + } + + nlh->nlmsg_len = skb->tail - old_tail; + return skb; + +nlmsg_failure: +nfattr_failure: + if (skb) + kfree_skb(skb); + *errp = -EINVAL; + if (net_ratelimit()) + printk(KERN_ERR "nf_queue: error creating packet message\n"); + return NULL; +} + +static int +nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, + unsigned int queuenum, void *data) +{ + int status = -EINVAL; + struct sk_buff *nskb; + struct nfqnl_instance *queue; + struct nfqnl_queue_entry *entry; + + QDEBUG("entered\n"); + + queue = instance_lookup(queuenum); + if (!queue) { + QDEBUG("no queue instance matching\n"); + return -EINVAL; + } + + if (queue->copy_mode == NFQNL_COPY_NONE) { + QDEBUG("mode COPY_NONE, aborting\n"); + return -EAGAIN; + } + + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + if (entry == NULL) { + if (net_ratelimit()) + printk(KERN_ERR + "nf_queue: OOM in nfqnl_enqueue_packet()\n"); + return -ENOMEM; + } + + entry->info = info; + entry->skb = skb; + entry->id = atomic_inc_return(&queue->id_sequence); + + nskb = nfqnl_build_packet_message(queue, entry, &status); + if (nskb == NULL) + goto err_out_free; + + spin_lock_bh(&queue->lock); + + if (!queue->peer_pid) + goto err_out_free_nskb; + + if (queue->queue_total >= queue->queue_maxlen) { + queue->queue_dropped++; + status = -ENOSPC; + if (net_ratelimit()) + printk(KERN_WARNING "ip_queue: full at %d entries, " + "dropping packets(s). Dropped: %d\n", + queue->queue_total, queue->queue_dropped); + goto err_out_free_nskb; + } + + /* nfnetlink_unicast will either free the nskb or add it to a socket */ + status = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT); + if (status < 0) { + queue->queue_user_dropped++; + goto err_out_unlock; + } + + __enqueue_entry(queue, entry); + + spin_unlock_bh(&queue->lock); + return status; + +err_out_free_nskb: + kfree_skb(nskb); + +err_out_unlock: + spin_unlock_bh(&queue->lock); + +err_out_free: + kfree(entry); + return status; +} + +static int +nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) +{ + int diff; + + diff = data_len - e->skb->len; + if (diff < 0) + skb_trim(e->skb, data_len); + else if (diff > 0) { + if (data_len > 0xFFFF) + return -EINVAL; + if (diff > skb_tailroom(e->skb)) { + struct sk_buff *newskb; + + newskb = skb_copy_expand(e->skb, + skb_headroom(e->skb), + diff, + GFP_ATOMIC); + if (newskb == NULL) { + printk(KERN_WARNING "ip_queue: OOM " + "in mangle, dropping packet\n"); + return -ENOMEM; + } + if (e->skb->sk) + skb_set_owner_w(newskb, e->skb->sk); + kfree_skb(e->skb); + e->skb = newskb; + } + skb_put(e->skb, diff); + } + if (!skb_make_writable(&e->skb, data_len)) + return -ENOMEM; + memcpy(e->skb->data, data, data_len); + + return 0; +} + +static inline int +id_cmp(struct nfqnl_queue_entry *e, unsigned long id) +{ + return (id == e->id); +} + +static int +nfqnl_set_mode(struct nfqnl_instance *queue, + unsigned char mode, unsigned int range) +{ + int status; + + spin_lock_bh(&queue->lock); + status = __nfqnl_set_mode(queue, mode, range); + spin_unlock_bh(&queue->lock); + + return status; +} + +static int +dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex) +{ + if (entry->info->indev) + if (entry->info->indev->ifindex == ifindex) + return 1; + + if (entry->info->outdev) + if (entry->info->outdev->ifindex == ifindex) + return 1; + + return 0; +} + +/* drop all packets with either indev or outdev == ifindex from all queue + * instances */ +static void +nfqnl_dev_drop(int ifindex) +{ + int i; + + QDEBUG("entering for ifindex %u\n", ifindex); + + /* this only looks like we have to hold the readlock for a way too long + * time, issue_verdict(), nf_reinject(), ... - but we always only + * issue NF_DROP, which is processed directly in nf_reinject() */ + read_lock_bh(&instances_lock); + + for (i = 0; i < INSTANCE_BUCKETS; i++) { + struct hlist_node *tmp; + struct nfqnl_instance *inst; + struct hlist_head *head = &instance_table[i]; + + hlist_for_each_entry(inst, tmp, head, hlist) { + struct nfqnl_queue_entry *entry; + while ((entry = find_dequeue_entry(inst, dev_cmp, + ifindex)) != NULL) + issue_verdict(entry, NF_DROP); + } + } + + read_unlock_bh(&instances_lock); +} + +#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) + +static int +nfqnl_rcv_dev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + + /* Drop any packets associated with the downed device */ + if (event == NETDEV_DOWN) + nfqnl_dev_drop(dev->ifindex); + return NOTIFY_DONE; +} + +static struct notifier_block nfqnl_dev_notifier = { + .notifier_call = nfqnl_rcv_dev_event, +}; + +static int +nfqnl_rcv_nl_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct netlink_notify *n = ptr; + + if (event == NETLINK_URELEASE && + n->protocol == NETLINK_NETFILTER && n->pid) { + int i; + + /* destroy all instances for this pid */ + write_lock_bh(&instances_lock); + for (i = 0; i < INSTANCE_BUCKETS; i++) { + struct hlist_node *tmp, *t2; + struct nfqnl_instance *inst; + struct hlist_head *head = &instance_table[i]; + + hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { + if (n->pid == inst->peer_pid) + __instance_destroy(inst); + } + } + write_unlock_bh(&instances_lock); + } + return NOTIFY_DONE; +} + +static struct notifier_block nfqnl_rtnl_notifier = { + .notifier_call = nfqnl_rcv_nl_event, +}; + +static int +nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) +{ + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int16_t queue_num = ntohs(nfmsg->res_id); + + struct nfqnl_msg_verdict_hdr *vhdr; + struct nfqnl_instance *queue; + unsigned int verdict; + struct nfqnl_queue_entry *entry; + + queue = instance_lookup(queue_num); + if (!queue) + return -ENODEV; + + if (queue->peer_pid != NETLINK_CB(skb).pid) + return -EPERM; + + if (!nfqa[NFQA_VERDICT_HDR-1]) + return -EINVAL; + + vhdr = NFA_DATA(nfqa[NFQA_VERDICT_HDR-1]); + verdict = ntohl(vhdr->verdict); + + if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) + return -EINVAL; + + entry = find_dequeue_entry(queue, id_cmp, ntohl(vhdr->id)); + if (entry == NULL) + return -ENOENT; + + if (nfqa[NFQA_PAYLOAD-1]) { + if (nfqnl_mangle(NFA_DATA(nfqa[NFQA_PAYLOAD-1]), + NFA_PAYLOAD(nfqa[NFQA_PAYLOAD-1]), entry) < 0) + verdict = NF_DROP; + } + + if (nfqa[NFQA_MARK-1]) + skb->nfmark = ntohl(*(u_int32_t *)NFA_DATA(nfqa[NFQA_MARK-1])); + + issue_verdict(entry, verdict); + return 0; +} + +static int +nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) +{ + return -ENOTSUPP; +} + +static int +nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) +{ + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int16_t queue_num = ntohs(nfmsg->res_id); + struct nfqnl_instance *queue; + + QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type)); + + queue = instance_lookup(queue_num); + if (nfqa[NFQA_CFG_CMD-1]) { + struct nfqnl_msg_config_cmd *cmd; + cmd = NFA_DATA(nfqa[NFQA_CFG_CMD-1]); + QDEBUG("found CFG_CMD\n"); + + switch (cmd->command) { + case NFQNL_CFG_CMD_BIND: + if (queue) + return -EBUSY; + + queue = instance_create(queue_num, NETLINK_CB(skb).pid); + if (!queue) + return -EINVAL; + break; + case NFQNL_CFG_CMD_UNBIND: + if (!queue) + return -ENODEV; + + if (queue->peer_pid != NETLINK_CB(skb).pid) + return -EPERM; + + instance_destroy(queue); + break; + case NFQNL_CFG_CMD_PF_BIND: + QDEBUG("registering queue handler for pf=%u\n", + ntohs(cmd->pf)); + return nf_register_queue_handler(ntohs(cmd->pf), + nfqnl_enqueue_packet, + NULL); + + break; + case NFQNL_CFG_CMD_PF_UNBIND: + QDEBUG("unregistering queue handler for pf=%u\n", + ntohs(cmd->pf)); + /* This is a bug and a feature. We can unregister + * other handlers(!) */ + return nf_unregister_queue_handler(ntohs(cmd->pf)); + break; + default: + return -EINVAL; + } + } else { + if (!queue) { + QDEBUG("no config command, and no instance ENOENT\n"); + return -ENOENT; + } + + if (queue->peer_pid != NETLINK_CB(skb).pid) { + QDEBUG("no config command, and wrong pid\n"); + return -EPERM; + } + } + + if (nfqa[NFQA_CFG_PARAMS-1]) { + struct nfqnl_msg_config_params *params; + params = NFA_DATA(nfqa[NFQA_CFG_PARAMS-1]); + + nfqnl_set_mode(queue, params->copy_mode, + ntohl(params->copy_range)); + } + + return 0; +} + +static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { + [NFQNL_MSG_PACKET] = { .call = nfqnl_recv_unsupp, + .cap_required = CAP_NET_ADMIN }, + [NFQNL_MSG_VERDICT] = { .call = nfqnl_recv_verdict, + .cap_required = CAP_NET_ADMIN }, + [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, + .cap_required = CAP_NET_ADMIN }, +}; + +static struct nfnetlink_subsystem nfqnl_subsys = { + .name = "nf_queue", + .subsys_id = NFNL_SUBSYS_QUEUE, + .cb_count = NFQNL_MSG_MAX, + .attr_count = NFQA_MAX, + .cb = nfqnl_cb, +}; + +static int +init_or_cleanup(int init) +{ + int status = -ENOMEM; + + if (!init) + goto cleanup; + + netlink_register_notifier(&nfqnl_rtnl_notifier); + status = nfnetlink_subsys_register(&nfqnl_subsys); + if (status < 0) { + printk(KERN_ERR "nf_queue: failed to create netlink socket\n"); + goto cleanup_netlink_notifier; + } + + register_netdevice_notifier(&nfqnl_dev_notifier); + return status; + +cleanup: + nf_unregister_queue_handlers(nfqnl_enqueue_packet); + unregister_netdevice_notifier(&nfqnl_dev_notifier); + nfnetlink_subsys_unregister(&nfqnl_subsys); + +cleanup_netlink_notifier: + netlink_unregister_notifier(&nfqnl_rtnl_notifier); + return status; +} + +static int __init init(void) +{ + + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +MODULE_DESCRIPTION("netfilter packet queue handler"); +MODULE_AUTHOR("Harald Welte "); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE); + +module_init(init); +module_exit(fini); -- cgit v1.2.3-70-g09d2 From 0bd1b59b15e4057101c89d4db15a3683c0d897f7 Mon Sep 17 00:00:00 2001 From: Andrew McDonald Date: Tue, 9 Aug 2005 19:44:42 -0700 Subject: [IPV6]: Check interface bindings on IPv6 raw socket reception Take account of whether a socket is bound to a particular device when selecting an IPv6 raw socket to receive a packet. Also perform this check when receiving IPv6 packets with router alert options. Signed-off-by: Andrew McDonald Signed-off-by: David S. Miller --- include/net/rawv6.h | 3 ++- net/ipv6/icmp.c | 3 ++- net/ipv6/ip6_output.c | 4 +++- net/ipv6/raw.c | 11 ++++++++--- 4 files changed, 15 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 23fd9a6a221..887009aa1f8 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -10,7 +10,8 @@ extern rwlock_t raw_v6_lock; extern void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, - struct in6_addr *loc_addr, struct in6_addr *rmt_addr); + struct in6_addr *loc_addr, struct in6_addr *rmt_addr, + int dif); extern int rawv6_rcv(struct sock *sk, struct sk_buff *skb); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ff3ec9822e3..ee9f1d36346 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -551,7 +551,8 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info) read_lock(&raw_v6_lock); if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) { - while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr))) { + while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, + skb->dev->ifindex))) { rawv6_err(sk, skb, NULL, type, code, inner_offset, info); sk = sk_next(sk); } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a7fcbcc8357..00f85148b85 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -277,7 +277,9 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel) read_lock(&ip6_ra_lock); for (ra = ip6_ra_chain; ra; ra = ra->next) { struct sock *sk = ra->sk; - if (sk && ra->sel == sel) { + if (sk && ra->sel == sel && + (!sk->sk_bound_dev_if || + sk->sk_bound_dev_if == skb->dev->ifindex)) { if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 1d4d75b34d3..9db0de81f07 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -81,7 +81,8 @@ static void raw_v6_unhash(struct sock *sk) /* Grumble... icmp and ip_input want to get at this... */ struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, - struct in6_addr *loc_addr, struct in6_addr *rmt_addr) + struct in6_addr *loc_addr, struct in6_addr *rmt_addr, + int dif) { struct hlist_node *node; int is_multicast = ipv6_addr_is_multicast(loc_addr); @@ -94,6 +95,9 @@ struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, !ipv6_addr_equal(&np->daddr, rmt_addr)) continue; + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) + continue; + if (!ipv6_addr_any(&np->rcv_saddr)) { if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) goto found; @@ -160,7 +164,7 @@ void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) if (sk == NULL) goto out; - sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr); + sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, skb->dev->ifindex); while (sk) { if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) { @@ -170,7 +174,8 @@ void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) if (clone) rawv6_rcv(sk, clone); } - sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr); + sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr, + skb->dev->ifindex); } out: read_unlock(&raw_v6_lock); -- cgit v1.2.3-70-g09d2 From d13964f4490157b8a290903362bfbc54f750a6bc Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 19:45:02 -0700 Subject: [IPV4/6]: Check if packet was actually delivered to a raw socket to decide whether to send an ICMP unreachable Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/raw.h | 2 +- include/net/rawv6.h | 2 +- net/ipv4/ip_input.c | 4 ++-- net/ipv4/raw.c | 5 ++++- net/ipv6/ip6_input.c | 4 ++-- net/ipv6/raw.c | 5 ++++- 6 files changed, 14 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/raw.h b/include/net/raw.h index 1c411c45587..1c4bc3e6809 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -37,6 +37,6 @@ extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, unsigned long raddr, unsigned long laddr, int dif); -extern void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); +extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); #endif /* _RAW_H */ diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 887009aa1f8..14476a71725 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -7,7 +7,7 @@ extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; extern rwlock_t raw_v6_lock; -extern void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); +extern int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, struct in6_addr *loc_addr, struct in6_addr *rmt_addr, diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d603247bdfe..81e18023dc1 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -225,8 +225,8 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb) /* If there maybe a raw socket we must check - if not we * don't care less */ - if (raw_sk) - raw_v4_input(skb, skb->nh.iph, hash); + if (raw_sk && !raw_v4_input(skb, skb->nh.iph, hash)) + raw_sk = NULL; if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { int ret; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index d1835b1bc8c..e222c5c26b3 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -150,10 +150,11 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) * RFC 1122: SHOULD pass TOS value up to the transport layer. * -> It does. And not only TOS, but all IP header. */ -void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) +int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) { struct sock *sk; struct hlist_head *head; + int delivered = 0; read_lock(&raw_v4_lock); head = &raw_v4_htable[hash]; @@ -164,6 +165,7 @@ void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) skb->dev->ifindex); while (sk) { + delivered = 1; if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); @@ -177,6 +179,7 @@ void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) } out: read_unlock(&raw_v4_lock); + return delivered; } void raw_err (struct sock *sk, struct sk_buff *skb, u32 info) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index ab51c0369e1..6e348042693 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -166,8 +166,8 @@ resubmit: nexthdr = skb->nh.raw[nhoff]; raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); - if (raw_sk) - ipv6_raw_deliver(skb, nexthdr); + if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) + raw_sk = NULL; hash = nexthdr & (MAX_INET_PROTOS - 1); if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 9db0de81f07..a082646e6f1 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -141,11 +141,12 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) * * Caller owns SKB so we must make clones. */ -void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) +int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) { struct in6_addr *saddr; struct in6_addr *daddr; struct sock *sk; + int delivered = 0; __u8 hash; saddr = &skb->nh.ipv6h->saddr; @@ -167,6 +168,7 @@ void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, skb->dev->ifindex); while (sk) { + delivered = 1; if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); @@ -179,6 +181,7 @@ void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) } out: read_unlock(&raw_v6_lock); + return delivered; } /* This cleans up af_inet6 a bit. -DaveM */ -- cgit v1.2.3-70-g09d2 From e6848976b721eeb5551cd94673faafeef78d9f35 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:45:38 -0700 Subject: [NET]: Cleanup INET_REFCNT_DEBUG code Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_common.h | 1 - include/net/ipv6.h | 1 - include/net/sock.h | 32 +++++++++++++++++++++++++++++++- include/net/tcp.h | 2 +- net/core/sock.c | 6 +----- net/ipv4/af_inet.c | 18 ++---------------- net/ipv4/tcp.c | 7 +------ net/ipv4/tcp_minisocks.c | 20 ++++++++++++++++---- net/ipv6/af_inet6.c | 31 +++++++++++-------------------- net/ipv6/ipv6_sockglue.c | 15 ++++++++++++--- net/ipv6/tcp_ipv6.c | 18 +++++++++--------- net/sctp/ipv6.c | 5 +---- net/sctp/protocol.c | 4 +--- 13 files changed, 86 insertions(+), 74 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/inet_common.h b/include/net/inet_common.h index fbc1f4d140d..1fbd94d8a31 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -29,7 +29,6 @@ extern unsigned int inet_poll(struct file * file, struct socket *sock, struct p extern int inet_listen(struct socket *sock, int backlog); extern void inet_sock_destruct(struct sock *sk); -extern atomic_t inet_sock_nr; extern int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 533fc074ed9..c5a02ddc594 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -145,7 +145,6 @@ DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6); #define UDP6_INC_STATS(field) SNMP_INC_STATS(udp_stats_in6, field) #define UDP6_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_stats_in6, field) #define UDP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_stats_in6, field) -extern atomic_t inet6_sock_nr; int snmp6_register_dev(struct inet6_dev *idev); int snmp6_unregister_dev(struct inet6_dev *idev); diff --git a/include/net/sock.h b/include/net/sock.h index e9b1dbab90d..11b81551041 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -491,6 +491,9 @@ extern int sk_wait_data(struct sock *sk, long *timeo); struct request_sock_ops; +/* Here is the right place to enable sock refcounting debugging */ +#define SOCK_REFCNT_DEBUG + /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface * transport -> network interface is defined by struct inet_proto @@ -561,7 +564,9 @@ struct proto { char name[32]; struct list_head node; - +#ifdef SOCK_REFCNT_DEBUG + atomic_t socks; +#endif struct { int inuse; u8 __pad[SMP_CACHE_BYTES - sizeof(int)]; @@ -571,6 +576,31 @@ struct proto { extern int proto_register(struct proto *prot, int alloc_slab); extern void proto_unregister(struct proto *prot); +#ifdef SOCK_REFCNT_DEBUG +static inline void sk_refcnt_debug_inc(struct sock *sk) +{ + atomic_inc(&sk->sk_prot->socks); +} + +static inline void sk_refcnt_debug_dec(struct sock *sk) +{ + atomic_dec(&sk->sk_prot->socks); + printk(KERN_DEBUG "%s socket %p released, %d are still alive\n", + sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks)); +} + +static inline void sk_refcnt_debug_release(const struct sock *sk) +{ + if (atomic_read(&sk->sk_refcnt) != 1) + printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", + sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt)); +} +#else /* SOCK_REFCNT_DEBUG */ +#define sk_refcnt_debug_inc(sk) do { } while (0) +#define sk_refcnt_debug_dec(sk) do { } while (0) +#define sk_refcnt_debug_release(sk) do { } while (0) +#endif /* SOCK_REFCNT_DEBUG */ + /* Called with local bh disabled */ static __inline__ void sock_prot_inc_use(struct proto *prot) { diff --git a/include/net/tcp.h b/include/net/tcp.h index 5010f0c5a56..31984733777 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -306,7 +306,7 @@ extern kmem_cache_t *tcp_timewait_cachep; static inline void tcp_tw_put(struct tcp_tw_bucket *tw) { if (atomic_dec_and_test(&tw->tw_refcnt)) { -#ifdef INET_REFCNT_DEBUG +#ifdef SOCK_REFCNT_DEBUG printk(KERN_DEBUG "tw_bucket %p released\n", tw); #endif kmem_cache_free(tcp_timewait_cachep, tw); diff --git a/net/core/sock.c b/net/core/sock.c index 51a5e7ddee8..a1a23be10aa 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1367,11 +1367,7 @@ void sk_common_release(struct sock *sk) xfrm_sk_free_policy(sk); -#ifdef INET_REFCNT_DEBUG - if (atomic_read(&sk->sk_refcnt) != 1) - printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n", - sk, atomic_read(&sk->sk_refcnt)); -#endif + sk_refcnt_debug_release(sk); sock_put(sk); } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 163ae4068b5..9e83d7773d8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -114,10 +114,6 @@ DEFINE_SNMP_STAT(struct linux_mib, net_statistics); -#ifdef INET_REFCNT_DEBUG -atomic_t inet_sock_nr; -#endif - extern void ip_mc_drop_socket(struct sock *sk); /* The inetsw table contains everything that inet_create needs to @@ -153,11 +149,7 @@ void inet_sock_destruct(struct sock *sk) if (inet->opt) kfree(inet->opt); dst_release(sk->sk_dst_cache); -#ifdef INET_REFCNT_DEBUG - atomic_dec(&inet_sock_nr); - printk(KERN_DEBUG "INET socket %p released, %d are still alive\n", - sk, atomic_read(&inet_sock_nr)); -#endif + sk_refcnt_debug_dec(sk); } /* @@ -317,9 +309,7 @@ static int inet_create(struct socket *sock, int protocol) inet->mc_index = 0; inet->mc_list = NULL; -#ifdef INET_REFCNT_DEBUG - atomic_inc(&inet_sock_nr); -#endif + sk_refcnt_debug_inc(sk); if (inet->num) { /* It assumes that any protocol which allows @@ -1205,7 +1195,3 @@ EXPORT_SYMBOL(inet_stream_ops); EXPORT_SYMBOL(inet_unregister_protosw); EXPORT_SYMBOL(net_statistics); EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); - -#ifdef INET_REFCNT_DEBUG -EXPORT_SYMBOL(inet_sock_nr); -#endif diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 42a2e2ccd43..20159a3dafb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1580,12 +1580,7 @@ void tcp_destroy_sock(struct sock *sk) xfrm_sk_free_policy(sk); -#ifdef INET_REFCNT_DEBUG - if (atomic_read(&sk->sk_refcnt) != 1) { - printk(KERN_DEBUG "Destruction TCP %p delayed, c=%d\n", - sk, atomic_read(&sk->sk_refcnt)); - } -#endif + sk_refcnt_debug_release(sk); atomic_dec(&tcp_orphan_count); sock_put(sk); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f42a284164b..f8e288c8d69 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -84,7 +84,7 @@ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) tcp_bucket_destroy(tb); spin_unlock(&bhead->lock); -#ifdef INET_REFCNT_DEBUG +#ifdef SOCK_REFCNT_DEBUG if (atomic_read(&tw->tw_refcnt) != 1) { printk(KERN_DEBUG "tw_bucket %p refcnt=%d\n", tw, atomic_read(&tw->tw_refcnt)); @@ -799,9 +799,21 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newsk->sk_err = 0; newsk->sk_priority = 0; atomic_set(&newsk->sk_refcnt, 2); -#ifdef INET_REFCNT_DEBUG - atomic_inc(&inet_sock_nr); -#endif + + /* + * Increment the counter in the same struct proto as the master + * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that + * is the same as sk->sk_prot->socks, as this field was copied + * with memcpy), same rationale as the first comment in this + * function. + * + * This _changes_ the previous behaviour, where + * tcp_create_openreq_child always was incrementing the + * equivalent to tcp_prot->socks (inet_sock_nr), so this have + * to be taken into account in all callers. -acme + */ + sk_refcnt_debug_inc(newsk); + atomic_inc(&tcp_sockets_allocated); if (sock_flag(newsk, SOCK_KEEPOPEN)) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 57404735362..7df2ccb380d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -86,26 +86,12 @@ extern void if6_proc_exit(void); int sysctl_ipv6_bindv6only; -#ifdef INET_REFCNT_DEBUG -atomic_t inet6_sock_nr; -EXPORT_SYMBOL(inet6_sock_nr); -#endif - /* The inetsw table contains everything that inet_create needs to * build a new socket. */ static struct list_head inetsw6[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw6_lock); -static void inet6_sock_destruct(struct sock *sk) -{ - inet_sock_destruct(sk); - -#ifdef INET_REFCNT_DEBUG - atomic_dec(&inet6_sock_nr); -#endif -} - static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) { const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo); @@ -186,7 +172,7 @@ static int inet6_create(struct socket *sock, int protocol) inet->hdrincl = 1; } - sk->sk_destruct = inet6_sock_destruct; + sk->sk_destruct = inet_sock_destruct; sk->sk_family = PF_INET6; sk->sk_protocol = protocol; @@ -213,12 +199,17 @@ static int inet6_create(struct socket *sock, int protocol) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; + /* + * Increment only the relevant sk_prot->socks debug field, this changes + * the previous behaviour of incrementing both the equivalent to + * answer->prot->socks (inet6_sock_nr) and inet_sock_nr. + * + * This allows better debug granularity as we'll know exactly how many + * UDPv6, TCPv6, etc socks were allocated, not the sum of all IPv6 + * transport protocol socks. -acme + */ + sk_refcnt_debug_inc(sk); - -#ifdef INET_REFCNT_DEBUG - atomic_inc(&inet6_sock_nr); - atomic_inc(&inet_sock_nr); -#endif if (inet->num) { /* It assumes that any protocol which allows * the user to assign a number at socket diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 3bc144a79fa..76fe23925d7 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -163,6 +163,13 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, fl6_free_socklist(sk); ipv6_sock_mc_close(sk); + /* + * Sock is moving from IPv6 to IPv4 (sk_prot), so + * remove it from the refcnt debug socks count in the + * original family... + */ + sk_refcnt_debug_dec(sk); + if (sk->sk_protocol == IPPROTO_TCP) { struct tcp_sock *tp = tcp_sk(sk); @@ -192,9 +199,11 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, kfree_skb(pktopt); sk->sk_destruct = inet_sock_destruct; -#ifdef INET_REFCNT_DEBUG - atomic_dec(&inet6_sock_nr); -#endif + /* + * ... and add it to the refcnt debug socks count + * in the new family. -acme + */ + sk_refcnt_debug_inc(sk); module_put(THIS_MODULE); retv = 0; break; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ef29cfd936d..885e05bd99f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1407,12 +1407,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->mcast_oif = tcp_v6_iif(skb); newnp->mcast_hops = skb->nh.ipv6h->hop_limit; - /* Charge newly allocated IPv6 socket. Though it is mapped, - * it is IPv6 yet. + /* + * No need to charge this sock to the relevant IPv6 refcnt debug socks count + * here, tcp_create_openreq_child now does this for us, see the comment in + * that function for the gory details. -acme */ -#ifdef INET_REFCNT_DEBUG - atomic_inc(&inet6_sock_nr); -#endif /* It is tricky place. Until this moment IPv4 tcp worked with IPv6 af_tcp.af_specific. @@ -1467,10 +1466,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (newsk == NULL) goto out; - /* Charge newly allocated IPv6 socket */ -#ifdef INET_REFCNT_DEBUG - atomic_inc(&inet6_sock_nr); -#endif + /* + * No need to charge this sock to the relevant IPv6 refcnt debug socks + * count here, tcp_create_openreq_child now does this for us, see the + * comment in that function for the gory details. -acme + */ ip6_dst_store(newsk, dst, NULL); newsk->sk_route_caps = dst->dev->features & diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e9b2fd480d6..4a6421a9fca 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -641,10 +641,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, else newinet->pmtudisc = IP_PMTUDISC_WANT; -#ifdef INET_REFCNT_DEBUG - atomic_inc(&inet6_sock_nr); - atomic_inc(&inet_sock_nr); -#endif + sk_refcnt_debug_inc(newsk); if (newsk->sk_prot->init(newsk)) { sk_common_release(newsk); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index ce9245e71fc..8d3f8096b87 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -593,9 +593,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, newinet->mc_index = 0; newinet->mc_list = NULL; -#ifdef INET_REFCNT_DEBUG - atomic_inc(&inet_sock_nr); -#endif + sk_refcnt_debug_inc(newsk); if (newsk->sk_prot->init(newsk)) { sk_common_release(newsk); -- cgit v1.2.3-70-g09d2 From 32519f11d38ea8f4f60896763bacec7db1760f9c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:50:02 -0700 Subject: [INET]: Introduce inet_sk_rebuild_header From tcp_v4_rebuild_header, that already was pretty generic, I only needed to use sk->sk_protocol instead of the hardcoded IPPROTO_TCP and establish the requirement that INET transport layer protocols that want to use this function map TCP_SYN_SENT to its equivalent state. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ip.h | 2 + include/net/tcp.h | 2 - net/ipv4/af_inet.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/ip_output.c | 5 --- net/ipv4/tcp_ipv4.c | 98 +------------------------------------------- net/ipv6/tcp_ipv6.c | 2 +- 6 files changed, 117 insertions(+), 105 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/ip.h b/include/linux/ip.h index 31e7cedd9f8..33e8a19a1a0 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -196,6 +196,8 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, #endif #endif +extern int inet_sk_rebuild_header(struct sock *sk); + struct iphdr { #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 ihl:4, diff --git a/include/net/tcp.h b/include/net/tcp.h index d95661a3aee..0c769adb046 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -799,8 +799,6 @@ extern void tcp_parse_options(struct sk_buff *skb, * TCP v4 functions exported for the inet6 API */ -extern int tcp_v4_rebuild_header(struct sock *sk); - extern int tcp_v4_build_header(struct sock *sk, struct sk_buff *skb); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9e83d7773d8..7137e6420d6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -951,6 +951,119 @@ void inet_unregister_protosw(struct inet_protosw *p) } } +/* + * Shall we try to damage output packets if routing dev changes? + */ + +int sysctl_ip_dynaddr; + +static int inet_sk_reselect_saddr(struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + int err; + struct rtable *rt; + __u32 old_saddr = inet->saddr; + __u32 new_saddr; + __u32 daddr = inet->daddr; + + if (inet->opt && inet->opt->srr) + daddr = inet->opt->faddr; + + /* Query new route. */ + err = ip_route_connect(&rt, daddr, 0, + RT_CONN_FLAGS(sk), + sk->sk_bound_dev_if, + sk->sk_protocol, + inet->sport, inet->dport, sk); + if (err) + return err; + + sk_setup_caps(sk, &rt->u.dst); + + new_saddr = rt->rt_src; + + if (new_saddr == old_saddr) + return 0; + + if (sysctl_ip_dynaddr > 1) { + printk(KERN_INFO "%s(): shifting inet->" + "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", + __FUNCTION__, + NIPQUAD(old_saddr), + NIPQUAD(new_saddr)); + } + + inet->saddr = inet->rcv_saddr = new_saddr; + + /* + * XXX The only one ugly spot where we need to + * XXX really change the sockets identity after + * XXX it has entered the hashes. -DaveM + * + * Besides that, it does not check for connection + * uniqueness. Wait for troubles. + */ + __sk_prot_rehash(sk); + return 0; +} + +int inet_sk_rebuild_header(struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); + u32 daddr; + int err; + + /* Route is OK, nothing to do. */ + if (rt) + return 0; + + /* Reroute. */ + daddr = inet->daddr; + if (inet->opt && inet->opt->srr) + daddr = inet->opt->faddr; +{ + struct flowi fl = { + .oif = sk->sk_bound_dev_if, + .nl_u = { + .ip4_u = { + .daddr = daddr, + .saddr = inet->saddr, + .tos = RT_CONN_FLAGS(sk), + }, + }, + .proto = sk->sk_protocol, + .uli_u = { + .ports = { + .sport = inet->sport, + .dport = inet->dport, + }, + }, + }; + + err = ip_route_output_flow(&rt, &fl, sk, 0); +} + if (!err) + sk_setup_caps(sk, &rt->u.dst); + else { + /* Routing failed... */ + sk->sk_route_caps = 0; + /* + * Other protocols have to map its equivalent state to TCP_SYN_SENT. + * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme + */ + if (!sysctl_ip_dynaddr || + sk->sk_state != TCP_SYN_SENT || + (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || + (err = inet_sk_reselect_saddr(sk)) != 0) + sk->sk_err_soft = -err; + } + + return err; +} + +EXPORT_SYMBOL(inet_sk_rebuild_header); + #ifdef CONFIG_IP_MULTICAST static struct net_protocol igmp_protocol = { .handler = igmp_rcv, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c72fc878f06..dd568b0b706 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -83,11 +83,6 @@ #include #include -/* - * Shall we try to damage output packets if routing dev changes? - */ - -int sysctl_ip_dynaddr; int sysctl_ip_default_ttl = IPDEFTTL; /* Generate a checksum for an outgoing IP datagram. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4a5daecbd2a..ae6fad99a9a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1832,101 +1832,6 @@ do_time_wait: goto discard_it; } -static int tcp_v4_reselect_saddr(struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - int err; - struct rtable *rt; - __u32 old_saddr = inet->saddr; - __u32 new_saddr; - __u32 daddr = inet->daddr; - - if (inet->opt && inet->opt->srr) - daddr = inet->opt->faddr; - - /* Query new route. */ - err = ip_route_connect(&rt, daddr, 0, - RT_CONN_FLAGS(sk), - sk->sk_bound_dev_if, - IPPROTO_TCP, - inet->sport, inet->dport, sk); - if (err) - return err; - - sk_setup_caps(sk, &rt->u.dst); - - new_saddr = rt->rt_src; - - if (new_saddr == old_saddr) - return 0; - - if (sysctl_ip_dynaddr > 1) { - printk(KERN_INFO "tcp_v4_rebuild_header(): shifting inet->" - "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", - NIPQUAD(old_saddr), - NIPQUAD(new_saddr)); - } - - inet->saddr = new_saddr; - inet->rcv_saddr = new_saddr; - - /* XXX The only one ugly spot where we need to - * XXX really change the sockets identity after - * XXX it has entered the hashes. -DaveM - * - * Besides that, it does not check for connection - * uniqueness. Wait for troubles. - */ - __sk_prot_rehash(sk); - return 0; -} - -int tcp_v4_rebuild_header(struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); - u32 daddr; - int err; - - /* Route is OK, nothing to do. */ - if (rt) - return 0; - - /* Reroute. */ - daddr = inet->daddr; - if (inet->opt && inet->opt->srr) - daddr = inet->opt->faddr; - - { - struct flowi fl = { .oif = sk->sk_bound_dev_if, - .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = inet->saddr, - .tos = RT_CONN_FLAGS(sk) } }, - .proto = IPPROTO_TCP, - .uli_u = { .ports = - { .sport = inet->sport, - .dport = inet->dport } } }; - - err = ip_route_output_flow(&rt, &fl, sk, 0); - } - if (!err) { - sk_setup_caps(sk, &rt->u.dst); - return 0; - } - - /* Routing failed... */ - sk->sk_route_caps = 0; - - if (!sysctl_ip_dynaddr || - sk->sk_state != TCP_SYN_SENT || - (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || - (err = tcp_v4_reselect_saddr(sk)) != 0) - sk->sk_err_soft = -err; - - return err; -} - static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) { struct sockaddr_in *sin = (struct sockaddr_in *) uaddr; @@ -1998,7 +1903,7 @@ int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw) struct tcp_func ipv4_specific = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, - .rebuild_header = tcp_v4_rebuild_header, + .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v4_conn_request, .syn_recv_sock = tcp_v4_syn_recv_sock, .remember_stamp = tcp_v4_remember_stamp, @@ -2630,7 +2535,6 @@ EXPORT_SYMBOL(tcp_unhash); EXPORT_SYMBOL(tcp_v4_conn_request); EXPORT_SYMBOL(tcp_v4_connect); EXPORT_SYMBOL(tcp_v4_do_rcv); -EXPORT_SYMBOL(tcp_v4_rebuild_header); EXPORT_SYMBOL(tcp_v4_remember_stamp); EXPORT_SYMBOL(tcp_v4_send_check); EXPORT_SYMBOL(tcp_v4_syn_recv_sock); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 885e05bd99f..4e32a8496be 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1983,7 +1983,7 @@ static struct tcp_func ipv6_specific = { static struct tcp_func ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, - .rebuild_header = tcp_v4_rebuild_header, + .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .remember_stamp = tcp_v4_remember_stamp, -- cgit v1.2.3-70-g09d2 From 608c8e4f7b6e61cc783283e9dff8a465a5ad59bb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:58:27 -0700 Subject: [NETFILTER]: Extend netfilter logging API This patch is in preparation to nfnetlink_log: - loggers now have to register struct nf_logger instead of nf_logfn - nf_log_unregister() replaced by nf_log_unregister_pf() and nf_log_unregister_logger() - add comment to ip[6]t_LOG.h to assure nobody redefines flags - add /proc/net/netfilter/nf_log to tell user which logger is currently registered for which address family - if user has configured logging, but no logging backend (logger) is available, always spit a message to syslog, not just the first time. - split ip[6]t_LOG.c into two parts: Backend: Always try to register as logger for the respective address family Frontend: Always log via nf_log_packet() API - modify all users of nf_log_packet() to accomodate additional argument Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 48 +++++++++- include/linux/netfilter_ipv4/ipt_LOG.h | 1 + include/linux/netfilter_ipv6/ip6t_LOG.h | 1 + net/core/netfilter.c | 127 +++++++++++++++++++++++---- net/ipv4/netfilter/ip_conntrack_proto_icmp.c | 8 +- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 21 ++--- net/ipv4/netfilter/ip_conntrack_proto_udp.c | 6 +- net/ipv4/netfilter/ipt_LOG.c | 86 ++++++++++-------- net/ipv4/netfilter/ipt_ULOG.c | 33 +++++-- net/ipv6/netfilter/ip6t_LOG.c | 93 +++++++++++--------- 10 files changed, 299 insertions(+), 125 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 711e05f33d6..815583af06c 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -114,15 +114,51 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg); extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; -typedef void nf_logfn(unsigned int hooknum, +/* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will + * disappear once iptables is replaced with pkttables. Please DO NOT use them + * for any new code! */ +#define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ +#define NF_LOG_TCPOPT 0x02 /* Log TCP options */ +#define NF_LOG_IPOPT 0x04 /* Log IP options */ +#define NF_LOG_UID 0x08 /* Log UID owning local socket */ +#define NF_LOG_MASK 0x0f + +#define NF_LOG_TYPE_LOG 0x01 +#define NF_LOG_TYPE_ULOG 0x02 + +struct nf_loginfo { + u_int8_t type; + union { + struct { + u_int32_t copy_len; + u_int16_t group; + u_int16_t qthreshold; + } ulog; + struct { + u_int8_t level; + u_int8_t logflags; + } log; + } u; +}; + +typedef void nf_logfn(unsigned int pf, + unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + const struct nf_loginfo *li, const char *prefix); +struct nf_logger { + struct module *me; + nf_logfn *logfn; + char *name; +}; + /* Function to register/unregister log function. */ -int nf_log_register(int pf, nf_logfn *logfn); -void nf_log_unregister(int pf, nf_logfn *logfn); +int nf_log_register(int pf, struct nf_logger *logger); +void nf_log_unregister_pf(int pf); +void nf_log_unregister_logger(struct nf_logger *logger); /* Calls the registered backend logging function */ void nf_log_packet(int pf, @@ -130,6 +166,7 @@ void nf_log_packet(int pf, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + struct nf_loginfo *li, const char *fmt, ...); /* Activate hook; either okfn or kfree_skb called, unless a hook @@ -221,6 +258,11 @@ struct nf_queue_rerouter { extern int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer); extern int nf_unregister_queue_rerouter(int pf); +#ifdef CONFIG_PROC_FS +#include +extern struct proc_dir_entry *proc_net_netfilter; +#endif + #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/linux/netfilter_ipv4/ipt_LOG.h index d25f782e57d..22d16177319 100644 --- a/include/linux/netfilter_ipv4/ipt_LOG.h +++ b/include/linux/netfilter_ipv4/ipt_LOG.h @@ -1,6 +1,7 @@ #ifndef _IPT_LOG_H #define _IPT_LOG_H +/* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */ #define IPT_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ #define IPT_LOG_TCPOPT 0x02 /* Log TCP options */ #define IPT_LOG_IPOPT 0x04 /* Log IP options */ diff --git a/include/linux/netfilter_ipv6/ip6t_LOG.h b/include/linux/netfilter_ipv6/ip6t_LOG.h index 42996a43bb3..9008ff5c40a 100644 --- a/include/linux/netfilter_ipv6/ip6t_LOG.h +++ b/include/linux/netfilter_ipv6/ip6t_LOG.h @@ -1,6 +1,7 @@ #ifndef _IP6T_LOG_H #define _IP6T_LOG_H +/* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */ #define IP6T_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ #define IP6T_LOG_TCPOPT 0x02 /* Log TCP options */ #define IP6T_LOG_IPOPT 0x04 /* Log IP options */ diff --git a/net/core/netfilter.c b/net/core/netfilter.c index 3e38084ac2b..98cc61e79fe 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -22,6 +22,7 @@ #include #include #include +#include #include /* In this code, we can be waiting indefinitely for userspace to @@ -535,11 +536,10 @@ EXPORT_SYMBOL(skb_make_writable); #define NF_LOG_PREFIXLEN 128 -static nf_logfn *nf_logging[NPROTO]; /* = NULL */ -static int reported = 0; +static struct nf_logger *nf_logging[NPROTO]; /* = NULL */ static DEFINE_SPINLOCK(nf_log_lock); -int nf_log_register(int pf, nf_logfn *logfn) +int nf_log_register(int pf, struct nf_logger *logger) { int ret = -EBUSY; @@ -547,54 +547,134 @@ int nf_log_register(int pf, nf_logfn *logfn) * substituting pointer. */ spin_lock(&nf_log_lock); if (!nf_logging[pf]) { - rcu_assign_pointer(nf_logging[pf], logfn); + rcu_assign_pointer(nf_logging[pf], logger); ret = 0; } spin_unlock(&nf_log_lock); return ret; } -void nf_log_unregister(int pf, nf_logfn *logfn) +void nf_log_unregister_pf(int pf) { spin_lock(&nf_log_lock); - if (nf_logging[pf] == logfn) - nf_logging[pf] = NULL; + nf_logging[pf] = NULL; spin_unlock(&nf_log_lock); /* Give time to concurrent readers. */ synchronize_net(); -} +} + +void nf_log_unregister_logger(struct nf_logger *logger) +{ + int i; + + spin_lock(&nf_log_lock); + for (i = 0; i < NPROTO; i++) { + if (nf_logging[i] == logger) + nf_logging[i] = NULL; + } + spin_unlock(&nf_log_lock); + + synchronize_net(); +} void nf_log_packet(int pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + struct nf_loginfo *loginfo, const char *fmt, ...) { va_list args; char prefix[NF_LOG_PREFIXLEN]; - nf_logfn *logfn; + struct nf_logger *logger; rcu_read_lock(); - logfn = rcu_dereference(nf_logging[pf]); - if (logfn) { + logger = rcu_dereference(nf_logging[pf]); + if (logger) { va_start(args, fmt); vsnprintf(prefix, sizeof(prefix), fmt, args); va_end(args); /* We must read logging before nf_logfn[pf] */ - logfn(hooknum, skb, in, out, prefix); - } else if (!reported) { - printk(KERN_WARNING "nf_log_packet: can\'t log yet, " - "no backend logging module loaded in!\n"); - reported++; + logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix); + } else if (net_ratelimit()) { + printk(KERN_WARNING "nf_log_packet: can\'t log since " + "no backend logging module loaded in! Please either " + "load one, or disable logging explicitly\n"); } rcu_read_unlock(); } EXPORT_SYMBOL(nf_log_register); -EXPORT_SYMBOL(nf_log_unregister); +EXPORT_SYMBOL(nf_log_unregister_pf); +EXPORT_SYMBOL(nf_log_unregister_logger); EXPORT_SYMBOL(nf_log_packet); +#ifdef CONFIG_PROC_FS +struct proc_dir_entry *proc_net_netfilter; +EXPORT_SYMBOL(proc_net_netfilter); + +static void *seq_start(struct seq_file *seq, loff_t *pos) +{ + rcu_read_lock(); + + if (*pos >= NPROTO) + return NULL; + + return pos; +} + +static void *seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + + if (*pos >= NPROTO) + return NULL; + + return pos; +} + +static void seq_stop(struct seq_file *s, void *v) +{ + rcu_read_unlock(); +} + +static int seq_show(struct seq_file *s, void *v) +{ + loff_t *pos = v; + const struct nf_logger *logger; + + logger = rcu_dereference(nf_logging[*pos]); + + if (!logger) + return seq_printf(s, "%2lld NONE\n", *pos); + + return seq_printf(s, "%2lld %s\n", *pos, logger->name); +} + +static struct seq_operations nflog_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = seq_show, +}; + +static int nflog_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &nflog_seq_ops); +} + +static struct file_operations nflog_file_ops = { + .owner = THIS_MODULE, + .open = nflog_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +#endif /* PROC_FS */ + + /* This does not belong here, but locally generated errors need it if connection tracking in use: without this, connection may not be in hash table, and hence manufactured ICMP or RST packets will not be associated with it. */ @@ -613,6 +693,9 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) void __init netfilter_init(void) { int i, h; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *pde; +#endif queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter), GFP_KERNEL); @@ -624,6 +707,16 @@ void __init netfilter_init(void) for (h = 0; h < NF_MAX_HOOKS; h++) INIT_LIST_HEAD(&nf_hooks[i][h]); } + +#ifdef CONFIG_PROC_FS + proc_net_netfilter = proc_mkdir("netfilter", proc_net); + if (!proc_net_netfilter) + panic("cannot create netfilter proc entry"); + pde = create_proc_entry("nf_log", S_IRUGO, proc_net_netfilter); + if (!pde) + panic("cannot create /proc/net/netfilter/nf_log"); + pde->proc_fops = &nflog_file_ops; +#endif } EXPORT_SYMBOL(ip_ct_attach); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 3f90cb9979a..838d1d69b36 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -217,7 +217,7 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih); if (icmph == NULL) { if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_icmp: short packet "); return -NF_ACCEPT; } @@ -231,13 +231,13 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, if (!(u16)csum_fold(skb->csum)) break; if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_icmp: bad HW ICMP checksum "); return -NF_ACCEPT; case CHECKSUM_NONE: if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) { if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_icmp: bad ICMP checksum "); return -NF_ACCEPT; } @@ -254,7 +254,7 @@ checksum_skipped: */ if (icmph->type > NR_ICMP_TYPES) { if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_icmp: invalid ICMP type "); return -NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index c2bce22d403..f23ef1f88c4 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -716,7 +716,7 @@ static int tcp_in_window(struct ip_ct_tcp *state, res = 1; } else { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ? @@ -815,7 +815,7 @@ static int tcp_error(struct sk_buff *skb, sizeof(_tcph), &_tcph); if (th == NULL) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: short packet "); return -NF_ACCEPT; } @@ -823,7 +823,7 @@ static int tcp_error(struct sk_buff *skb, /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: truncated/malformed packet "); return -NF_ACCEPT; } @@ -840,7 +840,7 @@ static int tcp_error(struct sk_buff *skb, skb->ip_summed == CHECKSUM_HW ? skb->csum : skb_checksum(skb, iph->ihl*4, tcplen, 0))) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: bad TCP checksum "); return -NF_ACCEPT; } @@ -849,7 +849,7 @@ static int tcp_error(struct sk_buff *skb, tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR)); if (!tcp_valid_flags[tcpflags]) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: invalid TCP flag combination "); return -NF_ACCEPT; } @@ -897,8 +897,9 @@ static int tcp_packet(struct ip_conntrack *conntrack, */ write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, - "ip_ct_tcp: killing out of sync session "); + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + NULL, "ip_ct_tcp: " + "killing out of sync session "); if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long) conntrack); @@ -912,7 +913,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: invalid packet ignored "); return NF_ACCEPT; case TCP_CONNTRACK_MAX: @@ -922,7 +923,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, old_state); write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_SYN_SENT: @@ -943,7 +944,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, - "ip_ct_tcp: invalid SYN"); + NULL, "ip_ct_tcp: invalid SYN"); return -NF_ACCEPT; } case TCP_CONNTRACK_CLOSE: diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index 14130169cbf..f2dcac7c766 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -98,7 +98,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, hdr = skb_header_pointer(skb, iph->ihl*4, sizeof(_hdr), &_hdr); if (hdr == NULL) { if (LOG_INVALID(IPPROTO_UDP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_udp: short packet "); return -NF_ACCEPT; } @@ -106,7 +106,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { if (LOG_INVALID(IPPROTO_UDP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_udp: truncated/malformed packet "); return -NF_ACCEPT; } @@ -126,7 +126,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, skb->ip_summed == CHECKSUM_HW ? skb->csum : skb_checksum(skb, iph->ihl*4, udplen, 0))) { if (LOG_INVALID(IPPROTO_UDP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_udp: bad UDP checksum "); return -NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index ef08733d26d..92ed050fac6 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -27,10 +27,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("iptables syslog logging module"); -static unsigned int nflog = 1; -module_param(nflog, int, 0400); -MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); - #if 0 #define DEBUGP printk #else @@ -41,11 +37,17 @@ MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); static DEFINE_SPINLOCK(log_lock); /* One level of recursion won't kill us */ -static void dump_packet(const struct ipt_log_info *info, +static void dump_packet(const struct nf_loginfo *info, const struct sk_buff *skb, unsigned int iphoff) { struct iphdr _iph, *ih; + unsigned int logflags; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + else + logflags = NF_LOG_MASK; ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); if (ih == NULL) { @@ -76,7 +78,7 @@ static void dump_packet(const struct ipt_log_info *info, if (ntohs(ih->frag_off) & IP_OFFSET) printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); - if ((info->logflags & IPT_LOG_IPOPT) + if ((logflags & IPT_LOG_IPOPT) && ih->ihl * 4 > sizeof(struct iphdr)) { unsigned char _opt[4 * 15 - sizeof(struct iphdr)], *op; unsigned int i, optsize; @@ -119,7 +121,7 @@ static void dump_packet(const struct ipt_log_info *info, printk("SPT=%u DPT=%u ", ntohs(th->source), ntohs(th->dest)); /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ - if (info->logflags & IPT_LOG_TCPSEQ) + if (logflags & IPT_LOG_TCPSEQ) printk("SEQ=%u ACK=%u ", ntohl(th->seq), ntohl(th->ack_seq)); /* Max length: 13 "WINDOW=65535 " */ @@ -146,7 +148,7 @@ static void dump_packet(const struct ipt_log_info *info, /* Max length: 11 "URGP=65535 " */ printk("URGP=%u ", ntohs(th->urg_ptr)); - if ((info->logflags & IPT_LOG_TCPOPT) + if ((logflags & IPT_LOG_TCPOPT) && th->doff * 4 > sizeof(struct tcphdr)) { unsigned char _opt[4 * 15 - sizeof(struct tcphdr)]; unsigned char *op; @@ -328,7 +330,7 @@ static void dump_packet(const struct ipt_log_info *info, } /* Max length: 15 "UID=4294967295 " */ - if ((info->logflags & IPT_LOG_UID) && !iphoff && skb->sk) { + if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) { read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) printk("UID=%u ", skb->sk->sk_socket->file->f_uid); @@ -349,19 +351,31 @@ static void dump_packet(const struct ipt_log_info *info, /* maxlen = 230+ 91 + 230 + 252 = 803 */ } +struct nf_loginfo default_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 0, + .logflags = NF_LOG_MASK, + }, + }, +}; + static void -ipt_log_packet(unsigned int hooknum, +ipt_log_packet(unsigned int pf, + unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, - const struct ipt_log_info *loginfo, - const char *level_string, + const struct nf_loginfo *loginfo, const char *prefix) { + if (!loginfo) + loginfo = &default_loginfo; + spin_lock_bh(&log_lock); - printk(level_string); - printk("%sIN=%s OUT=%s ", - prefix == NULL ? loginfo->prefix : prefix, + printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, + prefix, in ? in->name : "", out ? out->name : ""); #ifdef CONFIG_BRIDGE_NETFILTER @@ -405,28 +419,15 @@ ipt_log_target(struct sk_buff **pskb, void *userinfo) { const struct ipt_log_info *loginfo = targinfo; - char level_string[4] = "< >"; + struct nf_loginfo li; - level_string[1] = '0' + (loginfo->level % 8); - ipt_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL); + li.type = NF_LOG_TYPE_LOG; + li.u.log.level = loginfo->level; + li.u.log.logflags = loginfo->logflags; - return IPT_CONTINUE; -} + nf_log_packet(PF_INET, hooknum, *pskb, in, out, &li, loginfo->prefix); -static void -ipt_logfn(unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const char *prefix) -{ - struct ipt_log_info loginfo = { - .level = 0, - .logflags = IPT_LOG_MASK, - .prefix = "" - }; - - ipt_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix); + return IPT_CONTINUE; } static int ipt_log_checkentry(const char *tablename, @@ -464,20 +465,29 @@ static struct ipt_target ipt_log_reg = { .me = THIS_MODULE, }; +static struct nf_logger ipt_log_logger ={ + .name = "ipt_LOG", + .logfn = &ipt_log_packet, + .me = THIS_MODULE, +}; + static int __init init(void) { if (ipt_register_target(&ipt_log_reg)) return -EINVAL; - if (nflog) - nf_log_register(PF_INET, &ipt_logfn); + if (nf_log_register(PF_INET, &ipt_log_logger) < 0) { + printk(KERN_WARNING "ipt_LOG: not logging via system console " + "since somebody else already registered for PF_INET\n"); + /* we cannot make module load fail here, since otherwise + * iptables userspace would abort */ + } return 0; } static void __exit fini(void) { - if (nflog) - nf_log_unregister(PF_INET, &ipt_logfn); + nf_log_unregister_logger(&ipt_log_logger); ipt_unregister_target(&ipt_log_reg); } diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 4ea8371ab27..b86f06ec976 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -304,18 +304,27 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb, return IPT_CONTINUE; } -static void ipt_logfn(unsigned int hooknum, +static void ipt_logfn(unsigned int pf, + unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + const struct nf_loginfo *li, const char *prefix) { - struct ipt_ulog_info loginfo = { - .nl_group = ULOG_DEFAULT_NLGROUP, - .copy_range = 0, - .qthreshold = ULOG_DEFAULT_QTHRESHOLD, - .prefix = "" - }; + struct ipt_ulog_info loginfo; + + if (!li || li->type != NF_LOG_TYPE_ULOG) { + loginfo.nl_group = ULOG_DEFAULT_NLGROUP; + loginfo.copy_range = 0; + loginfo.qthreshold = ULOG_DEFAULT_QTHRESHOLD; + loginfo.prefix[0] = '\0'; + } else { + loginfo.nl_group = li->u.ulog.group; + loginfo.copy_range = li->u.ulog.copy_len; + loginfo.qthreshold = li->u.ulog.qthreshold; + strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix)); + } ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); } @@ -355,6 +364,12 @@ static struct ipt_target ipt_ulog_reg = { .me = THIS_MODULE, }; +static struct nf_logger ipt_ulog_logger = { + .name = "ipt_ULOG", + .logfn = &ipt_logfn, + .me = THIS_MODULE, +}; + static int __init init(void) { int i; @@ -382,7 +397,7 @@ static int __init init(void) return -EINVAL; } if (nflog) - nf_log_register(PF_INET, &ipt_logfn); + nf_log_register(PF_INET, &ipt_ulog_logger); return 0; } @@ -395,7 +410,7 @@ static void __exit fini(void) DEBUGP("ipt_ULOG: cleanup_module\n"); if (nflog) - nf_log_unregister(PF_INET, &ipt_logfn); + nf_log_unregister_logger(&ipt_ulog_logger); ipt_unregister_target(&ipt_ulog_reg); sock_release(nflognl->sk_socket); diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index a692e26a4fa..0cd1d1bd903 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -26,10 +26,6 @@ MODULE_AUTHOR("Jan Rekorajski "); MODULE_DESCRIPTION("IP6 tables LOG target module"); MODULE_LICENSE("GPL"); -static unsigned int nflog = 1; -module_param(nflog, int, 0400); -MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); - struct in_device; #include #include @@ -44,7 +40,7 @@ struct in_device; static DEFINE_SPINLOCK(log_lock); /* One level of recursion won't kill us */ -static void dump_packet(const struct ip6t_log_info *info, +static void dump_packet(const struct nf_loginfo *info, const struct sk_buff *skb, unsigned int ip6hoff, int recurse) { @@ -53,6 +49,12 @@ static void dump_packet(const struct ip6t_log_info *info, struct ipv6hdr _ip6h, *ih; unsigned int ptr; unsigned int hdrlen = 0; + unsigned int logflags; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + else + logflags = NF_LOG_MASK; ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); if (ih == NULL) { @@ -84,7 +86,7 @@ static void dump_packet(const struct ip6t_log_info *info, } /* Max length: 48 "OPT (...) " */ - if (info->logflags & IP6T_LOG_IPOPT) + if (logflags & IP6T_LOG_IPOPT) printk("OPT ( "); switch (currenthdr) { @@ -119,7 +121,7 @@ static void dump_packet(const struct ip6t_log_info *info, case IPPROTO_ROUTING: case IPPROTO_HOPOPTS: if (fragment) { - if (info->logflags & IP6T_LOG_IPOPT) + if (logflags & IP6T_LOG_IPOPT) printk(")"); return; } @@ -127,7 +129,7 @@ static void dump_packet(const struct ip6t_log_info *info, break; /* Max Length */ case IPPROTO_AH: - if (info->logflags & IP6T_LOG_IPOPT) { + if (logflags & IP6T_LOG_IPOPT) { struct ip_auth_hdr _ahdr, *ah; /* Max length: 3 "AH " */ @@ -158,7 +160,7 @@ static void dump_packet(const struct ip6t_log_info *info, hdrlen = (hp->hdrlen+2)<<2; break; case IPPROTO_ESP: - if (info->logflags & IP6T_LOG_IPOPT) { + if (logflags & IP6T_LOG_IPOPT) { struct ip_esp_hdr _esph, *eh; /* Max length: 4 "ESP " */ @@ -190,7 +192,7 @@ static void dump_packet(const struct ip6t_log_info *info, printk("Unknown Ext Hdr %u", currenthdr); return; } - if (info->logflags & IP6T_LOG_IPOPT) + if (logflags & IP6T_LOG_IPOPT) printk(") "); currenthdr = hp->nexthdr; @@ -218,7 +220,7 @@ static void dump_packet(const struct ip6t_log_info *info, printk("SPT=%u DPT=%u ", ntohs(th->source), ntohs(th->dest)); /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ - if (info->logflags & IP6T_LOG_TCPSEQ) + if (logflags & IP6T_LOG_TCPSEQ) printk("SEQ=%u ACK=%u ", ntohl(th->seq), ntohl(th->ack_seq)); /* Max length: 13 "WINDOW=65535 " */ @@ -245,7 +247,7 @@ static void dump_packet(const struct ip6t_log_info *info, /* Max length: 11 "URGP=65535 " */ printk("URGP=%u ", ntohs(th->urg_ptr)); - if ((info->logflags & IP6T_LOG_TCPOPT) + if ((logflags & IP6T_LOG_TCPOPT) && th->doff * 4 > sizeof(struct tcphdr)) { u_int8_t _opt[60 - sizeof(struct tcphdr)], *op; unsigned int i; @@ -349,7 +351,7 @@ static void dump_packet(const struct ip6t_log_info *info, } /* Max length: 15 "UID=4294967295 " */ - if ((info->logflags & IP6T_LOG_UID) && recurse && skb->sk) { + if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) { read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) printk("UID=%u ", skb->sk->sk_socket->file->f_uid); @@ -357,19 +359,31 @@ static void dump_packet(const struct ip6t_log_info *info, } } +static struct nf_loginfo default_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 0, + .logflags = NF_LOG_MASK, + }, + }, +}; + static void -ip6t_log_packet(unsigned int hooknum, +ip6t_log_packet(unsigned int pf, + unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, - const struct ip6t_log_info *loginfo, - const char *level_string, + const struct nf_loginfo *loginfo, const char *prefix) { + if (!loginfo) + loginfo = &default_loginfo; + spin_lock_bh(&log_lock); - printk(level_string); - printk("%sIN=%s OUT=%s ", - prefix == NULL ? loginfo->prefix : prefix, + printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, + prefix, in ? in->name : "", out ? out->name : ""); if (in && !out) { @@ -416,29 +430,17 @@ ip6t_log_target(struct sk_buff **pskb, void *userinfo) { const struct ip6t_log_info *loginfo = targinfo; - char level_string[4] = "< >"; + struct nf_loginfo li; + + li.type = NF_LOG_TYPE_LOG; + li.u.log.level = loginfo->level; + li.u.log.logflags = loginfo->logflags; - level_string[1] = '0' + (loginfo->level % 8); - ip6t_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL); + nf_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, loginfo->prefix); return IP6T_CONTINUE; } -static void -ip6t_logfn(unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const char *prefix) -{ - struct ip6t_log_info loginfo = { - .level = 0, - .logflags = IP6T_LOG_MASK, - .prefix = "" - }; - - ip6t_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix); -} static int ip6t_log_checkentry(const char *tablename, const struct ip6t_entry *e, @@ -475,20 +477,29 @@ static struct ip6t_target ip6t_log_reg = { .me = THIS_MODULE, }; +static struct nf_logger ip6t_logger = { + .name = "ip6t_LOG", + .logfn = &ip6t_log_packet, + .me = THIS_MODULE, +}; + static int __init init(void) { if (ip6t_register_target(&ip6t_log_reg)) return -EINVAL; - if (nflog) - nf_log_register(PF_INET6, &ip6t_logfn); + if (nf_log_register(PF_INET6, &ip6t_logger) < 0) { + printk(KERN_WARNING "ip6t_LOG: not logging via system console " + "since somebody else already registered for PF_INET6\n"); + /* we cannot make module load fail here, since otherwise + * ip6tables userspace would abort */ + } return 0; } static void __exit fini(void) { - if (nflog) - nf_log_unregister(PF_INET6, &ip6t_logfn); + nf_log_unregister_logger(&ip6t_logger); ip6t_unregister_target(&ip6t_log_reg); } -- cgit v1.2.3-70-g09d2 From 0f7ff9274e72fd254fbd1ab117bbc1db6e7cdb34 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:59:44 -0700 Subject: [INET]: Just rename the TCP hashtable functions/structs to inet_ This is to break down the complexity of the series of patches, making it very clear that this one just does: 1. renames tcp_ prefixed hashtable functions and data structures that were already mostly generic to inet_ to share it with DCCP and other INET transport protocols. 2. Removes not used functions (__tb_head & tb_head) 3. Removes some leftover prototypes in the headers (tcp_bucket_unlock & tcp_v4_build_header) Next changesets will move tcp_sk(sk)->bind_hash to inet_sock so that we can make functions such as tcp_inherit_port, __tcp_inherit_port, tcp_v4_get_port, __tcp_put_port, generic and get others like tcp_destroy_sock closer to generic (tcp_orphan_count will go to sk->sk_prot to allow this). Eventually most of these functions will be used passing the transport protocol inet_hashinfo structure. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- include/net/tcp.h | 93 +++++++++++++++++++---------------------- net/ipv4/tcp.c | 15 ++++--- net/ipv4/tcp_diag.c | 4 +- net/ipv4/tcp_ipv4.c | 106 +++++++++++++++++++++++++---------------------- net/ipv4/tcp_minisocks.c | 16 +++---- net/ipv6/tcp_ipv6.c | 42 +++++++++---------- 7 files changed, 139 insertions(+), 139 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e4fd82e4210..ec580a560e8 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -258,7 +258,7 @@ struct tcp_sock { __u32 snd_sml; /* Last byte of the most recently transmitted small packet */ __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ - struct tcp_bind_bucket *bind_hash; + struct inet_bind_bucket *bind_hash; /* Delayed ACK control data */ struct { __u8 pending; /* ACK is pending */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 0c769adb046..6c9f6f7cab5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -44,13 +44,13 @@ * New scheme, half the table is for TIME_WAIT, the other half is * for the rest. I'll experiment with dynamic table growth later. */ -struct tcp_ehash_bucket { +struct inet_ehash_bucket { rwlock_t lock; struct hlist_head chain; } __attribute__((__aligned__(8))); /* This is for listening sockets, thus all sockets which possess wildcards. */ -#define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ +#define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ /* There are a few simple rules, which allow for local port reuse by * an application. In essence: @@ -83,31 +83,22 @@ struct tcp_ehash_bucket { * users logged onto your box, isn't it nice to know that new data * ports are created in O(1) time? I thought so. ;-) -DaveM */ -struct tcp_bind_bucket { +struct inet_bind_bucket { unsigned short port; signed short fastreuse; struct hlist_node node; struct hlist_head owners; }; -#define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node) +#define inet_bind_bucket_for_each(tb, node, head) \ + hlist_for_each_entry(tb, node, head, node) -struct tcp_bind_hashbucket { +struct inet_bind_hashbucket { spinlock_t lock; struct hlist_head chain; }; -static inline struct tcp_bind_bucket *__tb_head(struct tcp_bind_hashbucket *head) -{ - return hlist_entry(head->chain.first, struct tcp_bind_bucket, node); -} - -static inline struct tcp_bind_bucket *tb_head(struct tcp_bind_hashbucket *head) -{ - return hlist_empty(&head->chain) ? NULL : __tb_head(head); -} - -extern struct tcp_hashinfo { +struct inet_hashinfo { /* This is for sockets with full identity only. Sockets here will * always be without wildcards and will have the following invariant: * @@ -116,21 +107,21 @@ extern struct tcp_hashinfo { * First half of the table is for sockets not in TIME_WAIT, second half * is for TIME_WAIT sockets only. */ - struct tcp_ehash_bucket *__tcp_ehash; + struct inet_ehash_bucket *ehash; /* Ok, let's try this, I give up, we do need a local binding * TCP hash as well as the others for fast bind/connect. */ - struct tcp_bind_hashbucket *__tcp_bhash; + struct inet_bind_hashbucket *bhash; - int __tcp_bhash_size; - int __tcp_ehash_size; + int bhash_size; + int ehash_size; /* All sockets in TCP_LISTEN state will be in here. This is the only * table where wildcard'd TCP sockets can exist. Hash function here * is just local port number. */ - struct hlist_head __tcp_listening_hash[TCP_LHTABLE_SIZE]; + struct hlist_head listening_hash[INET_LHTABLE_SIZE]; /* All the above members are written once at bootup and * never written again _or_ are predominantly read-access. @@ -138,36 +129,39 @@ extern struct tcp_hashinfo { * Now align to a new cache line as all the following members * are often dirty. */ - rwlock_t __tcp_lhash_lock ____cacheline_aligned; - atomic_t __tcp_lhash_users; - wait_queue_head_t __tcp_lhash_wait; - spinlock_t __tcp_portalloc_lock; -} tcp_hashinfo; - -#define tcp_ehash (tcp_hashinfo.__tcp_ehash) -#define tcp_bhash (tcp_hashinfo.__tcp_bhash) -#define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size) -#define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size) -#define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash) -#define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock) -#define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users) -#define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait) -#define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock) + rwlock_t lhash_lock ____cacheline_aligned; + atomic_t lhash_users; + wait_queue_head_t lhash_wait; + spinlock_t portalloc_lock; +}; + +extern struct inet_hashinfo tcp_hashinfo; +#define tcp_ehash (tcp_hashinfo.ehash) +#define tcp_bhash (tcp_hashinfo.bhash) +#define tcp_ehash_size (tcp_hashinfo.ehash_size) +#define tcp_bhash_size (tcp_hashinfo.bhash_size) +#define tcp_listening_hash (tcp_hashinfo.listening_hash) +#define tcp_lhash_lock (tcp_hashinfo.lhash_lock) +#define tcp_lhash_users (tcp_hashinfo.lhash_users) +#define tcp_lhash_wait (tcp_hashinfo.lhash_wait) +#define tcp_portalloc_lock (tcp_hashinfo.portalloc_lock) extern kmem_cache_t *tcp_bucket_cachep; -extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, - unsigned short snum); -extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb); -extern void tcp_bucket_unlock(struct sock *sk); +extern struct inet_bind_bucket * + inet_bind_bucket_create(kmem_cache_t *cachep, + struct inet_bind_hashbucket *head, + const unsigned short snum); +extern void inet_bind_bucket_destroy(kmem_cache_t *cachep, + struct inet_bind_bucket *tb); extern int tcp_port_rover; /* These are AF independent. */ -static __inline__ int tcp_bhashfn(__u16 lport) +static inline int inet_bhashfn(const __u16 lport, const int bhash_size) { - return (lport & (tcp_bhash_size - 1)); + return lport & (bhash_size - 1); } -extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, +extern void tcp_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, unsigned short snum); #if (BITS_PER_LONG == 64) @@ -212,7 +206,7 @@ struct tcp_tw_bucket { __u32 tw_ts_recent; long tw_ts_recent_stamp; unsigned long tw_ttd; - struct tcp_bind_bucket *tw_tb; + struct inet_bind_bucket *tw_tb; struct hlist_node tw_death_node; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct in6_addr tw_v6_daddr; @@ -366,14 +360,14 @@ extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) /* These can have wildcards, don't try too hard. */ -static __inline__ int tcp_lhashfn(unsigned short num) +static inline int inet_lhashfn(const unsigned short num) { - return num & (TCP_LHTABLE_SIZE - 1); + return num & (INET_LHTABLE_SIZE - 1); } -static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) +static inline int inet_sk_listen_hashfn(const struct sock *sk) { - return tcp_lhashfn(inet_sk(sk)->num); + return inet_lhashfn(inet_sk(sk)->num); } #define MAX_TCP_HEADER (128 + MAX_HEADER) @@ -799,9 +793,6 @@ extern void tcp_parse_options(struct sk_buff *skb, * TCP v4 functions exported for the inet6 API */ -extern int tcp_v4_build_header(struct sock *sk, - struct sk_buff *skb); - extern void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 20159a3dafb..1ec03db7dcd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -272,6 +272,9 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); kmem_cache_t *tcp_bucket_cachep; + +EXPORT_SYMBOL_GPL(tcp_bucket_cachep); + kmem_cache_t *tcp_timewait_cachep; atomic_t tcp_orphan_count = ATOMIC_INIT(0); @@ -2259,7 +2262,7 @@ void __init tcp_init(void) sizeof(skb->cb)); tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket", - sizeof(struct tcp_bind_bucket), + sizeof(struct inet_bind_bucket), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (!tcp_bucket_cachep) @@ -2277,9 +2280,9 @@ void __init tcp_init(void) * * The methodology is similar to that of the buffer cache. */ - tcp_ehash = (struct tcp_ehash_bucket *) + tcp_ehash = alloc_large_system_hash("TCP established", - sizeof(struct tcp_ehash_bucket), + sizeof(struct inet_ehash_bucket), thash_entries, (num_physpages >= 128 * 1024) ? (25 - PAGE_SHIFT) : @@ -2294,9 +2297,9 @@ void __init tcp_init(void) INIT_HLIST_HEAD(&tcp_ehash[i].chain); } - tcp_bhash = (struct tcp_bind_hashbucket *) + tcp_bhash = alloc_large_system_hash("TCP bind", - sizeof(struct tcp_bind_hashbucket), + sizeof(struct inet_bind_hashbucket), tcp_ehash_size, (num_physpages >= 128 * 1024) ? (25 - PAGE_SHIFT) : @@ -2315,7 +2318,7 @@ void __init tcp_init(void) * on available memory. */ for (order = 0; ((1 << order) << PAGE_SHIFT) < - (tcp_bhash_size * sizeof(struct tcp_bind_hashbucket)); + (tcp_bhash_size * sizeof(struct inet_bind_hashbucket)); order++) ; if (order >= 4) { diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index f79bd11a470..5bb6a0f1c77 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -590,7 +590,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) goto skip_listen_ht; tcp_listen_lock(); - for (i = s_i; i < TCP_LHTABLE_SIZE; i++) { + for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct sock *sk; struct hlist_node *node; @@ -646,7 +646,7 @@ skip_listen_ht: return skb->len; for (i = s_i; i < tcp_ehash_size; i++) { - struct tcp_ehash_bucket *head = &tcp_ehash[i]; + struct inet_ehash_bucket *head = &tcp_ehash[i]; struct sock *sk; struct hlist_node *node; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c03d7e9688c..4138630556e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -89,12 +89,11 @@ static struct socket *tcp_socket; void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb); -struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = { - .__tcp_lhash_lock = RW_LOCK_UNLOCKED, - .__tcp_lhash_users = ATOMIC_INIT(0), - .__tcp_lhash_wait - = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait), - .__tcp_portalloc_lock = SPIN_LOCK_UNLOCKED +struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { + .lhash_lock = RW_LOCK_UNLOCKED, + .lhash_users = ATOMIC_INIT(0), + .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), + .portalloc_lock = SPIN_LOCK_UNLOCKED, }; /* @@ -105,14 +104,14 @@ struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = { int sysctl_local_port_range[2] = { 1024, 4999 }; int tcp_port_rover = 1024 - 1; -/* Allocate and initialize a new TCP local port bind bucket. +/* Allocate and initialize a new local port bind bucket. * The bindhash mutex for snum's hash chain must be held here. */ -struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, - unsigned short snum) +struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep, + struct inet_bind_hashbucket *head, + const unsigned short snum) { - struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep, - SLAB_ATOMIC); + struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC); if (tb) { tb->port = snum; tb->fastreuse = 0; @@ -123,20 +122,21 @@ struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, } /* Caller must hold hashbucket lock for this tb with local BH disabled */ -void tcp_bucket_destroy(struct tcp_bind_bucket *tb) +void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb) { if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); - kmem_cache_free(tcp_bucket_cachep, tb); + kmem_cache_free(cachep, tb); } } /* Caller must disable local BH processing. */ static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child) { - struct tcp_bind_hashbucket *head = - &tcp_bhash[tcp_bhashfn(inet_sk(child)->num)]; - struct tcp_bind_bucket *tb; + struct inet_bind_hashbucket *head = + &tcp_bhash[inet_bhashfn(inet_sk(child)->num, + tcp_bhash_size)]; + struct inet_bind_bucket *tb; spin_lock(&head->lock); tb = tcp_sk(sk)->bind_hash; @@ -152,15 +152,15 @@ inline void tcp_inherit_port(struct sock *sk, struct sock *child) local_bh_enable(); } -void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, - unsigned short snum) +void tcp_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, + const unsigned short snum) { inet_sk(sk)->num = snum; sk_add_bind_node(sk, &tb->owners); tcp_sk(sk)->bind_hash = tb; } -static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) +static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) { const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk); struct sock *sk2; @@ -190,9 +190,9 @@ static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) */ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) { - struct tcp_bind_hashbucket *head; + struct inet_bind_hashbucket *head; struct hlist_node *node; - struct tcp_bind_bucket *tb; + struct inet_bind_bucket *tb; int ret; local_bh_disable(); @@ -211,9 +211,9 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) rover++; if (rover > high) rover = low; - head = &tcp_bhash[tcp_bhashfn(rover)]; + head = &tcp_bhash[inet_bhashfn(rover, tcp_bhash_size)]; spin_lock(&head->lock); - tb_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == rover) goto next; break; @@ -238,9 +238,9 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) */ snum = rover; } else { - head = &tcp_bhash[tcp_bhashfn(snum)]; + head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; spin_lock(&head->lock); - tb_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == snum) goto tb_found; } @@ -261,7 +261,7 @@ tb_found: } tb_not_found: ret = 1; - if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL) + if (!tb && (tb = inet_bind_bucket_create(tcp_bucket_cachep, head, snum)) == NULL) goto fail_unlock; if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) @@ -290,15 +290,16 @@ fail: static void __tcp_put_port(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); - struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)]; - struct tcp_bind_bucket *tb; + struct inet_bind_hashbucket *head = &tcp_bhash[inet_bhashfn(inet->num, + tcp_bhash_size)]; + struct inet_bind_bucket *tb; spin_lock(&head->lock); tb = tcp_sk(sk)->bind_hash; __sk_del_bind_node(sk); tcp_sk(sk)->bind_hash = NULL; inet->num = 0; - tcp_bucket_destroy(tb); + inet_bind_bucket_destroy(tcp_bucket_cachep, tb); spin_unlock(&head->lock); } @@ -344,7 +345,7 @@ static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) BUG_TRAP(sk_unhashed(sk)); if (listen_possible && sk->sk_state == TCP_LISTEN) { - list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; + list = &tcp_listening_hash[inet_sk_listen_hashfn(sk)]; lock = &tcp_lhash_lock; tcp_listen_wlock(); } else { @@ -381,7 +382,7 @@ void tcp_unhash(struct sock *sk) tcp_listen_wlock(); lock = &tcp_lhash_lock; } else { - struct tcp_ehash_bucket *head = &tcp_ehash[sk->sk_hashent]; + struct inet_ehash_bucket *head = &tcp_ehash[sk->sk_hashent]; lock = &head->lock; write_lock_bh(&head->lock); } @@ -401,8 +402,10 @@ void tcp_unhash(struct sock *sk) * connection. So always assume those are both wildcarded * during the search since they can never be otherwise. */ -static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr, - unsigned short hnum, int dif) +static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, + const u32 daddr, + const unsigned short hnum, + const int dif) { struct sock *result = NULL, *sk; struct hlist_node *node; @@ -438,14 +441,15 @@ static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr, } /* Optimize the common listener case. */ -static inline struct sock *tcp_v4_lookup_listener(u32 daddr, - unsigned short hnum, int dif) +static inline struct sock *tcp_v4_lookup_listener(const u32 daddr, + const unsigned short hnum, + const int dif) { struct sock *sk = NULL; struct hlist_head *head; read_lock(&tcp_lhash_lock); - head = &tcp_listening_hash[tcp_lhashfn(hnum)]; + head = &tcp_listening_hash[inet_lhashfn(hnum)]; if (!hlist_empty(head)) { struct inet_sock *inet = inet_sk((sk = __sk_head(head))); @@ -470,11 +474,13 @@ sherry_cache: * Local BH must be disabled here. */ -static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport, - u32 daddr, u16 hnum, - int dif) +static inline struct sock *__tcp_v4_lookup_established(const u32 saddr, + const u16 sport, + const u32 daddr, + const u16 hnum, + const int dif) { - struct tcp_ehash_bucket *head; + struct inet_ehash_bucket *head; TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) __u32 ports = TCP_COMBINED_PORTS(sport, hnum); struct sock *sk; @@ -546,7 +552,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_ehash_size); - struct tcp_ehash_bucket *head = &tcp_ehash[hash]; + struct inet_ehash_bucket *head = &tcp_ehash[hash]; struct sock *sk2; struct hlist_node *node; struct tcp_tw_bucket *tw; @@ -639,9 +645,9 @@ static inline u32 connect_port_offset(const struct sock *sk) */ static inline int tcp_v4_hash_connect(struct sock *sk) { - unsigned short snum = inet_sk(sk)->num; - struct tcp_bind_hashbucket *head; - struct tcp_bind_bucket *tb; + const unsigned short snum = inet_sk(sk)->num; + struct inet_bind_hashbucket *head; + struct inet_bind_bucket *tb; int ret; if (!snum) { @@ -658,14 +664,14 @@ static inline int tcp_v4_hash_connect(struct sock *sk) local_bh_disable(); for (i = 1; i <= range; i++) { port = low + (i + offset) % range; - head = &tcp_bhash[tcp_bhashfn(port)]; + head = &tcp_bhash[inet_bhashfn(port, tcp_bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, * because the established check is already * unique enough. */ - tb_for_each(tb, node, &head->chain) { + inet_bind_bucket_for_each(tb, node, &head->chain) { if (tb->port == port) { BUG_TRAP(!hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) @@ -678,7 +684,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk) } } - tb = tcp_bucket_create(head, port); + tb = inet_bind_bucket_create(tcp_bucket_cachep, head, port); if (!tb) { spin_unlock(&head->lock); break; @@ -713,7 +719,7 @@ ok: goto out; } - head = &tcp_bhash[tcp_bhashfn(snum)]; + head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; tb = tcp_sk(sk)->bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { @@ -2055,7 +2061,7 @@ start_req: } read_unlock_bh(&tp->accept_queue.syn_wait_lock); } - if (++st->bucket < TCP_LHTABLE_SIZE) { + if (++st->bucket < INET_LHTABLE_SIZE) { sk = sk_head(&tcp_listening_hash[st->bucket]); goto get_sk; } @@ -2506,7 +2512,7 @@ void __init tcp_v4_init(struct net_proto_family *ops) EXPORT_SYMBOL(ipv4_specific); EXPORT_SYMBOL(tcp_bind_hash); -EXPORT_SYMBOL(tcp_bucket_create); +EXPORT_SYMBOL(inet_bind_bucket_create); EXPORT_SYMBOL(tcp_hashinfo); EXPORT_SYMBOL(tcp_inherit_port); EXPORT_SYMBOL(tcp_listen_wlock); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 7c46a553c4a..1df6cd46066 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -60,9 +60,9 @@ int tcp_tw_count; /* Must be called with locally disabled BHs. */ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) { - struct tcp_ehash_bucket *ehead; - struct tcp_bind_hashbucket *bhead; - struct tcp_bind_bucket *tb; + struct inet_ehash_bucket *ehead; + struct inet_bind_hashbucket *bhead; + struct inet_bind_bucket *tb; /* Unlink from established hashes. */ ehead = &tcp_ehash[tw->tw_hashent]; @@ -76,12 +76,12 @@ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) write_unlock(&ehead->lock); /* Disassociate with bind bucket. */ - bhead = &tcp_bhash[tcp_bhashfn(tw->tw_num)]; + bhead = &tcp_bhash[inet_bhashfn(tw->tw_num, tcp_bhash_size)]; spin_lock(&bhead->lock); tb = tw->tw_tb; __hlist_del(&tw->tw_bind_node); tw->tw_tb = NULL; - tcp_bucket_destroy(tb); + inet_bind_bucket_destroy(tcp_bucket_cachep, tb); spin_unlock(&bhead->lock); #ifdef SOCK_REFCNT_DEBUG @@ -296,14 +296,14 @@ kill: */ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) { - struct tcp_ehash_bucket *ehead = &tcp_ehash[sk->sk_hashent]; - struct tcp_bind_hashbucket *bhead; + struct inet_ehash_bucket *ehead = &tcp_ehash[sk->sk_hashent]; + struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet_sk(sk)->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)]; + bhead = &tcp_bhash[inet_bhashfn(inet_sk(sk)->num, tcp_bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = tcp_sk(sk)->bind_hash; BUG_TRAP(tcp_sk(sk)->bind_hash); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4e32a8496be..31f50fb29ff 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -98,11 +98,11 @@ static __inline__ int tcp_v6_sk_hashfn(struct sock *sk) return tcp_v6_hashfn(laddr, lport, faddr, fport); } -static inline int tcp_v6_bind_conflict(struct sock *sk, - struct tcp_bind_bucket *tb) +static inline int tcp_v6_bind_conflict(const struct sock *sk, + const struct inet_bind_bucket *tb) { - struct sock *sk2; - struct hlist_node *node; + const struct sock *sk2; + const struct hlist_node *node; /* We must walk the whole port owner list in this case. -DaveM */ sk_for_each_bound(sk2, node, &tb->owners) { @@ -126,8 +126,8 @@ static inline int tcp_v6_bind_conflict(struct sock *sk, */ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) { - struct tcp_bind_hashbucket *head; - struct tcp_bind_bucket *tb; + struct inet_bind_hashbucket *head; + struct inet_bind_bucket *tb; struct hlist_node *node; int ret; @@ -146,9 +146,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) do { rover++; if (rover > high) rover = low; - head = &tcp_bhash[tcp_bhashfn(rover)]; + head = &tcp_bhash[inet_bhashfn(rover, tcp_bhash_size)]; spin_lock(&head->lock); - tb_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == rover) goto next; break; @@ -171,9 +171,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) /* OK, here is the one we will use. */ snum = rover; } else { - head = &tcp_bhash[tcp_bhashfn(snum)]; + head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; spin_lock(&head->lock); - tb_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == snum) goto tb_found; } @@ -192,7 +192,7 @@ tb_found: } tb_not_found: ret = 1; - if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL) + if (!tb && (tb = inet_bind_bucket_create(tcp_bucket_cachep, head, snum)) == NULL) goto fail_unlock; if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) @@ -224,7 +224,7 @@ static __inline__ void __tcp_v6_hash(struct sock *sk) BUG_TRAP(sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { - list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; + list = &tcp_listening_hash[inet_sk_listen_hashfn(sk)]; lock = &tcp_lhash_lock; tcp_listen_wlock(); } else { @@ -264,7 +264,7 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor hiscore=0; read_lock(&tcp_lhash_lock); - sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) { + sk_for_each(sk, node, &tcp_listening_hash[inet_lhashfn(hnum)]) { if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -305,7 +305,7 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u struct in6_addr *daddr, u16 hnum, int dif) { - struct tcp_ehash_bucket *head; + struct inet_ehash_bucket *head; struct sock *sk; struct hlist_node *node; __u32 ports = TCP_COMBINED_PORTS(sport, hnum); @@ -461,7 +461,7 @@ static int __tcp_v6_check_established(struct sock *sk, __u16 lport, int dif = sk->sk_bound_dev_if; u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); - struct tcp_ehash_bucket *head = &tcp_ehash[hash]; + struct inet_ehash_bucket *head = &tcp_ehash[hash]; struct sock *sk2; struct hlist_node *node; struct tcp_tw_bucket *tw; @@ -540,8 +540,8 @@ static inline u32 tcpv6_port_offset(const struct sock *sk) static int tcp_v6_hash_connect(struct sock *sk) { unsigned short snum = inet_sk(sk)->num; - struct tcp_bind_hashbucket *head; - struct tcp_bind_bucket *tb; + struct inet_bind_hashbucket *head; + struct inet_bind_bucket *tb; int ret; if (!snum) { @@ -558,14 +558,14 @@ static int tcp_v6_hash_connect(struct sock *sk) local_bh_disable(); for (i = 1; i <= range; i++) { port = low + (i + offset) % range; - head = &tcp_bhash[tcp_bhashfn(port)]; + head = &tcp_bhash[inet_bhashfn(port, tcp_bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, * because the established check is already * unique enough. */ - tb_for_each(tb, node, &head->chain) { + inet_bind_bucket_for_each(tb, node, &head->chain) { if (tb->port == port) { BUG_TRAP(!hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) @@ -578,7 +578,7 @@ static int tcp_v6_hash_connect(struct sock *sk) } } - tb = tcp_bucket_create(head, port); + tb = inet_bind_bucket_create(tcp_bucket_cachep, head, port); if (!tb) { spin_unlock(&head->lock); break; @@ -613,7 +613,7 @@ ok: goto out; } - head = &tcp_bhash[tcp_bhashfn(snum)]; + head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; tb = tcp_sk(sk)->bind_hash; spin_lock_bh(&head->lock); -- cgit v1.2.3-70-g09d2 From a55ebcc4c4532107ad9eee1c9bb698ab5f12c00f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:01:14 -0700 Subject: [INET]: Move bind_hash from tcp_sk to inet_sk This should really be in a inet_connection_sock, but I'm leaving it for a later optimization, when some more fields common to INET transport protocols now in tcp_sk or inet_sk will be chunked out into inet_connection_sock, for now its better to concentrate on getting the changes in the core merged to leave the DCCP tree with only DCCP specific code. Next changesets will take advantage of this move to generalise things like tcp_bind_hash, tcp_put_port, tcp_inherit_port, making the later receive a inet_hashinfo parameter, and even __tcp_tw_hashdance, etc in the future, when tcp_tw_bucket gets transformed into the struct timewait_sock hierarchy. tcp_destroy_sock also is eligible as soon as tcp_orphan_count gets moved to sk_prot. A cascade of incremental changes will ultimately make the tcp_lookup functions be fully generic. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ip.h | 2 ++ include/linux/tcp.h | 1 - include/net/tcp.h | 2 +- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_ipv4.c | 21 +++++++++++---------- net/ipv4/tcp_minisocks.c | 15 ++++++++------- net/ipv6/tcp_ipv6.c | 8 ++++---- 7 files changed, 28 insertions(+), 25 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/ip.h b/include/linux/ip.h index 33e8a19a1a0..2c54bbd3da7 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -128,6 +128,7 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) return (struct inet_request_sock *)sk; } +struct inet_bind_bucket; struct ipv6_pinfo; struct inet_sock { @@ -157,6 +158,7 @@ struct inet_sock { int mc_index; /* Multicast device index */ __u32 mc_addr; struct ip_mc_socklist *mc_list; /* Group array */ + struct inet_bind_bucket *bind_hash; /* * Following members are used to retain the infomation to build * an ip header on each ip fragmentation while the socket is corked. diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ec580a560e8..e70ab19652d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -258,7 +258,6 @@ struct tcp_sock { __u32 snd_sml; /* Last byte of the most recently transmitted small packet */ __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ - struct inet_bind_bucket *bind_hash; /* Delayed ACK control data */ struct { __u8 pending; /* ACK is pending */ diff --git a/include/net/tcp.h b/include/net/tcp.h index ff5d30ac2b0..6c6c879e7e8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1266,7 +1266,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) TCP_INC_STATS(TCP_MIB_ESTABRESETS); sk->sk_prot->unhash(sk); - if (tcp_sk(sk)->bind_hash && + if (inet_sk(sk)->bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) tcp_put_port(sk); /* fall through */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1ec03db7dcd..e54a410ca70 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1575,7 +1575,7 @@ void tcp_destroy_sock(struct sock *sk) BUG_TRAP(sk_unhashed(sk)); /* If it has not 0 inet_sk(sk)->num, it must be bound */ - BUG_TRAP(!inet_sk(sk)->num || tcp_sk(sk)->bind_hash); + BUG_TRAP(!inet_sk(sk)->num || inet_sk(sk)->bind_hash); sk->sk_prot->destroy(sk); @@ -1802,7 +1802,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_sack_reset(&tp->rx_opt); __sk_dst_reset(sk); - BUG_TRAP(!inet->num || tp->bind_hash); + BUG_TRAP(!inet->num || inet->bind_hash); sk->sk_error_report(sk); return err; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 58e36ed88f2..10a9b3ae344 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -113,9 +113,9 @@ static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child) struct inet_bind_bucket *tb; spin_lock(&head->lock); - tb = tcp_sk(sk)->bind_hash; + tb = inet_sk(sk)->bind_hash; sk_add_bind_node(child, &tb->owners); - tcp_sk(child)->bind_hash = tb; + inet_sk(child)->bind_hash = tb; spin_unlock(&head->lock); } @@ -129,9 +129,10 @@ inline void tcp_inherit_port(struct sock *sk, struct sock *child) void tcp_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, const unsigned short snum) { - inet_sk(sk)->num = snum; + struct inet_sock *inet = inet_sk(sk); + inet->num = snum; sk_add_bind_node(sk, &tb->owners); - tcp_sk(sk)->bind_hash = tb; + inet->bind_hash = tb; } static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) @@ -246,9 +247,9 @@ tb_not_found: (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) tb->fastreuse = 0; success: - if (!tcp_sk(sk)->bind_hash) + if (!inet_sk(sk)->bind_hash) tcp_bind_hash(sk, tb, snum); - BUG_TRAP(tcp_sk(sk)->bind_hash == tb); + BUG_TRAP(inet_sk(sk)->bind_hash == tb); ret = 0; fail_unlock: @@ -269,9 +270,9 @@ static void __tcp_put_port(struct sock *sk) struct inet_bind_bucket *tb; spin_lock(&head->lock); - tb = tcp_sk(sk)->bind_hash; + tb = inet->bind_hash; __sk_del_bind_node(sk); - tcp_sk(sk)->bind_hash = NULL; + inet->bind_hash = NULL; inet->num = 0; inet_bind_bucket_destroy(tcp_bucket_cachep, tb); spin_unlock(&head->lock); @@ -694,7 +695,7 @@ ok: } head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; - tb = tcp_sk(sk)->bind_hash; + tb = inet_sk(sk)->bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { __tcp_v4_hash(sk, 0); @@ -1940,7 +1941,7 @@ int tcp_v4_destroy_sock(struct sock *sk) __skb_queue_purge(&tp->ucopy.prequeue); /* Clean up a referenced TCP bind bucket. */ - if (tp->bind_hash) + if (inet_sk(sk)->bind_hash) tcp_put_port(sk); /* diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 1df6cd46066..267cea1087e 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -296,17 +296,17 @@ kill: */ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) { + const struct inet_sock *inet = inet_sk(sk); struct inet_ehash_bucket *ehead = &tcp_ehash[sk->sk_hashent]; struct inet_bind_hashbucket *bhead; - /* Step 1: Put TW into bind hash. Original socket stays there too. - Note, that any socket with inet_sk(sk)->num != 0 MUST be bound in + Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &tcp_bhash[inet_bhashfn(inet_sk(sk)->num, tcp_bhash_size)]; + bhead = &tcp_bhash[inet_bhashfn(inet->num, tcp_bhash_size)]; spin_lock(&bhead->lock); - tw->tw_tb = tcp_sk(sk)->bind_hash; - BUG_TRAP(tcp_sk(sk)->bind_hash); + tw->tw_tb = inet->bind_hash; + BUG_TRAP(inet->bind_hash); tw_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); @@ -694,6 +694,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, if(newsk != NULL) { struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); + struct inet_sock *newinet = inet_sk(newsk); struct tcp_sock *newtp; struct sk_filter *filter; @@ -702,10 +703,10 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, /* SANITY */ sk_node_init(&newsk->sk_node); - tcp_sk(newsk)->bind_hash = NULL; + newinet->bind_hash = NULL; /* Clone the TCP header template */ - inet_sk(newsk)->dport = ireq->rmt_port; + newinet->dport = ireq->rmt_port; sock_lock_init(newsk); bh_lock_sock(newsk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 31f50fb29ff..a8ca7ba06c1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -204,9 +204,9 @@ tb_not_found: tb->fastreuse = 0; success: - if (!tcp_sk(sk)->bind_hash) + if (!inet_sk(sk)->bind_hash) tcp_bind_hash(sk, tb, snum); - BUG_TRAP(tcp_sk(sk)->bind_hash == tb); + BUG_TRAP(inet_sk(sk)->bind_hash == tb); ret = 0; fail_unlock: @@ -613,8 +613,8 @@ ok: goto out; } - head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; - tb = tcp_sk(sk)->bind_hash; + head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; + tb = inet_sk(sk)->bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { -- cgit v1.2.3-70-g09d2 From 2d8c4ce51903636ce0f60addc8134aa50ab8fa76 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:07:13 -0700 Subject: [INET]: Generalise tcp_bind_hash & tcp_inherit_port This required moving tcp_bucket_cachep to inet_hashinfo. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 32 ++++++++++++++++++++ include/net/tcp.h | 11 ++----- net/ipv4/inet_hashtables.c | 40 +++++++++++++++++++++++++ net/ipv4/tcp.c | 4 --- net/ipv4/tcp_ipv4.c | 68 +++---------------------------------------- net/ipv6/tcp_ipv6.c | 6 ++-- 6 files changed, 81 insertions(+), 80 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 3a6c11ca421..da9705525f1 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -14,12 +14,15 @@ #ifndef _INET_HASHTABLES_H #define _INET_HASHTABLES_H +#include #include #include #include #include #include +#include + /* This is for all connections with a full identity, no wildcards. * New scheme, half the table is for TIME_WAIT, the other half is * for the rest. I'll experiment with dynamic table growth later. @@ -113,6 +116,7 @@ struct inet_hashinfo { atomic_t lhash_users; wait_queue_head_t lhash_wait; spinlock_t portalloc_lock; + kmem_cache_t *bind_bucket_cachep; }; static inline int inet_ehashfn(const __u32 laddr, const __u16 lport, @@ -148,6 +152,9 @@ static inline int inet_bhashfn(const __u16 lport, const int bhash_size) return lport & (bhash_size - 1); } +extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, + const unsigned short snum); + /* These can have wildcards, don't try too hard. */ static inline int inet_lhashfn(const unsigned short num) { @@ -159,4 +166,29 @@ static inline int inet_sk_listen_hashfn(const struct sock *sk) return inet_lhashfn(inet_sk(sk)->num); } +/* Caller must disable local BH processing. */ +static inline void __inet_inherit_port(struct inet_hashinfo *table, + struct sock *sk, struct sock *child) +{ + const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); + struct inet_bind_hashbucket *head = &table->bhash[bhash]; + struct inet_bind_bucket *tb; + + spin_lock(&head->lock); + tb = inet_sk(sk)->bind_hash; + sk_add_bind_node(child, &tb->owners); + inet_sk(child)->bind_hash = tb; + spin_unlock(&head->lock); +} + +static inline void inet_inherit_port(struct inet_hashinfo *table, + struct sock *sk, struct sock *child) +{ + local_bh_disable(); + __inet_inherit_port(table, sk, child); + local_bh_enable(); +} + +extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); + #endif /* _INET_HASHTABLES_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 6c6c879e7e8..9eb8ff7c911 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -51,14 +51,10 @@ extern struct inet_hashinfo tcp_hashinfo; #define tcp_lhash_users (tcp_hashinfo.lhash_users) #define tcp_lhash_wait (tcp_hashinfo.lhash_wait) #define tcp_portalloc_lock (tcp_hashinfo.portalloc_lock) - -extern kmem_cache_t *tcp_bucket_cachep; +#define tcp_bucket_cachep (tcp_hashinfo.bind_bucket_cachep) extern int tcp_port_rover; -extern void tcp_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, - unsigned short snum); - #if (BITS_PER_LONG == 64) #define TCP_ADDRCMP_ALIGN_BYTES 8 #else @@ -549,9 +545,6 @@ DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics); #define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val) #define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val) -extern void tcp_put_port(struct sock *sk); -extern void tcp_inherit_port(struct sock *sk, struct sock *child); - extern void tcp_v4_err(struct sk_buff *skb, u32); extern void tcp_shutdown (struct sock *sk, int how); @@ -1268,7 +1261,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) sk->sk_prot->unhash(sk); if (inet_sk(sk)->bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) - tcp_put_port(sk); + inet_put_port(&tcp_hashinfo, sk); /* fall through */ default: if (oldstate==TCP_ESTABLISHED) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 343a890bd61..33d6cbe32cd 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -14,6 +14,7 @@ */ #include +#include #include #include @@ -49,3 +50,42 @@ void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb) kmem_cache_free(cachep, tb); } } + +void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, + const unsigned short snum) +{ + struct inet_sock *inet = inet_sk(sk); + inet->num = snum; + sk_add_bind_node(sk, &tb->owners); + inet->bind_hash = tb; +} + +EXPORT_SYMBOL(inet_bind_hash); + +/* + * Get rid of any references to a local port held by the given sock. + */ +static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + const int bhash = inet_bhashfn(inet->num, hashinfo->bhash_size); + struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; + struct inet_bind_bucket *tb; + + spin_lock(&head->lock); + tb = inet->bind_hash; + __sk_del_bind_node(sk); + inet->bind_hash = NULL; + inet->num = 0; + inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + spin_unlock(&head->lock); +} + +void inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) +{ + local_bh_disable(); + __inet_put_port(hashinfo, sk); + local_bh_enable(); +} + +EXPORT_SYMBOL(inet_put_port); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e54a410ca70..38c04c1a754 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -271,10 +271,6 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); -kmem_cache_t *tcp_bucket_cachep; - -EXPORT_SYMBOL_GPL(tcp_bucket_cachep); - kmem_cache_t *tcp_timewait_cachep; atomic_t tcp_orphan_count = ATOMIC_INIT(0); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 10a9b3ae344..40fe4f5fca1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -104,37 +104,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { int sysctl_local_port_range[2] = { 1024, 4999 }; int tcp_port_rover = 1024 - 1; -/* Caller must disable local BH processing. */ -static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child) -{ - struct inet_bind_hashbucket *head = - &tcp_bhash[inet_bhashfn(inet_sk(child)->num, - tcp_bhash_size)]; - struct inet_bind_bucket *tb; - - spin_lock(&head->lock); - tb = inet_sk(sk)->bind_hash; - sk_add_bind_node(child, &tb->owners); - inet_sk(child)->bind_hash = tb; - spin_unlock(&head->lock); -} - -inline void tcp_inherit_port(struct sock *sk, struct sock *child) -{ - local_bh_disable(); - __tcp_inherit_port(sk, child); - local_bh_enable(); -} - -void tcp_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, - const unsigned short snum) -{ - struct inet_sock *inet = inet_sk(sk); - inet->num = snum; - sk_add_bind_node(sk, &tb->owners); - inet->bind_hash = tb; -} - static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) { const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk); @@ -248,7 +217,7 @@ tb_not_found: tb->fastreuse = 0; success: if (!inet_sk(sk)->bind_hash) - tcp_bind_hash(sk, tb, snum); + inet_bind_hash(sk, tb, snum); BUG_TRAP(inet_sk(sk)->bind_hash == tb); ret = 0; @@ -259,32 +228,6 @@ fail: return ret; } -/* Get rid of any references to a local port held by the - * given sock. - */ -static void __tcp_put_port(struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - struct inet_bind_hashbucket *head = &tcp_bhash[inet_bhashfn(inet->num, - tcp_bhash_size)]; - struct inet_bind_bucket *tb; - - spin_lock(&head->lock); - tb = inet->bind_hash; - __sk_del_bind_node(sk); - inet->bind_hash = NULL; - inet->num = 0; - inet_bind_bucket_destroy(tcp_bucket_cachep, tb); - spin_unlock(&head->lock); -} - -void tcp_put_port(struct sock *sk) -{ - local_bh_disable(); - __tcp_put_port(sk); - local_bh_enable(); -} - /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. * Look, when several writers sleep and reader wakes them up, all but one * immediately hit write lock and grab all the cpus. Exclusive sleep solves @@ -678,7 +621,7 @@ ok: hint += i; /* Head lock still held and bh's disabled */ - tcp_bind_hash(sk, tb, port); + inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->sport = htons(port); __tcp_v4_hash(sk, 0); @@ -1537,7 +1480,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_initialize_rcv_mss(newsk); __tcp_v4_hash(newsk, 0); - __tcp_inherit_port(sk, newsk); + __inet_inherit_port(&tcp_hashinfo, sk, newsk); return newsk; @@ -1942,7 +1885,7 @@ int tcp_v4_destroy_sock(struct sock *sk) /* Clean up a referenced TCP bind bucket. */ if (inet_sk(sk)->bind_hash) - tcp_put_port(sk); + inet_put_port(&tcp_hashinfo, sk); /* * If sendmsg cached page exists, toss it. @@ -2486,14 +2429,11 @@ void __init tcp_v4_init(struct net_proto_family *ops) } EXPORT_SYMBOL(ipv4_specific); -EXPORT_SYMBOL(tcp_bind_hash); EXPORT_SYMBOL(inet_bind_bucket_create); EXPORT_SYMBOL(tcp_hashinfo); -EXPORT_SYMBOL(tcp_inherit_port); EXPORT_SYMBOL(tcp_listen_wlock); EXPORT_SYMBOL(tcp_port_rover); EXPORT_SYMBOL(tcp_prot); -EXPORT_SYMBOL(tcp_put_port); EXPORT_SYMBOL(tcp_unhash); EXPORT_SYMBOL(tcp_v4_conn_request); EXPORT_SYMBOL(tcp_v4_connect); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a8ca7ba06c1..bfbedb56bce 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -205,7 +205,7 @@ tb_not_found: success: if (!inet_sk(sk)->bind_hash) - tcp_bind_hash(sk, tb, snum); + inet_bind_hash(sk, tb, snum); BUG_TRAP(inet_sk(sk)->bind_hash == tb); ret = 0; @@ -597,7 +597,7 @@ ok: hint += i; /* Head lock still held and bh's disabled */ - tcp_bind_hash(sk, tb, port); + inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->sport = htons(port); __tcp_v6_hash(sk); @@ -1536,7 +1536,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; __tcp_v6_hash(newsk); - tcp_inherit_port(sk, newsk); + inet_inherit_port(&tcp_hashinfo, sk, newsk); return newsk; -- cgit v1.2.3-70-g09d2 From 6e04e02165a7209a71db553b7bc48d68421e5ebf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:07:35 -0700 Subject: [INET]: Move tcp_port_rover to inet_hashinfo Also expose all of the tcp_hashinfo members, i.e. killing those tcp_ehash, etc macros, this will more clearly expose already generic functions and some that need just a bit of work to become generic, as we'll see in the upcoming changesets. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 1 + include/net/sock.h | 2 +- include/net/tcp.h | 26 +++-------- net/ipv4/tcp.c | 42 +++++++++--------- net/ipv4/tcp_diag.c | 8 ++-- net/ipv4/tcp_ipv4.c | 101 +++++++++++++++++++++--------------------- net/ipv4/tcp_minisocks.c | 15 +++---- net/ipv6/tcp_ipv6.c | 51 +++++++++++---------- 8 files changed, 118 insertions(+), 128 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index da9705525f1..da07411b36d 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -117,6 +117,7 @@ struct inet_hashinfo { wait_queue_head_t lhash_wait; spinlock_t portalloc_lock; kmem_cache_t *bind_bucket_cachep; + int port_rover; }; static inline int inet_ehashfn(const __u32 laddr, const __u16 lport, diff --git a/include/net/sock.h b/include/net/sock.h index 69d869e41c3..391d00b5b7b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -136,7 +136,7 @@ struct sock_common { * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) * @sk_lingertime: %SO_LINGER l_linger setting - * @sk_hashent: hash entry in several tables (e.g. tcp_ehash) + * @sk_hashent: hash entry in several tables (e.g. inet_hashinfo.ehash) * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct * @sk_error_queue: rarely used diff --git a/include/net/tcp.h b/include/net/tcp.h index 9eb8ff7c911..99e47695d4b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -41,19 +41,7 @@ #endif #include -extern struct inet_hashinfo tcp_hashinfo; -#define tcp_ehash (tcp_hashinfo.ehash) -#define tcp_bhash (tcp_hashinfo.bhash) -#define tcp_ehash_size (tcp_hashinfo.ehash_size) -#define tcp_bhash_size (tcp_hashinfo.bhash_size) -#define tcp_listening_hash (tcp_hashinfo.listening_hash) -#define tcp_lhash_lock (tcp_hashinfo.lhash_lock) -#define tcp_lhash_users (tcp_hashinfo.lhash_users) -#define tcp_lhash_wait (tcp_hashinfo.lhash_wait) -#define tcp_portalloc_lock (tcp_hashinfo.portalloc_lock) -#define tcp_bucket_cachep (tcp_hashinfo.bind_bucket_cachep) - -extern int tcp_port_rover; +extern struct inet_hashinfo tcp_hashinfo; #if (BITS_PER_LONG == 64) #define TCP_ADDRCMP_ALIGN_BYTES 8 @@ -1463,21 +1451,21 @@ extern void tcp_listen_wlock(void); /* - We may sleep inside this lock. * - If sleeping is not required (or called from BH), - * use plain read_(un)lock(&tcp_lhash_lock). + * use plain read_(un)lock(&inet_hashinfo.lhash_lock). */ static inline void tcp_listen_lock(void) { /* read_lock synchronizes to candidates to writers */ - read_lock(&tcp_lhash_lock); - atomic_inc(&tcp_lhash_users); - read_unlock(&tcp_lhash_lock); + read_lock(&tcp_hashinfo.lhash_lock); + atomic_inc(&tcp_hashinfo.lhash_users); + read_unlock(&tcp_hashinfo.lhash_lock); } static inline void tcp_listen_unlock(void) { - if (atomic_dec_and_test(&tcp_lhash_users)) - wake_up(&tcp_lhash_wait); + if (atomic_dec_and_test(&tcp_hashinfo.lhash_users)) + wake_up(&tcp_hashinfo.lhash_wait); } static inline int keepalive_intvl_when(const struct tcp_sock *tp) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 38c04c1a754..2f4b1a374bb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2257,11 +2257,11 @@ void __init tcp_init(void) __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb), sizeof(skb->cb)); - tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket", - sizeof(struct inet_bind_bucket), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (!tcp_bucket_cachep) + tcp_hashinfo.bind_bucket_cachep = + kmem_cache_create("tcp_bind_bucket", + sizeof(struct inet_bind_bucket), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!tcp_hashinfo.bind_bucket_cachep) panic("tcp_init: Cannot alloc tcp_bind_bucket cache."); tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket", @@ -2276,7 +2276,7 @@ void __init tcp_init(void) * * The methodology is similar to that of the buffer cache. */ - tcp_ehash = + tcp_hashinfo.ehash = alloc_large_system_hash("TCP established", sizeof(struct inet_ehash_bucket), thash_entries, @@ -2284,37 +2284,37 @@ void __init tcp_init(void) (25 - PAGE_SHIFT) : (27 - PAGE_SHIFT), HASH_HIGHMEM, - &tcp_ehash_size, + &tcp_hashinfo.ehash_size, NULL, 0); - tcp_ehash_size = (1 << tcp_ehash_size) >> 1; - for (i = 0; i < (tcp_ehash_size << 1); i++) { - rwlock_init(&tcp_ehash[i].lock); - INIT_HLIST_HEAD(&tcp_ehash[i].chain); + tcp_hashinfo.ehash_size = (1 << tcp_hashinfo.ehash_size) >> 1; + for (i = 0; i < (tcp_hashinfo.ehash_size << 1); i++) { + rwlock_init(&tcp_hashinfo.ehash[i].lock); + INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain); } - tcp_bhash = + tcp_hashinfo.bhash = alloc_large_system_hash("TCP bind", sizeof(struct inet_bind_hashbucket), - tcp_ehash_size, + tcp_hashinfo.ehash_size, (num_physpages >= 128 * 1024) ? (25 - PAGE_SHIFT) : (27 - PAGE_SHIFT), HASH_HIGHMEM, - &tcp_bhash_size, + &tcp_hashinfo.bhash_size, NULL, 64 * 1024); - tcp_bhash_size = 1 << tcp_bhash_size; - for (i = 0; i < tcp_bhash_size; i++) { - spin_lock_init(&tcp_bhash[i].lock); - INIT_HLIST_HEAD(&tcp_bhash[i].chain); + tcp_hashinfo.bhash_size = 1 << tcp_hashinfo.bhash_size; + for (i = 0; i < tcp_hashinfo.bhash_size; i++) { + spin_lock_init(&tcp_hashinfo.bhash[i].lock); + INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); } /* Try to be a bit smarter and adjust defaults depending * on available memory. */ for (order = 0; ((1 << order) << PAGE_SHIFT) < - (tcp_bhash_size * sizeof(struct inet_bind_hashbucket)); + (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket)); order++) ; if (order >= 4) { @@ -2329,7 +2329,7 @@ void __init tcp_init(void) sysctl_tcp_max_orphans >>= (3 - order); sysctl_max_syn_backlog = 128; } - tcp_port_rover = sysctl_local_port_range[0] - 1; + tcp_hashinfo.port_rover = sysctl_local_port_range[0] - 1; sysctl_tcp_mem[0] = 768 << order; sysctl_tcp_mem[1] = 1024 << order; @@ -2344,7 +2344,7 @@ void __init tcp_init(void) printk(KERN_INFO "TCP: Hash tables configured " "(established %d bind %d)\n", - tcp_ehash_size << 1, tcp_bhash_size); + tcp_hashinfo.ehash_size << 1, tcp_hashinfo.bhash_size); tcp_register_congestion_control(&tcp_reno); } diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 5bb6a0f1c77..0ae738b455f 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -595,7 +595,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_node *node; num = 0; - sk_for_each(sk, node, &tcp_listening_hash[i]) { + sk_for_each(sk, node, &tcp_hashinfo.listening_hash[i]) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) { @@ -645,8 +645,8 @@ skip_listen_ht: if (!(r->tcpdiag_states&~(TCPF_LISTEN|TCPF_SYN_RECV))) return skb->len; - for (i = s_i; i < tcp_ehash_size; i++) { - struct inet_ehash_bucket *head = &tcp_ehash[i]; + for (i = s_i; i < tcp_hashinfo.ehash_size; i++) { + struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[i]; struct sock *sk; struct hlist_node *node; @@ -678,7 +678,7 @@ next_normal: if (r->tcpdiag_states&TCPF_TIME_WAIT) { sk_for_each(sk, node, - &tcp_ehash[i + tcp_ehash_size].chain) { + &tcp_hashinfo.ehash[i + tcp_hashinfo.ehash_size].chain) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 40fe4f5fca1..f5373f9f00a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -94,6 +94,7 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { .lhash_users = ATOMIC_INIT(0), .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), .portalloc_lock = SPIN_LOCK_UNLOCKED, + .port_rover = 1024 - 1, }; /* @@ -102,7 +103,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { * 32768-61000 */ int sysctl_local_port_range[2] = { 1024, 4999 }; -int tcp_port_rover = 1024 - 1; static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) { @@ -146,16 +146,16 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) int remaining = (high - low) + 1; int rover; - spin_lock(&tcp_portalloc_lock); - if (tcp_port_rover < low) + spin_lock(&tcp_hashinfo.portalloc_lock); + if (tcp_hashinfo.port_rover < low) rover = low; else - rover = tcp_port_rover; + rover = tcp_hashinfo.port_rover; do { rover++; if (rover > high) rover = low; - head = &tcp_bhash[inet_bhashfn(rover, tcp_bhash_size)]; + head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == rover) @@ -164,8 +164,8 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) next: spin_unlock(&head->lock); } while (--remaining > 0); - tcp_port_rover = rover; - spin_unlock(&tcp_portalloc_lock); + tcp_hashinfo.port_rover = rover; + spin_unlock(&tcp_hashinfo.portalloc_lock); /* Exhausted local port range during search? It is not * possible for us to be holding one of the bind hash @@ -182,7 +182,7 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) */ snum = rover; } else { - head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; + head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == snum) @@ -205,7 +205,7 @@ tb_found: } tb_not_found: ret = 1; - if (!tb && (tb = inet_bind_bucket_create(tcp_bucket_cachep, head, snum)) == NULL) + if (!tb && (tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum)) == NULL) goto fail_unlock; if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) @@ -237,22 +237,22 @@ fail: void tcp_listen_wlock(void) { - write_lock(&tcp_lhash_lock); + write_lock(&tcp_hashinfo.lhash_lock); - if (atomic_read(&tcp_lhash_users)) { + if (atomic_read(&tcp_hashinfo.lhash_users)) { DEFINE_WAIT(wait); for (;;) { - prepare_to_wait_exclusive(&tcp_lhash_wait, + prepare_to_wait_exclusive(&tcp_hashinfo.lhash_wait, &wait, TASK_UNINTERRUPTIBLE); - if (!atomic_read(&tcp_lhash_users)) + if (!atomic_read(&tcp_hashinfo.lhash_users)) break; - write_unlock_bh(&tcp_lhash_lock); + write_unlock_bh(&tcp_hashinfo.lhash_lock); schedule(); - write_lock_bh(&tcp_lhash_lock); + write_lock_bh(&tcp_hashinfo.lhash_lock); } - finish_wait(&tcp_lhash_wait, &wait); + finish_wait(&tcp_hashinfo.lhash_wait, &wait); } } @@ -263,20 +263,20 @@ static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) BUG_TRAP(sk_unhashed(sk)); if (listen_possible && sk->sk_state == TCP_LISTEN) { - list = &tcp_listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &tcp_lhash_lock; + list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; + lock = &tcp_hashinfo.lhash_lock; tcp_listen_wlock(); } else { - sk->sk_hashent = inet_sk_ehashfn(sk, tcp_ehash_size); - list = &tcp_ehash[sk->sk_hashent].chain; - lock = &tcp_ehash[sk->sk_hashent].lock; + sk->sk_hashent = inet_sk_ehashfn(sk, tcp_hashinfo.ehash_size); + list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; + lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock; write_lock(lock); } __sk_add_node(sk, list); sock_prot_inc_use(sk->sk_prot); write_unlock(lock); if (listen_possible && sk->sk_state == TCP_LISTEN) - wake_up(&tcp_lhash_wait); + wake_up(&tcp_hashinfo.lhash_wait); } static void tcp_v4_hash(struct sock *sk) @@ -298,9 +298,9 @@ void tcp_unhash(struct sock *sk) if (sk->sk_state == TCP_LISTEN) { local_bh_disable(); tcp_listen_wlock(); - lock = &tcp_lhash_lock; + lock = &tcp_hashinfo.lhash_lock; } else { - struct inet_ehash_bucket *head = &tcp_ehash[sk->sk_hashent]; + struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent]; lock = &head->lock; write_lock_bh(&head->lock); } @@ -311,7 +311,7 @@ void tcp_unhash(struct sock *sk) ende: if (sk->sk_state == TCP_LISTEN) - wake_up(&tcp_lhash_wait); + wake_up(&tcp_hashinfo.lhash_wait); } /* Don't inline this cruft. Here are some nice properties to @@ -366,8 +366,8 @@ static inline struct sock *tcp_v4_lookup_listener(const u32 daddr, struct sock *sk = NULL; struct hlist_head *head; - read_lock(&tcp_lhash_lock); - head = &tcp_listening_hash[inet_lhashfn(hnum)]; + read_lock(&tcp_hashinfo.lhash_lock); + head = &tcp_hashinfo.listening_hash[inet_lhashfn(hnum)]; if (!hlist_empty(head)) { struct inet_sock *inet = inet_sk((sk = __sk_head(head))); @@ -382,7 +382,7 @@ static inline struct sock *tcp_v4_lookup_listener(const u32 daddr, sherry_cache: sock_hold(sk); } - read_unlock(&tcp_lhash_lock); + read_unlock(&tcp_hashinfo.lhash_lock); return sk; } @@ -406,8 +406,8 @@ static inline struct sock *__tcp_v4_lookup_established(const u32 saddr, /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - const int hash = inet_ehashfn(daddr, hnum, saddr, sport, tcp_ehash_size); - head = &tcp_ehash[hash]; + const int hash = inet_ehashfn(daddr, hnum, saddr, sport, tcp_hashinfo.ehash_size); + head = &tcp_hashinfo.ehash[hash]; read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif)) @@ -415,7 +415,7 @@ static inline struct sock *__tcp_v4_lookup_established(const u32 saddr, } /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) { + sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; } @@ -469,8 +469,8 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, int dif = sk->sk_bound_dev_if; TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); - const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_ehash_size); - struct inet_ehash_bucket *head = &tcp_ehash[hash]; + const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size); + struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; struct sock *sk2; struct hlist_node *node; struct tcp_tw_bucket *tw; @@ -478,7 +478,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) { + sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { tw = (struct tcp_tw_bucket *)sk2; if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { @@ -582,7 +582,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk) local_bh_disable(); for (i = 1; i <= range; i++) { port = low + (i + offset) % range; - head = &tcp_bhash[inet_bhashfn(port, tcp_bhash_size)]; + head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, @@ -602,7 +602,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk) } } - tb = inet_bind_bucket_create(tcp_bucket_cachep, head, port); + tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port); if (!tb) { spin_unlock(&head->lock); break; @@ -637,7 +637,7 @@ ok: goto out; } - head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; + head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; tb = inet_sk(sk)->bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { @@ -1926,7 +1926,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) if (!sk) { st->bucket = 0; - sk = sk_head(&tcp_listening_hash[0]); + sk = sk_head(&tcp_hashinfo.listening_hash[0]); goto get_sk; } @@ -1980,7 +1980,7 @@ start_req: read_unlock_bh(&tp->accept_queue.syn_wait_lock); } if (++st->bucket < INET_LHTABLE_SIZE) { - sk = sk_head(&tcp_listening_hash[st->bucket]); + sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); goto get_sk; } cur = NULL; @@ -2004,7 +2004,7 @@ static void *established_get_first(struct seq_file *seq) struct tcp_iter_state* st = seq->private; void *rc = NULL; - for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) { + for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { struct sock *sk; struct hlist_node *node; struct tcp_tw_bucket *tw; @@ -2012,8 +2012,8 @@ static void *established_get_first(struct seq_file *seq) /* We can reschedule _before_ having picked the target: */ cond_resched_softirq(); - read_lock(&tcp_ehash[st->bucket].lock); - sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) { + read_lock(&tcp_hashinfo.ehash[st->bucket].lock); + sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { if (sk->sk_family != st->family) { continue; } @@ -2022,14 +2022,14 @@ static void *established_get_first(struct seq_file *seq) } st->state = TCP_SEQ_STATE_TIME_WAIT; tw_for_each(tw, node, - &tcp_ehash[st->bucket + tcp_ehash_size].chain) { + &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) { if (tw->tw_family != st->family) { continue; } rc = tw; goto out; } - read_unlock(&tcp_ehash[st->bucket].lock); + read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); st->state = TCP_SEQ_STATE_ESTABLISHED; } out: @@ -2056,15 +2056,15 @@ get_tw: cur = tw; goto out; } - read_unlock(&tcp_ehash[st->bucket].lock); + read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); st->state = TCP_SEQ_STATE_ESTABLISHED; /* We can reschedule between buckets: */ cond_resched_softirq(); - if (++st->bucket < tcp_ehash_size) { - read_lock(&tcp_ehash[st->bucket].lock); - sk = sk_head(&tcp_ehash[st->bucket].chain); + if (++st->bucket < tcp_hashinfo.ehash_size) { + read_lock(&tcp_hashinfo.ehash[st->bucket].lock); + sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); } else { cur = NULL; goto out; @@ -2078,7 +2078,7 @@ get_tw: } st->state = TCP_SEQ_STATE_TIME_WAIT; - tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain); + tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain); goto get_tw; found: cur = sk; @@ -2173,7 +2173,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: if (v) - read_unlock(&tcp_ehash[st->bucket].lock); + read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); local_bh_enable(); break; } @@ -2432,7 +2432,6 @@ EXPORT_SYMBOL(ipv4_specific); EXPORT_SYMBOL(inet_bind_bucket_create); EXPORT_SYMBOL(tcp_hashinfo); EXPORT_SYMBOL(tcp_listen_wlock); -EXPORT_SYMBOL(tcp_port_rover); EXPORT_SYMBOL(tcp_prot); EXPORT_SYMBOL(tcp_unhash); EXPORT_SYMBOL(tcp_v4_conn_request); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 267cea1087e..f29e2f6ebe1 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -60,12 +60,11 @@ int tcp_tw_count; /* Must be called with locally disabled BHs. */ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) { - struct inet_ehash_bucket *ehead; struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; - /* Unlink from established hashes. */ - ehead = &tcp_ehash[tw->tw_hashent]; + struct inet_ehash_bucket *ehead = &tcp_hashinfo.ehash[tw->tw_hashent]; + write_lock(&ehead->lock); if (hlist_unhashed(&tw->tw_node)) { write_unlock(&ehead->lock); @@ -76,12 +75,12 @@ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) write_unlock(&ehead->lock); /* Disassociate with bind bucket. */ - bhead = &tcp_bhash[inet_bhashfn(tw->tw_num, tcp_bhash_size)]; + bhead = &tcp_hashinfo.bhash[inet_bhashfn(tw->tw_num, tcp_hashinfo.bhash_size)]; spin_lock(&bhead->lock); tb = tw->tw_tb; __hlist_del(&tw->tw_bind_node); tw->tw_tb = NULL; - inet_bind_bucket_destroy(tcp_bucket_cachep, tb); + inet_bind_bucket_destroy(tcp_hashinfo.bind_bucket_cachep, tb); spin_unlock(&bhead->lock); #ifdef SOCK_REFCNT_DEBUG @@ -297,13 +296,13 @@ kill: static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) { const struct inet_sock *inet = inet_sk(sk); - struct inet_ehash_bucket *ehead = &tcp_ehash[sk->sk_hashent]; + struct inet_ehash_bucket *ehead = &tcp_hashinfo.ehash[sk->sk_hashent]; struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &tcp_bhash[inet_bhashfn(inet->num, tcp_bhash_size)]; + bhead = &tcp_hashinfo.bhash[inet_bhashfn(inet->num, tcp_hashinfo.bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = inet->bind_hash; BUG_TRAP(inet->bind_hash); @@ -317,7 +316,7 @@ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) sock_prot_dec_use(sk->sk_prot); /* Step 3: Hash TW into TIMEWAIT half of established hash table. */ - tw_add_node(tw, &(ehead + tcp_ehash_size)->chain); + tw_add_node(tw, &(ehead + tcp_hashinfo.ehash_size)->chain); atomic_inc(&tw->tw_refcnt); write_unlock(&ehead->lock); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bfbedb56bce..362ef5a6406 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -84,7 +84,7 @@ static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport, hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); hashent ^= hashent>>16; hashent ^= hashent>>8; - return (hashent & (tcp_ehash_size - 1)); + return (hashent & (tcp_hashinfo.ehash_size - 1)); } static __inline__ int tcp_v6_sk_hashfn(struct sock *sk) @@ -138,15 +138,15 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) int remaining = (high - low) + 1; int rover; - spin_lock(&tcp_portalloc_lock); - if (tcp_port_rover < low) + spin_lock(&tcp_hashinfo.portalloc_lock); + if (tcp_hashinfo.port_rover < low) rover = low; else - rover = tcp_port_rover; + rover = tcp_hashinfo.port_rover; do { rover++; if (rover > high) rover = low; - head = &tcp_bhash[inet_bhashfn(rover, tcp_bhash_size)]; + head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == rover) @@ -155,8 +155,8 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) next: spin_unlock(&head->lock); } while (--remaining > 0); - tcp_port_rover = rover; - spin_unlock(&tcp_portalloc_lock); + tcp_hashinfo.port_rover = rover; + spin_unlock(&tcp_hashinfo.portalloc_lock); /* Exhausted local port range during search? It is not * possible for us to be holding one of the bind hash @@ -171,7 +171,7 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) /* OK, here is the one we will use. */ snum = rover; } else { - head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; + head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == snum) @@ -192,8 +192,11 @@ tb_found: } tb_not_found: ret = 1; - if (!tb && (tb = inet_bind_bucket_create(tcp_bucket_cachep, head, snum)) == NULL) - goto fail_unlock; + if (tb == NULL) { + tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum); + if (tb == NULL) + goto fail_unlock; + } if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) tb->fastreuse = 1; @@ -224,13 +227,13 @@ static __inline__ void __tcp_v6_hash(struct sock *sk) BUG_TRAP(sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { - list = &tcp_listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &tcp_lhash_lock; + list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; + lock = &tcp_hashinfo.lhash_lock; tcp_listen_wlock(); } else { sk->sk_hashent = tcp_v6_sk_hashfn(sk); - list = &tcp_ehash[sk->sk_hashent].chain; - lock = &tcp_ehash[sk->sk_hashent].lock; + list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; + lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock; write_lock(lock); } @@ -263,8 +266,8 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor int score, hiscore; hiscore=0; - read_lock(&tcp_lhash_lock); - sk_for_each(sk, node, &tcp_listening_hash[inet_lhashfn(hnum)]) { + read_lock(&tcp_hashinfo.lhash_lock); + sk_for_each(sk, node, &tcp_hashinfo.listening_hash[inet_lhashfn(hnum)]) { if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -291,7 +294,7 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor } if (result) sock_hold(result); - read_unlock(&tcp_lhash_lock); + read_unlock(&tcp_hashinfo.lhash_lock); return result; } @@ -315,7 +318,7 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u * have wildcards anyways. */ hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); - head = &tcp_ehash[hash]; + head = &tcp_hashinfo.ehash[hash]; read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ @@ -323,7 +326,7 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) { + sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { /* FIXME: acme: check this... */ struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; @@ -461,7 +464,7 @@ static int __tcp_v6_check_established(struct sock *sk, __u16 lport, int dif = sk->sk_bound_dev_if; u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); - struct inet_ehash_bucket *head = &tcp_ehash[hash]; + struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; struct sock *sk2; struct hlist_node *node; struct tcp_tw_bucket *tw; @@ -469,7 +472,7 @@ static int __tcp_v6_check_established(struct sock *sk, __u16 lport, write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) { + sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { tw = (struct tcp_tw_bucket*)sk2; if(*((__u32 *)&(tw->tw_dport)) == ports && @@ -558,7 +561,7 @@ static int tcp_v6_hash_connect(struct sock *sk) local_bh_disable(); for (i = 1; i <= range; i++) { port = low + (i + offset) % range; - head = &tcp_bhash[inet_bhashfn(port, tcp_bhash_size)]; + head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, @@ -578,7 +581,7 @@ static int tcp_v6_hash_connect(struct sock *sk) } } - tb = inet_bind_bucket_create(tcp_bucket_cachep, head, port); + tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port); if (!tb) { spin_unlock(&head->lock); break; @@ -613,7 +616,7 @@ ok: goto out; } - head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; + head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; tb = inet_sk(sk)->bind_hash; spin_lock_bh(&head->lock); -- cgit v1.2.3-70-g09d2 From f3f05f7046e7c85b04af390d95a82a27160dd5d0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:08:09 -0700 Subject: [INET]: Generalise the tcp_listen_ lock routines Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 48 +++++++++++++++++++++++++++++ include/net/tcp.h | 21 ------------- net/ipv4/inet_hashtables.c | 32 ++++++++++++++++++++ net/ipv4/tcp_diag.c | 8 ++--- net/ipv4/tcp_ipv4.c | 70 ++++++------------------------------------- net/ipv6/tcp_ipv6.c | 2 +- 6 files changed, 94 insertions(+), 87 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index da07411b36d..f5d65121f7b 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -19,10 +19,14 @@ #include #include #include +#include /* only for TCP_LISTEN, damn :-( */ #include +#include #include +#include + /* This is for all connections with a full identity, no wildcards. * New scheme, half the table is for TIME_WAIT, the other half is * for the rest. I'll experiment with dynamic table growth later. @@ -192,4 +196,48 @@ static inline void inet_inherit_port(struct inet_hashinfo *table, extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); +extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); + +/* + * - We may sleep inside this lock. + * - If sleeping is not required (or called from BH), + * use plain read_(un)lock(&inet_hashinfo.lhash_lock). + */ +static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) +{ + /* read_lock synchronizes to candidates to writers */ + read_lock(&hashinfo->lhash_lock); + atomic_inc(&hashinfo->lhash_users); + read_unlock(&hashinfo->lhash_lock); +} + +static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) +{ + if (atomic_dec_and_test(&hashinfo->lhash_users)) + wake_up(&hashinfo->lhash_wait); +} + +static inline void __inet_hash(struct inet_hashinfo *hashinfo, + struct sock *sk, const int listen_possible) +{ + struct hlist_head *list; + rwlock_t *lock; + + BUG_TRAP(sk_unhashed(sk)); + if (listen_possible && sk->sk_state == TCP_LISTEN) { + list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + lock = &hashinfo->lhash_lock; + inet_listen_wlock(hashinfo); + } else { + sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size); + list = &hashinfo->ehash[sk->sk_hashent].chain; + lock = &hashinfo->ehash[sk->sk_hashent].lock; + write_lock(lock); + } + __sk_add_node(sk, list); + sock_prot_inc_use(sk->sk_prot); + write_unlock(lock); + if (listen_possible && sk->sk_state == TCP_LISTEN) + wake_up(&hashinfo->lhash_wait); +} #endif /* _INET_HASHTABLES_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 99e47695d4b..bc110cc7022 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1447,27 +1447,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req, extern void tcp_enter_memory_pressure(void); -extern void tcp_listen_wlock(void); - -/* - We may sleep inside this lock. - * - If sleeping is not required (or called from BH), - * use plain read_(un)lock(&inet_hashinfo.lhash_lock). - */ - -static inline void tcp_listen_lock(void) -{ - /* read_lock synchronizes to candidates to writers */ - read_lock(&tcp_hashinfo.lhash_lock); - atomic_inc(&tcp_hashinfo.lhash_users); - read_unlock(&tcp_hashinfo.lhash_lock); -} - -static inline void tcp_listen_unlock(void) -{ - if (atomic_dec_and_test(&tcp_hashinfo.lhash_users)) - wake_up(&tcp_hashinfo.lhash_wait); -} - static inline int keepalive_intvl_when(const struct tcp_sock *tp) { return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 33d6cbe32cd..06cbc6f689c 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -15,7 +15,9 @@ #include #include +#include #include +#include #include @@ -89,3 +91,33 @@ void inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) } EXPORT_SYMBOL(inet_put_port); + +/* + * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. + * Look, when several writers sleep and reader wakes them up, all but one + * immediately hit write lock and grab all the cpus. Exclusive sleep solves + * this, _but_ remember, it adds useless work on UP machines (wake up each + * exclusive lock release). It should be ifdefed really. + */ +void inet_listen_wlock(struct inet_hashinfo *hashinfo) +{ + write_lock(&hashinfo->lhash_lock); + + if (atomic_read(&hashinfo->lhash_users)) { + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait_exclusive(&hashinfo->lhash_wait, + &wait, TASK_UNINTERRUPTIBLE); + if (!atomic_read(&hashinfo->lhash_users)) + break; + write_unlock_bh(&hashinfo->lhash_lock); + schedule(); + write_lock_bh(&hashinfo->lhash_lock); + } + + finish_wait(&hashinfo->lhash_wait, &wait); + } +} + +EXPORT_SYMBOL(inet_listen_wlock); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 0ae738b455f..1a89a03c449 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -589,7 +589,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (cb->args[0] == 0) { if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) goto skip_listen_ht; - tcp_listen_lock(); + inet_listen_lock(&tcp_hashinfo); for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct sock *sk; struct hlist_node *node; @@ -613,7 +613,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto syn_recv; if (tcpdiag_dump_sock(skb, sk, cb) < 0) { - tcp_listen_unlock(); + inet_listen_unlock(&tcp_hashinfo); goto done; } @@ -622,7 +622,7 @@ syn_recv: goto next_listen; if (tcpdiag_dump_reqs(skb, sk, cb) < 0) { - tcp_listen_unlock(); + inet_listen_unlock(&tcp_hashinfo); goto done; } @@ -636,7 +636,7 @@ next_listen: cb->args[3] = 0; cb->args[4] = 0; } - tcp_listen_unlock(); + inet_listen_unlock(&tcp_hashinfo); skip_listen_ht: cb->args[0] = 1; s_i = num = s_num = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f5373f9f00a..5f9ad95304c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -228,62 +228,11 @@ fail: return ret; } -/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. - * Look, when several writers sleep and reader wakes them up, all but one - * immediately hit write lock and grab all the cpus. Exclusive sleep solves - * this, _but_ remember, it adds useless work on UP machines (wake up each - * exclusive lock release). It should be ifdefed really. - */ - -void tcp_listen_wlock(void) -{ - write_lock(&tcp_hashinfo.lhash_lock); - - if (atomic_read(&tcp_hashinfo.lhash_users)) { - DEFINE_WAIT(wait); - - for (;;) { - prepare_to_wait_exclusive(&tcp_hashinfo.lhash_wait, - &wait, TASK_UNINTERRUPTIBLE); - if (!atomic_read(&tcp_hashinfo.lhash_users)) - break; - write_unlock_bh(&tcp_hashinfo.lhash_lock); - schedule(); - write_lock_bh(&tcp_hashinfo.lhash_lock); - } - - finish_wait(&tcp_hashinfo.lhash_wait, &wait); - } -} - -static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) -{ - struct hlist_head *list; - rwlock_t *lock; - - BUG_TRAP(sk_unhashed(sk)); - if (listen_possible && sk->sk_state == TCP_LISTEN) { - list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &tcp_hashinfo.lhash_lock; - tcp_listen_wlock(); - } else { - sk->sk_hashent = inet_sk_ehashfn(sk, tcp_hashinfo.ehash_size); - list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; - lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock; - write_lock(lock); - } - __sk_add_node(sk, list); - sock_prot_inc_use(sk->sk_prot); - write_unlock(lock); - if (listen_possible && sk->sk_state == TCP_LISTEN) - wake_up(&tcp_hashinfo.lhash_wait); -} - static void tcp_v4_hash(struct sock *sk) { if (sk->sk_state != TCP_CLOSE) { local_bh_disable(); - __tcp_v4_hash(sk, 1); + __inet_hash(&tcp_hashinfo, sk, 1); local_bh_enable(); } } @@ -297,7 +246,7 @@ void tcp_unhash(struct sock *sk) if (sk->sk_state == TCP_LISTEN) { local_bh_disable(); - tcp_listen_wlock(); + inet_listen_wlock(&tcp_hashinfo); lock = &tcp_hashinfo.lhash_lock; } else { struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent]; @@ -624,7 +573,7 @@ ok: inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->sport = htons(port); - __tcp_v4_hash(sk, 0); + __inet_hash(&tcp_hashinfo, sk, 0); } spin_unlock(&head->lock); @@ -641,7 +590,7 @@ ok: tb = inet_sk(sk)->bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - __tcp_v4_hash(sk, 0); + __inet_hash(&tcp_hashinfo, sk, 0); spin_unlock_bh(&head->lock); return 0; } else { @@ -1479,7 +1428,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = dst_metric(dst, RTAX_ADVMSS); tcp_initialize_rcv_mss(newsk); - __tcp_v4_hash(newsk, 0); + __inet_hash(&tcp_hashinfo, newsk, 0); __inet_inherit_port(&tcp_hashinfo, sk, newsk); return newsk; @@ -2102,12 +2051,12 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) void *rc; struct tcp_iter_state* st = seq->private; - tcp_listen_lock(); + inet_listen_lock(&tcp_hashinfo); st->state = TCP_SEQ_STATE_LISTENING; rc = listening_get_idx(seq, &pos); if (!rc) { - tcp_listen_unlock(); + inet_listen_unlock(&tcp_hashinfo); local_bh_disable(); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_idx(seq, pos); @@ -2140,7 +2089,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) case TCP_SEQ_STATE_LISTENING: rc = listening_get_next(seq, v); if (!rc) { - tcp_listen_unlock(); + inet_listen_unlock(&tcp_hashinfo); local_bh_disable(); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_first(seq); @@ -2168,7 +2117,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) } case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) - tcp_listen_unlock(); + inet_listen_unlock(&tcp_hashinfo); break; case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: @@ -2431,7 +2380,6 @@ void __init tcp_v4_init(struct net_proto_family *ops) EXPORT_SYMBOL(ipv4_specific); EXPORT_SYMBOL(inet_bind_bucket_create); EXPORT_SYMBOL(tcp_hashinfo); -EXPORT_SYMBOL(tcp_listen_wlock); EXPORT_SYMBOL(tcp_prot); EXPORT_SYMBOL(tcp_unhash); EXPORT_SYMBOL(tcp_v4_conn_request); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 362ef5a6406..93a66b9a76e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -229,7 +229,7 @@ static __inline__ void __tcp_v6_hash(struct sock *sk) if (sk->sk_state == TCP_LISTEN) { list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; lock = &tcp_hashinfo.lhash_lock; - tcp_listen_wlock(); + inet_listen_wlock(&tcp_hashinfo); } else { sk->sk_hashent = tcp_v6_sk_hashfn(sk); list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; -- cgit v1.2.3-70-g09d2 From c752f0739f09b803aed191c4765a3b6650a08653 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:08:28 -0700 Subject: [TCP]: Move the tcp sock states to net/tcp_states.h Lots of places just needs the states, not even linux/tcp.h, where this enum was, needs it. This speeds up development of the refactorings as less sources are rebuilt when things get moved from net/tcp.h. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- fs/smbfs/sock.c | 2 +- include/linux/tcp.h | 18 ------------------ include/net/dn.h | 1 + include/net/inet_hashtables.h | 2 +- include/net/ip6_route.h | 1 - include/net/ip_vs.h | 1 - include/net/sctp/constants.h | 2 +- include/net/tcp.h | 2 ++ include/net/tcp_states.h | 34 ++++++++++++++++++++++++++++++++++ net/appletalk/ddp.c | 2 +- net/ax25/af_ax25.c | 2 +- net/ax25/ax25_ds_in.c | 2 +- net/ax25/ax25_ds_timer.c | 2 +- net/ax25/ax25_in.c | 2 +- net/ax25/ax25_std_in.c | 2 +- net/ax25/ax25_std_timer.c | 2 +- net/ax25/ax25_subr.c | 2 +- net/core/datagram.c | 6 +++--- net/decnet/af_decnet.c | 2 +- net/decnet/dn_nsp_in.c | 2 +- net/ipv4/datagram.c | 2 +- net/ipv4/ipvs/ip_vs_app.c | 1 + net/ipv4/protocol.c | 1 - net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 3 ++- net/ipv6/datagram.c | 1 + net/ipv6/raw.c | 1 + net/ipv6/udp.c | 1 + net/ipx/af_ipx.c | 2 +- net/ipx/ipx_proc.c | 2 +- net/irda/af_irda.c | 2 +- net/llc/af_llc.c | 2 +- net/llc/llc_conn.c | 2 +- net/llc/llc_if.c | 2 +- net/llc/llc_sap.c | 2 +- net/netrom/af_netrom.c | 2 +- net/netrom/nr_in.c | 2 +- net/netrom/nr_subr.c | 2 +- net/netrom/nr_timer.c | 2 +- net/rose/af_rose.c | 2 +- net/rose/rose_in.c | 2 +- net/rose/rose_route.c | 2 +- net/rose/rose_subr.c | 2 +- net/rose/rose_timer.c | 2 +- net/sctp/ipv6.c | 2 +- net/sunrpc/svcsock.c | 3 +-- net/unix/af_unix.c | 2 +- net/unix/garbage.c | 2 +- net/wanrouter/af_wanpipe.c | 2 +- net/x25/af_x25.c | 2 +- net/x25/x25_in.c | 2 +- net/x25/x25_subr.c | 2 +- net/x25/x25_timer.c | 2 +- 53 files changed, 86 insertions(+), 66 deletions(-) create mode 100644 include/net/tcp_states.h (limited to 'net/ipv6') diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c index 93f3cd22a2e..6815b1b12b6 100644 --- a/fs/smbfs/sock.c +++ b/fs/smbfs/sock.c @@ -15,12 +15,12 @@ #include #include #include -#include #include #include #include #include #include +#include #include #include diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e70ab19652d..b88fe05fdcb 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -55,24 +55,6 @@ struct tcphdr { __u16 urg_ptr; }; - -enum { - TCP_ESTABLISHED = 1, - TCP_SYN_SENT, - TCP_SYN_RECV, - TCP_FIN_WAIT1, - TCP_FIN_WAIT2, - TCP_TIME_WAIT, - TCP_CLOSE, - TCP_CLOSE_WAIT, - TCP_LAST_ACK, - TCP_LISTEN, - TCP_CLOSING, /* now a valid state */ - - TCP_MAX_STATES /* Leave at the end! */ -}; - -#define TCP_STATE_MASK 0xF #define TCP_ACTION_FIN (1 << 7) enum { diff --git a/include/net/dn.h b/include/net/dn.h index 5551c46db39..c1dbbd22279 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -3,6 +3,7 @@ #include #include +#include #include typedef unsigned short dn_address; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f5d65121f7b..c816708fa55 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -19,11 +19,11 @@ #include #include #include -#include /* only for TCP_LISTEN, damn :-( */ #include #include #include +#include #include diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f920706d526..1f2e428ca36 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -12,7 +12,6 @@ #include #include #include -#include #include #include diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 52da5d26617..7a3c43711a1 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -255,7 +255,6 @@ struct ip_vs_daemon_user { #include /* for struct atomic_t */ #include /* for struct neighbour */ #include /* for struct dst_entry */ -#include #include #include diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 5999e5684bb..c51541ee024 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -47,10 +47,10 @@ #ifndef __sctp_constants_h__ #define __sctp_constants_h__ -#include /* For TCP states used in sctp_sock_state_t */ #include #include /* For ipv6hdr. */ #include +#include /* For TCP states used in sctp_sock_state_t */ /* Value used for stream negotiation. */ enum { SCTP_MAX_STREAM = 0xffff }; diff --git a/include/net/tcp.h b/include/net/tcp.h index bc110cc7022..9d026d81d8c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -36,6 +36,8 @@ #include #include #include +#include + #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) #include #endif diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h new file mode 100644 index 00000000000..b9d4176b2d1 --- /dev/null +++ b/include/net/tcp_states.h @@ -0,0 +1,34 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the TCP protocol sk_state field. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_TCP_STATES_H +#define _LINUX_TCP_STATES_H + +enum { + TCP_ESTABLISHED = 1, + TCP_SYN_SENT, + TCP_SYN_RECV, + TCP_FIN_WAIT1, + TCP_FIN_WAIT2, + TCP_TIME_WAIT, + TCP_CLOSE, + TCP_CLOSE_WAIT, + TCP_LAST_ACK, + TCP_LISTEN, + TCP_CLOSING, /* Now a valid state */ + + TCP_MAX_STATES /* Leave at the end! */ +}; + +#define TCP_STATE_MASK 0xF + +#endif /* _LINUX_TCP_STATES_H */ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index ffde33cd09b..1d31b3a3f1e 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -53,12 +53,12 @@ #include #include -#include #include #include /* For TIOCOUTQ/INQ */ #include #include #include +#include #include #include diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index a5c94f11547..ea43dfb774e 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c index 8adc0022cf5..5d0f8fb9d7a 100644 --- a/net/ax25/ax25_ds_in.c +++ b/net/ax25/ax25_ds_in.c @@ -23,7 +23,7 @@ #include #include #include /* For ip_rcv */ -#include +#include #include #include #include diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 3a8b67316fc..061083efc1d 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 124eec8216d..0357705d575 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -27,7 +27,7 @@ #include #include #include /* For ip_rcv */ -#include +#include #include /* For arp_rcv */ #include #include diff --git a/net/ax25/ax25_std_in.c b/net/ax25/ax25_std_in.c index 7131873322c..83a33387c06 100644 --- a/net/ax25/ax25_std_in.c +++ b/net/ax25/ax25_std_in.c @@ -30,7 +30,7 @@ #include #include #include /* For ip_rcv */ -#include +#include #include #include #include diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c index 066897bc074..a29c480a4dc 100644 --- a/net/ax25/ax25_std_timer.c +++ b/net/ax25/ax25_std_timer.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index eb7343c10a9..c41dbe5fade 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/core/datagram.c b/net/core/datagram.c index fcee054b6f7..da9bf71421a 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include @@ -51,9 +50,10 @@ #include #include -#include -#include +#include +#include +#include /* * Is a socket 'connection oriented' ? diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index bd49dd97a09..621680f127a 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -118,7 +118,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include #include #include -#include +#include #include #include #include diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 202dbde9850..369f25b60f3 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -60,7 +60,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index b1db561f254..3fd49f4282a 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -17,8 +17,8 @@ #include #include #include -#include #include +#include int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index d9212addd19..6e092dadb38 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 0db405a869f..291831e792a 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index e222c5c26b3..304bb0a1d4f 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -59,7 +59,6 @@ #include #include #include -#include #include #include #include @@ -71,6 +70,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index dc4d07357e3..a8135e1f528 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -95,7 +95,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 5229365cd8b..761984f3bd9 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a082646e6f1..766e1c7179a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index eff050ac704..2ffe34cc2ef 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 3a13c5d1d4d..39d5939ccd6 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -44,7 +44,6 @@ #include #include #include -#include #include #include @@ -52,6 +51,7 @@ #include #include #include +#include #include diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index b6761913445..1f73d9ea434 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include static __inline__ struct ipx_interface *ipx_get_interface_idx(loff_t pos) diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 92c6e8d4e73..6f92f9c6299 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -56,7 +56,7 @@ #include #include -#include +#include #include diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index f49b82da826..66f55e514b5 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -23,13 +23,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include /* remember: uninitialized global data is zeroed because its in .bss */ static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 57154861946..4c644bc70ea 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c index 0f9fc48aeaf..0f84f66018e 100644 --- a/net/llc/llc_if.c +++ b/net/llc/llc_if.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -25,6 +24,7 @@ #include #include #include +#include u8 llc_mac_null_var[IFHWADDRLEN]; diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 965c94eb4bb..34228ef1498 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include /** diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 162a85fed15..9aa8b14a81a 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index 9c44b379412..2fcba9e24b2 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include /* For ip_rcv */ #include #include diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index 252c1b3ecd7..587bed2674b 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index faabda8088b..75b72d389ba 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 5480caf8ccc..c6e59f84c3a 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c index ef475a1bb1b..a52417bd0a1 100644 --- a/net/rose/rose_in.c +++ b/net/rose/rose_in.c @@ -27,7 +27,7 @@ #include #include #include /* For ip_rcv */ -#include +#include #include #include #include diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 25da6f699fd..4510cd7613e 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index ae135e27799..a29a3a960fd 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index 84dd4403f79..50ae0371dab 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 4a6421a9fca..fa3be2b8fb5 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -66,8 +66,8 @@ #include #include -#include #include +#include #include #include #include diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d0c3120d023..e750cb685cb 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -34,7 +33,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d403e34088a..bc4c44552c1 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -105,7 +105,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 46252d2807b..6ffc64e1712 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -76,11 +76,11 @@ #include #include #include -#include #include #include #include +#include /* Internal data structures and random procedures: */ diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c index d93b19faaab..596cb96e5f4 100644 --- a/net/wanrouter/af_wanpipe.c +++ b/net/wanrouter/af_wanpipe.c @@ -57,7 +57,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 04bec047fa9..020d73cc841 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -47,7 +47,7 @@ #include #include #include -#include +#include #include #include #include /* For TIOCINQ/OUTQ */ diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index b0197c70a9f..26146874b83 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index e20cfadad4d..8be9b8fbc24 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include /* diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c index d6a21a3ad80..0a92e1da392 100644 --- a/net/x25/x25_timer.c +++ b/net/x25/x25_timer.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include static void x25_heartbeat_expiry(unsigned long); -- cgit v1.2.3-70-g09d2 From 8feaf0c0a5488b3d898a9c207eb6678f44ba3f26 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:09:30 -0700 Subject: [INET]: Generalise tcp_tw_bucket, aka TIME_WAIT sockets This paves the way to generalise the rest of the sock ID lookup routines and saves some bytes in TCPv4 TIME_WAIT sockets on distro kernels (where IPv6 is always built as a module): [root@qemu ~]# grep tw_sock /proc/slabinfo tw_sock_TCPv6 0 0 128 31 1 tw_sock_TCP 0 0 96 41 1 [root@qemu ~]# Now if a protocol wants to use the TIME_WAIT generic infrastructure it only has to set the sk_prot->twsk_obj_size field with the size of its inet_timewait_sock derived sock and proto_register will create sk_prot->twsk_slab, for now its only for INET sockets, but we can introduce timewait_sock later if some non INET transport protocolo wants to use this stuff. Next changesets will take advantage of this new infrastructure to generalise even more TCP code. [acme@toy net-2.6.14]$ grep built-in /tmp/before.size /tmp/after.size /tmp/before.size: 188646 11764 5068 205478 322a6 net/ipv4/built-in.o /tmp/after.size: 188144 11764 5068 204976 320b0 net/ipv4/built-in.o [acme@toy net-2.6.14]$ Tested with both IPv4 & IPv6 (::1 (localhost) & ::ffff:172.20.0.1 (qemu host)). Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 52 +++++++++- include/linux/tcp.h | 15 +++ include/net/inet_hashtables.h | 41 ++++++++ include/net/inet_timewait_sock.h | 142 +++++++++++++++++++++++++++ include/net/sock.h | 17 ++-- include/net/tcp.h | 202 +-------------------------------------- net/core/sock.c | 35 ++++++- net/ipv4/tcp.c | 10 -- net/ipv4/tcp_diag.c | 10 +- net/ipv4/tcp_ipv4.c | 107 +++++++++++---------- net/ipv4/tcp_minisocks.c | 142 ++++++++++++++------------- net/ipv6/addrconf.c | 2 +- net/ipv6/tcp_ipv6.c | 100 ++++++++++--------- 13 files changed, 484 insertions(+), 391 deletions(-) create mode 100644 include/net/inet_timewait_sock.h (limited to 'net/ipv6') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 6fcd6a0ade2..98fa32316e4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -308,6 +308,41 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, #define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only) #define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk)) + +#include + +struct tcp6_timewait_sock { + struct tcp_timewait_sock tw_v6_sk; + struct in6_addr tw_v6_daddr; + struct in6_addr tw_v6_rcv_saddr; +}; + +static inline struct tcp6_timewait_sock *tcp6_twsk(const struct sock *sk) +{ + return (struct tcp6_timewait_sock *)sk; +} + +static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk) +{ + return likely(sk->sk_state != TCP_TIME_WAIT) ? + &inet6_sk(sk)->rcv_saddr : &tcp6_twsk(sk)->tw_v6_rcv_saddr; +} + +static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) +{ + return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; +} + +static inline int tcp_twsk_ipv6only(const struct sock *sk) +{ + return inet_twsk(sk)->tw_ipv6only; +} + +static inline int tcp_v6_ipv6only(const struct sock *sk) +{ + return likely(sk->sk_state != TCP_TIME_WAIT) ? + ipv6_only_sock(sk) : tcp_twsk_ipv6only(sk); +} #else #define __ipv6_only_sock(sk) 0 #define ipv6_only_sock(sk) 0 @@ -322,8 +357,19 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) return NULL; } -#endif +#define __tcp_v6_rcv_saddr(__sk) NULL +#define tcp_v6_rcv_saddr(__sk) NULL +#define tcp_twsk_ipv6only(__sk) 0 +#define tcp_v6_ipv6only(__sk) 0 +#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ -#endif +#define INET6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ + (((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + ((__sk)->sk_family == AF_INET6) && \ + ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ + ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#endif +#endif /* __KERNEL__ */ + +#endif /* _IPV6_H */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index b88fe05fdcb..5d295b1b3de 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -179,6 +179,7 @@ struct tcp_info #include #include #include +#include /* This defines a selective acknowledgement block. */ struct tcp_sack_block { @@ -387,6 +388,20 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk) return (struct tcp_sock *)sk; } +struct tcp_timewait_sock { + struct inet_timewait_sock tw_sk; + __u32 tw_rcv_nxt; + __u32 tw_snd_nxt; + __u32 tw_rcv_wnd; + __u32 tw_ts_recent; + long tw_ts_recent_stamp; +}; + +static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) +{ + return (struct tcp_timewait_sock *)sk; +} + static inline void *tcp_ca(const struct tcp_sock *tp) { return (void *) tp->ca_priv; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 1c4fa0065a8..c38c637e073 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -14,6 +14,8 @@ #ifndef _INET_HASHTABLES_H #define _INET_HASHTABLES_H +#include + #include #include #include @@ -310,4 +312,43 @@ sherry_cache: read_unlock(&hashinfo->lhash_lock); return sk; } + +/* Socket demux engine toys. */ +#ifdef __BIG_ENDIAN +#define INET_COMBINED_PORTS(__sport, __dport) \ + (((__u32)(__sport) << 16) | (__u32)(__dport)) +#else /* __LITTLE_ENDIAN */ +#define INET_COMBINED_PORTS(__sport, __dport) \ + (((__u32)(__dport) << 16) | (__u32)(__sport)) +#endif + +#if (BITS_PER_LONG == 64) +#ifdef __BIG_ENDIAN +#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ + const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr)); +#else /* __LITTLE_ENDIAN */ +#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ + const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr)); +#endif /* __BIG_ENDIAN */ +#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ + (((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ + ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ + (((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ + ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#else /* 32-bit arch */ +#define INET_ADDR_COOKIE(__name, __saddr, __daddr) +#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ + ((inet_sk(__sk)->daddr == (__saddr)) && \ + (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ + ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ + ((inet_twsk(__sk)->tw_daddr == (__saddr)) && \ + (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ + ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#endif /* 64-bit arch */ #endif /* _INET_HASHTABLES_H */ diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h new file mode 100644 index 00000000000..ce117048f2f --- /dev/null +++ b/include/net/inet_timewait_sock.h @@ -0,0 +1,142 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for a generic INET TIMEWAIT sock + * + * From code originally in net/tcp.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _INET_TIMEWAIT_SOCK_ +#define _INET_TIMEWAIT_SOCK_ + +#include + +#include +#include + +#include +#include + +#include + +#if (BITS_PER_LONG == 64) +#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 +#else +#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 4 +#endif + +struct inet_bind_bucket; + +/* + * This is a TIME_WAIT sock. It works around the memory consumption + * problems of sockets in such a state on heavily loaded servers, but + * without violating the protocol specification. + */ +struct inet_timewait_sock { + /* + * Now struct sock also uses sock_common, so please just + * don't add nothing before this first member (__tw_common) --acme + */ + struct sock_common __tw_common; +#define tw_family __tw_common.skc_family +#define tw_state __tw_common.skc_state +#define tw_reuse __tw_common.skc_reuse +#define tw_bound_dev_if __tw_common.skc_bound_dev_if +#define tw_node __tw_common.skc_node +#define tw_bind_node __tw_common.skc_bind_node +#define tw_refcnt __tw_common.skc_refcnt +#define tw_prot __tw_common.skc_prot + volatile unsigned char tw_substate; + /* 3 bits hole, try to pack */ + unsigned char tw_rcv_wscale; + /* Socket demultiplex comparisons on incoming packets. */ + /* these five are in inet_sock */ + __u16 tw_sport; + __u32 tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES))); + __u32 tw_rcv_saddr; + __u16 tw_dport; + __u16 tw_num; + /* And these are ours. */ + __u8 tw_ipv6only:1; + /* 31 bits hole, try to pack */ + int tw_hashent; + int tw_timeout; + unsigned long tw_ttd; + struct inet_bind_bucket *tw_tb; + struct hlist_node tw_death_node; +}; + +static inline void inet_twsk_add_node(struct inet_timewait_sock *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_node, list); +} + +static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_bind_node, list); +} + +static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw) +{ + return tw->tw_death_node.pprev != NULL; +} + +static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw) +{ + tw->tw_death_node.pprev = NULL; +} + +static inline void __inet_twsk_del_dead_node(struct inet_timewait_sock *tw) +{ + __hlist_del(&tw->tw_death_node); + inet_twsk_dead_node_init(tw); +} + +static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) +{ + if (inet_twsk_dead_hashed(tw)) { + __inet_twsk_del_dead_node(tw); + return 1; + } + return 0; +} + +#define inet_twsk_for_each(tw, node, head) \ + hlist_for_each_entry(tw, node, head, tw_node) + +#define inet_twsk_for_each_inmate(tw, node, jail) \ + hlist_for_each_entry(tw, node, jail, tw_death_node) + +#define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \ + hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) + +static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) +{ + return (struct inet_timewait_sock *)sk; +} + +static inline u32 inet_rcv_saddr(const struct sock *sk) +{ + return likely(sk->sk_state != TCP_TIME_WAIT) ? + inet_sk(sk)->rcv_saddr : inet_twsk(sk)->tw_rcv_saddr; +} + +static inline void inet_twsk_put(struct inet_timewait_sock *tw) +{ + if (atomic_dec_and_test(&tw->tw_refcnt)) { +#ifdef SOCK_REFCNT_DEBUG + printk(KERN_DEBUG "%s timewait_sock %p released\n", + tw->tw_prot->name, tw); +#endif + kmem_cache_free(tw->tw_prot->twsk_slab, tw); + } +} +#endif /* _INET_TIMEWAIT_SOCK_ */ diff --git a/include/net/sock.h b/include/net/sock.h index 391d00b5b7b..c902c57bf2b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -88,6 +88,7 @@ do { spin_lock_init(&((__sk)->sk_lock.slock)); \ } while(0) struct sock; +struct proto; /** * struct sock_common - minimal network layer representation of sockets @@ -98,10 +99,11 @@ struct sock; * @skc_node: main hash linkage for various protocol lookup tables * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_refcnt: reference count + * @skc_prot: protocol handlers inside a network family * * This is the minimal network layer representation of sockets, the header - * for struct sock and struct tcp_tw_bucket. - */ + * for struct sock and struct inet_timewait_sock. + */ struct sock_common { unsigned short skc_family; volatile unsigned char skc_state; @@ -110,11 +112,12 @@ struct sock_common { struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + struct proto *skc_prot; }; /** * struct sock - network layer representation of sockets - * @__sk_common: shared layout with tcp_tw_bucket + * @__sk_common: shared layout with inet_timewait_sock * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings * @sk_lock: synchronizer @@ -140,7 +143,6 @@ struct sock_common { * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct * @sk_error_queue: rarely used - * @sk_prot: protocol handlers inside a network family * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance) * @sk_err: last error * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out' @@ -173,7 +175,7 @@ struct sock_common { */ struct sock { /* - * Now struct tcp_tw_bucket also uses sock_common, so please just + * Now struct inet_timewait_sock also uses sock_common, so please just * don't add nothing before this first member (__sk_common) --acme */ struct sock_common __sk_common; @@ -184,6 +186,7 @@ struct sock { #define sk_node __sk_common.skc_node #define sk_bind_node __sk_common.skc_bind_node #define sk_refcnt __sk_common.skc_refcnt +#define sk_prot __sk_common.skc_prot unsigned char sk_shutdown : 2, sk_no_check : 2, sk_userlocks : 4; @@ -218,7 +221,6 @@ struct sock { struct sk_buff *tail; } sk_backlog; struct sk_buff_head sk_error_queue; - struct proto *sk_prot; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; int sk_err, @@ -557,6 +559,9 @@ struct proto { kmem_cache_t *slab; unsigned int obj_size; + kmem_cache_t *twsk_slab; + unsigned int twsk_obj_size; + struct request_sock_ops *rsk_prot; struct module *owner; diff --git a/include/net/tcp.h b/include/net/tcp.h index 9d026d81d8c..cf8e664176a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -38,207 +38,14 @@ #include #include -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) -#include -#endif #include extern struct inet_hashinfo tcp_hashinfo; -#if (BITS_PER_LONG == 64) -#define TCP_ADDRCMP_ALIGN_BYTES 8 -#else -#define TCP_ADDRCMP_ALIGN_BYTES 4 -#endif - -/* This is a TIME_WAIT bucket. It works around the memory consumption - * problems of sockets in such a state on heavily loaded servers, but - * without violating the protocol specification. - */ -struct tcp_tw_bucket { - /* - * Now struct sock also uses sock_common, so please just - * don't add nothing before this first member (__tw_common) --acme - */ - struct sock_common __tw_common; -#define tw_family __tw_common.skc_family -#define tw_state __tw_common.skc_state -#define tw_reuse __tw_common.skc_reuse -#define tw_bound_dev_if __tw_common.skc_bound_dev_if -#define tw_node __tw_common.skc_node -#define tw_bind_node __tw_common.skc_bind_node -#define tw_refcnt __tw_common.skc_refcnt - volatile unsigned char tw_substate; - unsigned char tw_rcv_wscale; - __u16 tw_sport; - /* Socket demultiplex comparisons on incoming packets. */ - /* these five are in inet_sock */ - __u32 tw_daddr - __attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES))); - __u32 tw_rcv_saddr; - __u16 tw_dport; - __u16 tw_num; - /* And these are ours. */ - int tw_hashent; - int tw_timeout; - __u32 tw_rcv_nxt; - __u32 tw_snd_nxt; - __u32 tw_rcv_wnd; - __u32 tw_ts_recent; - long tw_ts_recent_stamp; - unsigned long tw_ttd; - struct inet_bind_bucket *tw_tb; - struct hlist_node tw_death_node; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct in6_addr tw_v6_daddr; - struct in6_addr tw_v6_rcv_saddr; - int tw_v6_ipv6only; -#endif -}; - -static __inline__ void tw_add_node(struct tcp_tw_bucket *tw, - struct hlist_head *list) -{ - hlist_add_head(&tw->tw_node, list); -} - -static __inline__ void tw_add_bind_node(struct tcp_tw_bucket *tw, - struct hlist_head *list) -{ - hlist_add_head(&tw->tw_bind_node, list); -} - -static inline int tw_dead_hashed(struct tcp_tw_bucket *tw) -{ - return tw->tw_death_node.pprev != NULL; -} - -static __inline__ void tw_dead_node_init(struct tcp_tw_bucket *tw) -{ - tw->tw_death_node.pprev = NULL; -} - -static __inline__ void __tw_del_dead_node(struct tcp_tw_bucket *tw) -{ - __hlist_del(&tw->tw_death_node); - tw_dead_node_init(tw); -} - -static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw) -{ - if (tw_dead_hashed(tw)) { - __tw_del_dead_node(tw); - return 1; - } - return 0; -} - -#define tw_for_each(tw, node, head) \ - hlist_for_each_entry(tw, node, head, tw_node) - -#define tw_for_each_inmate(tw, node, jail) \ - hlist_for_each_entry(tw, node, jail, tw_death_node) - -#define tw_for_each_inmate_safe(tw, node, safe, jail) \ - hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) - -#define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk)) - -static inline u32 tcp_v4_rcv_saddr(const struct sock *sk) -{ - return likely(sk->sk_state != TCP_TIME_WAIT) ? - inet_sk(sk)->rcv_saddr : tcptw_sk(sk)->tw_rcv_saddr; -} - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk) -{ - return likely(sk->sk_state != TCP_TIME_WAIT) ? - &inet6_sk(sk)->rcv_saddr : &tcptw_sk(sk)->tw_v6_rcv_saddr; -} - -static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) -{ - return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; -} - -#define tcptw_sk_ipv6only(__sk) (tcptw_sk(__sk)->tw_v6_ipv6only) - -static inline int tcp_v6_ipv6only(const struct sock *sk) -{ - return likely(sk->sk_state != TCP_TIME_WAIT) ? - ipv6_only_sock(sk) : tcptw_sk_ipv6only(sk); -} -#else -# define __tcp_v6_rcv_saddr(__sk) NULL -# define tcp_v6_rcv_saddr(__sk) NULL -# define tcptw_sk_ipv6only(__sk) 0 -# define tcp_v6_ipv6only(__sk) 0 -#endif - -extern kmem_cache_t *tcp_timewait_cachep; - -static inline void tcp_tw_put(struct tcp_tw_bucket *tw) -{ - if (atomic_dec_and_test(&tw->tw_refcnt)) { -#ifdef SOCK_REFCNT_DEBUG - printk(KERN_DEBUG "tw_bucket %p released\n", tw); -#endif - kmem_cache_free(tcp_timewait_cachep, tw); - } -} - extern atomic_t tcp_orphan_count; extern int tcp_tw_count; extern void tcp_time_wait(struct sock *sk, int state, int timeo); -extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); - - -/* Socket demux engine toys. */ -#ifdef __BIG_ENDIAN -#define TCP_COMBINED_PORTS(__sport, __dport) \ - (((__u32)(__sport)<<16) | (__u32)(__dport)) -#else /* __LITTLE_ENDIAN */ -#define TCP_COMBINED_PORTS(__sport, __dport) \ - (((__u32)(__dport)<<16) | (__u32)(__sport)) -#endif - -#if (BITS_PER_LONG == 64) -#ifdef __BIG_ENDIAN -#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ - __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr)); -#else /* __LITTLE_ENDIAN */ -#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ - __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr)); -#endif /* __BIG_ENDIAN */ -#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - (((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ - ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - (((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) && \ - ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#else /* 32-bit arch */ -#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) -#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - ((inet_sk(__sk)->daddr == (__saddr)) && \ - (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ - ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - ((tcptw_sk(__sk)->tw_daddr == (__saddr)) && \ - (tcptw_sk(__sk)->tw_rcv_saddr == (__daddr)) && \ - ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#endif /* 64-bit arch */ - -#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ - (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ - ((__sk)->sk_family == AF_INET6) && \ - ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ - ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); #define MAX_TCP_HEADER (128 + MAX_HEADER) @@ -543,7 +350,7 @@ extern int tcp_v4_rcv(struct sk_buff *skb); extern int tcp_v4_remember_stamp(struct sock *sk); -extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw); +extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); @@ -616,10 +423,9 @@ enum tcp_tw_status }; -extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw, +extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, - struct tcphdr *th, - unsigned len); + const struct tcphdr *th); extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, struct request_sock *req, diff --git a/net/core/sock.c b/net/core/sock.c index a1a23be10aa..aba31fedf2a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1378,7 +1378,8 @@ static LIST_HEAD(proto_list); int proto_register(struct proto *prot, int alloc_slab) { - char *request_sock_slab_name; + char *request_sock_slab_name = NULL; + char *timewait_sock_slab_name; int rc = -ENOBUFS; if (alloc_slab) { @@ -1409,6 +1410,23 @@ int proto_register(struct proto *prot, int alloc_slab) goto out_free_request_sock_slab_name; } } + + if (prot->twsk_obj_size) { + static const char mask[] = "tw_sock_%s"; + + timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); + + if (timewait_sock_slab_name == NULL) + goto out_free_request_sock_slab; + + sprintf(timewait_sock_slab_name, mask, prot->name); + prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name, + prot->twsk_obj_size, + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (prot->twsk_slab == NULL) + goto out_free_timewait_sock_slab_name; + } } write_lock(&proto_list_lock); @@ -1417,6 +1435,13 @@ int proto_register(struct proto *prot, int alloc_slab) rc = 0; out: return rc; +out_free_timewait_sock_slab_name: + kfree(timewait_sock_slab_name); +out_free_request_sock_slab: + if (prot->rsk_prot && prot->rsk_prot->slab) { + kmem_cache_destroy(prot->rsk_prot->slab); + prot->rsk_prot->slab = NULL; + } out_free_request_sock_slab_name: kfree(request_sock_slab_name); out_free_sock_slab: @@ -1444,6 +1469,14 @@ void proto_unregister(struct proto *prot) prot->rsk_prot->slab = NULL; } + if (prot->twsk_slab != NULL) { + const char *name = kmem_cache_name(prot->twsk_slab); + + kmem_cache_destroy(prot->twsk_slab); + kfree(name); + prot->twsk_slab = NULL; + } + list_del(&prot->node); write_unlock(&proto_list_lock); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2f4b1a374bb..f1a708bf7a9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -271,8 +271,6 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); -kmem_cache_t *tcp_timewait_cachep; - atomic_t tcp_orphan_count = ATOMIC_INIT(0); int sysctl_tcp_mem[3]; @@ -2264,13 +2262,6 @@ void __init tcp_init(void) if (!tcp_hashinfo.bind_bucket_cachep) panic("tcp_init: Cannot alloc tcp_bind_bucket cache."); - tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket", - sizeof(struct tcp_tw_bucket), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (!tcp_timewait_cachep) - panic("tcp_init: Cannot alloc tcp_tw_bucket cache."); - /* Size and allocate the main established and bind bucket * hash tables. * @@ -2363,4 +2354,3 @@ EXPORT_SYMBOL(tcp_sendpage); EXPORT_SYMBOL(tcp_setsockopt); EXPORT_SYMBOL(tcp_shutdown); EXPORT_SYMBOL(tcp_statistics); -EXPORT_SYMBOL(tcp_timewait_cachep); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 1a89a03c449..6f2d6f2276b 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -81,7 +81,7 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); if (r->tcpdiag_state == TCP_TIME_WAIT) { - struct tcp_tw_bucket *tw = (struct tcp_tw_bucket*)sk; + const struct inet_timewait_sock *tw = inet_twsk(sk); long tmo = tw->tw_ttd - jiffies; if (tmo < 0) tmo = 0; @@ -99,10 +99,12 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->tcpdiag_inode = 0; #ifdef CONFIG_IP_TCPDIAG_IPV6 if (r->tcpdiag_family == AF_INET6) { + const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); + ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, - &tw->tw_v6_rcv_saddr); + &tcp6tw->tw_v6_rcv_saddr); ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst, - &tw->tw_v6_daddr); + &tcp6tw->tw_v6_daddr); } #endif nlh->nlmsg_len = skb->tail - b; @@ -239,7 +241,7 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) out: if (sk) { if (sk->sk_state == TCP_TIME_WAIT) - tcp_tw_put((struct tcp_tw_bucket*)sk); + inet_twsk_put((struct inet_timewait_sock *)sk); else sock_put(sk); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a678709b36f..ce423e48ebe 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -106,7 +106,7 @@ int sysctl_local_port_range[2] = { 1024, 4999 }; static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) { - const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk); + const u32 sk_rcv_saddr = inet_rcv_saddr(sk); struct sock *sk2; struct hlist_node *node; int reuse = sk->sk_reuse; @@ -119,7 +119,7 @@ static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { - const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2); + const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); if (!sk2_rcv_saddr || !sk_rcv_saddr || sk2_rcv_saddr == sk_rcv_saddr) break; @@ -251,10 +251,10 @@ static inline struct sock *__tcp_v4_lookup_established(const u32 saddr, const int dif) { struct inet_ehash_bucket *head; - TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) - __u32 ports = TCP_COMBINED_PORTS(sport, hnum); + INET_ADDR_COOKIE(acookie, saddr, daddr) + const __u32 ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; - struct hlist_node *node; + const struct hlist_node *node; /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ @@ -262,13 +262,13 @@ static inline struct sock *__tcp_v4_lookup_established(const u32 saddr, head = &tcp_hashinfo.ehash[hash]; read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { - if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { - if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) + if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; } sk = NULL; @@ -313,27 +313,28 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) /* called with local bh disabled */ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, - struct tcp_tw_bucket **twp) + struct inet_timewait_sock **twp) { struct inet_sock *inet = inet_sk(sk); u32 daddr = inet->rcv_saddr; u32 saddr = inet->daddr; int dif = sk->sk_bound_dev_if; - TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) - __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); + INET_ADDR_COOKIE(acookie, saddr, daddr) + const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size); struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; struct sock *sk2; - struct hlist_node *node; - struct tcp_tw_bucket *tw; + const struct hlist_node *node; + struct inet_timewait_sock *tw; write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { - tw = (struct tcp_tw_bucket *)sk2; + tw = inet_twsk(sk2); - if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { + if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { + const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); struct tcp_sock *tp = tcp_sk(sk); /* With PAWS, it is safe from the viewpoint @@ -350,15 +351,15 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, fall back to VJ's scheme and use initial timestamp retrieved from peer table. */ - if (tw->tw_ts_recent_stamp && + if (tcptw->tw_ts_recent_stamp && (!twp || (sysctl_tcp_tw_reuse && xtime.tv_sec - - tw->tw_ts_recent_stamp > 1))) { - if ((tp->write_seq = - tw->tw_snd_nxt + 65535 + 2) == 0) + tcptw->tw_ts_recent_stamp > 1))) { + tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; + if (tp->write_seq == 0) tp->write_seq = 1; - tp->rx_opt.ts_recent = tw->tw_ts_recent; - tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp; + tp->rx_opt.ts_recent = tcptw->tw_ts_recent; + tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; sock_hold(sk2); goto unique; } else @@ -369,7 +370,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) goto not_unique; } @@ -392,7 +393,7 @@ unique: tcp_tw_deschedule(tw); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - tcp_tw_put(tw); + inet_twsk_put(tw); } return 0; @@ -429,7 +430,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk) static u32 hint; u32 offset = hint + connect_port_offset(sk); struct hlist_node *node; - struct tcp_tw_bucket *tw = NULL; + struct inet_timewait_sock *tw = NULL; local_bh_disable(); for (i = 1; i <= range; i++) { @@ -482,7 +483,7 @@ ok: if (tw) { tcp_tw_deschedule(tw); - tcp_tw_put(tw); + inet_twsk_put(tw); } ret = 0; @@ -757,7 +758,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) return; } if (sk->sk_state == TCP_TIME_WAIT) { - tcp_tw_put((struct tcp_tw_bucket *)sk); + inet_twsk_put((struct inet_timewait_sock *)sk); return; } @@ -1002,12 +1003,13 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) { - struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; + struct inet_timewait_sock *tw = inet_twsk(sk); + const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt, - tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent); + tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent); - tcp_tw_put(tw); + inet_twsk_put(tw); } static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) @@ -1368,7 +1370,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) bh_lock_sock(nsk); return nsk; } - tcp_tw_put((struct tcp_tw_bucket *)nsk); + inet_twsk_put((struct inet_timewait_sock *)nsk); return NULL; } @@ -1557,25 +1559,25 @@ discard_and_relse: do_time_wait: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - tcp_tw_put((struct tcp_tw_bucket *) sk); + inet_twsk_put((struct inet_timewait_sock *) sk); goto discard_it; } if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { TCP_INC_STATS_BH(TCP_MIB_INERRS); - tcp_tw_put((struct tcp_tw_bucket *) sk); + inet_twsk_put((struct inet_timewait_sock *) sk); goto discard_it; } - switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk, - skb, th, skb->len)) { + switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk, + skb, th)) { case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, skb->nh.iph->daddr, ntohs(th->dest), tcp_v4_iif(skb)); if (sk2) { - tcp_tw_deschedule((struct tcp_tw_bucket *)sk); - tcp_tw_put((struct tcp_tw_bucket *)sk); + tcp_tw_deschedule((struct inet_timewait_sock *)sk); + inet_twsk_put((struct inet_timewait_sock *)sk); sk = sk2; goto process; } @@ -1639,18 +1641,18 @@ int tcp_v4_remember_stamp(struct sock *sk) return 0; } -int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw) +int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) { - struct inet_peer *peer = NULL; - - peer = inet_getpeer(tw->tw_daddr, 1); + struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); if (peer) { - if ((s32)(peer->tcp_ts - tw->tw_ts_recent) <= 0 || + const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); + + if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && - peer->tcp_ts_stamp <= tw->tw_ts_recent_stamp)) { - peer->tcp_ts_stamp = tw->tw_ts_recent_stamp; - peer->tcp_ts = tw->tw_ts_recent; + peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { + peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; + peer->tcp_ts = tcptw->tw_ts_recent; } inet_putpeer(peer); return 1; @@ -1758,13 +1760,13 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS /* Proc filesystem TCP sock list dumping. */ -static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head) +static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) { return hlist_empty(head) ? NULL : - list_entry(head->first, struct tcp_tw_bucket, tw_node); + list_entry(head->first, struct inet_timewait_sock, tw_node); } -static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw) +static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) { return tw->tw_node.next ? hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; @@ -1860,7 +1862,7 @@ static void *established_get_first(struct seq_file *seq) for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { struct sock *sk; struct hlist_node *node; - struct tcp_tw_bucket *tw; + struct inet_timewait_sock *tw; /* We can reschedule _before_ having picked the target: */ cond_resched_softirq(); @@ -1874,8 +1876,8 @@ static void *established_get_first(struct seq_file *seq) goto out; } st->state = TCP_SEQ_STATE_TIME_WAIT; - tw_for_each(tw, node, - &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) { + inet_twsk_for_each(tw, node, + &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) { if (tw->tw_family != st->family) { continue; } @@ -1892,7 +1894,7 @@ out: static void *established_get_next(struct seq_file *seq, void *cur) { struct sock *sk = cur; - struct tcp_tw_bucket *tw; + struct inet_timewait_sock *tw; struct hlist_node *node; struct tcp_iter_state* st = seq->private; @@ -2159,7 +2161,7 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); } -static void get_timewait4_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i) +static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i) { unsigned int dest, src; __u16 destp, srcp; @@ -2261,6 +2263,7 @@ struct proto tcp_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp_sock), + .twsk_obj_size = sizeof(struct tcp_timewait_sock), .rsk_prot = &tcp_request_sock_ops, }; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f29e2f6ebe1..5b5a49335fb 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -41,7 +41,7 @@ int sysctl_tcp_max_tw_buckets = NR_FILE*2; int sysctl_tcp_syncookies = SYNC_INIT; int sysctl_tcp_abort_on_overflow; -static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo); +static void tcp_tw_schedule(struct inet_timewait_sock *tw, int timeo); static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { @@ -58,7 +58,7 @@ int tcp_tw_count; /* Must be called with locally disabled BHs. */ -static void tcp_timewait_kill(struct tcp_tw_bucket *tw) +static void tcp_timewait_kill(struct inet_timewait_sock *tw) { struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; @@ -85,11 +85,11 @@ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) #ifdef SOCK_REFCNT_DEBUG if (atomic_read(&tw->tw_refcnt) != 1) { - printk(KERN_DEBUG "tw_bucket %p refcnt=%d\n", tw, - atomic_read(&tw->tw_refcnt)); + printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", + tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); } #endif - tcp_tw_put(tw); + inet_twsk_put(tw); } /* @@ -121,19 +121,20 @@ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) * to avoid misread sequence numbers, states etc. --ANK */ enum tcp_tw_status -tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb, - struct tcphdr *th, unsigned len) +tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, + const struct tcphdr *th) { + struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); struct tcp_options_received tmp_opt; int paws_reject = 0; tmp_opt.saw_tstamp = 0; - if (th->doff > (sizeof(struct tcphdr) >> 2) && tw->tw_ts_recent_stamp) { + if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { tcp_parse_options(skb, &tmp_opt, 0); if (tmp_opt.saw_tstamp) { - tmp_opt.ts_recent = tw->tw_ts_recent; - tmp_opt.ts_recent_stamp = tw->tw_ts_recent_stamp; + tmp_opt.ts_recent = tcptw->tw_ts_recent; + tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; paws_reject = tcp_paws_check(&tmp_opt, th->rst); } } @@ -144,20 +145,20 @@ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb, /* Out of window, send ACK */ if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, - tw->tw_rcv_nxt, - tw->tw_rcv_nxt + tw->tw_rcv_wnd)) + tcptw->tw_rcv_nxt, + tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd)) return TCP_TW_ACK; if (th->rst) goto kill; - if (th->syn && !before(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt)) + if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt)) goto kill_with_rst; /* Dup ACK? */ - if (!after(TCP_SKB_CB(skb)->end_seq, tw->tw_rcv_nxt) || + if (!after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) || TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) { - tcp_tw_put(tw); + inet_twsk_put(tw); return TCP_TW_SUCCESS; } @@ -165,19 +166,19 @@ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb, * reset. */ if (!th->fin || - TCP_SKB_CB(skb)->end_seq != tw->tw_rcv_nxt + 1) { + TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { kill_with_rst: tcp_tw_deschedule(tw); - tcp_tw_put(tw); + inet_twsk_put(tw); return TCP_TW_RST; } /* FIN arrived, enter true time-wait state. */ - tw->tw_substate = TCP_TIME_WAIT; - tw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; + tw->tw_substate = TCP_TIME_WAIT; + tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; if (tmp_opt.saw_tstamp) { - tw->tw_ts_recent_stamp = xtime.tv_sec; - tw->tw_ts_recent = tmp_opt.rcv_tsval; + tcptw->tw_ts_recent_stamp = xtime.tv_sec; + tcptw->tw_ts_recent = tmp_opt.rcv_tsval; } /* I am shamed, but failed to make it more elegant. @@ -186,7 +187,7 @@ kill_with_rst: * do not undertsnad recycling in any case, it not * a big problem in practice. --ANK */ if (tw->tw_family == AF_INET && - sysctl_tcp_tw_recycle && tw->tw_ts_recent_stamp && + sysctl_tcp_tw_recycle && tcptw->tw_ts_recent_stamp && tcp_v4_tw_remember_stamp(tw)) tcp_tw_schedule(tw, tw->tw_timeout); else @@ -212,7 +213,7 @@ kill_with_rst: */ if (!paws_reject && - (TCP_SKB_CB(skb)->seq == tw->tw_rcv_nxt && + (TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt && (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) { /* In window segment, it may be only reset or bare ack. */ @@ -224,18 +225,18 @@ kill_with_rst: if (sysctl_tcp_rfc1337 == 0) { kill: tcp_tw_deschedule(tw); - tcp_tw_put(tw); + inet_twsk_put(tw); return TCP_TW_SUCCESS; } } tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); if (tmp_opt.saw_tstamp) { - tw->tw_ts_recent = tmp_opt.rcv_tsval; - tw->tw_ts_recent_stamp = xtime.tv_sec; + tcptw->tw_ts_recent = tmp_opt.rcv_tsval; + tcptw->tw_ts_recent_stamp = xtime.tv_sec; } - tcp_tw_put(tw); + inet_twsk_put(tw); return TCP_TW_SUCCESS; } @@ -257,9 +258,10 @@ kill: */ if (th->syn && !th->rst && !th->ack && !paws_reject && - (after(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt) || - (tmp_opt.saw_tstamp && (s32)(tw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) { - u32 isn = tw->tw_snd_nxt + 65535 + 2; + (after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) || + (tmp_opt.saw_tstamp && + (s32)(tcptw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) { + u32 isn = tcptw->tw_snd_nxt + 65535 + 2; if (isn == 0) isn++; TCP_SKB_CB(skb)->when = isn; @@ -284,7 +286,7 @@ kill: */ return TCP_TW_ACK; } - tcp_tw_put(tw); + inet_twsk_put(tw); return TCP_TW_SUCCESS; } @@ -293,7 +295,7 @@ kill: * relevant info into it from the SK, and mess with hash chains * and list linkage. */ -static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) +static void __tcp_tw_hashdance(struct sock *sk, struct inet_timewait_sock *tw) { const struct inet_sock *inet = inet_sk(sk); struct inet_ehash_bucket *ehead = &tcp_hashinfo.ehash[sk->sk_hashent]; @@ -306,7 +308,7 @@ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) spin_lock(&bhead->lock); tw->tw_tb = inet->bind_hash; BUG_TRAP(inet->bind_hash); - tw_add_bind_node(tw, &tw->tw_tb->owners); + inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); write_lock(&ehead->lock); @@ -316,7 +318,7 @@ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) sock_prot_dec_use(sk->sk_prot); /* Step 3: Hash TW into TIMEWAIT half of established hash table. */ - tw_add_node(tw, &(ehead + tcp_hashinfo.ehash_size)->chain); + inet_twsk_add_node(tw, &(ehead + tcp_hashinfo.ehash_size)->chain); atomic_inc(&tw->tw_refcnt); write_unlock(&ehead->lock); @@ -327,19 +329,23 @@ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) */ void tcp_time_wait(struct sock *sk, int state, int timeo) { - struct tcp_tw_bucket *tw = NULL; - struct tcp_sock *tp = tcp_sk(sk); + struct inet_timewait_sock *tw = NULL; + const struct tcp_sock *tp = tcp_sk(sk); int recycle_ok = 0; if (sysctl_tcp_tw_recycle && tp->rx_opt.ts_recent_stamp) recycle_ok = tp->af_specific->remember_stamp(sk); if (tcp_tw_count < sysctl_tcp_max_tw_buckets) - tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC); + tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab, SLAB_ATOMIC); + + if (tw != NULL) { + struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); + const struct inet_sock *inet = inet_sk(sk); + const int rto = (tp->rto << 2) - (tp->rto >> 1); - if(tw != NULL) { - struct inet_sock *inet = inet_sk(sk); - int rto = (tp->rto<<2) - (tp->rto>>1); + /* Remember our protocol */ + tw->tw_prot = sk->sk_prot_creator; /* Give us an identity. */ tw->tw_daddr = inet->daddr; @@ -356,25 +362,23 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) atomic_set(&tw->tw_refcnt, 1); tw->tw_hashent = sk->sk_hashent; - tw->tw_rcv_nxt = tp->rcv_nxt; - tw->tw_snd_nxt = tp->snd_nxt; - tw->tw_rcv_wnd = tcp_receive_window(tp); - tw->tw_ts_recent = tp->rx_opt.ts_recent; - tw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; - tw_dead_node_init(tw); + tcptw->tw_rcv_nxt = tp->rcv_nxt; + tcptw->tw_snd_nxt = tp->snd_nxt; + tcptw->tw_rcv_wnd = tcp_receive_window(tp); + tcptw->tw_ts_recent = tp->rx_opt.ts_recent; + tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; + inet_twsk_dead_node_init(tw); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); + struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); - ipv6_addr_copy(&tw->tw_v6_daddr, &np->daddr); - ipv6_addr_copy(&tw->tw_v6_rcv_saddr, &np->rcv_saddr); - tw->tw_v6_ipv6only = np->ipv6only; - } else { - memset(&tw->tw_v6_daddr, 0, sizeof(tw->tw_v6_daddr)); - memset(&tw->tw_v6_rcv_saddr, 0, sizeof(tw->tw_v6_rcv_saddr)); - tw->tw_v6_ipv6only = 0; - } + ipv6_addr_copy(&tcp6tw->tw_v6_daddr, &np->daddr); + ipv6_addr_copy(&tcp6tw->tw_v6_rcv_saddr, &np->rcv_saddr); + tw->tw_ipv6only = np->ipv6only; + } else + tw->tw_ipv6only = 0; #endif /* Linkage updates. */ __tcp_tw_hashdance(sk, tw); @@ -392,7 +396,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) } tcp_tw_schedule(tw, timeo); - tcp_tw_put(tw); + inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than @@ -427,7 +431,7 @@ static u32 twkill_thread_slots; /* Returns non-zero if quota exceeded. */ static int tcp_do_twkill_work(int slot, unsigned int quota) { - struct tcp_tw_bucket *tw; + struct inet_timewait_sock *tw; struct hlist_node *node; unsigned int killed; int ret; @@ -441,11 +445,11 @@ static int tcp_do_twkill_work(int slot, unsigned int quota) killed = 0; ret = 0; rescan: - tw_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) { - __tw_del_dead_node(tw); + inet_twsk_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) { + __inet_twsk_del_dead_node(tw); spin_unlock(&tw_death_lock); tcp_timewait_kill(tw); - tcp_tw_put(tw); + inet_twsk_put(tw); killed++; spin_lock(&tw_death_lock); if (killed > quota) { @@ -531,11 +535,11 @@ static void twkill_work(void *dummy) */ /* This is for handling early-kills of TIME_WAIT sockets. */ -void tcp_tw_deschedule(struct tcp_tw_bucket *tw) +void tcp_tw_deschedule(struct inet_timewait_sock *tw) { spin_lock(&tw_death_lock); - if (tw_del_dead_node(tw)) { - tcp_tw_put(tw); + if (inet_twsk_del_dead_node(tw)) { + inet_twsk_put(tw); if (--tcp_tw_count == 0) del_timer(&tcp_tw_timer); } @@ -552,7 +556,7 @@ static struct timer_list tcp_twcal_timer = TIMER_INITIALIZER(tcp_twcal_tick, 0, 0); static struct hlist_head tcp_twcal_row[TCP_TW_RECYCLE_SLOTS]; -static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo) +static void tcp_tw_schedule(struct inet_timewait_sock *tw, const int timeo) { struct hlist_head *list; int slot; @@ -586,7 +590,7 @@ static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo) spin_lock(&tw_death_lock); /* Unlink it, if it was scheduled */ - if (tw_del_dead_node(tw)) + if (inet_twsk_del_dead_node(tw)) tcp_tw_count--; else atomic_inc(&tw->tw_refcnt); @@ -644,13 +648,13 @@ void tcp_twcal_tick(unsigned long dummy) for (n=0; nrcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2); u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; - u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2); + u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = tcp_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 93a66b9a76e..af8ad5bb273 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -308,33 +308,32 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u struct in6_addr *daddr, u16 hnum, int dif) { - struct inet_ehash_bucket *head; struct sock *sk; - struct hlist_node *node; - __u32 ports = TCP_COMBINED_PORTS(sport, hnum); - int hash; - + const struct hlist_node *node; + const __u32 ports = INET_COMBINED_PORTS(sport, hnum); /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); - head = &tcp_hashinfo.ehash[hash]; + const int hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); + struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; + read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ - if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif)) + if (INET6_MATCH(sk, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { - /* FIXME: acme: check this... */ - struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; + const struct inet_timewait_sock *tw = inet_twsk(sk); if(*((__u32 *)&(tw->tw_dport)) == ports && sk->sk_family == PF_INET6) { - if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) && - (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) + const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); + + if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && + (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) goto hit; } } @@ -455,43 +454,46 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) } static int __tcp_v6_check_established(struct sock *sk, __u16 lport, - struct tcp_tw_bucket **twp) + struct inet_timewait_sock **twp) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *daddr = &np->rcv_saddr; struct in6_addr *saddr = &np->daddr; int dif = sk->sk_bound_dev_if; - u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); - int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); + const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + const int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; struct sock *sk2; - struct hlist_node *node; - struct tcp_tw_bucket *tw; + const struct hlist_node *node; + struct inet_timewait_sock *tw; write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { - tw = (struct tcp_tw_bucket*)sk2; + const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2); + + tw = inet_twsk(sk2); if(*((__u32 *)&(tw->tw_dport)) == ports && sk2->sk_family == PF_INET6 && - ipv6_addr_equal(&tw->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) && + ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { + const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); struct tcp_sock *tp = tcp_sk(sk); - if (tw->tw_ts_recent_stamp && - (!twp || (sysctl_tcp_tw_reuse && - xtime.tv_sec - - tw->tw_ts_recent_stamp > 1))) { + if (tcptw->tw_ts_recent_stamp && + (!twp || + (sysctl_tcp_tw_reuse && + xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) { /* See comment in tcp_ipv4.c */ - tp->write_seq = tw->tw_snd_nxt + 65535 + 2; + tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; if (!tp->write_seq) tp->write_seq = 1; - tp->rx_opt.ts_recent = tw->tw_ts_recent; - tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp; + tp->rx_opt.ts_recent = tcptw->tw_ts_recent; + tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; sock_hold(sk2); goto unique; } else @@ -502,7 +504,7 @@ static int __tcp_v6_check_established(struct sock *sk, __u16 lport, /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif)) + if (INET6_MATCH(sk2, saddr, daddr, ports, dif)) goto not_unique; } @@ -521,7 +523,7 @@ unique: tcp_tw_deschedule(tw); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - tcp_tw_put(tw); + inet_twsk_put(tw); } return 0; @@ -556,7 +558,7 @@ static int tcp_v6_hash_connect(struct sock *sk) static u32 hint; u32 offset = hint + tcpv6_port_offset(sk); struct hlist_node *node; - struct tcp_tw_bucket *tw = NULL; + struct inet_timewait_sock *tw = NULL; local_bh_disable(); for (i = 1; i <= range; i++) { @@ -609,7 +611,7 @@ ok: if (tw) { tcp_tw_deschedule(tw); - tcp_tw_put(tw); + inet_twsk_put(tw); } ret = 0; @@ -845,7 +847,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (sk->sk_state == TCP_TIME_WAIT) { - tcp_tw_put((struct tcp_tw_bucket*)sk); + inet_twsk_put((struct inet_timewait_sock *)sk); return; } @@ -1223,12 +1225,14 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) { - struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; + struct inet_timewait_sock *tw = inet_twsk(sk); + const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt, - tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent); + tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, + tcptw->tw_ts_recent); - tcp_tw_put(tw); + inet_twsk_put(tw); } static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) @@ -1261,7 +1265,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) bh_lock_sock(nsk); return nsk; } - tcp_tw_put((struct tcp_tw_bucket*)nsk); + inet_twsk_put((struct inet_timewait_sock *)nsk); return NULL; } @@ -1798,26 +1802,26 @@ discard_and_relse: do_time_wait: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { - tcp_tw_put((struct tcp_tw_bucket *) sk); + inet_twsk_put((struct inet_timewait_sock *)sk); goto discard_it; } if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { TCP_INC_STATS_BH(TCP_MIB_INERRS); - tcp_tw_put((struct tcp_tw_bucket *) sk); + inet_twsk_put((struct inet_timewait_sock *)sk); goto discard_it; } - switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk, - skb, th, skb->len)) { + switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk, + skb, th)) { case TCP_TW_SYN: { struct sock *sk2; sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); if (sk2 != NULL) { - tcp_tw_deschedule((struct tcp_tw_bucket *)sk); - tcp_tw_put((struct tcp_tw_bucket *)sk); + tcp_tw_deschedule((struct inet_timewait_sock *)sk); + inet_twsk_put((struct inet_timewait_sock *)sk); sk = sk2; goto process; } @@ -2137,17 +2141,18 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) } static void get_timewait6_sock(struct seq_file *seq, - struct tcp_tw_bucket *tw, int i) + struct inet_timewait_sock *tw, int i) { struct in6_addr *dest, *src; __u16 destp, srcp; + struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); int ttd = tw->tw_ttd - jiffies; if (ttd < 0) ttd = 0; - dest = &tw->tw_v6_daddr; - src = &tw->tw_v6_rcv_saddr; + dest = &tcp6tw->tw_v6_daddr; + src = &tcp6tw->tw_v6_rcv_saddr; destp = ntohs(tw->tw_dport); srcp = ntohs(tw->tw_sport); @@ -2244,6 +2249,7 @@ struct proto tcpv6_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), + .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .rsk_prot = &tcp6_request_sock_ops, }; -- cgit v1.2.3-70-g09d2 From 463c84b97f24010a67cd871746d6a7e4c925a5f9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:10:42 -0700 Subject: [NET]: Introduce inet_connection_sock This creates struct inet_connection_sock, moving members out of struct tcp_sock that are shareable with other INET connection oriented protocols, such as DCCP, that in my private tree already uses most of these members. The functions that operate on these members were renamed, using a inet_csk_ prefix while not being moved yet to a new file, so as to ease the review of these changes. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ip.h | 2 - include/linux/ipv6.h | 8 +- include/linux/tcp.h | 39 +----- include/net/inet_connection_sock.h | 86 ++++++++++++ include/net/inet_hashtables.h | 6 +- include/net/request_sock.h | 6 +- include/net/sock.h | 3 - include/net/tcp.h | 222 +++++++++++++++---------------- include/net/tcp_ecn.h | 2 +- net/ipv4/inet_hashtables.c | 15 +-- net/ipv4/inet_timewait_sock.c | 5 +- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp.c | 90 +++++++------ net/ipv4/tcp_diag.c | 21 +-- net/ipv4/tcp_input.c | 266 +++++++++++++++++++------------------ net/ipv4/tcp_ipv4.c | 158 ++++++++++++---------- net/ipv4/tcp_minisocks.c | 28 ++-- net/ipv4/tcp_output.c | 86 ++++++------ net/ipv4/tcp_timer.c | 165 ++++++++++++----------- net/ipv6/addrconf.c | 2 +- net/ipv6/tcp_ipv6.c | 54 ++++---- 21 files changed, 685 insertions(+), 581 deletions(-) create mode 100644 include/net/inet_connection_sock.h (limited to 'net/ipv6') diff --git a/include/linux/ip.h b/include/linux/ip.h index 2c54bbd3da7..33e8a19a1a0 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -128,7 +128,6 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) return (struct inet_request_sock *)sk; } -struct inet_bind_bucket; struct ipv6_pinfo; struct inet_sock { @@ -158,7 +157,6 @@ struct inet_sock { int mc_index; /* Multicast device index */ __u32 mc_addr; struct ip_mc_socklist *mc_list; /* Group array */ - struct inet_bind_bucket *bind_hash; /* * Following members are used to retain the infomation to build * an ip header on each ip fragmentation while the socket is corked. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 98fa32316e4..88591913c94 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -333,15 +333,15 @@ static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; } -static inline int tcp_twsk_ipv6only(const struct sock *sk) +static inline int inet_twsk_ipv6only(const struct sock *sk) { return inet_twsk(sk)->tw_ipv6only; } -static inline int tcp_v6_ipv6only(const struct sock *sk) +static inline int inet_v6_ipv6only(const struct sock *sk) { return likely(sk->sk_state != TCP_TIME_WAIT) ? - ipv6_only_sock(sk) : tcp_twsk_ipv6only(sk); + ipv6_only_sock(sk) : inet_twsk_ipv6only(sk); } #else #define __ipv6_only_sock(sk) 0 @@ -360,7 +360,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #define __tcp_v6_rcv_saddr(__sk) NULL #define tcp_v6_rcv_saddr(__sk) NULL #define tcp_twsk_ipv6only(__sk) 0 -#define tcp_v6_ipv6only(__sk) 0 +#define inet_v6_ipv6only(__sk) 0 #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ #define INET6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 5d295b1b3de..800930fac38 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -177,8 +177,8 @@ struct tcp_info #include #include -#include #include +#include #include /* This defines a selective acknowledgement block. */ @@ -219,8 +219,8 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) } struct tcp_sock { - /* inet_sock has to be the first member of tcp_sock */ - struct inet_sock inet; + /* inet_connection_sock has to be the first member of tcp_sock */ + struct inet_connection_sock inet_conn; int tcp_header_len; /* Bytes of tcp header to send */ /* @@ -241,18 +241,6 @@ struct tcp_sock { __u32 snd_sml; /* Last byte of the most recently transmitted small packet */ __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ - /* Delayed ACK control data */ - struct { - __u8 pending; /* ACK is pending */ - __u8 quick; /* Scheduled number of quick acks */ - __u8 pingpong; /* The session is interactive */ - __u8 blocked; /* Delayed ACK was blocked by socket lock*/ - __u32 ato; /* Predicted tick of soft clock */ - unsigned long timeout; /* Currently scheduled timeout */ - __u32 lrcvtime; /* timestamp of last received data packet*/ - __u16 last_seg_size; /* Size of last incoming segment */ - __u16 rcv_mss; /* MSS used for delayed ACK decisions */ - } ack; /* Data for direct copy to user */ struct { @@ -271,8 +259,8 @@ struct tcp_sock { __u16 xmit_size_goal; /* Goal for segmenting output packets */ __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ __u8 ca_state; /* State of fast-retransmit machine */ - __u8 retransmits; /* Number of unrecovered RTO timeouts. */ + __u8 keepalive_probes; /* num of allowed keep alive probes */ __u16 advmss; /* Advertised MSS */ __u32 window_clamp; /* Maximal window to advertise */ __u32 rcv_ssthresh; /* Current window clamp */ @@ -281,7 +269,7 @@ struct tcp_sock { __u8 reordering; /* Packet reordering metric. */ __u8 frto_counter; /* Number of new acks after RTO */ - __u8 unused; + __u8 nonagle; /* Disable Nagle algorithm? */ __u8 defer_accept; /* User waits for some data after accept() */ /* RTT measurement */ @@ -290,19 +278,13 @@ struct tcp_sock { __u32 mdev_max; /* maximal mdev for the last rtt period */ __u32 rttvar; /* smoothed mdev_max */ __u32 rtt_seq; /* sequence number to update rttvar */ - __u32 rto; /* retransmit timeout */ __u32 packets_out; /* Packets which are "in flight" */ __u32 left_out; /* Packets which leaved network */ __u32 retrans_out; /* Retransmitted packets out */ - __u8 backoff; /* backoff */ /* * Options received (usually on last packet, some only on SYN packets). */ - __u8 nonagle; /* Disable Nagle algorithm? */ - __u8 keepalive_probes; /* num of allowed keep alive probes */ - - __u8 probes_out; /* unanswered 0 window probes */ struct tcp_options_received rx_opt; /* @@ -315,11 +297,6 @@ struct tcp_sock { __u32 snd_cwnd_used; __u32 snd_cwnd_stamp; - /* Two commonly used timers in both sender and receiver paths. */ - unsigned long timeout; - struct timer_list retransmit_timer; /* Resend (no ack) */ - struct timer_list delack_timer; /* Ack delay */ - struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */ @@ -334,7 +311,7 @@ struct tcp_sock { struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ - __u8 syn_retries; /* num of allowed syn retries */ + __u8 probes_out; /* unanswered 0 window probes */ __u8 ecn_flags; /* ECN status bits. */ __u16 prior_ssthresh; /* ssthresh saved at recovery start */ __u32 lost_out; /* Lost packets */ @@ -349,14 +326,12 @@ struct tcp_sock { int undo_retrans; /* number of undoable retransmissions. */ __u32 urg_seq; /* Seq of received urgent pointer */ __u16 urg_data; /* Saved octet of OOB data and control flags */ - __u8 pending; /* Scheduled timer event */ __u8 urg_mode; /* In urgent mode */ + /* ONE BYTE HOLE, TRY TO PACK! */ __u32 snd_up; /* Urgent pointer */ __u32 total_retrans; /* Total retransmits for entire connection */ - struct request_sock_queue accept_queue; /* FIFO of established children */ - unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_intvl; /* time interval between keep alive probes */ int linger2; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h new file mode 100644 index 00000000000..ef609396e41 --- /dev/null +++ b/include/net/inet_connection_sock.h @@ -0,0 +1,86 @@ +/* + * NET Generic infrastructure for INET connection oriented protocols. + * + * Definitions for inet_connection_sock + * + * Authors: Many people, see the TCP sources + * + * From code originally in TCP + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _INET_CONNECTION_SOCK_H +#define _INET_CONNECTION_SOCK_H + +#include +#include +#include + +struct inet_bind_bucket; +struct inet_hashinfo; + +/** inet_connection_sock - INET connection oriented sock + * + * @icsk_accept_queue: FIFO of established children + * @icsk_bind_hash: Bind node + * @icsk_timeout: Timeout + * @icsk_retransmit_timer: Resend (no ack) + * @icsk_rto: Retransmit timeout + * @icsk_retransmits: Number of unrecovered [RTO] timeouts + * @icsk_pending: Scheduled timer event + * @icsk_backoff: Backoff + * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries + * @icsk_ack: Delayed ACK control data + */ +struct inet_connection_sock { + /* inet_sock has to be the first member! */ + struct inet_sock icsk_inet; + struct request_sock_queue icsk_accept_queue; + struct inet_bind_bucket *icsk_bind_hash; + unsigned long icsk_timeout; + struct timer_list icsk_retransmit_timer; + struct timer_list icsk_delack_timer; + __u32 icsk_rto; + __u8 icsk_retransmits; + __u8 icsk_pending; + __u8 icsk_backoff; + __u8 icsk_syn_retries; + struct { + __u8 pending; /* ACK is pending */ + __u8 quick; /* Scheduled number of quick acks */ + __u8 pingpong; /* The session is interactive */ + __u8 blocked; /* Delayed ACK was blocked by socket lock */ + __u32 ato; /* Predicted tick of soft clock */ + unsigned long timeout; /* Currently scheduled timeout */ + __u32 lrcvtime; /* timestamp of last received data packet */ + __u16 last_seg_size; /* Size of last incoming segment */ + __u16 rcv_mss; /* MSS used for delayed ACK decisions */ + } icsk_ack; +}; + +static inline struct inet_connection_sock *inet_csk(const struct sock *sk) +{ + return (struct inet_connection_sock *)sk; +} + +extern void inet_csk_init_xmit_timers(struct sock *sk, + void (*retransmit_handler)(unsigned long), + void (*delack_handler)(unsigned long), + void (*keepalive_handler)(unsigned long)); +extern void inet_csk_clear_xmit_timers(struct sock *sk); + +extern struct request_sock *inet_csk_search_req(const struct sock *sk, + struct request_sock ***prevp, + const __u16 rport, + const __u32 raddr, + const __u32 laddr); +extern int inet_csk_get_port(struct inet_hashinfo *hashinfo, + struct sock *sk, unsigned short snum); + +extern struct dst_entry* inet_csk_route_req(struct sock *sk, + const struct request_sock *req); + +#endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index b5c0d64ea74..f0c21c07f89 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -26,6 +25,7 @@ #include #include +#include #include #include @@ -185,9 +185,9 @@ static inline void __inet_inherit_port(struct inet_hashinfo *table, struct inet_bind_bucket *tb; spin_lock(&head->lock); - tb = inet_sk(sk)->bind_hash; + tb = inet_csk(sk)->icsk_bind_hash; sk_add_bind_node(child, &tb->owners); - inet_sk(child)->bind_hash = tb; + inet_csk(child)->icsk_bind_hash = tb; spin_unlock(&head->lock); } diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 334717bf9ef..b7c7eecbe64 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -224,17 +224,17 @@ static inline int reqsk_queue_added(struct request_sock_queue *queue) return prev_qlen; } -static inline int reqsk_queue_len(struct request_sock_queue *queue) +static inline int reqsk_queue_len(const struct request_sock_queue *queue) { return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0; } -static inline int reqsk_queue_len_young(struct request_sock_queue *queue) +static inline int reqsk_queue_len_young(const struct request_sock_queue *queue) { return queue->listen_opt->qlen_young; } -static inline int reqsk_queue_is_full(struct request_sock_queue *queue) +static inline int reqsk_queue_is_full(const struct request_sock_queue *queue) { return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; } diff --git a/include/net/sock.h b/include/net/sock.h index 828dc082fcb..48cc337a656 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -493,9 +493,6 @@ extern int sk_wait_data(struct sock *sk, long *timeo); struct request_sock_ops; -/* Here is the right place to enable sock refcounting debugging */ -//#define SOCK_REFCNT_DEBUG - /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface * transport -> network interface is defined by struct inet_proto diff --git a/include/net/tcp.h b/include/net/tcp.h index cf8e664176a..a943c79c88b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -19,10 +19,11 @@ #define _TCP_H #define TCP_DEBUG 1 +#define INET_CSK_DEBUG 1 #define FASTRETRANS_DEBUG 1 /* Cancel timers, when they are not required. */ -#undef TCP_CLEAR_TIMERS +#undef INET_CSK_CLEAR_TIMERS #include #include @@ -205,10 +206,10 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); #define TCPOLEN_SACK_BASE_ALIGNED 4 #define TCPOLEN_SACK_PERBLOCK 8 -#define TCP_TIME_RETRANS 1 /* Retransmit timer */ -#define TCP_TIME_DACK 2 /* Delayed ack timer */ -#define TCP_TIME_PROBE0 3 /* Zero window probe timer */ -#define TCP_TIME_KEEPOPEN 4 /* Keepalive timer */ +#define ICSK_TIME_RETRANS 1 /* Retransmit timer */ +#define ICSK_TIME_DACK 2 /* Delayed ack timer */ +#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ +#define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */ /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ @@ -257,9 +258,9 @@ extern atomic_t tcp_sockets_allocated; extern int tcp_memory_pressure; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -#define TCP_INET_FAMILY(fam) ((fam) == AF_INET) +#define AF_INET_FAMILY(fam) ((fam) == AF_INET) #else -#define TCP_INET_FAMILY(fam) 1 +#define AF_INET_FAMILY(fam) 1 #endif /* @@ -372,41 +373,42 @@ extern int tcp_rcv_established(struct sock *sk, extern void tcp_rcv_space_adjust(struct sock *sk); -enum tcp_ack_state_t -{ - TCP_ACK_SCHED = 1, - TCP_ACK_TIMER = 2, - TCP_ACK_PUSHED= 4 +enum inet_csk_ack_state_t { + ICSK_ACK_SCHED = 1, + ICSK_ACK_TIMER = 2, + ICSK_ACK_PUSHED = 4 }; -static inline void tcp_schedule_ack(struct tcp_sock *tp) +static inline void inet_csk_schedule_ack(struct sock *sk) { - tp->ack.pending |= TCP_ACK_SCHED; + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; } -static inline int tcp_ack_scheduled(struct tcp_sock *tp) +static inline int inet_csk_ack_scheduled(const struct sock *sk) { - return tp->ack.pending&TCP_ACK_SCHED; + return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; } -static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp, unsigned int pkts) +static inline void tcp_dec_quickack_mode(struct sock *sk, + const unsigned int pkts) { - if (tp->ack.quick) { - if (pkts >= tp->ack.quick) { - tp->ack.quick = 0; + struct inet_connection_sock *icsk = inet_csk(sk); + if (icsk->icsk_ack.quick) { + if (pkts >= icsk->icsk_ack.quick) { + icsk->icsk_ack.quick = 0; /* Leaving quickack mode we deflate ATO. */ - tp->ack.ato = TCP_ATO_MIN; + icsk->icsk_ack.ato = TCP_ATO_MIN; } else - tp->ack.quick -= pkts; + icsk->icsk_ack.quick -= pkts; } } -extern void tcp_enter_quickack_mode(struct tcp_sock *tp); +extern void tcp_enter_quickack_mode(struct sock *sk); -static __inline__ void tcp_delack_init(struct tcp_sock *tp) +static inline void inet_csk_delack_init(struct sock *sk) { - memset(&tp->ack, 0, sizeof(tp->ack)); + memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); } static inline void tcp_clear_options(struct tcp_options_received *rx_opt) @@ -440,7 +442,7 @@ extern void tcp_update_metrics(struct sock *sk); extern void tcp_close(struct sock *sk, long timeout); -extern struct sock * tcp_accept(struct sock *sk, int flags, int *err); +extern struct sock * inet_csk_accept(struct sock *sk, int flags, int *err); extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); extern int tcp_getsockopt(struct sock *sk, int level, @@ -534,15 +536,18 @@ extern void tcp_cwnd_application_limited(struct sock *sk); /* tcp_timer.c */ extern void tcp_init_xmit_timers(struct sock *); -extern void tcp_clear_xmit_timers(struct sock *); +static inline void tcp_clear_xmit_timers(struct sock *sk) +{ + inet_csk_clear_xmit_timers(sk); +} -extern void tcp_delete_keepalive_timer(struct sock *); -extern void tcp_reset_keepalive_timer(struct sock *, unsigned long); +extern void inet_csk_delete_keepalive_timer(struct sock *sk); +extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); extern unsigned int tcp_current_mss(struct sock *sk, int large); -#ifdef TCP_DEBUG -extern const char tcp_timer_bug_msg[]; +#ifdef INET_CSK_DEBUG +extern const char inet_csk_timer_bug_msg[]; #endif /* tcp_diag.c */ @@ -554,70 +559,58 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); -static inline void tcp_clear_xmit_timer(struct sock *sk, int what) +static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); - switch (what) { - case TCP_TIME_RETRANS: - case TCP_TIME_PROBE0: - tp->pending = 0; - -#ifdef TCP_CLEAR_TIMERS - sk_stop_timer(sk, &tp->retransmit_timer); + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + icsk->icsk_pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS + sk_stop_timer(sk, &icsk->icsk_retransmit_timer); #endif - break; - case TCP_TIME_DACK: - tp->ack.blocked = 0; - tp->ack.pending = 0; - -#ifdef TCP_CLEAR_TIMERS - sk_stop_timer(sk, &tp->delack_timer); + } else if (what == ICSK_TIME_DACK) { + icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS + sk_stop_timer(sk, &icsk->icsk_delack_timer); #endif - break; - default: -#ifdef TCP_DEBUG - printk(tcp_timer_bug_msg); + } +#ifdef INET_CSK_DEBUG + else { + pr_debug(inet_csk_timer_bug_msg); + } #endif - return; - }; - } /* * Reset the retransmission timer */ -static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when) +static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, + unsigned long when) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); if (when > TCP_RTO_MAX) { -#ifdef TCP_DEBUG - printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr()); +#ifdef INET_CSK_DEBUG + pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", + sk, what, when, current_text_addr()); #endif when = TCP_RTO_MAX; } - switch (what) { - case TCP_TIME_RETRANS: - case TCP_TIME_PROBE0: - tp->pending = what; - tp->timeout = jiffies+when; - sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout); - break; - - case TCP_TIME_DACK: - tp->ack.pending |= TCP_ACK_TIMER; - tp->ack.timeout = jiffies+when; - sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout); - break; - - default: -#ifdef TCP_DEBUG - printk(tcp_timer_bug_msg); + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + icsk->icsk_pending = what; + icsk->icsk_timeout = jiffies + when; + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); + } else if (what == ICSK_TIME_DACK) { + icsk->icsk_ack.pending |= ICSK_ACK_TIMER; + icsk->icsk_ack.timeout = jiffies + when; + sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); + } +#ifdef INET_CSK_DEBUG + else { + pr_debug(inet_csk_timer_bug_msg); + } #endif - return; - }; } /* Initialize RCV_MSS value. @@ -637,7 +630,7 @@ static inline void tcp_initialize_rcv_mss(struct sock *sk) hint = min(hint, TCP_MIN_RCVMSS); hint = max(hint, TCP_MIN_MSS); - tp->ack.rcv_mss = hint; + inet_csk(sk)->icsk_ack.rcv_mss = hint; } static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) @@ -772,7 +765,7 @@ static inline void tcp_packets_out_inc(struct sock *sk, tp->packets_out += tcp_skb_pcount(skb); if (!orig) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); } static inline void tcp_packets_out_dec(struct tcp_sock *tp, @@ -939,8 +932,9 @@ static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) { - if (!tp->packets_out && !tp->pending) - tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto); + const struct inet_connection_sock *icsk = inet_csk(sk); + if (!tp->packets_out && !icsk->icsk_pending) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, icsk->icsk_rto); } static __inline__ void tcp_push_pending_frames(struct sock *sk, @@ -1021,8 +1015,9 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb) tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { wake_up_interruptible(sk->sk_sleep); - if (!tcp_ack_scheduled(tp)) - tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4); + if (!inet_csk_ack_scheduled(sk)) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + (3 * TCP_RTO_MIN) / 4); } return 1; } @@ -1055,7 +1050,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) TCP_INC_STATS(TCP_MIB_ESTABRESETS); sk->sk_prot->unhash(sk); - if (inet_sk(sk)->bind_hash && + if (inet_csk(sk)->icsk_bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) inet_put_port(&tcp_hashinfo, sk); /* fall through */ @@ -1186,51 +1181,55 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk->sk_rcvbuf); } -static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req, - struct sock *child) +static inline void inet_csk_reqsk_queue_add(struct sock *sk, + struct request_sock *req, + struct sock *child) { - reqsk_queue_add(&tcp_sk(sk)->accept_queue, req, sk, child); + reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); } -static inline void -tcp_synq_removed(struct sock *sk, struct request_sock *req) +static inline void inet_csk_reqsk_queue_removed(struct sock *sk, + struct request_sock *req) { - if (reqsk_queue_removed(&tcp_sk(sk)->accept_queue, req) == 0) - tcp_delete_keepalive_timer(sk); + if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) + inet_csk_delete_keepalive_timer(sk); } -static inline void tcp_synq_added(struct sock *sk) +static inline void inet_csk_reqsk_queue_added(struct sock *sk, + const unsigned long timeout) { - if (reqsk_queue_added(&tcp_sk(sk)->accept_queue) == 0) - tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT); + if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) + inet_csk_reset_keepalive_timer(sk, timeout); } -static inline int tcp_synq_len(struct sock *sk) +static inline int inet_csk_reqsk_queue_len(const struct sock *sk) { - return reqsk_queue_len(&tcp_sk(sk)->accept_queue); + return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); } -static inline int tcp_synq_young(struct sock *sk) +static inline int inet_csk_reqsk_queue_young(const struct sock *sk) { - return reqsk_queue_len_young(&tcp_sk(sk)->accept_queue); + return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); } -static inline int tcp_synq_is_full(struct sock *sk) +static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) { - return reqsk_queue_is_full(&tcp_sk(sk)->accept_queue); + return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); } -static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req, - struct request_sock **prev) +static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, + struct request_sock *req, + struct request_sock **prev) { - reqsk_queue_unlink(&tp->accept_queue, req, prev); + reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); } -static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req, - struct request_sock **prev) +static inline void inet_csk_reqsk_queue_drop(struct sock *sk, + struct request_sock *req, + struct request_sock **prev) { - tcp_synq_unlink(tcp_sk(sk), req, prev); - tcp_synq_removed(sk, req); + inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_removed(sk, req); reqsk_free(req); } @@ -1265,12 +1264,13 @@ static inline int keepalive_time_when(const struct tcp_sock *tp) return tp->keepalive_time ? : sysctl_tcp_keepalive_time; } -static inline int tcp_fin_time(const struct tcp_sock *tp) +static inline int tcp_fin_time(const struct sock *sk) { - int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout; + int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout; + const int rto = inet_csk(sk)->icsk_rto; - if (fin_timeout < (tp->rto<<2) - (tp->rto>>1)) - fin_timeout = (tp->rto<<2) - (tp->rto>>1); + if (fin_timeout < (rto << 2) - (rto >> 1)) + fin_timeout = (rto << 2) - (rto >> 1); return fin_timeout; } diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 64980ee8c92..c6b84397448 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -88,7 +88,7 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) * it is surely retransmit. It is not in ECN RFC, * but Linux follows this rule. */ else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) - tcp_enter_quickack_mode(tp); + tcp_enter_quickack_mode((struct sock *)tp); } } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index d94e962958a..e8d29fe736d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -19,6 +19,7 @@ #include #include +#include #include /* @@ -56,10 +57,9 @@ void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb) void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, const unsigned short snum) { - struct inet_sock *inet = inet_sk(sk); - inet->num = snum; + inet_sk(sk)->num = snum; sk_add_bind_node(sk, &tb->owners); - inet->bind_hash = tb; + inet_csk(sk)->icsk_bind_hash = tb; } EXPORT_SYMBOL(inet_bind_hash); @@ -69,16 +69,15 @@ EXPORT_SYMBOL(inet_bind_hash); */ static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) { - struct inet_sock *inet = inet_sk(sk); - const int bhash = inet_bhashfn(inet->num, hashinfo->bhash_size); + const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; struct inet_bind_bucket *tb; spin_lock(&head->lock); - tb = inet->bind_hash; + tb = inet_csk(sk)->icsk_bind_hash; __sk_del_bind_node(sk); - inet->bind_hash = NULL; - inet->num = 0; + inet_csk(sk)->icsk_bind_hash = NULL; + inet_sk(sk)->num = 0; inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); spin_unlock(&head->lock); } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index ceb577c7423..5cba59b869f 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -56,6 +56,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, struct inet_hashinfo *hashinfo) { const struct inet_sock *inet = inet_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_ehash_bucket *ehead = &hashinfo->ehash[sk->sk_hashent]; struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. @@ -64,8 +65,8 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, */ bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)]; spin_lock(&bhead->lock); - tw->tw_tb = inet->bind_hash; - BUG_TRAP(inet->bind_hash); + tw->tw_tb = icsk->icsk_bind_hash; + BUG_TRAP(icsk->icsk_bind_hash); inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 72d01444218..8692cb9d4bd 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -180,7 +180,7 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, child = tp->af_specific->syn_recv_sock(sk, skb, req, dst); if (child) - tcp_acceptq_queue(sk, req, child); + inet_csk_reqsk_queue_add(sk, req, child); else reqsk_free(req); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f1a708bf7a9..8177b86570d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -313,7 +313,7 @@ EXPORT_SYMBOL(tcp_enter_memory_pressure); static __inline__ unsigned int tcp_listen_poll(struct sock *sk, poll_table *wait) { - return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0; + return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ? (POLLIN | POLLRDNORM) : 0; } /* @@ -458,15 +458,15 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) int tcp_listen_start(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); - int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE); + struct inet_connection_sock *icsk = inet_csk(sk); + int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, TCP_SYNQ_HSIZE); if (rc != 0) return rc; sk->sk_max_ack_backlog = 0; sk->sk_ack_backlog = 0; - tcp_delack_init(tp); + inet_csk_delack_init(sk); /* There is race window here: we announce ourselves listening, * but this transition is still not validated by get_port(). @@ -484,7 +484,7 @@ int tcp_listen_start(struct sock *sk) } sk->sk_state = TCP_CLOSE; - __reqsk_queue_destroy(&tp->accept_queue); + __reqsk_queue_destroy(&icsk->icsk_accept_queue); return -EADDRINUSE; } @@ -495,14 +495,14 @@ int tcp_listen_start(struct sock *sk) static void tcp_listen_stop (struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock *acc_req; struct request_sock *req; - tcp_delete_keepalive_timer(sk); + inet_csk_delete_keepalive_timer(sk); /* make all the listen_opt local to us */ - acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue); + acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); /* Following specs, it would be better either to send FIN * (and enter FIN-WAIT-1, it is normal close) @@ -512,7 +512,7 @@ static void tcp_listen_stop (struct sock *sk) * To be honest, we are not able to make either * of the variants now. --ANK */ - reqsk_queue_destroy(&tp->accept_queue); + reqsk_queue_destroy(&icsk->icsk_accept_queue); while ((req = acc_req) != NULL) { struct sock *child = req->sk; @@ -1039,20 +1039,21 @@ static void cleanup_rbuf(struct sock *sk, int copied) BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)); #endif - if (tcp_ack_scheduled(tp)) { + if (inet_csk_ack_scheduled(sk)) { + const struct inet_connection_sock *icsk = inet_csk(sk); /* Delayed ACKs frequently hit locked sockets during bulk * receive. */ - if (tp->ack.blocked || + if (icsk->icsk_ack.blocked || /* Once-per-two-segments ACK was not sent by tcp_input.c */ - tp->rcv_nxt - tp->rcv_wup > tp->ack.rcv_mss || + tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || /* * If this read emptied read buffer, we send ACK, if * connection is not bidirectional, user drained * receive buffer and there was a small segment * in queue. */ - (copied > 0 && (tp->ack.pending & TCP_ACK_PUSHED) && - !tp->ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) + (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && + !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) time_to_ack = 1; } @@ -1569,7 +1570,7 @@ void tcp_destroy_sock(struct sock *sk) BUG_TRAP(sk_unhashed(sk)); /* If it has not 0 inet_sk(sk)->num, it must be bound */ - BUG_TRAP(!inet_sk(sk)->num || inet_sk(sk)->bind_hash); + BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash); sk->sk_prot->destroy(sk); @@ -1698,10 +1699,10 @@ adjudge_to_death: tcp_send_active_reset(sk, GFP_ATOMIC); NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER); } else { - int tmo = tcp_fin_time(tp); + const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - tcp_reset_keepalive_timer(sk, tcp_fin_time(tp)); + inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk)); } else { atomic_inc(&tcp_orphan_count); tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); @@ -1746,6 +1747,7 @@ static inline int tcp_need_reset(int state) int tcp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int err = 0; int old_state = sk->sk_state; @@ -1782,7 +1784,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->srtt = 0; if ((tp->write_seq += tp->max_window + 2) == 0) tp->write_seq = 1; - tp->backoff = 0; + icsk->icsk_backoff = 0; tp->snd_cwnd = 2; tp->probes_out = 0; tp->packets_out = 0; @@ -1790,13 +1792,13 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_cwnd_cnt = 0; tcp_set_ca_state(tp, TCP_CA_Open); tcp_clear_retrans(tp); - tcp_delack_init(tp); + inet_csk_delack_init(sk); sk->sk_send_head = NULL; tp->rx_opt.saw_tstamp = 0; tcp_sack_reset(&tp->rx_opt); __sk_dst_reset(sk); - BUG_TRAP(!inet->num || inet->bind_hash); + BUG_TRAP(!inet->num || icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; @@ -1808,7 +1810,7 @@ int tcp_disconnect(struct sock *sk, int flags) */ static int wait_for_connect(struct sock *sk, long timeo) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); DEFINE_WAIT(wait); int err; @@ -1830,11 +1832,11 @@ static int wait_for_connect(struct sock *sk, long timeo) prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); release_sock(sk); - if (reqsk_queue_empty(&tp->accept_queue)) + if (reqsk_queue_empty(&icsk->icsk_accept_queue)) timeo = schedule_timeout(timeo); lock_sock(sk); err = 0; - if (!reqsk_queue_empty(&tp->accept_queue)) + if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) break; err = -EINVAL; if (sk->sk_state != TCP_LISTEN) @@ -1854,9 +1856,9 @@ static int wait_for_connect(struct sock *sk, long timeo) * This will accept the next outstanding connection. */ -struct sock *tcp_accept(struct sock *sk, int flags, int *err) +struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct sock *newsk; int error; @@ -1870,7 +1872,7 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err) goto out_err; /* Find already established connection */ - if (reqsk_queue_empty(&tp->accept_queue)) { + if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); /* If this is a non blocking socket don't sleep */ @@ -1883,7 +1885,7 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err) goto out_err; } - newsk = reqsk_queue_get_child(&tp->accept_queue, sk); + newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); out: release_sock(sk); @@ -1901,6 +1903,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); int val; int err = 0; @@ -1999,7 +2002,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, elapsed = tp->keepalive_time - elapsed; else elapsed = 0; - tcp_reset_keepalive_timer(sk, elapsed); + inet_csk_reset_keepalive_timer(sk, elapsed); } } break; @@ -2019,7 +2022,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, if (val < 1 || val > MAX_TCP_SYNCNT) err = -EINVAL; else - tp->syn_retries = val; + icsk->icsk_syn_retries = val; break; case TCP_LINGER2: @@ -2058,16 +2061,16 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, case TCP_QUICKACK: if (!val) { - tp->ack.pingpong = 1; + icsk->icsk_ack.pingpong = 1; } else { - tp->ack.pingpong = 0; + icsk->icsk_ack.pingpong = 0; if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && - tcp_ack_scheduled(tp)) { - tp->ack.pending |= TCP_ACK_PUSHED; + inet_csk_ack_scheduled(sk)) { + icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; cleanup_rbuf(sk, 1); if (!(val & 1)) - tp->ack.pingpong = 1; + icsk->icsk_ack.pingpong = 1; } } break; @@ -2084,15 +2087,16 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, void tcp_get_info(struct sock *sk, struct tcp_info *info) { struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; memset(info, 0, sizeof(*info)); info->tcpi_state = sk->sk_state; info->tcpi_ca_state = tp->ca_state; - info->tcpi_retransmits = tp->retransmits; + info->tcpi_retransmits = icsk->icsk_retransmits; info->tcpi_probes = tp->probes_out; - info->tcpi_backoff = tp->backoff; + info->tcpi_backoff = icsk->icsk_backoff; if (tp->rx_opt.tstamp_ok) info->tcpi_options |= TCPI_OPT_TIMESTAMPS; @@ -2107,10 +2111,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) if (tp->ecn_flags&TCP_ECN_OK) info->tcpi_options |= TCPI_OPT_ECN; - info->tcpi_rto = jiffies_to_usecs(tp->rto); - info->tcpi_ato = jiffies_to_usecs(tp->ack.ato); + info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); + info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato); info->tcpi_snd_mss = tp->mss_cache; - info->tcpi_rcv_mss = tp->ack.rcv_mss; + info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; info->tcpi_unacked = tp->packets_out; info->tcpi_sacked = tp->sacked_out; @@ -2119,7 +2123,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_fackets = tp->fackets_out; info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); - info->tcpi_last_data_recv = jiffies_to_msecs(now - tp->ack.lrcvtime); + info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); info->tcpi_pmtu = tp->pmtu_cookie; @@ -2179,7 +2183,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; break; case TCP_SYNCNT: - val = tp->syn_retries ? : sysctl_tcp_syn_retries; + val = inet_csk(sk)->icsk_syn_retries ? : sysctl_tcp_syn_retries; break; case TCP_LINGER2: val = tp->linger2; @@ -2209,7 +2213,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, return 0; } case TCP_QUICKACK: - val = !tp->ack.pingpong; + val = !inet_csk(sk)->icsk_ack.pingpong; break; case TCP_CONGESTION: @@ -2340,7 +2344,7 @@ void __init tcp_init(void) tcp_register_congestion_control(&tcp_reno); } -EXPORT_SYMBOL(tcp_accept); +EXPORT_SYMBOL(inet_csk_accept); EXPORT_SYMBOL(tcp_close); EXPORT_SYMBOL(tcp_destroy_sock); EXPORT_SYMBOL(tcp_disconnect); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 60c6a797cc5..5f4c74f45e8 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -48,8 +48,9 @@ static struct sock *tcpnl; static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, int ext, u32 pid, u32 seq, u16 nlmsg_flags) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcpdiagmsg *r; struct nlmsghdr *nlh; struct tcp_info *info = NULL; @@ -129,14 +130,14 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, #define EXPIRES_IN_MS(tmo) ((tmo-jiffies)*1000+HZ-1)/HZ - if (tp->pending == TCP_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS) { r->tcpdiag_timer = 1; - r->tcpdiag_retrans = tp->retransmits; - r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout); - } else if (tp->pending == TCP_TIME_PROBE0) { + r->tcpdiag_retrans = icsk->icsk_retransmits; + r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); + } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { r->tcpdiag_timer = 4; r->tcpdiag_retrans = tp->probes_out; - r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout); + r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (timer_pending(&sk->sk_timer)) { r->tcpdiag_timer = 2; r->tcpdiag_retrans = tp->probes_out; @@ -497,7 +498,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, { struct tcpdiag_entry entry; struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt; struct rtattr *bc = NULL; struct inet_sock *inet = inet_sk(sk); @@ -513,9 +514,9 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, entry.family = sk->sk_family; - read_lock_bh(&tp->accept_queue.syn_wait_lock); + read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); - lopt = tp->accept_queue.listen_opt; + lopt = icsk->icsk_accept_queue.listen_opt; if (!lopt || !lopt->qlen) goto out; @@ -572,7 +573,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, } out: - read_unlock_bh(&tp->accept_queue.syn_wait_lock); + read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); return err; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ffa24025cd0..8a8c5c2d90c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -114,20 +114,21 @@ int sysctl_tcp_moderate_rcvbuf = 1; /* Adapt the MSS value used to make delayed ack decision to the * real world. */ -static inline void tcp_measure_rcv_mss(struct tcp_sock *tp, - struct sk_buff *skb) +static inline void tcp_measure_rcv_mss(struct sock *sk, + const struct sk_buff *skb) { - unsigned int len, lss; + struct inet_connection_sock *icsk = inet_csk(sk); + const unsigned int lss = icsk->icsk_ack.last_seg_size; + unsigned int len; - lss = tp->ack.last_seg_size; - tp->ack.last_seg_size = 0; + icsk->icsk_ack.last_seg_size = 0; /* skb->len may jitter because of SACKs, even if peer * sends good full-sized frames. */ len = skb->len; - if (len >= tp->ack.rcv_mss) { - tp->ack.rcv_mss = len; + if (len >= icsk->icsk_ack.rcv_mss) { + icsk->icsk_ack.rcv_mss = len; } else { /* Otherwise, we make more careful check taking into account, * that SACKs block is variable. @@ -147,41 +148,44 @@ static inline void tcp_measure_rcv_mss(struct tcp_sock *tp, * tcp header plus fixed timestamp option length. * Resulting "len" is MSS free of SACK jitter. */ - len -= tp->tcp_header_len; - tp->ack.last_seg_size = len; + len -= tcp_sk(sk)->tcp_header_len; + icsk->icsk_ack.last_seg_size = len; if (len == lss) { - tp->ack.rcv_mss = len; + icsk->icsk_ack.rcv_mss = len; return; } } - tp->ack.pending |= TCP_ACK_PUSHED; + icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; } } -static void tcp_incr_quickack(struct tcp_sock *tp) +static void tcp_incr_quickack(struct sock *sk) { - unsigned quickacks = tp->rcv_wnd/(2*tp->ack.rcv_mss); + struct inet_connection_sock *icsk = inet_csk(sk); + unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); if (quickacks==0) quickacks=2; - if (quickacks > tp->ack.quick) - tp->ack.quick = min(quickacks, TCP_MAX_QUICKACKS); + if (quickacks > icsk->icsk_ack.quick) + icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); } -void tcp_enter_quickack_mode(struct tcp_sock *tp) +void tcp_enter_quickack_mode(struct sock *sk) { - tcp_incr_quickack(tp); - tp->ack.pingpong = 0; - tp->ack.ato = TCP_ATO_MIN; + struct inet_connection_sock *icsk = inet_csk(sk); + tcp_incr_quickack(sk); + icsk->icsk_ack.pingpong = 0; + icsk->icsk_ack.ato = TCP_ATO_MIN; } /* Send ACKs quickly, if "quick" count is not exhausted * and the session is not interactive. */ -static __inline__ int tcp_in_quickack_mode(struct tcp_sock *tp) +static inline int tcp_in_quickack_mode(const struct sock *sk) { - return (tp->ack.quick && !tp->ack.pingpong); + const struct inet_connection_sock *icsk = inet_csk(sk); + return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong; } /* Buffer size and advertised window tuning. @@ -224,8 +228,8 @@ static void tcp_fixup_sndbuf(struct sock *sk) */ /* Slow part of check#2. */ -static int __tcp_grow_window(struct sock *sk, struct tcp_sock *tp, - struct sk_buff *skb) +static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp, + const struct sk_buff *skb) { /* Optimize this! */ int truesize = tcp_win_from_space(skb->truesize)/2; @@ -233,7 +237,7 @@ static int __tcp_grow_window(struct sock *sk, struct tcp_sock *tp, while (tp->rcv_ssthresh <= window) { if (truesize <= skb->len) - return 2*tp->ack.rcv_mss; + return 2 * inet_csk(sk)->icsk_ack.rcv_mss; truesize >>= 1; window >>= 1; @@ -260,7 +264,7 @@ static inline void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, if (incr) { tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp); - tp->ack.quick |= 1; + inet_csk(sk)->icsk_ack.quick |= 1; } } } @@ -325,7 +329,7 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) unsigned int app_win = tp->rcv_nxt - tp->copied_seq; int ofo_win = 0; - tp->ack.quick = 0; + inet_csk(sk)->icsk_ack.quick = 0; skb_queue_walk(&tp->out_of_order_queue, skb) { ofo_win += skb->len; @@ -346,8 +350,8 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) app_win += ofo_win; if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf) app_win >>= 1; - if (app_win > tp->ack.rcv_mss) - app_win -= tp->ack.rcv_mss; + if (app_win > inet_csk(sk)->icsk_ack.rcv_mss) + app_win -= inet_csk(sk)->icsk_ack.rcv_mss; app_win = max(app_win, 2U*tp->advmss); if (!ofo_win) @@ -415,11 +419,12 @@ new_measure: tp->rcv_rtt_est.time = tcp_time_stamp; } -static inline void tcp_rcv_rtt_measure_ts(struct tcp_sock *tp, struct sk_buff *skb) +static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); if (tp->rx_opt.rcv_tsecr && (TCP_SKB_CB(skb)->end_seq - - TCP_SKB_CB(skb)->seq >= tp->ack.rcv_mss)) + TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0); } @@ -492,41 +497,42 @@ new_measure: */ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) { + struct inet_connection_sock *icsk = inet_csk(sk); u32 now; - tcp_schedule_ack(tp); + inet_csk_schedule_ack(sk); - tcp_measure_rcv_mss(tp, skb); + tcp_measure_rcv_mss(sk, skb); tcp_rcv_rtt_measure(tp); now = tcp_time_stamp; - if (!tp->ack.ato) { + if (!icsk->icsk_ack.ato) { /* The _first_ data packet received, initialize * delayed ACK engine. */ - tcp_incr_quickack(tp); - tp->ack.ato = TCP_ATO_MIN; + tcp_incr_quickack(sk); + icsk->icsk_ack.ato = TCP_ATO_MIN; } else { - int m = now - tp->ack.lrcvtime; + int m = now - icsk->icsk_ack.lrcvtime; if (m <= TCP_ATO_MIN/2) { /* The fastest case is the first. */ - tp->ack.ato = (tp->ack.ato>>1) + TCP_ATO_MIN/2; - } else if (m < tp->ack.ato) { - tp->ack.ato = (tp->ack.ato>>1) + m; - if (tp->ack.ato > tp->rto) - tp->ack.ato = tp->rto; - } else if (m > tp->rto) { + icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2; + } else if (m < icsk->icsk_ack.ato) { + icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m; + if (icsk->icsk_ack.ato > icsk->icsk_rto) + icsk->icsk_ack.ato = icsk->icsk_rto; + } else if (m > icsk->icsk_rto) { /* Too long gap. Apparently sender falled to * restart window, so that we send ACKs quickly. */ - tcp_incr_quickack(tp); + tcp_incr_quickack(sk); sk_stream_mem_reclaim(sk); } } - tp->ack.lrcvtime = now; + icsk->icsk_ack.lrcvtime = now; TCP_ECN_check_ce(tp, skb); @@ -611,8 +617,9 @@ static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt) /* Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ -static inline void tcp_set_rto(struct tcp_sock *tp) +static inline void tcp_set_rto(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); /* Old crap is replaced with new one. 8) * * More seriously: @@ -623,7 +630,7 @@ static inline void tcp_set_rto(struct tcp_sock *tp) * is invisible. Actually, Linux-2.4 also generates erratic * ACKs in some curcumstances. */ - tp->rto = (tp->srtt >> 3) + tp->rttvar; + inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar; /* 2. Fixups made earlier cannot be right. * If we do not estimate RTO correctly without them, @@ -635,10 +642,10 @@ static inline void tcp_set_rto(struct tcp_sock *tp) /* NOTE: clamping at TCP_RTO_MIN is not required, current algo * guarantees that rto is higher. */ -static inline void tcp_bound_rto(struct tcp_sock *tp) +static inline void tcp_bound_rto(struct sock *sk) { - if (tp->rto > TCP_RTO_MAX) - tp->rto = TCP_RTO_MAX; + if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) + inet_csk(sk)->icsk_rto = TCP_RTO_MAX; } /* Save metrics learned by this TCP session. @@ -658,7 +665,7 @@ void tcp_update_metrics(struct sock *sk) if (dst && (dst->flags&DST_HOST)) { int m; - if (tp->backoff || !tp->srtt) { + if (inet_csk(sk)->icsk_backoff || !tp->srtt) { /* This session failed to estimate rtt. Why? * Probably, no packets returned in time. * Reset our results. @@ -801,9 +808,9 @@ static void tcp_init_metrics(struct sock *sk) tp->mdev = dst_metric(dst, RTAX_RTTVAR); tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); } - tcp_set_rto(tp); - tcp_bound_rto(tp); - if (tp->rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) + tcp_set_rto(sk); + tcp_bound_rto(sk); + if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) goto reset; tp->snd_cwnd = tcp_init_cwnd(tp, dst); tp->snd_cwnd_stamp = tcp_time_stamp; @@ -817,7 +824,7 @@ reset: if (!tp->rx_opt.saw_tstamp && tp->srtt) { tp->srtt = 0; tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; - tp->rto = TCP_TIMEOUT_INIT; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; } } @@ -1118,7 +1125,7 @@ void tcp_enter_frto(struct sock *sk) if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || - (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) { + (tp->ca_state == TCP_CA_Loss && !inet_csk(sk)->icsk_retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(tp); tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); tcp_ca_event(tp, CA_EVENT_FRTO); @@ -1214,7 +1221,7 @@ void tcp_enter_loss(struct sock *sk, int how) /* Reduce ssthresh if it has not yet been made inside this window. */ if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || - (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) { + (tp->ca_state == TCP_CA_Loss && !inet_csk(sk)->icsk_retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(tp); tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); tcp_ca_event(tp, CA_EVENT_LOSS); @@ -1253,7 +1260,7 @@ void tcp_enter_loss(struct sock *sk, int how) TCP_ECN_queue_cwr(tp); } -static int tcp_check_sack_reneging(struct sock *sk, struct tcp_sock *tp) +static int tcp_check_sack_reneging(struct sock *sk) { struct sk_buff *skb; @@ -1268,9 +1275,10 @@ static int tcp_check_sack_reneging(struct sock *sk, struct tcp_sock *tp) NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); tcp_enter_loss(sk, 1); - tp->retransmits++; + inet_csk(sk)->icsk_retransmits++; tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto); return 1; } return 0; @@ -1281,15 +1289,15 @@ static inline int tcp_fackets_out(struct tcp_sock *tp) return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out; } -static inline int tcp_skb_timedout(struct tcp_sock *tp, struct sk_buff *skb) +static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) { - return (tcp_time_stamp - TCP_SKB_CB(skb)->when > tp->rto); + return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto); } static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp) { return tp->packets_out && - tcp_skb_timedout(tp, skb_peek(&sk->sk_write_queue)); + tcp_skb_timedout(sk, skb_peek(&sk->sk_write_queue)); } /* Linux NewReno/SACK/FACK/ECN state machine. @@ -1509,7 +1517,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) struct sk_buff *skb; sk_stream_for_retrans_queue(skb, sk) { - if (tcp_skb_timedout(tp, skb) && + if (tcp_skb_timedout(sk, skb) && !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); @@ -1676,7 +1684,7 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) tp->left_out = tp->sacked_out; tcp_undo_cwr(tp, 1); NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); - tp->retransmits = 0; + inet_csk(sk)->icsk_retransmits = 0; tp->undo_marker = 0; if (!IsReno(tp)) tcp_set_ca_state(tp, TCP_CA_Open); @@ -1750,7 +1758,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tp->prior_ssthresh = 0; /* B. In all the states check for reneging SACKs. */ - if (tp->sacked_out && tcp_check_sack_reneging(sk, tp)) + if (tp->sacked_out && tcp_check_sack_reneging(sk)) return; /* C. Process data loss notification, provided it is valid. */ @@ -1774,7 +1782,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, } else if (!before(tp->snd_una, tp->high_seq)) { switch (tp->ca_state) { case TCP_CA_Loss: - tp->retransmits = 0; + inet_csk(sk)->icsk_retransmits = 0; if (tcp_try_undo_recovery(sk, tp)) return; break; @@ -1824,7 +1832,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, break; case TCP_CA_Loss: if (flag&FLAG_DATA_ACKED) - tp->retransmits = 0; + inet_csk(sk)->icsk_retransmits = 0; if (!tcp_try_undo_loss(sk, tp)) { tcp_moderate_cwnd(tp); tcp_xmit_retransmit_queue(sk); @@ -1881,10 +1889,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* Read draft-ietf-tcplw-high-performance before mucking * with this code. (Superceeds RFC1323) */ -static void tcp_ack_saw_tstamp(struct tcp_sock *tp, u32 *usrtt, int flag) +static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag) { - __u32 seq_rtt; - /* RTTM Rule: A TSecr value received in a segment is used to * update the averaged RTT measurement only if the segment * acknowledges some new data, i.e., only if it advances the @@ -1900,14 +1906,15 @@ static void tcp_ack_saw_tstamp(struct tcp_sock *tp, u32 *usrtt, int flag) * answer arrives rto becomes 120 seconds! If at least one of segments * in window is lost... Voila. --ANK (010210) */ - seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; + struct tcp_sock *tp = tcp_sk(sk); + const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; tcp_rtt_estimator(tp, seq_rtt, usrtt); - tcp_set_rto(tp); - tp->backoff = 0; - tcp_bound_rto(tp); + tcp_set_rto(sk); + inet_csk(sk)->icsk_backoff = 0; + tcp_bound_rto(sk); } -static void tcp_ack_no_tstamp(struct tcp_sock *tp, u32 seq_rtt, u32 *usrtt, int flag) +static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag) { /* We don't have a timestamp. Can only use * packets that are not retransmitted to determine @@ -1921,20 +1928,21 @@ static void tcp_ack_no_tstamp(struct tcp_sock *tp, u32 seq_rtt, u32 *usrtt, int if (flag & FLAG_RETRANS_DATA_ACKED) return; - tcp_rtt_estimator(tp, seq_rtt, usrtt); - tcp_set_rto(tp); - tp->backoff = 0; - tcp_bound_rto(tp); + tcp_rtt_estimator(tcp_sk(sk), seq_rtt, usrtt); + tcp_set_rto(sk); + inet_csk(sk)->icsk_backoff = 0; + tcp_bound_rto(sk); } -static inline void tcp_ack_update_rtt(struct tcp_sock *tp, - int flag, s32 seq_rtt, u32 *usrtt) +static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, + const s32 seq_rtt, u32 *usrtt) { + const struct tcp_sock *tp = tcp_sk(sk); /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) - tcp_ack_saw_tstamp(tp, usrtt, flag); + tcp_ack_saw_tstamp(sk, usrtt, flag); else if (seq_rtt >= 0) - tcp_ack_no_tstamp(tp, seq_rtt, usrtt, flag); + tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag); } static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, @@ -1951,9 +1959,9 @@ static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) { if (!tp->packets_out) { - tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS); + inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); } else { - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); } } @@ -2090,7 +2098,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt } if (acked&FLAG_ACKED) { - tcp_ack_update_rtt(tp, acked, seq_rtt, seq_usrtt); + tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt); tcp_ack_packets_out(sk, tp); if (tp->ca_ops->pkts_acked) @@ -2125,20 +2133,21 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt static void tcp_ack_probe(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); /* Was it a usable window open? */ if (!after(TCP_SKB_CB(sk->sk_send_head)->end_seq, tp->snd_una + tp->snd_wnd)) { - tp->backoff = 0; - tcp_clear_xmit_timer(sk, TCP_TIME_PROBE0); + icsk->icsk_backoff = 0; + inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); /* Socket must be waked up by subsequent tcp_data_snd_check(). * This function is not for random using! */ } else { - tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, - min(tp->rto << tp->backoff, TCP_RTO_MAX)); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, + min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX)); } } @@ -2157,8 +2166,8 @@ static inline int tcp_may_raise_cwnd(struct tcp_sock *tp, int flag) /* Check that window update is acceptable. * The function assumes that snd_una<=ack<=snd_next. */ -static inline int tcp_may_update_window(struct tcp_sock *tp, u32 ack, - u32 ack_seq, u32 nwin) +static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack, + const u32 ack_seq, const u32 nwin) { return (after(ack, tp->snd_una) || after(ack_seq, tp->snd_wl1) || @@ -2500,8 +2509,9 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) * up to bandwidth of 18Gigabit/sec. 8) ] */ -static int tcp_disordered_ack(struct tcp_sock *tp, struct sk_buff *skb) +static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); struct tcphdr *th = skb->h.th; u32 seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; @@ -2516,14 +2526,15 @@ static int tcp_disordered_ack(struct tcp_sock *tp, struct sk_buff *skb) !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) && /* 4. ... and sits in replay window. */ - (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (tp->rto*1024)/HZ); + (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ); } -static inline int tcp_paws_discard(struct tcp_sock *tp, struct sk_buff *skb) +static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *skb) { + const struct tcp_sock *tp = tcp_sk(sk); return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS && - !tcp_disordered_ack(tp, skb)); + !tcp_disordered_ack(sk, skb)); } /* Check segment sequence number for validity. @@ -2586,7 +2597,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) { struct tcp_sock *tp = tcp_sk(sk); - tcp_schedule_ack(tp); + inet_csk_schedule_ack(sk); sk->sk_shutdown |= RCV_SHUTDOWN; sock_set_flag(sk, SOCK_DONE); @@ -2596,7 +2607,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) case TCP_ESTABLISHED: /* Move to CLOSE_WAIT */ tcp_set_state(sk, TCP_CLOSE_WAIT); - tp->ack.pingpong = 1; + inet_csk(sk)->icsk_ack.pingpong = 1; break; case TCP_CLOSE_WAIT: @@ -2694,7 +2705,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST); - tcp_enter_quickack_mode(tp); + tcp_enter_quickack_mode(sk); if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; @@ -2942,7 +2953,7 @@ queue_and_out: * gap in queue is filled. */ if (skb_queue_empty(&tp->out_of_order_queue)) - tp->ack.pingpong = 0; + inet_csk(sk)->icsk_ack.pingpong = 0; } if (tp->rx_opt.num_sacks) @@ -2963,8 +2974,8 @@ queue_and_out: tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); out_of_window: - tcp_enter_quickack_mode(tp); - tcp_schedule_ack(tp); + tcp_enter_quickack_mode(sk); + inet_csk_schedule_ack(sk); drop: __kfree_skb(skb); return; @@ -2974,7 +2985,7 @@ drop: if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp))) goto out_of_window; - tcp_enter_quickack_mode(tp); + tcp_enter_quickack_mode(sk); if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { /* Partial packet, seq < rcv_next < end_seq */ @@ -3003,7 +3014,7 @@ drop: /* Disable header prediction. */ tp->pred_flags = 0; - tcp_schedule_ack(tp); + inet_csk_schedule_ack(sk); SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); @@ -3373,13 +3384,13 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) struct tcp_sock *tp = tcp_sk(sk); /* More than one full frame received... */ - if (((tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss + if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss /* ... and right edge of window advances far enough. * (tcp_recvmsg() will send ACK otherwise). Or... */ && __tcp_select_window(sk) >= tp->rcv_wnd) || /* We ACK each frame or... */ - tcp_in_quickack_mode(tp) || + tcp_in_quickack_mode(sk) || /* We have out of order data. */ (ofo_possible && skb_peek(&tp->out_of_order_queue))) { @@ -3393,8 +3404,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) static __inline__ void tcp_ack_snd_check(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); - if (!tcp_ack_scheduled(tp)) { + if (!inet_csk_ack_scheduled(sk)) { /* We sent a data segment already. */ return; } @@ -3648,7 +3658,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tp->rcv_nxt == tp->rcv_wup) tcp_store_ts_recent(tp); - tcp_rcv_rtt_measure_ts(tp, skb); + tcp_rcv_rtt_measure_ts(sk, skb); /* We know that such packets are checksummed * on entry. @@ -3681,7 +3691,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tp->rcv_nxt == tp->rcv_wup) tcp_store_ts_recent(tp); - tcp_rcv_rtt_measure_ts(tp, skb); + tcp_rcv_rtt_measure_ts(sk, skb); __skb_pull(skb, tcp_header_len); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; @@ -3702,7 +3712,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tp->rcv_nxt == tp->rcv_wup) tcp_store_ts_recent(tp); - tcp_rcv_rtt_measure_ts(tp, skb); + tcp_rcv_rtt_measure_ts(sk, skb); if ((int)skb->truesize > sk->sk_forward_alloc) goto step5; @@ -3722,7 +3732,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, /* Well, only one small jumplet in fast path... */ tcp_ack(sk, skb, FLAG_DATA); tcp_data_snd_check(sk, tp); - if (!tcp_ack_scheduled(tp)) + if (!inet_csk_ack_scheduled(sk)) goto no_ack; } @@ -3744,7 +3754,7 @@ slow_path: * RFC1323: H1. Apply PAWS check first. */ if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && - tcp_paws_discard(tp, skb)) { + tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); tcp_send_dupack(sk, skb); @@ -3791,7 +3801,7 @@ step5: if(th->ack) tcp_ack(sk, skb, FLAG_SLOWPATH); - tcp_rcv_rtt_measure_ts(tp, skb); + tcp_rcv_rtt_measure_ts(sk, skb); /* Process urgent data. */ tcp_urg(sk, skb, th); @@ -3933,7 +3943,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_init_buffer_space(sk); if (sock_flag(sk, SOCK_KEEPOPEN)) - tcp_reset_keepalive_timer(sk, keepalive_time_when(tp)); + inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); if (!tp->rx_opt.snd_wscale) __tcp_fast_path_on(tp, tp->snd_wnd); @@ -3945,7 +3955,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, sk_wake_async(sk, 0, POLL_OUT); } - if (sk->sk_write_pending || tp->defer_accept || tp->ack.pingpong) { + if (sk->sk_write_pending || tp->defer_accept || inet_csk(sk)->icsk_ack.pingpong) { /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. * @@ -3953,12 +3963,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * look so _wonderfully_ clever, that I was not able * to stand against the temptation 8) --ANK */ - tcp_schedule_ack(tp); - tp->ack.lrcvtime = tcp_time_stamp; - tp->ack.ato = TCP_ATO_MIN; - tcp_incr_quickack(tp); - tcp_enter_quickack_mode(tp); - tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX); + inet_csk_schedule_ack(sk); + inet_csk(sk)->icsk_ack.lrcvtime = tcp_time_stamp; + inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; + tcp_incr_quickack(sk); + tcp_enter_quickack_mode(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX); discard: __kfree_skb(skb); @@ -4114,7 +4124,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && - tcp_paws_discard(tp, skb)) { + tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); tcp_send_dupack(sk, skb); @@ -4183,7 +4193,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, */ if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && !tp->srtt) - tcp_ack_saw_tstamp(tp, 0, 0); + tcp_ack_saw_tstamp(sk, 0, 0); if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; @@ -4230,9 +4240,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 1; } - tmo = tcp_fin_time(tp); + tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - tcp_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); + inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); } else if (th->fin || sock_owned_by_user(sk)) { /* Bad case. We could lose such FIN otherwise. * It is not a big problem, but it looks confusing @@ -4240,7 +4250,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * if it spins in bh_lock_sock(), but it is really * marginal case. */ - tcp_reset_keepalive_timer(sk, tmo); + inet_csk_reset_keepalive_timer(sk, tmo); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto discard; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e7e91e60ac7..2cd41265d17 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -104,7 +104,7 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { */ int sysctl_local_port_range[2] = { 1024, 4999 }; -static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) +static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) { const u32 sk_rcv_saddr = inet_rcv_saddr(sk); struct sock *sk2; @@ -113,7 +113,7 @@ static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb sk_for_each_bound(sk2, node, &tb->owners) { if (sk != sk2 && - !tcp_v6_ipv6only(sk2) && + !inet_v6_ipv6only(sk2) && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { @@ -132,7 +132,8 @@ static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb /* Obtain a reference to a local port for the given sock, * if snum is zero it means select any available local port. */ -static int tcp_v4_get_port(struct sock *sk, unsigned short snum) +int inet_csk_get_port(struct inet_hashinfo *hashinfo, + struct sock *sk, unsigned short snum) { struct inet_bind_hashbucket *head; struct hlist_node *node; @@ -146,16 +147,16 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) int remaining = (high - low) + 1; int rover; - spin_lock(&tcp_hashinfo.portalloc_lock); - if (tcp_hashinfo.port_rover < low) + spin_lock(&hashinfo->portalloc_lock); + if (hashinfo->port_rover < low) rover = low; else - rover = tcp_hashinfo.port_rover; + rover = hashinfo->port_rover; do { rover++; if (rover > high) rover = low; - head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; + head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == rover) @@ -164,8 +165,8 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) next: spin_unlock(&head->lock); } while (--remaining > 0); - tcp_hashinfo.port_rover = rover; - spin_unlock(&tcp_hashinfo.portalloc_lock); + hashinfo->port_rover = rover; + spin_unlock(&hashinfo->portalloc_lock); /* Exhausted local port range during search? It is not * possible for us to be holding one of the bind hash @@ -182,7 +183,7 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) */ snum = rover; } else { - head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; + head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == snum) @@ -199,13 +200,13 @@ tb_found: goto success; } else { ret = 1; - if (tcp_bind_conflict(sk, tb)) + if (inet_csk_bind_conflict(sk, tb)) goto fail_unlock; } } tb_not_found: ret = 1; - if (!tb && (tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum)) == NULL) + if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) goto fail_unlock; if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) @@ -216,9 +217,9 @@ tb_not_found: (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) tb->fastreuse = 0; success: - if (!inet_sk(sk)->bind_hash) + if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, snum); - BUG_TRAP(inet_sk(sk)->bind_hash == tb); + BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); ret = 0; fail_unlock: @@ -228,6 +229,11 @@ fail: return ret; } +static int tcp_v4_get_port(struct sock *sk, unsigned short snum) +{ + return inet_csk_get_port(&tcp_hashinfo, sk, snum); +} + static void tcp_v4_hash(struct sock *sk) { inet_hash(&tcp_hashinfo, sk); @@ -426,7 +432,7 @@ ok: } head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; - tb = inet_sk(sk)->bind_hash; + tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { __inet_hash(&tcp_hashinfo, sk, 0); @@ -557,25 +563,28 @@ failure: return err; } -static __inline__ int tcp_v4_iif(struct sk_buff *skb) +static inline int inet_iif(const struct sk_buff *skb) { return ((struct rtable *)skb->dst)->rt_iif; } -static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd) +static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, + const u32 rnd, const u16 synq_hsize) { - return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1)); + return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1); } -static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp, - struct request_sock ***prevp, - __u16 rport, - __u32 raddr, __u32 laddr) +struct request_sock *inet_csk_search_req(const struct sock *sk, + struct request_sock ***prevp, + const __u16 rport, const __u32 raddr, + const __u32 laddr) { - struct listen_sock *lopt = tp->accept_queue.listen_opt; + const struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; struct request_sock *req, **prev; - for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)]; + for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, + lopt->nr_table_entries)]; (req = *prev) != NULL; prev = &req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -583,7 +592,7 @@ static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp, if (ireq->rmt_port == rport && ireq->rmt_addr == raddr && ireq->loc_addr == laddr && - TCP_INET_FAMILY(req->rsk_ops->family)) { + AF_INET_FAMILY(req->rsk_ops->family)) { BUG_TRAP(!req->sk); *prevp = prev; break; @@ -595,12 +604,13 @@ static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp, static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req) { - struct tcp_sock *tp = tcp_sk(sk); - struct listen_sock *lopt = tp->accept_queue.listen_opt; - u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); + struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; + const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, + lopt->hash_rnd, lopt->nr_table_entries); - reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT); - tcp_synq_added(sk); + reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT); + inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT); } @@ -687,7 +697,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) } sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, - th->source, tcp_v4_iif(skb)); + th->source, inet_iif(skb)); if (!sk) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; @@ -747,8 +757,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) if (sock_owned_by_user(sk)) goto out; - req = tcp_v4_search_req(tp, &prev, th->dest, - iph->daddr, iph->saddr); + req = inet_csk_search_req(sk, &prev, th->dest, + iph->daddr, iph->saddr); if (!req) goto out; @@ -768,7 +778,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) * created socket, and POSIX does not want network * errors returned from accept(). */ - tcp_synq_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case TCP_SYN_SENT: @@ -953,8 +963,8 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) req->ts_recent); } -static struct dst_entry* tcp_v4_route_req(struct sock *sk, - struct request_sock *req) +struct dst_entry* inet_csk_route_req(struct sock *sk, + const struct request_sock *req) { struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); @@ -966,7 +976,7 @@ static struct dst_entry* tcp_v4_route_req(struct sock *sk, ireq->rmt_addr), .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, - .proto = IPPROTO_TCP, + .proto = sk->sk_protocol, .uli_u = { .ports = { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; @@ -996,7 +1006,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, struct sk_buff * skb; /* First, grab a route. */ - if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) + if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) goto out; skb = tcp_make_synack(sk, dst, req); @@ -1098,7 +1108,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * limitations, they conserve resources and peer is * evidently real one. */ - if (tcp_synq_is_full(sk) && !isn) { + if (inet_csk_reqsk_queue_is_full(sk) && !isn) { #ifdef CONFIG_SYN_COOKIES if (sysctl_tcp_syncookies) { want_cookie = 1; @@ -1112,7 +1122,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * clogging syn queue with openreqs with exponentially increasing * timeout. */ - if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; req = reqsk_alloc(&tcp_request_sock_ops); @@ -1169,7 +1179,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && sysctl_tcp_tw_recycle && - (dst = tcp_v4_route_req(sk, req)) != NULL && + (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && peer->v4daddr == saddr) { if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL && @@ -1182,7 +1192,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) } /* Kill the following clause, if you dislike this way. */ else if (!sysctl_tcp_syncookies && - (sysctl_max_syn_backlog - tcp_synq_len(sk) < + (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (sysctl_max_syn_backlog >> 2)) && (!peer || !peer->tcp_ts_stamp) && (!dst || !dst_metric(dst, RTAX_RTT))) { @@ -1240,7 +1250,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (sk_acceptq_is_full(sk)) goto exit_overflow; - if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) + if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) goto exit; newsk = tcp_create_openreq_child(sk, req, skb); @@ -1257,7 +1267,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->saddr = ireq->loc_addr; newinet->opt = ireq->opt; ireq->opt = NULL; - newinet->mc_index = tcp_v4_iif(skb); + newinet->mc_index = inet_iif(skb); newinet->mc_ttl = skb->nh.iph->ttl; newtp->ext_header_len = 0; if (newinet->opt) @@ -1285,18 +1295,17 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) { struct tcphdr *th = skb->h.th; struct iphdr *iph = skb->nh.iph; - struct tcp_sock *tp = tcp_sk(sk); struct sock *nsk; struct request_sock **prev; /* Find possible connection requests. */ - struct request_sock *req = tcp_v4_search_req(tp, &prev, th->source, - iph->saddr, iph->daddr); + struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, + iph->saddr, iph->daddr); if (req) return tcp_check_req(sk, skb, req, prev); nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, th->source, skb->nh.iph->daddr, - ntohs(th->dest), tcp_v4_iif(skb)); + ntohs(th->dest), inet_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1440,7 +1449,7 @@ int tcp_v4_rcv(struct sk_buff *skb) sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, skb->nh.iph->daddr, ntohs(th->dest), - tcp_v4_iif(skb)); + inet_iif(skb)); if (!sk) goto no_tcp_socket; @@ -1507,7 +1516,7 @@ do_time_wait: struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, skb->nh.iph->daddr, ntohs(th->dest), - tcp_v4_iif(skb)); + inet_iif(skb)); if (sk2) { tcp_tw_deschedule((struct inet_timewait_sock *)sk); inet_twsk_put((struct inet_timewait_sock *)sk); @@ -1619,7 +1628,7 @@ static int tcp_v4_init_sock(struct sock *sk) tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); - tp->rto = TCP_TIMEOUT_INIT; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; /* So many TCP implementations out there (incorrectly) count the @@ -1672,7 +1681,7 @@ int tcp_v4_destroy_sock(struct sock *sk) __skb_queue_purge(&tp->ucopy.prequeue); /* Clean up a referenced TCP bind bucket. */ - if (inet_sk(sk)->bind_hash) + if (inet_csk(sk)->icsk_bind_hash) inet_put_port(&tcp_hashinfo, sk); /* @@ -1707,7 +1716,7 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) static void *listening_get_next(struct seq_file *seq, void *cur) { - struct tcp_sock *tp; + struct inet_connection_sock *icsk; struct hlist_node *node; struct sock *sk = cur; struct tcp_iter_state* st = seq->private; @@ -1723,7 +1732,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) if (st->state == TCP_SEQ_STATE_OPENREQ) { struct request_sock *req = cur; - tp = tcp_sk(st->syn_wait_sk); + icsk = inet_csk(st->syn_wait_sk); req = req->dl_next; while (1) { while (req) { @@ -1736,17 +1745,17 @@ static void *listening_get_next(struct seq_file *seq, void *cur) if (++st->sbucket >= TCP_SYNQ_HSIZE) break; get_req: - req = tp->accept_queue.listen_opt->syn_table[st->sbucket]; + req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; } sk = sk_next(st->syn_wait_sk); st->state = TCP_SEQ_STATE_LISTENING; - read_unlock_bh(&tp->accept_queue.syn_wait_lock); + read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } else { - tp = tcp_sk(sk); - read_lock_bh(&tp->accept_queue.syn_wait_lock); - if (reqsk_queue_len(&tp->accept_queue)) + icsk = inet_csk(sk); + read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + if (reqsk_queue_len(&icsk->icsk_accept_queue)) goto start_req; - read_unlock_bh(&tp->accept_queue.syn_wait_lock); + read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); sk = sk_next(sk); } get_sk: @@ -1755,9 +1764,9 @@ get_sk: cur = sk; goto out; } - tp = tcp_sk(sk); - read_lock_bh(&tp->accept_queue.syn_wait_lock); - if (reqsk_queue_len(&tp->accept_queue)) { + icsk = inet_csk(sk); + read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + if (reqsk_queue_len(&icsk->icsk_accept_queue)) { start_req: st->uid = sock_i_uid(sk); st->syn_wait_sk = sk; @@ -1765,7 +1774,7 @@ start_req: st->sbucket = 0; goto get_req; } - read_unlock_bh(&tp->accept_queue.syn_wait_lock); + read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } if (++st->bucket < INET_LHTABLE_SIZE) { sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); @@ -1951,8 +1960,8 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) switch (st->state) { case TCP_SEQ_STATE_OPENREQ: if (v) { - struct tcp_sock *tp = tcp_sk(st->syn_wait_sk); - read_unlock_bh(&tp->accept_queue.syn_wait_lock); + struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); + read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) @@ -2058,18 +2067,19 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) int timer_active; unsigned long timer_expires; struct tcp_sock *tp = tcp_sk(sp); + const struct inet_connection_sock *icsk = inet_csk(sp); struct inet_sock *inet = inet_sk(sp); unsigned int dest = inet->daddr; unsigned int src = inet->rcv_saddr; __u16 destp = ntohs(inet->dport); __u16 srcp = ntohs(inet->sport); - if (tp->pending == TCP_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS) { timer_active = 1; - timer_expires = tp->timeout; - } else if (tp->pending == TCP_TIME_PROBE0) { + timer_expires = icsk->icsk_timeout; + } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = tp->timeout; + timer_expires = icsk->icsk_timeout; } else if (timer_pending(&sp->sk_timer)) { timer_active = 2; timer_expires = sp->sk_timer.expires; @@ -2084,12 +2094,14 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq, timer_active, jiffies_to_clock_t(timer_expires - jiffies), - tp->retransmits, + icsk->icsk_retransmits, sock_i_uid(sp), tp->probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, - tp->rto, tp->ack.ato, (tp->ack.quick << 1) | tp->ack.pingpong, + icsk->icsk_rto, + icsk->icsk_ack.ato, + (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); } @@ -2174,7 +2186,7 @@ struct proto tcp_prot = { .close = tcp_close, .connect = tcp_v4_connect, .disconnect = tcp_disconnect, - .accept = tcp_accept, + .accept = inet_csk_accept, .ioctl = tcp_ioctl, .init = tcp_v4_init_sock, .destroy = tcp_v4_destroy_sock, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 8b6cd8d8066..56823704eb7 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -271,7 +271,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (tw != NULL) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); - const int rto = (tp->rto << 2) - (tp->rto >> 1); + const struct inet_connection_sock *icsk = inet_csk(sk); + const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; tcptw->tw_rcv_nxt = tp->rcv_nxt; @@ -605,10 +606,11 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); struct inet_sock *newinet = inet_sk(newsk); + struct inet_connection_sock *newicsk = inet_csk(newsk); struct tcp_sock *newtp; newsk->sk_state = TCP_SYN_RECV; - newinet->bind_hash = NULL; + newicsk->icsk_bind_hash = NULL; /* Clone the TCP header template */ newinet->dport = ireq->rmt_port; @@ -624,11 +626,11 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn); - newtp->retransmits = 0; - newtp->backoff = 0; + newicsk->icsk_retransmits = 0; + newicsk->icsk_backoff = 0; newtp->srtt = 0; newtp->mdev = TCP_TIMEOUT_INIT; - newtp->rto = TCP_TIMEOUT_INIT; + newicsk->icsk_rto = TCP_TIMEOUT_INIT; newtp->packets_out = 0; newtp->left_out = 0; @@ -667,11 +669,11 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.num_sacks = 0; newtp->urg_data = 0; /* Deinitialize accept_queue to trap illegal accesses. */ - memset(&newtp->accept_queue, 0, sizeof(newtp->accept_queue)); + memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); if (sock_flag(newsk, SOCK_KEEPOPEN)) - tcp_reset_keepalive_timer(newsk, - keepalive_time_when(newtp)); + inet_csk_reset_keepalive_timer(newsk, + keepalive_time_when(newtp)); newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { @@ -701,7 +703,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->tcp_header_len = sizeof(struct tcphdr); } if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len) - newtp->ack.last_seg_size = skb->len-newtp->tcp_header_len; + newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; TCP_ECN_openreq_child(newtp, req); if (newtp->ecn_flags&TCP_ECN_OK) @@ -881,10 +883,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, if (child == NULL) goto listen_overflow; - tcp_synq_unlink(tp, req, prev); - tcp_synq_removed(sk, req); + inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_removed(sk, req); - tcp_acceptq_queue(sk, req, child); + inet_csk_reqsk_queue_add(sk, req, child); return child; listen_overflow: @@ -898,7 +900,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, if (!(flg & TCP_FLAG_RST)) req->rsk_ops->send_reset(skb); - tcp_synq_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req, prev); return NULL; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a4d1eb9a092..6f0a7e30cea 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -105,8 +105,9 @@ static __u16 tcp_advertise_mss(struct sock *sk) /* RFC2861. Reset CWND after idle period longer RTO to "restart window". * This is the first part of cwnd validation mechanism. */ -static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst) +static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) { + struct tcp_sock *tp = tcp_sk(sk); s32 delta = tcp_time_stamp - tp->lsndtime; u32 restart_cwnd = tcp_init_cwnd(tp, dst); u32 cwnd = tp->snd_cwnd; @@ -116,7 +117,7 @@ static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst) tp->snd_ssthresh = tcp_current_ssthresh(tp); restart_cwnd = min(restart_cwnd, cwnd); - while ((delta -= tp->rto) > 0 && cwnd > restart_cwnd) + while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) cwnd >>= 1; tp->snd_cwnd = max(cwnd, restart_cwnd); tp->snd_cwnd_stamp = tcp_time_stamp; @@ -126,26 +127,25 @@ static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst) static inline void tcp_event_data_sent(struct tcp_sock *tp, struct sk_buff *skb, struct sock *sk) { - u32 now = tcp_time_stamp; + struct inet_connection_sock *icsk = inet_csk(sk); + const u32 now = tcp_time_stamp; - if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto) - tcp_cwnd_restart(tp, __sk_dst_get(sk)); + if (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto) + tcp_cwnd_restart(sk, __sk_dst_get(sk)); tp->lsndtime = now; /* If it is a reply for ato after last received * packet, enter pingpong mode. */ - if ((u32)(now - tp->ack.lrcvtime) < tp->ack.ato) - tp->ack.pingpong = 1; + if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) + icsk->icsk_ack.pingpong = 1; } static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) { - struct tcp_sock *tp = tcp_sk(sk); - - tcp_dec_quickack_mode(tp, pkts); - tcp_clear_xmit_timer(sk, TCP_TIME_DACK); + tcp_dec_quickack_mode(sk, pkts); + inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } /* Determine a window scaling and initial window to offer. @@ -696,7 +696,7 @@ static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) if (tp->packets_out > tp->snd_cwnd_used) tp->snd_cwnd_used = tp->packets_out; - if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto) + if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto) tcp_cwnd_application_limited(sk); } } @@ -1147,6 +1147,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) */ u32 __tcp_select_window(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); /* MSS for the peer's data. Previous verions used mss_clamp * here. I don't know if the value based on our guesses @@ -1154,7 +1155,7 @@ u32 __tcp_select_window(struct sock *sk) * but may be worse for the performance because of rcv_mss * fluctuations. --SAW 1998/11/1 */ - int mss = tp->ack.rcv_mss; + int mss = icsk->icsk_ack.rcv_mss; int free_space = tcp_space(sk); int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk)); int window; @@ -1163,7 +1164,7 @@ u32 __tcp_select_window(struct sock *sk) mss = full_space; if (free_space < full_space/2) { - tp->ack.quick = 0; + icsk->icsk_ack.quick = 0; if (tcp_memory_pressure) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); @@ -1491,7 +1492,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (skb == skb_peek(&sk->sk_write_queue)) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto); } packet_cnt -= tcp_skb_pcount(skb); @@ -1544,7 +1546,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) break; if (skb == skb_peek(&sk->sk_write_queue)) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); } @@ -1780,8 +1782,8 @@ static inline void tcp_connect_init(struct sock *sk) tp->rcv_wup = 0; tp->copied_seq = 0; - tp->rto = TCP_TIMEOUT_INIT; - tp->retransmits = 0; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; + inet_csk(sk)->icsk_retransmits = 0; tcp_clear_retrans(tp); } @@ -1824,7 +1826,7 @@ int tcp_connect(struct sock *sk) TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); return 0; } @@ -1834,20 +1836,21 @@ int tcp_connect(struct sock *sk) */ void tcp_send_delayed_ack(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); - int ato = tp->ack.ato; + struct inet_connection_sock *icsk = inet_csk(sk); + int ato = icsk->icsk_ack.ato; unsigned long timeout; if (ato > TCP_DELACK_MIN) { + const struct tcp_sock *tp = tcp_sk(sk); int max_ato = HZ/2; - if (tp->ack.pingpong || (tp->ack.pending&TCP_ACK_PUSHED)) + if (icsk->icsk_ack.pingpong || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)) max_ato = TCP_DELACK_MAX; /* Slow path, intersegment interval is "high". */ /* If some rtt estimate is known, use it to bound delayed ack. - * Do not use tp->rto here, use results of rtt measurements + * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements * directly. */ if (tp->srtt) { @@ -1864,21 +1867,22 @@ void tcp_send_delayed_ack(struct sock *sk) timeout = jiffies + ato; /* Use new timeout only if there wasn't a older one earlier. */ - if (tp->ack.pending&TCP_ACK_TIMER) { + if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { /* If delack timer was blocked or is about to expire, * send ACK now. */ - if (tp->ack.blocked || time_before_eq(tp->ack.timeout, jiffies+(ato>>2))) { + if (icsk->icsk_ack.blocked || + time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) { tcp_send_ack(sk); return; } - if (!time_before(timeout, tp->ack.timeout)) - timeout = tp->ack.timeout; + if (!time_before(timeout, icsk->icsk_ack.timeout)) + timeout = icsk->icsk_ack.timeout; } - tp->ack.pending |= TCP_ACK_SCHED|TCP_ACK_TIMER; - tp->ack.timeout = timeout; - sk_reset_timer(sk, &tp->delack_timer, timeout); + icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; + icsk->icsk_ack.timeout = timeout; + sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); } /* This routine sends an ack and also updates the window. */ @@ -1895,9 +1899,9 @@ void tcp_send_ack(struct sock *sk) */ buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); if (buff == NULL) { - tcp_schedule_ack(tp); - tp->ack.ato = TCP_ATO_MIN; - tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX); + inet_csk_schedule_ack(sk); + inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX); return; } @@ -2011,6 +2015,7 @@ int tcp_write_wakeup(struct sock *sk) */ void tcp_send_probe0(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int err; @@ -2019,16 +2024,16 @@ void tcp_send_probe0(struct sock *sk) if (tp->packets_out || !sk->sk_send_head) { /* Cancel probe timer, if it is not required. */ tp->probes_out = 0; - tp->backoff = 0; + icsk->icsk_backoff = 0; return; } if (err <= 0) { - if (tp->backoff < sysctl_tcp_retries2) - tp->backoff++; + if (icsk->icsk_backoff < sysctl_tcp_retries2) + icsk->icsk_backoff++; tp->probes_out++; - tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, - min(tp->rto << tp->backoff, TCP_RTO_MAX)); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, + min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX)); } else { /* If packet was not sent due to local congestion, * do not backoff and do not remember probes_out. @@ -2038,8 +2043,9 @@ void tcp_send_probe0(struct sock *sk) */ if (!tp->probes_out) tp->probes_out=1; - tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, - min(tp->rto << tp->backoff, TCP_RESOURCE_PROBE_INTERVAL)); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, + min(icsk->icsk_rto << icsk->icsk_backoff, + TCP_RESOURCE_PROBE_INTERVAL)); } } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0084227438c..0b71380ee42 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -36,9 +36,9 @@ static void tcp_write_timer(unsigned long); static void tcp_delack_timer(unsigned long); static void tcp_keepalive_timer (unsigned long data); -#ifdef TCP_DEBUG -const char tcp_timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n"; -EXPORT_SYMBOL(tcp_timer_bug_msg); +#ifdef INET_CSK_DEBUG +const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; +EXPORT_SYMBOL(inet_csk_timer_bug_msg); #endif /* @@ -46,40 +46,45 @@ EXPORT_SYMBOL(tcp_timer_bug_msg); * We may wish use just one timer maintaining a list of expire jiffies * to optimize. */ - -void tcp_init_xmit_timers(struct sock *sk) +void inet_csk_init_xmit_timers(struct sock *sk, + void (*retransmit_handler)(unsigned long), + void (*delack_handler)(unsigned long), + void (*keepalive_handler)(unsigned long)) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); - init_timer(&tp->retransmit_timer); - tp->retransmit_timer.function=&tcp_write_timer; - tp->retransmit_timer.data = (unsigned long) sk; - tp->pending = 0; + init_timer(&icsk->icsk_retransmit_timer); + init_timer(&icsk->icsk_delack_timer); + init_timer(&sk->sk_timer); - init_timer(&tp->delack_timer); - tp->delack_timer.function=&tcp_delack_timer; - tp->delack_timer.data = (unsigned long) sk; - tp->ack.pending = 0; + icsk->icsk_retransmit_timer.function = retransmit_handler; + icsk->icsk_delack_timer.function = delack_handler; + sk->sk_timer.function = keepalive_handler; - init_timer(&sk->sk_timer); - sk->sk_timer.function = &tcp_keepalive_timer; - sk->sk_timer.data = (unsigned long)sk; + icsk->icsk_retransmit_timer.data = + icsk->icsk_delack_timer.data = + sk->sk_timer.data = (unsigned long)sk; + + icsk->icsk_pending = icsk->icsk_ack.pending = 0; } -void tcp_clear_xmit_timers(struct sock *sk) +void inet_csk_clear_xmit_timers(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); - tp->pending = 0; - sk_stop_timer(sk, &tp->retransmit_timer); - - tp->ack.pending = 0; - tp->ack.blocked = 0; - sk_stop_timer(sk, &tp->delack_timer); + icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; + sk_stop_timer(sk, &icsk->icsk_retransmit_timer); + sk_stop_timer(sk, &icsk->icsk_delack_timer); sk_stop_timer(sk, &sk->sk_timer); } +void tcp_init_xmit_timers(struct sock *sk) +{ + inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, + &tcp_keepalive_timer); +} + static void tcp_write_err(struct sock *sk) { sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; @@ -155,15 +160,15 @@ static int tcp_orphan_retries(struct sock *sk, int alive) /* A write timeout has occurred. Process the after effects. */ static int tcp_write_timeout(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); int retry_until; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { - if (tp->retransmits) + if (icsk->icsk_retransmits) dst_negative_advice(&sk->sk_dst_cache); - retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries; + retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; } else { - if (tp->retransmits >= sysctl_tcp_retries1) { + if (icsk->icsk_retransmits >= sysctl_tcp_retries1) { /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black hole detection. :-( @@ -189,16 +194,16 @@ static int tcp_write_timeout(struct sock *sk) retry_until = sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { - int alive = (tp->rto < TCP_RTO_MAX); + const int alive = (icsk->icsk_rto < TCP_RTO_MAX); retry_until = tcp_orphan_retries(sk, alive); - if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until)) + if (tcp_out_of_resources(sk, alive || icsk->icsk_retransmits < retry_until)) return 1; } } - if (tp->retransmits >= retry_until) { + if (icsk->icsk_retransmits >= retry_until) { /* Has it gone just too far? */ tcp_write_err(sk); return 1; @@ -210,26 +215,27 @@ static void tcp_delack_timer(unsigned long data) { struct sock *sk = (struct sock*)data; struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later. */ - tp->ack.blocked = 1; + icsk->icsk_ack.blocked = 1; NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); - sk_reset_timer(sk, &tp->delack_timer, jiffies + TCP_DELACK_MIN); + sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); goto out_unlock; } sk_stream_mem_reclaim(sk); - if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER)) + if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) goto out; - if (time_after(tp->ack.timeout, jiffies)) { - sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout); + if (time_after(icsk->icsk_ack.timeout, jiffies)) { + sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); goto out; } - tp->ack.pending &= ~TCP_ACK_TIMER; + icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; if (!skb_queue_empty(&tp->ucopy.prequeue)) { struct sk_buff *skb; @@ -242,16 +248,16 @@ static void tcp_delack_timer(unsigned long data) tp->ucopy.memory = 0; } - if (tcp_ack_scheduled(tp)) { - if (!tp->ack.pingpong) { + if (inet_csk_ack_scheduled(sk)) { + if (!icsk->icsk_ack.pingpong) { /* Delayed ACK missed: inflate ATO. */ - tp->ack.ato = min(tp->ack.ato << 1, tp->rto); + icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); } else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. */ - tp->ack.pingpong = 0; - tp->ack.ato = TCP_ATO_MIN; + icsk->icsk_ack.pingpong = 0; + icsk->icsk_ack.ato = TCP_ATO_MIN; } tcp_send_ack(sk); NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); @@ -294,7 +300,8 @@ static void tcp_probe_timer(struct sock *sk) max_probes = sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { - int alive = ((tp->rto<backoff) < TCP_RTO_MAX); + const struct inet_connection_sock *icsk = inet_csk(sk); + const int alive = ((icsk->icsk_rto << icsk->icsk_backoff) < TCP_RTO_MAX); max_probes = tcp_orphan_retries(sk, alive); @@ -317,6 +324,7 @@ static void tcp_probe_timer(struct sock *sk) static void tcp_retransmit_timer(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); if (!tp->packets_out) goto out; @@ -351,7 +359,7 @@ static void tcp_retransmit_timer(struct sock *sk) if (tcp_write_timeout(sk)) goto out; - if (tp->retransmits == 0) { + if (icsk->icsk_retransmits == 0) { if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) { if (tp->rx_opt.sack_ok) { if (tp->ca_state == TCP_CA_Recovery) @@ -381,10 +389,10 @@ static void tcp_retransmit_timer(struct sock *sk) /* Retransmission failed because of local congestion, * do not backoff. */ - if (!tp->retransmits) - tp->retransmits=1; - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, - min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL)); + if (!icsk->icsk_retransmits) + icsk->icsk_retransmits = 1; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL)); goto out; } @@ -403,13 +411,13 @@ static void tcp_retransmit_timer(struct sock *sk) * implemented ftp to mars will work nicely. We will have to fix * the 120 second clamps though! */ - tp->backoff++; - tp->retransmits++; + icsk->icsk_backoff++; + icsk->icsk_retransmits++; out_reset_timer: - tp->rto = min(tp->rto << 1, TCP_RTO_MAX); - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); - if (tp->retransmits > sysctl_tcp_retries1) + icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto); + if (icsk->icsk_retransmits > sysctl_tcp_retries1) __sk_dst_reset(sk); out:; @@ -418,32 +426,32 @@ out:; static void tcp_write_timer(unsigned long data) { struct sock *sk = (struct sock*)data; - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); int event; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later */ - sk_reset_timer(sk, &tp->retransmit_timer, jiffies + (HZ / 20)); + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); goto out_unlock; } - if (sk->sk_state == TCP_CLOSE || !tp->pending) + if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) goto out; - if (time_after(tp->timeout, jiffies)) { - sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout); + if (time_after(icsk->icsk_timeout, jiffies)) { + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); goto out; } - event = tp->pending; - tp->pending = 0; + event = icsk->icsk_pending; + icsk->icsk_pending = 0; switch (event) { - case TCP_TIME_RETRANS: + case ICSK_TIME_RETRANS: tcp_retransmit_timer(sk); break; - case TCP_TIME_PROBE0: + case ICSK_TIME_PROBE0: tcp_probe_timer(sk); break; } @@ -463,8 +471,9 @@ out_unlock: static void tcp_synack_timer(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct listen_sock *lopt = tp->accept_queue.listen_opt; - int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries; + struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; + int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; int thresh = max_retries; unsigned long now = jiffies; struct request_sock **reqp, *req; @@ -526,8 +535,8 @@ static void tcp_synack_timer(struct sock *sk) } /* Drop this request */ - tcp_synq_unlink(tp, req, reqp); - reqsk_queue_removed(&tp->accept_queue, req); + inet_csk_reqsk_queue_unlink(sk, req, reqp); + reqsk_queue_removed(&icsk->icsk_accept_queue, req); reqsk_free(req); continue; } @@ -541,15 +550,15 @@ static void tcp_synack_timer(struct sock *sk) lopt->clock_hand = i; if (lopt->qlen) - tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); + inet_csk_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); } -void tcp_delete_keepalive_timer (struct sock *sk) +void inet_csk_delete_keepalive_timer(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); } -void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len) +void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) { sk_reset_timer(sk, &sk->sk_timer, jiffies + len); } @@ -560,9 +569,9 @@ void tcp_set_keepalive(struct sock *sk, int val) return; if (val && !sock_flag(sk, SOCK_KEEPOPEN)) - tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); + inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); else if (!val) - tcp_delete_keepalive_timer(sk); + inet_csk_delete_keepalive_timer(sk); } @@ -576,7 +585,7 @@ static void tcp_keepalive_timer (unsigned long data) bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later. */ - tcp_reset_keepalive_timer (sk, HZ/20); + inet_csk_reset_keepalive_timer (sk, HZ/20); goto out; } @@ -587,7 +596,7 @@ static void tcp_keepalive_timer (unsigned long data) if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { if (tp->linger2 >= 0) { - int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN; + const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; if (tmo > 0) { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); @@ -634,7 +643,7 @@ static void tcp_keepalive_timer (unsigned long data) sk_stream_mem_reclaim(sk); resched: - tcp_reset_keepalive_timer (sk, elapsed); + inet_csk_reset_keepalive_timer (sk, elapsed); goto out; death: @@ -645,7 +654,7 @@ out: sock_put(sk); } -EXPORT_SYMBOL(tcp_clear_xmit_timers); -EXPORT_SYMBOL(tcp_delete_keepalive_timer); +EXPORT_SYMBOL(inet_csk_clear_xmit_timers); +EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); EXPORT_SYMBOL(tcp_init_xmit_timers); -EXPORT_SYMBOL(tcp_reset_keepalive_timer); +EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4582d9cf4bb..b9c3da34949 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1043,7 +1043,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); - int sk2_ipv6only = tcp_v6_ipv6only(sk2); + int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index af8ad5bb273..b9c7003b7f8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -207,9 +207,9 @@ tb_not_found: tb->fastreuse = 0; success: - if (!inet_sk(sk)->bind_hash) + if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, snum); - BUG_TRAP(inet_sk(sk)->bind_hash == tb); + BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); ret = 0; fail_unlock: @@ -381,7 +381,7 @@ EXPORT_SYMBOL_GPL(tcp_v6_lookup); * Open request hash tables. */ -static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd) +static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd) { u32 a, b, c; @@ -401,14 +401,15 @@ static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd) return c & (TCP_SYNQ_HSIZE - 1); } -static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp, +static struct request_sock *tcp_v6_search_req(const struct sock *sk, struct request_sock ***prevp, __u16 rport, struct in6_addr *raddr, struct in6_addr *laddr, int iif) { - struct listen_sock *lopt = tp->accept_queue.listen_opt; + const struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; struct request_sock *req, **prev; for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; @@ -619,7 +620,7 @@ ok: } head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; - tb = inet_sk(sk)->bind_hash; + tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { @@ -925,7 +926,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sock_owned_by_user(sk)) goto out; - req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr, + req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr, &hdr->saddr, tcp_v6_iif(skb)); if (!req) goto out; @@ -940,7 +941,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - tcp_synq_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case TCP_SYN_SENT: @@ -1245,11 +1246,10 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) { struct request_sock *req, **prev; struct tcphdr *th = skb->h.th; - struct tcp_sock *tp = tcp_sk(sk); struct sock *nsk; /* Find possible connection requests. */ - req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr, + req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, tcp_v6_iif(skb)); if (req) return tcp_check_req(sk, skb, req, prev); @@ -1278,12 +1278,12 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req) { - struct tcp_sock *tp = tcp_sk(sk); - struct listen_sock *lopt = tp->accept_queue.listen_opt; - u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); + struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; + const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); - reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT); - tcp_synq_added(sk); + reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT); + inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT); } @@ -1308,13 +1308,13 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) /* * There are no SYN attacks on IPv6, yet... */ - if (tcp_synq_is_full(sk) && !isn) { + if (inet_csk_reqsk_queue_is_full(sk) && !isn) { if (net_ratelimit()) printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n"); goto drop; } - if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; req = reqsk_alloc(&tcp6_request_sock_ops); @@ -2015,7 +2015,7 @@ static int tcp_v6_init_sock(struct sock *sk) tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); - tp->rto = TCP_TIMEOUT_INIT; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; /* So many TCP implementations out there (incorrectly) count the @@ -2098,18 +2098,20 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) unsigned long timer_expires; struct inet_sock *inet = inet_sk(sp); struct tcp_sock *tp = tcp_sk(sp); + const struct inet_connection_sock *icsk = inet_csk(sp); struct ipv6_pinfo *np = inet6_sk(sp); dest = &np->daddr; src = &np->rcv_saddr; destp = ntohs(inet->dport); srcp = ntohs(inet->sport); - if (tp->pending == TCP_TIME_RETRANS) { + + if (icsk->icsk_pending == ICSK_TIME_RETRANS) { timer_active = 1; - timer_expires = tp->timeout; - } else if (tp->pending == TCP_TIME_PROBE0) { + timer_expires = icsk->icsk_timeout; + } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = tp->timeout; + timer_expires = icsk->icsk_timeout; } else if (timer_pending(&sp->sk_timer)) { timer_active = 2; timer_expires = sp->sk_timer.expires; @@ -2130,12 +2132,14 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq, timer_active, jiffies_to_clock_t(timer_expires - jiffies), - tp->retransmits, + icsk->icsk_retransmits, sock_i_uid(sp), tp->probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, - tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong, + icsk->icsk_rto, + icsk->icsk_ack.ato, + (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh ); } @@ -2227,7 +2231,7 @@ struct proto tcpv6_prot = { .close = tcp_close, .connect = tcp_v6_connect, .disconnect = tcp_disconnect, - .accept = tcp_accept, + .accept = inet_csk_accept, .ioctl = tcp_ioctl, .init = tcp_v6_init_sock, .destroy = tcp_v6_destroy_sock, -- cgit v1.2.3-70-g09d2 From 0a5578cf8e5e045aaa68643c17ce885426697c6b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:11:41 -0700 Subject: [ICSK]: Generalise tcp_listen_{start,stop} This also moved inet_iif from tcp to inet_hashtables.h, as it is needed by the inet_lookup callers, perhaps this needs a bit of polishing, but for now seems fine. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 6 ++++++ include/net/sock.h | 1 + include/net/tcp.h | 4 ++-- net/ipv4/af_inet.c | 2 +- net/ipv4/tcp.c | 38 +++++++++++++++++++++----------------- net/ipv4/tcp_ipv4.c | 6 +----- net/ipv6/tcp_ipv6.c | 1 + 7 files changed, 33 insertions(+), 25 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f0c21c07f89..646b6ea7fe2 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -280,6 +281,11 @@ out: wake_up(&hashinfo->lhash_wait); } +static inline int inet_iif(const struct sk_buff *skb) +{ + return ((struct rtable *)skb->dst)->rt_iif; +} + extern struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, const unsigned short hnum, diff --git a/include/net/sock.h b/include/net/sock.h index 48cc337a656..8678313a22b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -558,6 +558,7 @@ struct proto { kmem_cache_t *twsk_slab; unsigned int twsk_obj_size; + atomic_t *orphan_count; struct request_sock_ops *rsk_prot; diff --git a/include/net/tcp.h b/include/net/tcp.h index dd9a5a288f8..68f1ec1c583 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -860,7 +860,7 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) tp->snd_wl1 = seq; } -extern void tcp_destroy_sock(struct sock *sk); +extern void inet_csk_destroy_sock(struct sock *sk); /* @@ -987,7 +987,7 @@ static __inline__ void tcp_done(struct sock *sk) if (!sock_flag(sk, SOCK_DEAD)) sk->sk_state_change(sk); else - tcp_destroy_sock(sk); + inet_csk_destroy_sock(sk); } static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7137e6420d6..f691058cf59 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -202,7 +202,7 @@ int inet_listen(struct socket *sock, int backlog) * we can only allow the backlog to be adjusted. */ if (old_state != TCP_LISTEN) { - err = tcp_listen_start(sk); + err = inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); if (err) goto out; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 581016a6a93..a1f812159ce 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -273,6 +273,8 @@ DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); atomic_t tcp_orphan_count = ATOMIC_INIT(0); +EXPORT_SYMBOL_GPL(tcp_orphan_count); + int sysctl_tcp_mem[3]; int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 }; int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 }; @@ -454,12 +456,11 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) return put_user(answ, (int __user *)arg); } - -int tcp_listen_start(struct sock *sk) +int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) { struct inet_sock *inet = inet_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, TCP_SYNQ_HSIZE); + int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); if (rc != 0) return rc; @@ -488,12 +489,13 @@ int tcp_listen_start(struct sock *sk) return -EADDRINUSE; } +EXPORT_SYMBOL_GPL(inet_csk_listen_start); + /* * This routine closes sockets which have been at least partially * opened, but not yet accepted. */ - -static void tcp_listen_stop (struct sock *sk) +static void inet_csk_listen_stop(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock *acc_req; @@ -524,13 +526,13 @@ static void tcp_listen_stop (struct sock *sk) BUG_TRAP(!sock_owned_by_user(child)); sock_hold(child); - tcp_disconnect(child, O_NONBLOCK); + sk->sk_prot->disconnect(child, O_NONBLOCK); sock_orphan(child); - atomic_inc(&tcp_orphan_count); + atomic_inc(sk->sk_prot->orphan_count); - tcp_destroy_sock(child); + inet_csk_destroy_sock(child); bh_unlock_sock(child); local_bh_enable(); @@ -542,6 +544,8 @@ static void tcp_listen_stop (struct sock *sk) BUG_TRAP(!sk->sk_ack_backlog); } +EXPORT_SYMBOL_GPL(inet_csk_listen_stop); + static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; @@ -1561,7 +1565,7 @@ void tcp_shutdown(struct sock *sk, int how) * can assume the socket waitqueue is inactive and nobody will * try to jump onto it. */ -void tcp_destroy_sock(struct sock *sk) +void inet_csk_destroy_sock(struct sock *sk) { BUG_TRAP(sk->sk_state == TCP_CLOSE); BUG_TRAP(sock_flag(sk, SOCK_DEAD)); @@ -1580,7 +1584,7 @@ void tcp_destroy_sock(struct sock *sk) sk_refcnt_debug_release(sk); - atomic_dec(&tcp_orphan_count); + atomic_dec(sk->sk_prot->orphan_count); sock_put(sk); } @@ -1596,7 +1600,7 @@ void tcp_close(struct sock *sk, long timeout) tcp_set_state(sk, TCP_CLOSE); /* Special case. */ - tcp_listen_stop(sk); + inet_csk_listen_stop(sk); goto adjudge_to_death; } @@ -1704,7 +1708,7 @@ adjudge_to_death: if (tmo > TCP_TIMEWAIT_LEN) { inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk)); } else { - atomic_inc(&tcp_orphan_count); + atomic_inc(sk->sk_prot->orphan_count); tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto out; } @@ -1712,7 +1716,7 @@ adjudge_to_death: } if (sk->sk_state != TCP_CLOSE) { sk_stream_mem_reclaim(sk); - if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans || + if (atomic_read(sk->sk_prot->orphan_count) > sysctl_tcp_max_orphans || (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) { if (net_ratelimit()) @@ -1723,10 +1727,10 @@ adjudge_to_death: NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY); } } - atomic_inc(&tcp_orphan_count); + atomic_inc(sk->sk_prot->orphan_count); if (sk->sk_state == TCP_CLOSE) - tcp_destroy_sock(sk); + inet_csk_destroy_sock(sk); /* Otherwise, socket is reprieved until protocol close. */ out: @@ -1757,7 +1761,7 @@ int tcp_disconnect(struct sock *sk, int flags) /* ABORT function of RFC793 */ if (old_state == TCP_LISTEN) { - tcp_listen_stop(sk); + inet_csk_listen_stop(sk); } else if (tcp_need_reset(old_state) || (tp->snd_nxt != tp->write_seq && (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { @@ -2253,7 +2257,7 @@ void __init tcp_init(void) } EXPORT_SYMBOL(tcp_close); -EXPORT_SYMBOL(tcp_destroy_sock); +EXPORT_SYMBOL(inet_csk_destroy_sock); EXPORT_SYMBOL(tcp_disconnect); EXPORT_SYMBOL(tcp_getsockopt); EXPORT_SYMBOL(tcp_ioctl); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2f605b9e6b6..b966102b9f3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -431,11 +431,6 @@ failure: return err; } -static inline int inet_iif(const struct sk_buff *skb) -{ - return ((struct rtable *)skb->dst)->rt_iif; -} - /* * This routine does path mtu discovery as defined in RFC1191. */ @@ -1993,6 +1988,7 @@ struct proto tcp_prot = { .get_port = tcp_v4_get_port, .enter_memory_pressure = tcp_enter_memory_pressure, .sockets_allocated = &tcp_sockets_allocated, + .orphan_count = &tcp_orphan_count, .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, .sysctl_mem = sysctl_tcp_mem, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b9c7003b7f8..0b51ec310eb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2248,6 +2248,7 @@ struct proto tcpv6_prot = { .sockets_allocated = &tcp_sockets_allocated, .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, + .orphan_count = &tcp_orphan_count, .sysctl_mem = sysctl_tcp_mem, .sysctl_wmem = sysctl_tcp_wmem, .sysctl_rmem = sysctl_tcp_rmem, -- cgit v1.2.3-70-g09d2 From 210a9ebef2d1bd32d9e9d81c84d538e237769cdb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:20:54 -0700 Subject: [NETFILTER]: ip{6}_queue: prevent unregistration race with nfnetlink_queue Since nfnetlink_queue can override ip{6}_queue as queue handlers, we can no longer blindly unregister whoever is registered for PF_INET[6], but only unregister ourselves. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_queue.c | 2 +- net/ipv6/netfilter/ip6_queue.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index cfc886f382a..629de649f13 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -692,7 +692,7 @@ init_or_cleanup(int init) return status; cleanup: - nf_unregister_queue_handler(PF_INET); + nf_unregister_queue_handlers(&ipq_enqueue_packet); synchronize_net(); ipq_flush(NF_DROP); diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 5af4cee93d9..56ffec3568f 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -687,7 +687,7 @@ init_or_cleanup(int init) return status; cleanup: - nf_unregister_queue_handler(PF_INET6); + nf_unregister_queue_handlers(&ipq_enqueue_packet); synchronize_net(); ipq_flush(NF_DROP); -- cgit v1.2.3-70-g09d2 From bbd86b9fc469b7e91dc7444e6abb8930811d79cb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:23:11 -0700 Subject: [NETFILTER]: add /proc/net/netfilter interface to nf_queue This patch adds a /proc/net/netfilter/nf_queue file, similar to the recently-added /proc/net/netfilter/nf_log. It indicates which queue handler is registered to which protocol family. This is useful since there are now multiple queue handlers in the treee (ip[6]_queue, nfnetlink_queue). Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 13 +++-- net/ipv4/netfilter/ip_queue.c | 9 +++- net/ipv6/netfilter/ip6_queue.c | 9 +++- net/netfilter/nf_log.c | 1 + net/netfilter/nf_queue.c | 106 ++++++++++++++++++++++++++++++++-------- net/netfilter/nfnetlink_queue.c | 12 +++-- 6 files changed, 116 insertions(+), 34 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 815583af06c..bf430fcbe36 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -225,13 +225,16 @@ int nf_getsockopt(struct sock *sk, int pf, int optval, char __user *opt, int *len); /* Packet queuing */ -typedef int (*nf_queue_outfn_t)(struct sk_buff *skb, - struct nf_info *info, - unsigned int queuenum, void *data); +struct nf_queue_handler { + int (*outfn)(struct sk_buff *skb, struct nf_info *info, + unsigned int queuenum, void *data); + void *data; + char *name; +}; extern int nf_register_queue_handler(int pf, - nf_queue_outfn_t outfn, void *data); + struct nf_queue_handler *qh); extern int nf_unregister_queue_handler(int pf); -extern void nf_unregister_queue_handlers(nf_queue_outfn_t outfn); +extern void nf_unregister_queue_handlers(struct nf_queue_handler *qh); extern void nf_reinject(struct sk_buff *skb, struct nf_info *info, unsigned int verdict); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 629de649f13..1c49833e00a 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -656,6 +656,11 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) } #endif /* CONFIG_PROC_FS */ +static struct nf_queue_handler nfqh = { + .name = "ip_queue", + .outfn = &ipq_enqueue_packet, +}; + static int init_or_cleanup(int init) { @@ -684,7 +689,7 @@ init_or_cleanup(int init) register_netdevice_notifier(&ipq_dev_notifier); ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0); - status = nf_register_queue_handler(PF_INET, ipq_enqueue_packet, NULL); + status = nf_register_queue_handler(PF_INET, &nfqh); if (status < 0) { printk(KERN_ERR "ip_queue: failed to register queue handler\n"); goto cleanup_sysctl; @@ -692,7 +697,7 @@ init_or_cleanup(int init) return status; cleanup: - nf_unregister_queue_handlers(&ipq_enqueue_packet); + nf_unregister_queue_handlers(&nfqh); synchronize_net(); ipq_flush(NF_DROP); diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 56ffec3568f..7ecb91e24a3 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -652,6 +652,11 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) return len; } +static struct nf_queue_handler nfqh = { + .name = "ip6_queue", + .outfn = &ipq_enqueue_packet, +}; + static int init_or_cleanup(int init) { @@ -679,7 +684,7 @@ init_or_cleanup(int init) register_netdevice_notifier(&ipq_dev_notifier); ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0); - status = nf_register_queue_handler(PF_INET6, ipq_enqueue_packet, NULL); + status = nf_register_queue_handler(PF_INET6, &nfqh); if (status < 0) { printk(KERN_ERR "ip6_queue: failed to register queue handler\n"); goto cleanup_sysctl; @@ -687,7 +692,7 @@ init_or_cleanup(int init) return status; cleanup: - nf_unregister_queue_handlers(&ipq_enqueue_packet); + nf_unregister_queue_handlers(&nfqh); synchronize_net(); ipq_flush(NF_DROP); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index ec58c4d2c66..31a9d63921d 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "nf_internals.h" diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 5586f843ed4..8a67bde8b64 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "nf_internals.h" @@ -14,17 +15,12 @@ * long term mutex. The handler must provide an an outfn() to accept packets * for queueing and must reinject all packets it receives, no matter what. */ -static struct nf_queue_handler_t { - nf_queue_outfn_t outfn; - void *data; -} queue_handler[NPROTO]; - +static struct nf_queue_handler *queue_handler[NPROTO]; static struct nf_queue_rerouter *queue_rerouter; static DEFINE_RWLOCK(queue_handler_lock); - -int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) +int nf_register_queue_handler(int pf, struct nf_queue_handler *qh) { int ret; @@ -32,11 +28,10 @@ int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) return -EINVAL; write_lock_bh(&queue_handler_lock); - if (queue_handler[pf].outfn) + if (queue_handler[pf]) ret = -EBUSY; else { - queue_handler[pf].outfn = outfn; - queue_handler[pf].data = data; + queue_handler[pf] = qh; ret = 0; } write_unlock_bh(&queue_handler_lock); @@ -52,8 +47,7 @@ int nf_unregister_queue_handler(int pf) return -EINVAL; write_lock_bh(&queue_handler_lock); - queue_handler[pf].outfn = NULL; - queue_handler[pf].data = NULL; + queue_handler[pf] = NULL; write_unlock_bh(&queue_handler_lock); return 0; @@ -85,16 +79,14 @@ int nf_unregister_queue_rerouter(int pf) } EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter); -void nf_unregister_queue_handlers(nf_queue_outfn_t outfn) +void nf_unregister_queue_handlers(struct nf_queue_handler *qh) { int pf; write_lock_bh(&queue_handler_lock); for (pf = 0; pf < NPROTO; pf++) { - if (queue_handler[pf].outfn == outfn) { - queue_handler[pf].outfn = NULL; - queue_handler[pf].data = NULL; - } + if (queue_handler[pf] == qh) + queue_handler[pf] = NULL; } write_unlock_bh(&queue_handler_lock); } @@ -121,7 +113,7 @@ int nf_queue(struct sk_buff **skb, /* QUEUE == DROP if noone is waiting, to be safe. */ read_lock(&queue_handler_lock); - if (!queue_handler[pf].outfn) { + if (!queue_handler[pf]->outfn) { read_unlock(&queue_handler_lock); kfree_skb(*skb); return 1; @@ -162,8 +154,8 @@ int nf_queue(struct sk_buff **skb, if (queue_rerouter[pf].save) queue_rerouter[pf].save(*skb, info); - status = queue_handler[pf].outfn(*skb, info, queuenum, - queue_handler[pf].data); + status = queue_handler[pf]->outfn(*skb, info, queuenum, + queue_handler[pf]->data); if (status >= 0 && queue_rerouter[pf].reroute) status = queue_rerouter[pf].reroute(skb, info); @@ -259,13 +251,87 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, } EXPORT_SYMBOL(nf_reinject); +#ifdef CONFIG_PROC_FS +static void *seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos >= NPROTO) + return NULL; + + return pos; +} + +static void *seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + + if (*pos >= NPROTO) + return NULL; + + return pos; +} + +static void seq_stop(struct seq_file *s, void *v) +{ + +} + +static int seq_show(struct seq_file *s, void *v) +{ + int ret; + loff_t *pos = v; + struct nf_queue_handler *qh; + + read_lock_bh(&queue_handler_lock); + qh = queue_handler[*pos]; + if (!qh) + ret = seq_printf(s, "%2lld NONE\n", *pos); + else + ret = seq_printf(s, "%2lld %s\n", *pos, qh->name); + read_unlock_bh(&queue_handler_lock); + + return ret; +} + +static struct seq_operations nfqueue_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = seq_show, +}; + +static int nfqueue_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &nfqueue_seq_ops); +} + +static struct file_operations nfqueue_file_ops = { + .owner = THIS_MODULE, + .open = nfqueue_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif /* PROC_FS */ + + int __init netfilter_queue_init(void) { +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *pde; +#endif queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter), GFP_KERNEL); if (!queue_rerouter) return -ENOMEM; +#ifdef CONFIG_PROC_FS + pde = create_proc_entry("nf_queue", S_IRUGO, proc_net_netfilter); + if (!pde) { + kfree(queue_rerouter); + return -1; + } + pde->proc_fops = &nfqueue_file_ops; +#endif memset(queue_rerouter, 0, NPROTO * sizeof(struct nf_queue_rerouter)); return 0; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index bf9223084b4..741686ff71d 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -845,6 +845,11 @@ static const int nfqa_cfg_min[NFQA_CFG_MAX] = { [NFQA_CFG_PARAMS-1] = sizeof(struct nfqnl_msg_config_params), }; +static struct nf_queue_handler nfqh = { + .name = "nf_queue", + .outfn = &nfqnl_enqueue_packet, +}; + static int nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) @@ -890,10 +895,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, case NFQNL_CFG_CMD_PF_BIND: QDEBUG("registering queue handler for pf=%u\n", ntohs(cmd->pf)); - ret = nf_register_queue_handler(ntohs(cmd->pf), - nfqnl_enqueue_packet, - NULL); - + ret = nf_register_queue_handler(ntohs(cmd->pf), &nfqh); break; case NFQNL_CFG_CMD_PF_UNBIND: QDEBUG("unregistering queue handler for pf=%u\n", @@ -1098,7 +1100,7 @@ init_or_cleanup(int init) return status; cleanup: - nf_unregister_queue_handlers(nfqnl_enqueue_packet); + nf_unregister_queue_handlers(&nfqh); unregister_netdevice_notifier(&nfqnl_dev_notifier); #ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_queue", proc_net_netfilter); -- cgit v1.2.3-70-g09d2 From 295ff7edb8f72b77d524759266f7524deae379b3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:44:40 -0700 Subject: [TIMEWAIT]: Introduce inet_timewait_death_row That groups all of the tables and variables associated to the TCP timewait schedulling/recycling/killing code, that now can be isolated from the TCP specific code and used by other transport protocols, such as DCCP. Next changeset will move this code to net/ipv4/inet_timewait_sock.c Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 57 ++++++++- include/net/tcp.h | 36 +----- net/ipv4/proc.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 4 +- net/ipv4/tcp.c | 4 +- net/ipv4/tcp_ipv4.c | 11 +- net/ipv4/tcp_minisocks.c | 256 ++++++++++++++++++++------------------- net/ipv6/tcp_ipv6.c | 9 +- 8 files changed, 207 insertions(+), 172 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index e00861b1669..a7e8052e2fb 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -19,13 +19,69 @@ #include #include +#include #include +#include #include #include #include +struct inet_hashinfo; + +#define INET_TWDR_RECYCLE_SLOTS_LOG 5 +#define INET_TWDR_RECYCLE_SLOTS (1 << INET_TWDR_RECYCLE_SLOTS_LOG) + +/* + * If time > 4sec, it is "slow" path, no recycling is required, + * so that we select tick to get range about 4 seconds. + */ +#if HZ <= 16 || HZ > 4096 +# error Unsupported: HZ <= 16 or HZ > 4096 +#elif HZ <= 32 +# define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 64 +# define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 128 +# define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 256 +# define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 512 +# define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 1024 +# define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 2048 +# define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#else +# define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#endif + +/* TIME_WAIT reaping mechanism. */ +#define INET_TWDR_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ + +#define INET_TWDR_TWKILL_QUOTA 100 + +struct inet_timewait_death_row { + /* Short-time timewait calendar */ + int twcal_hand; + int twcal_jiffie; + struct timer_list twcal_timer; + struct hlist_head twcal_row[INET_TWDR_RECYCLE_SLOTS]; + + spinlock_t death_lock; + int tw_count; + int period; + u32 thread_slots; + struct work_struct twkill_work; + struct timer_list tw_timer; + int slot; + struct hlist_head cells[INET_TWDR_TWKILL_SLOTS]; + struct inet_hashinfo *hashinfo; + int sysctl_tw_recycle; + int sysctl_max_tw_buckets; +}; + #if (BITS_PER_LONG == 64) #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 #else @@ -33,7 +89,6 @@ #endif struct inet_bind_bucket; -struct inet_hashinfo; /* * This is a TIME_WAIT sock. It works around the memory consumption diff --git a/include/net/tcp.h b/include/net/tcp.h index 077db859ae0..4c4cd4fb1ed 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -42,9 +43,9 @@ extern struct inet_hashinfo tcp_hashinfo; extern atomic_t tcp_orphan_count; -extern int tcp_tw_count; extern void tcp_time_wait(struct sock *sk, int state, int timeo); -extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); +extern void inet_twsk_deschedule(struct inet_timewait_sock *tw, + struct inet_timewait_death_row *twdr); #define MAX_TCP_HEADER (128 + MAX_HEADER) @@ -148,33 +149,6 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); * timestamps. It must be less than * minimal timewait lifetime. */ - -#define TCP_TW_RECYCLE_SLOTS_LOG 5 -#define TCP_TW_RECYCLE_SLOTS (1< 4sec, it is "slow" path, no recycling is required, - so that we select tick to get range about 4 seconds. - */ - -#if HZ <= 16 || HZ > 4096 -# error Unsupported: HZ <= 16 or HZ > 4096 -#elif HZ <= 32 -# define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 64 -# define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 128 -# define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 256 -# define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 512 -# define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 1024 -# define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 2048 -# define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG) -#else -# define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG) -#endif /* * TCP option */ @@ -209,12 +183,13 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); #define TCP_NAGLE_CORK 2 /* Socket is corked */ #define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */ +extern struct inet_timewait_death_row tcp_death_row; + /* sysctl variables for tcp */ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_tw_recycle; extern int sysctl_tcp_keepalive_time; extern int sysctl_tcp_keepalive_probes; extern int sysctl_tcp_keepalive_intvl; @@ -229,7 +204,6 @@ extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_max_tw_buckets; extern int sysctl_tcp_fack; extern int sysctl_tcp_reordering; extern int sysctl_tcp_ecn; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 912bbcc7f41..3eadbb27187 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -65,7 +65,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), - tcp_tw_count, atomic_read(&tcp_sockets_allocated), + tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), atomic_read(&tcp_memory_allocated)); seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e3289453241..ce47a345ecc 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -259,7 +259,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_TCP_MAX_TW_BUCKETS, .procname = "tcp_max_tw_buckets", - .data = &sysctl_tcp_max_tw_buckets, + .data = &tcp_death_row.sysctl_max_tw_buckets, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -363,7 +363,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_TCP_TW_RECYCLE, .procname = "tcp_tw_recycle", - .data = &sysctl_tcp_tw_recycle, + .data = &tcp_death_row.sysctl_tw_recycle, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4bda522d25c..0eed64a1991 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2109,12 +2109,12 @@ void __init tcp_init(void) if (order >= 4) { sysctl_local_port_range[0] = 32768; sysctl_local_port_range[1] = 61000; - sysctl_tcp_max_tw_buckets = 180000; + tcp_death_row.sysctl_max_tw_buckets = 180000; sysctl_tcp_max_orphans = 4096 << (order - 4); sysctl_max_syn_backlog = 1024; } else if (order < 3) { sysctl_local_port_range[0] = 1024 * (3 - order); - sysctl_tcp_max_tw_buckets >>= (3 - order); + tcp_death_row.sysctl_max_tw_buckets >>= (3 - order); sysctl_tcp_max_orphans >>= (3 - order); sysctl_max_syn_backlog = 128; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b966102b9f3..83f72346274 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -199,7 +199,7 @@ unique: NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); } else if (tw) { /* Silly. Should hash-dance instead... */ - tcp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &tcp_death_row); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); inet_twsk_put(tw); @@ -291,7 +291,7 @@ ok: spin_unlock(&head->lock); if (tw) { - tcp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &tcp_death_row);; inet_twsk_put(tw); } @@ -366,7 +366,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tp->write_seq = 0; } - if (sysctl_tcp_tw_recycle && + if (tcp_death_row.sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { struct inet_peer *peer = rt_get_peer(rt); @@ -965,7 +965,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * are made in the function processing timewait state. */ if (tmp_opt.saw_tstamp && - sysctl_tcp_tw_recycle && + tcp_death_row.sysctl_tw_recycle && (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && peer->v4daddr == saddr) { @@ -1305,7 +1305,8 @@ do_time_wait: ntohs(th->dest), inet_iif(skb)); if (sk2) { - tcp_tw_deschedule((struct inet_timewait_sock *)sk); + inet_twsk_deschedule((struct inet_timewait_sock *)sk, + &tcp_death_row); inet_twsk_put((struct inet_timewait_sock *)sk); sk = sk2; goto process; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 2d95afe5b39..81b9a52c50c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -35,13 +35,37 @@ #define SYNC_INIT 1 #endif -int sysctl_tcp_tw_recycle; -int sysctl_tcp_max_tw_buckets = NR_FILE*2; +/* New-style handling of TIME_WAIT sockets. */ + +static void inet_twdr_hangman(unsigned long data); +static void inet_twdr_twkill_work(void *data); +static void inet_twdr_twcal_tick(unsigned long data); int sysctl_tcp_syncookies = SYNC_INIT; int sysctl_tcp_abort_on_overflow; -static void tcp_tw_schedule(struct inet_timewait_sock *tw, int timeo); +struct inet_timewait_death_row tcp_death_row = { + .sysctl_max_tw_buckets = NR_FILE * 2, + .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, + .death_lock = SPIN_LOCK_UNLOCKED, + .hashinfo = &tcp_hashinfo, + .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, + (unsigned long)&tcp_death_row), + .twkill_work = __WORK_INITIALIZER(tcp_death_row.twkill_work, + inet_twdr_twkill_work, + &tcp_death_row), +/* Short-time timewait calendar */ + + .twcal_hand = -1, + .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, + (unsigned long)&tcp_death_row), +}; + +EXPORT_SYMBOL_GPL(tcp_death_row); + +static void inet_twsk_schedule(struct inet_timewait_sock *tw, + struct inet_timewait_death_row *twdr, + const int timeo); static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { @@ -52,10 +76,6 @@ static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) return (seq == e_win && seq == end_seq); } -/* New-style handling of TIME_WAIT sockets. */ - -int tcp_tw_count; - /* * * Main purpose of TIME-WAIT state is to close connection gracefully, * when one of ends sits in LAST-ACK or CLOSING retransmitting FIN @@ -132,7 +152,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, if (!th->fin || TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { kill_with_rst: - tcp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &tcp_death_row); inet_twsk_put(tw); return TCP_TW_RST; } @@ -151,11 +171,11 @@ kill_with_rst: * do not undertsnad recycling in any case, it not * a big problem in practice. --ANK */ if (tw->tw_family == AF_INET && - sysctl_tcp_tw_recycle && tcptw->tw_ts_recent_stamp && + tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && tcp_v4_tw_remember_stamp(tw)) - tcp_tw_schedule(tw, tw->tw_timeout); + inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout); else - tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); return TCP_TW_ACK; } @@ -188,12 +208,12 @@ kill_with_rst: */ if (sysctl_tcp_rfc1337 == 0) { kill: - tcp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &tcp_death_row); inet_twsk_put(tw); return TCP_TW_SUCCESS; } } - tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent = tmp_opt.rcv_tsval; @@ -243,7 +263,7 @@ kill: * Do not reschedule in the last case. */ if (paws_reject || th->ack) - tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); /* Send ACK. Note, we do not put the bucket, * it will be released by caller. @@ -263,10 +283,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) const struct tcp_sock *tp = tcp_sk(sk); int recycle_ok = 0; - if (sysctl_tcp_tw_recycle && tp->rx_opt.ts_recent_stamp) + if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) recycle_ok = tp->af_specific->remember_stamp(sk); - if (tcp_tw_count < sysctl_tcp_max_tw_buckets) + if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) tw = inet_twsk_alloc(sk, state); if (tw != NULL) { @@ -306,7 +326,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) timeo = TCP_TIMEWAIT_LEN; } - tcp_tw_schedule(tw, timeo); + inet_twsk_schedule(tw, &tcp_death_row, timeo); inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this @@ -321,26 +341,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcp_done(sk); } -/* Kill off TIME_WAIT sockets once their lifetime has expired. */ -static int tcp_tw_death_row_slot; - -static void tcp_twkill(unsigned long); - -/* TIME_WAIT reaping mechanism. */ -#define TCP_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ -#define TCP_TWKILL_PERIOD (TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS) - -#define TCP_TWKILL_QUOTA 100 - -static struct hlist_head tcp_tw_death_row[TCP_TWKILL_SLOTS]; -static DEFINE_SPINLOCK(tw_death_lock); -static struct timer_list tcp_tw_timer = TIMER_INITIALIZER(tcp_twkill, 0, 0); -static void twkill_work(void *); -static DECLARE_WORK(tcp_twkill_work, twkill_work, NULL); -static u32 twkill_thread_slots; - /* Returns non-zero if quota exceeded. */ -static int tcp_do_twkill_work(int slot, unsigned int quota) +static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, + const int slot) { struct inet_timewait_sock *tw; struct hlist_node *node; @@ -356,19 +359,19 @@ static int tcp_do_twkill_work(int slot, unsigned int quota) killed = 0; ret = 0; rescan: - inet_twsk_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) { + inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) { __inet_twsk_del_dead_node(tw); - spin_unlock(&tw_death_lock); - __inet_twsk_kill(tw, &tcp_hashinfo); + spin_unlock(&twdr->death_lock); + __inet_twsk_kill(tw, twdr->hashinfo); inet_twsk_put(tw); killed++; - spin_lock(&tw_death_lock); - if (killed > quota) { + spin_lock(&twdr->death_lock); + if (killed > INET_TWDR_TWKILL_QUOTA) { ret = 1; break; } - /* While we dropped tw_death_lock, another cpu may have + /* While we dropped twdr->death_lock, another cpu may have * killed off the next TW bucket in the list, therefore * do a fresh re-read of the hlist head node with the * lock reacquired. We still use the hlist traversal @@ -377,67 +380,68 @@ rescan: goto rescan; } - tcp_tw_count -= killed; + twdr->tw_count -= killed; NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed); return ret; } -static void tcp_twkill(unsigned long dummy) +static void inet_twdr_hangman(unsigned long data) { - int need_timer, ret; + struct inet_timewait_death_row *twdr; + int unsigned need_timer; - spin_lock(&tw_death_lock); + twdr = (struct inet_timewait_death_row *)data; + spin_lock(&twdr->death_lock); - if (tcp_tw_count == 0) + if (twdr->tw_count == 0) goto out; need_timer = 0; - ret = tcp_do_twkill_work(tcp_tw_death_row_slot, TCP_TWKILL_QUOTA); - if (ret) { - twkill_thread_slots |= (1 << tcp_tw_death_row_slot); + if (inet_twdr_do_twkill_work(twdr, twdr->slot)) { + twdr->thread_slots |= (1 << twdr->slot); mb(); - schedule_work(&tcp_twkill_work); + schedule_work(&twdr->twkill_work); need_timer = 1; } else { /* We purged the entire slot, anything left? */ - if (tcp_tw_count) + if (twdr->tw_count) need_timer = 1; } - tcp_tw_death_row_slot = - ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1)); + twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); if (need_timer) - mod_timer(&tcp_tw_timer, jiffies + TCP_TWKILL_PERIOD); + mod_timer(&twdr->tw_timer, jiffies + twdr->period); out: - spin_unlock(&tw_death_lock); + spin_unlock(&twdr->death_lock); } extern void twkill_slots_invalid(void); -static void twkill_work(void *dummy) +static void inet_twdr_twkill_work(void *data) { + struct inet_timewait_death_row *twdr = data; int i; - if ((TCP_TWKILL_SLOTS - 1) > (sizeof(twkill_thread_slots) * 8)) + if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8)) twkill_slots_invalid(); - while (twkill_thread_slots) { - spin_lock_bh(&tw_death_lock); - for (i = 0; i < TCP_TWKILL_SLOTS; i++) { - if (!(twkill_thread_slots & (1 << i))) + while (twdr->thread_slots) { + spin_lock_bh(&twdr->death_lock); + for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) { + if (!(twdr->thread_slots & (1 << i))) continue; - while (tcp_do_twkill_work(i, TCP_TWKILL_QUOTA) != 0) { + while (inet_twdr_do_twkill_work(twdr, i) != 0) { if (need_resched()) { - spin_unlock_bh(&tw_death_lock); + spin_unlock_bh(&twdr->death_lock); schedule(); - spin_lock_bh(&tw_death_lock); + spin_lock_bh(&twdr->death_lock); } } - twkill_thread_slots &= ~(1 << i); + twdr->thread_slots &= ~(1 << i); } - spin_unlock_bh(&tw_death_lock); + spin_unlock_bh(&twdr->death_lock); } } @@ -446,28 +450,22 @@ static void twkill_work(void *dummy) */ /* This is for handling early-kills of TIME_WAIT sockets. */ -void tcp_tw_deschedule(struct inet_timewait_sock *tw) +void inet_twsk_deschedule(struct inet_timewait_sock *tw, + struct inet_timewait_death_row *twdr) { - spin_lock(&tw_death_lock); + spin_lock(&twdr->death_lock); if (inet_twsk_del_dead_node(tw)) { inet_twsk_put(tw); - if (--tcp_tw_count == 0) - del_timer(&tcp_tw_timer); + if (--twdr->tw_count == 0) + del_timer(&twdr->tw_timer); } - spin_unlock(&tw_death_lock); - __inet_twsk_kill(tw, &tcp_hashinfo); + spin_unlock(&twdr->death_lock); + __inet_twsk_kill(tw, twdr->hashinfo); } -/* Short-time timewait calendar */ - -static int tcp_twcal_hand = -1; -static int tcp_twcal_jiffie; -static void tcp_twcal_tick(unsigned long); -static struct timer_list tcp_twcal_timer = - TIMER_INITIALIZER(tcp_twcal_tick, 0, 0); -static struct hlist_head tcp_twcal_row[TCP_TW_RECYCLE_SLOTS]; - -static void tcp_tw_schedule(struct inet_timewait_sock *tw, const int timeo) +static void inet_twsk_schedule(struct inet_timewait_sock *tw, + struct inet_timewait_death_row *twdr, + const int timeo) { struct hlist_head *list; int slot; @@ -496,100 +494,106 @@ static void tcp_tw_schedule(struct inet_timewait_sock *tw, const int timeo) * is greater than TS tick!) and detect old duplicates with help * of PAWS. */ - slot = (timeo + (1<> TCP_TW_RECYCLE_TICK; + slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK; - spin_lock(&tw_death_lock); + spin_lock(&twdr->death_lock); /* Unlink it, if it was scheduled */ if (inet_twsk_del_dead_node(tw)) - tcp_tw_count--; + twdr->tw_count--; else atomic_inc(&tw->tw_refcnt); - if (slot >= TCP_TW_RECYCLE_SLOTS) { + if (slot >= INET_TWDR_RECYCLE_SLOTS) { /* Schedule to slow timer */ if (timeo >= TCP_TIMEWAIT_LEN) { - slot = TCP_TWKILL_SLOTS-1; + slot = INET_TWDR_TWKILL_SLOTS - 1; } else { - slot = (timeo + TCP_TWKILL_PERIOD-1) / TCP_TWKILL_PERIOD; - if (slot >= TCP_TWKILL_SLOTS) - slot = TCP_TWKILL_SLOTS-1; + slot = (timeo + twdr->period - 1) / twdr->period; + if (slot >= INET_TWDR_TWKILL_SLOTS) + slot = INET_TWDR_TWKILL_SLOTS - 1; } tw->tw_ttd = jiffies + timeo; - slot = (tcp_tw_death_row_slot + slot) & (TCP_TWKILL_SLOTS - 1); - list = &tcp_tw_death_row[slot]; + slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); + list = &twdr->cells[slot]; } else { - tw->tw_ttd = jiffies + (slot << TCP_TW_RECYCLE_TICK); - - if (tcp_twcal_hand < 0) { - tcp_twcal_hand = 0; - tcp_twcal_jiffie = jiffies; - tcp_twcal_timer.expires = tcp_twcal_jiffie + (slot<tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK); + + if (twdr->twcal_hand < 0) { + twdr->twcal_hand = 0; + twdr->twcal_jiffie = jiffies; + twdr->twcal_timer.expires = twdr->twcal_jiffie + + (slot << INET_TWDR_RECYCLE_TICK); + add_timer(&twdr->twcal_timer); } else { - if (time_after(tcp_twcal_timer.expires, jiffies + (slot<twcal_timer.expires, + jiffies + (slot << INET_TWDR_RECYCLE_TICK))) + mod_timer(&twdr->twcal_timer, + jiffies + (slot << INET_TWDR_RECYCLE_TICK)); + slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1); } - list = &tcp_twcal_row[slot]; + list = &twdr->twcal_row[slot]; } hlist_add_head(&tw->tw_death_node, list); - if (tcp_tw_count++ == 0) - mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD); - spin_unlock(&tw_death_lock); + if (twdr->tw_count++ == 0) + mod_timer(&twdr->tw_timer, jiffies + twdr->period); + spin_unlock(&twdr->death_lock); } -void tcp_twcal_tick(unsigned long dummy) +void inet_twdr_twcal_tick(unsigned long data) { + struct inet_timewait_death_row *twdr; int n, slot; unsigned long j; unsigned long now = jiffies; int killed = 0; int adv = 0; - spin_lock(&tw_death_lock); - if (tcp_twcal_hand < 0) + twdr = (struct inet_timewait_death_row *)data; + + spin_lock(&twdr->death_lock); + if (twdr->twcal_hand < 0) goto out; - slot = tcp_twcal_hand; - j = tcp_twcal_jiffie; + slot = twdr->twcal_hand; + j = twdr->twcal_jiffie; - for (n=0; ntwcal_row[slot]) { __inet_twsk_del_dead_node(tw); - __inet_twsk_kill(tw, &tcp_hashinfo); + __inet_twsk_kill(tw, twdr->hashinfo); inet_twsk_put(tw); killed++; } } else { if (!adv) { adv = 1; - tcp_twcal_jiffie = j; - tcp_twcal_hand = slot; + twdr->twcal_jiffie = j; + twdr->twcal_hand = slot; } - if (!hlist_empty(&tcp_twcal_row[slot])) { - mod_timer(&tcp_twcal_timer, j); + if (!hlist_empty(&twdr->twcal_row[slot])) { + mod_timer(&twdr->twcal_timer, j); goto out; } } - j += (1<twcal_hand = -1; out: - if ((tcp_tw_count -= killed) == 0) - del_timer(&tcp_tw_timer); + if ((twdr->tw_count -= killed) == 0) + del_timer(&twdr->tw_timer); NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed); - spin_unlock(&tw_death_lock); + spin_unlock(&twdr->death_lock); } /* This is not only more efficient than what we used to do, it eliminates @@ -929,4 +933,4 @@ EXPORT_SYMBOL(tcp_check_req); EXPORT_SYMBOL(tcp_child_process); EXPORT_SYMBOL(tcp_create_openreq_child); EXPORT_SYMBOL(tcp_timewait_state_process); -EXPORT_SYMBOL(tcp_tw_deschedule); +EXPORT_SYMBOL(inet_twsk_deschedule); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0b51ec310eb..1c21ad66cfa 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -521,7 +521,7 @@ unique: NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); } else if (tw) { /* Silly. Should hash-dance instead... */ - tcp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &tcp_death_row); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); inet_twsk_put(tw); @@ -611,7 +611,7 @@ ok: spin_unlock(&head->lock); if (tw) { - tcp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &tcp_death_row); inet_twsk_put(tw); } @@ -1820,8 +1820,9 @@ do_time_wait: sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); if (sk2 != NULL) { - tcp_tw_deschedule((struct inet_timewait_sock *)sk); - inet_twsk_put((struct inet_timewait_sock *)sk); + struct inet_timewait_sock *tw = inet_twsk(sk); + inet_twsk_deschedule(tw, &tcp_death_row); + inet_twsk_put(tw); sk = sk2; goto process; } -- cgit v1.2.3-70-g09d2 From 64ce207306debd7157f47282be94770407bec01c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 20:50:53 -0700 Subject: [NET]: Make NETDEBUG pure printk wrappers Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/sock.h | 8 ++++---- net/dccp/input.c | 2 +- net/dccp/options.c | 8 ++++---- net/ipv4/esp4.c | 12 ++++++------ net/ipv4/icmp.c | 12 +++++------- net/ipv4/igmp.c | 2 +- net/ipv4/ip_fragment.c | 6 +++--- net/ipv4/ip_output.c | 2 +- net/ipv4/ipcomp.c | 4 ++-- net/ipv4/tcp_ipv4.c | 11 +++++------ net/ipv4/udp.c | 32 ++++++++++++++++---------------- net/ipv6/ah6.c | 13 ++++++------- net/ipv6/datagram.c | 4 ++-- net/ipv6/esp6.c | 3 +-- net/ipv6/exthdrs.c | 8 ++++---- net/ipv6/icmp.c | 20 +++++++------------- net/ipv6/ip6_output.c | 2 +- net/ipv6/netfilter.c | 3 +-- net/ipv6/raw.c | 3 +-- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 7 +++---- 21 files changed, 75 insertions(+), 89 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/sock.h b/include/net/sock.h index 8678313a22b..065df67b642 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1316,11 +1316,11 @@ extern int sock_get_timestamp(struct sock *, struct timeval __user *); */ #if 0 -#define NETDEBUG(x) do { } while (0) -#define LIMIT_NETDEBUG(x) do {} while(0) +#define NETDEBUG(fmt, args...) do { } while (0) +#define LIMIT_NETDEBUG(fmt, args...) do { } while(0) #else -#define NETDEBUG(x) do { x; } while (0) -#define LIMIT_NETDEBUG(x) do { if (net_ratelimit()) { x; } } while(0) +#define NETDEBUG(fmt, args...) printk(fmt,##args) +#define LIMIT_NETDEBUG(fmt, args...) do { if (net_ratelimit()) printk(fmt,##args); } while(0) #endif /* diff --git a/net/dccp/input.c b/net/dccp/input.c index 76c3401e93a..bdaecde0bde 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -161,7 +161,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKPKTS_STATE_RECEIVED)) { - LIMIT_NETDEBUG(pr_info("DCCP: acknowledgeable packets buffer full!\n")); + LIMIT_NETDEBUG(KERN_INFO "DCCP: acknowledgeable packets buffer full!\n"); ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; inet_csk_schedule_ack(sk); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); diff --git a/net/dccp/options.c b/net/dccp/options.c index 9ca32cba83a..5bf997683a1 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -231,7 +231,7 @@ void dccp_insert_option(struct sock *sk, struct sk_buff *skb, unsigned char *to; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert %d option!\n", option)); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert %d option!\n", option); return; } @@ -299,7 +299,7 @@ void dccp_insert_option_elapsed_time(struct sock *sk, return; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert elapsed time!\n")); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert elapsed time!\n"); return; } @@ -335,7 +335,7 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) dccp_insert_option_elapsed_time(sk, skb, elapsed_time); if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert ACK Vector!\n")); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert ACK Vector!\n"); return; } @@ -412,7 +412,7 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *s unsigned char *to; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert timestamp echo!\n")); + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert timestamp echo!\n"); return; } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index ba57446d5d1..b31ffc5053d 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -331,8 +331,8 @@ static void esp4_err(struct sk_buff *skb, u32 info) x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); if (!x) return; - NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", - ntohl(esph->spi), ntohl(iph->daddr))); + NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", + ntohl(esph->spi), ntohl(iph->daddr)); xfrm_state_put(x); } @@ -395,10 +395,10 @@ static int esp_init_state(struct xfrm_state *x) if (aalg_desc->uinfo.auth.icv_fullbits/8 != crypto_tfm_alg_digestsize(esp->auth.tfm)) { - NETDEBUG(printk(KERN_INFO "ESP: %s digestsize %u != %hu\n", - x->aalg->alg_name, - crypto_tfm_alg_digestsize(esp->auth.tfm), - aalg_desc->uinfo.auth.icv_fullbits/8)); + NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", + x->aalg->alg_name, + crypto_tfm_alg_digestsize(esp->auth.tfm), + aalg_desc->uinfo.auth.icv_fullbits/8); goto error; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index badfc584997..25f66b750fd 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -627,11 +627,10 @@ static void icmp_unreach(struct sk_buff *skb) break; case ICMP_FRAG_NEEDED: if (ipv4_config.no_pmtu_disc) { - LIMIT_NETDEBUG( - printk(KERN_INFO "ICMP: %u.%u.%u.%u: " + LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: " "fragmentation needed " "and DF set.\n", - NIPQUAD(iph->daddr))); + NIPQUAD(iph->daddr)); } else { info = ip_rt_frag_needed(iph, ntohs(icmph->un.frag.mtu)); @@ -640,10 +639,9 @@ static void icmp_unreach(struct sk_buff *skb) } break; case ICMP_SR_FAILED: - LIMIT_NETDEBUG( - printk(KERN_INFO "ICMP: %u.%u.%u.%u: Source " + LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: Source " "Route Failed.\n", - NIPQUAD(iph->daddr))); + NIPQUAD(iph->daddr)); break; default: break; @@ -936,7 +934,7 @@ int icmp_rcv(struct sk_buff *skb) case CHECKSUM_HW: if (!(u16)csum_fold(skb->csum)) break; - LIMIT_NETDEBUG(printk(KERN_DEBUG "icmp v4 hw csum failure\n")); + LIMIT_NETDEBUG(KERN_DEBUG "icmp v4 hw csum failure\n"); case CHECKSUM_NONE: if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) goto error; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 5088f90835a..44607f4767b 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -904,7 +904,7 @@ int igmp_rcv(struct sk_buff *skb) case IGMP_MTRACE_RESP: break; default: - NETDEBUG(printk(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type)); + NETDEBUG(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type); } in_dev_put(in_dev); kfree_skb(skb); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index eb377ae1530..1ac64c0c5b3 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -377,7 +377,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user) return ip_frag_intern(hash, qp); out_nomem: - LIMIT_NETDEBUG(printk(KERN_ERR "ip_frag_create: no memory left !\n")); + LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n"); return NULL; } @@ -625,8 +625,8 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) return head; out_nomem: - LIMIT_NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing " - "queue %p\n", qp)); + LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing " + "queue %p\n", qp); goto out_fail; out_oversize: if (net_ratelimit()) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 633945d27ac..19f24f778dc 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -573,7 +573,7 @@ slow_path: */ if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) { - NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n")); + NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n"); err = -ENOMEM; goto fail; } diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 7ded6e60f43..dcb7ee6c485 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -214,8 +214,8 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) spi, IPPROTO_COMP, AF_INET); if (!x) return; - NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n", - spi, NIPQUAD(iph->daddr))); + NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n", + spi, NIPQUAD(iph->daddr)); xfrm_state_put(x); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 83f72346274..32a0ebc589d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -990,11 +990,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * to destinations, already remembered * to the moment of synflood. */ - LIMIT_NETDEBUG(printk(KERN_DEBUG "TCP: drop open " - "request from %u.%u." - "%u.%u/%u\n", - NIPQUAD(saddr), - ntohs(skb->h.th->source))); + LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " + "request from %u.%u.%u.%u/%u\n", + NIPQUAD(saddr), + ntohs(skb->h.th->source)); dst_release(dst); goto drop_and_free; } @@ -1118,7 +1117,7 @@ static int tcp_v4_checksum_init(struct sk_buff *skb) skb->nh.iph->daddr, skb->csum)) return 0; - LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v4 csum failed\n")); + LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v4 csum failed\n"); skb->ip_summed = CHECKSUM_NONE; } if (skb->len <= 76) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a8135e1f528..3a5bbbe7dd8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -629,7 +629,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 2\n")); + LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); err = -EINVAL; goto out; } @@ -694,7 +694,7 @@ static int udp_sendpage(struct sock *sk, struct page *page, int offset, if (unlikely(!up->pending)) { release_sock(sk); - LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 3\n")); + LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); return -EINVAL; } @@ -1103,7 +1103,7 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, skb->ip_summed = CHECKSUM_UNNECESSARY; if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) return 0; - LIMIT_NETDEBUG(printk(KERN_DEBUG "udp v4 hw csum failure.\n")); + LIMIT_NETDEBUG(KERN_DEBUG "udp v4 hw csum failure.\n"); skb->ip_summed = CHECKSUM_NONE; } if (skb->ip_summed != CHECKSUM_UNNECESSARY) @@ -1182,13 +1182,13 @@ int udp_rcv(struct sk_buff *skb) return(0); short_packet: - LIMIT_NETDEBUG(printk(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", - NIPQUAD(saddr), - ntohs(uh->source), - ulen, - len, - NIPQUAD(daddr), - ntohs(uh->dest))); + LIMIT_NETDEBUG(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", + NIPQUAD(saddr), + ntohs(uh->source), + ulen, + len, + NIPQUAD(daddr), + ntohs(uh->dest)); no_header: UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); @@ -1199,12 +1199,12 @@ csum_error: * RFC1122: OK. Discards the bad packet silently (as far as * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ - LIMIT_NETDEBUG(printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", - NIPQUAD(saddr), - ntohs(uh->source), - NIPQUAD(daddr), - ntohs(uh->dest), - ulen)); + LIMIT_NETDEBUG(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", + NIPQUAD(saddr), + ntohs(uh->source), + NIPQUAD(daddr), + ntohs(uh->dest), + ulen); drop: UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 986fdfdccbc..0ebfad907a0 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -131,10 +131,10 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) case NEXTHDR_HOP: case NEXTHDR_DEST: if (!zero_out_mutable_opts(exthdr.opth)) { - LIMIT_NETDEBUG(printk( + LIMIT_NETDEBUG( KERN_WARNING "overrun %sopts\n", nexthdr == NEXTHDR_HOP ? - "hop" : "dest")); + "hop" : "dest"); return -EINVAL; } break; @@ -293,8 +293,7 @@ static int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struc skb_push(skb, skb->data - skb->nh.raw); ahp->icv(ahp, skb, ah->auth_data); if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) { - LIMIT_NETDEBUG( - printk(KERN_WARNING "ipsec ah authentication error\n")); + LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n"); x->stats.integrity_failed++; goto free_out; } @@ -332,9 +331,9 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/" - "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", - ntohl(ah->spi), NIP6(iph->daddr))); + NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/" + "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + ntohl(ah->spi), NIP6(iph->daddr)); xfrm_state_put(x); } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 761984f3bd9..01468fab3d3 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -589,8 +589,8 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, break; default: - LIMIT_NETDEBUG( - printk(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type)); + LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n", + cmsg->cmsg_type); err = -EINVAL; break; }; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 324db62515a..e8bff9d3d96 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -212,8 +212,7 @@ static int esp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, stru padlen = nexthdr[0]; if (padlen+2 >= elen) { - LIMIT_NETDEBUG( - printk(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen)); + LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen); ret = -EINVAL; goto out; } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index e0839eafc3a..5be6da2584e 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -424,8 +424,8 @@ static int ipv6_hop_ra(struct sk_buff *skb, int optoff) IP6CB(skb)->ra = optoff; return 1; } - LIMIT_NETDEBUG( - printk(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", skb->nh.raw[optoff+1])); + LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", + skb->nh.raw[optoff+1]); kfree_skb(skb); return 0; } @@ -437,8 +437,8 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) u32 pkt_len; if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) { - LIMIT_NETDEBUG( - printk(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", skb->nh.raw[optoff+1])); + LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", + skb->nh.raw[optoff+1]); IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); goto drop; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ee9f1d36346..ff685f229b6 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -332,8 +332,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, * for now we don't know that. */ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { - LIMIT_NETDEBUG( - printk(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n")); + LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"); return; } @@ -341,8 +340,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, * Never answer to a ICMP packet. */ if (is_ineligible(skb)) { - LIMIT_NETDEBUG( - printk(KERN_DEBUG "icmpv6_send: no reply to icmp error\n")); + LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n"); return; } @@ -393,8 +391,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, len = skb->len - msg.offset; len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr)); if (len < 0) { - LIMIT_NETDEBUG( - printk(KERN_DEBUG "icmp: len problem\n")); + LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n"); goto out_dst_release; } @@ -584,17 +581,15 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) skb->ip_summed = CHECKSUM_UNNECESSARY; if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, skb->csum)) { - LIMIT_NETDEBUG( - printk(KERN_DEBUG "ICMPv6 hw checksum failed\n")); + LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 hw checksum failed\n"); skb->ip_summed = CHECKSUM_NONE; } } if (skb->ip_summed == CHECKSUM_NONE) { if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, skb_checksum(skb, 0, skb->len, 0))) { - LIMIT_NETDEBUG( - printk(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n", - NIP6(*saddr), NIP6(*daddr))); + LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n", + NIP6(*saddr), NIP6(*daddr)); goto discard_it; } } @@ -670,8 +665,7 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) break; default: - LIMIT_NETDEBUG( - printk(KERN_DEBUG "icmpv6: msg of unknown type\n")); + LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n"); /* informational */ if (type & ICMPV6_INFOMSG_MASK) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 00f85148b85..01ef94f7c7f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -625,7 +625,7 @@ slow_path: */ if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { - NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n")); + NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index c8daef97cf5..f8626ebf90f 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -28,8 +28,7 @@ int ip6_route_me_harder(struct sk_buff *skb) if (dst->error) { IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); - LIMIT_NETDEBUG( - printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n")); + LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); dst_release(dst); return -EINVAL; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 766e1c7179a..7a5863298f3 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -343,8 +343,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, skb->len, inet->num, skb->csum)) { - LIMIT_NETDEBUG( - printk(KERN_DEBUG "raw v6 hw csum failure.\n")); + LIMIT_NETDEBUG(KERN_DEBUG "raw v6 hw csum failure.\n"); skb->ip_summed = CHECKSUM_NONE; } } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1c21ad66cfa..08c55b02470 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1564,7 +1564,7 @@ static int tcp_v6_checksum_init(struct sk_buff *skb) if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,skb->csum)) return 0; - LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n")); + LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n"); } if (skb->len <= 76) { if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2ffe34cc2ef..c348307e577 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -478,8 +478,7 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) /* RFC 2460 section 8.1 says that we SHOULD log this error. Well, it is reasonable. */ - LIMIT_NETDEBUG( - printk(KERN_INFO "IPv6: udp checksum is 0\n")); + LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); goto discard; } @@ -494,7 +493,7 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) if (skb->ip_summed==CHECKSUM_HW) { skb->ip_summed = CHECKSUM_UNNECESSARY; if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) { - LIMIT_NETDEBUG(printk(KERN_DEBUG "udp v6 hw csum failure.\n")); + LIMIT_NETDEBUG(KERN_DEBUG "udp v6 hw csum failure.\n"); skb->ip_summed = CHECKSUM_NONE; } } @@ -826,7 +825,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 2\n")); + LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); err = -EINVAL; goto out; } -- cgit v1.2.3-70-g09d2 From 6687e988d9aeaccad6774e6a8304f681f3ec0a03 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Aug 2005 04:03:31 -0300 Subject: [ICSK]: Move TCP congestion avoidance members to icsk This changeset basically moves tcp_sk()->{ca_ops,ca_state,etc} to inet_csk(), minimal renaming/moving done in this changeset to ease review. Most of it is just changes of struct tcp_sock * to struct sock * parameters. With this we move to a state closer to two interesting goals: 1. Generalisation of net/ipv4/tcp_diag.c, becoming inet_diag.c, being used for any INET transport protocol that has struct inet_hashinfo and are derived from struct inet_connection_sock. Keeps the userspace API, that will just not display DCCP sockets, while newer versions of tools can support DCCP. 2. INET generic transport pluggable Congestion Avoidance infrastructure, using the current TCP CA infrastructure with DCCP. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp.h | 21 +--- include/net/inet_connection_sock.h | 15 +++ include/net/tcp.h | 74 ++++++------ net/ipv4/inet_connection_sock.c | 3 +- net/ipv4/tcp.c | 12 +- net/ipv4/tcp_bic.c | 46 ++++---- net/ipv4/tcp_cong.c | 44 +++++--- net/ipv4/tcp_diag.c | 16 +-- net/ipv4/tcp_highspeed.c | 17 +-- net/ipv4/tcp_htcp.c | 53 +++++---- net/ipv4/tcp_hybla.c | 31 +++--- net/ipv4/tcp_input.c | 223 +++++++++++++++++++++---------------- net/ipv4/tcp_ipv4.c | 9 +- net/ipv4/tcp_minisocks.c | 5 +- net/ipv4/tcp_output.c | 36 +++--- net/ipv4/tcp_scalable.c | 6 +- net/ipv4/tcp_timer.c | 26 +++-- net/ipv4/tcp_vegas.c | 44 ++++---- net/ipv4/tcp_westwood.c | 58 +++++----- net/ipv6/tcp_ipv6.c | 7 +- 20 files changed, 412 insertions(+), 334 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 62009684074..ac4ca44c75c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -258,19 +258,15 @@ struct tcp_sock { __u32 mss_cache; /* Cached effective mss, not including SACKS */ __u16 xmit_size_goal; /* Goal for segmenting output packets */ __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ - __u8 ca_state; /* State of fast-retransmit machine */ - __u8 keepalive_probes; /* num of allowed keep alive probes */ - __u16 advmss; /* Advertised MSS */ __u32 window_clamp; /* Maximal window to advertise */ __u32 rcv_ssthresh; /* Current window clamp */ __u32 frto_highmark; /* snd_nxt when RTO occurred */ __u8 reordering; /* Packet reordering metric. */ __u8 frto_counter; /* Number of new acks after RTO */ - __u8 nonagle; /* Disable Nagle algorithm? */ - /* ONE BYTE HOLE, TRY TO PACK */ + __u8 keepalive_probes; /* num of allowed keep alive probes */ /* RTT measurement */ __u32 srtt; /* smoothed round trip time << 3 */ @@ -311,8 +307,7 @@ struct tcp_sock { struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ - __u8 probes_out; /* unanswered 0 window probes */ - __u8 ecn_flags; /* ECN status bits. */ + __u16 advmss; /* Advertised MSS */ __u16 prior_ssthresh; /* ssthresh saved at recovery start */ __u32 lost_out; /* Lost packets */ __u32 sacked_out; /* SACK'd packets */ @@ -327,7 +322,7 @@ struct tcp_sock { __u32 urg_seq; /* Seq of received urgent pointer */ __u16 urg_data; /* Saved octet of OOB data and control flags */ __u8 urg_mode; /* In urgent mode */ - /* ONE BYTE HOLE, TRY TO PACK! */ + __u8 ecn_flags; /* ECN status bits. */ __u32 snd_up; /* Urgent pointer */ __u32 total_retrans; /* Total retransmits for entire connection */ @@ -351,11 +346,6 @@ struct tcp_sock { __u32 seq; __u32 time; } rcvq_space; - - /* Pluggable TCP congestion control hook */ - struct tcp_congestion_ops *ca_ops; - u32 ca_priv[16]; -#define TCP_CA_PRIV_SIZE (16*sizeof(u32)) }; static inline struct tcp_sock *tcp_sk(const struct sock *sk) @@ -377,11 +367,6 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) return (struct tcp_timewait_sock *)sk; } -static inline void *tcp_ca(const struct tcp_sock *tp) -{ - return (void *) tp->ca_priv; -} - #endif #endif /* _LINUX_TCP_H */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index bec19d5cff2..4d7e708c07d 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -27,6 +27,7 @@ struct inet_bind_bucket; struct inet_hashinfo; +struct tcp_congestion_ops; /** inet_connection_sock - INET connection oriented sock * @@ -35,10 +36,13 @@ struct inet_hashinfo; * @icsk_timeout: Timeout * @icsk_retransmit_timer: Resend (no ack) * @icsk_rto: Retransmit timeout + * @icsk_ca_ops Pluggable congestion control hook + * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event * @icsk_backoff: Backoff * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries + * @icsk_probes_out: unanswered 0 window probes * @icsk_ack: Delayed ACK control data */ struct inet_connection_sock { @@ -50,10 +54,14 @@ struct inet_connection_sock { struct timer_list icsk_retransmit_timer; struct timer_list icsk_delack_timer; __u32 icsk_rto; + struct tcp_congestion_ops *icsk_ca_ops; + __u8 icsk_ca_state; __u8 icsk_retransmits; __u8 icsk_pending; __u8 icsk_backoff; __u8 icsk_syn_retries; + __u8 icsk_probes_out; + /* 2 BYTES HOLE, TRY TO PACK! */ struct { __u8 pending; /* ACK is pending */ __u8 quick; /* Scheduled number of quick acks */ @@ -65,6 +73,8 @@ struct inet_connection_sock { __u16 last_seg_size; /* Size of last incoming segment */ __u16 rcv_mss; /* MSS used for delayed ACK decisions */ } icsk_ack; + u32 icsk_ca_priv[16]; +#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ @@ -77,6 +87,11 @@ static inline struct inet_connection_sock *inet_csk(const struct sock *sk) return (struct inet_connection_sock *)sk; } +static inline void *inet_csk_ca(const struct sock *sk) +{ + return (void *)inet_csk(sk)->icsk_ca_priv; +} + extern struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, const unsigned int __nocast priority); diff --git a/include/net/tcp.h b/include/net/tcp.h index d489ac548e4..0b3f7294c5c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -669,29 +669,29 @@ struct tcp_congestion_ops { struct list_head list; /* initialize private data (optional) */ - void (*init)(struct tcp_sock *tp); + void (*init)(struct sock *sk); /* cleanup private data (optional) */ - void (*release)(struct tcp_sock *tp); + void (*release)(struct sock *sk); /* return slow start threshold (required) */ - u32 (*ssthresh)(struct tcp_sock *tp); + u32 (*ssthresh)(struct sock *sk); /* lower bound for congestion window (optional) */ - u32 (*min_cwnd)(struct tcp_sock *tp); + u32 (*min_cwnd)(struct sock *sk); /* do new cwnd calculation (required) */ - void (*cong_avoid)(struct tcp_sock *tp, u32 ack, + void (*cong_avoid)(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int good_ack); /* round trip time sample per acked packet (optional) */ - void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt); + void (*rtt_sample)(struct sock *sk, u32 usrtt); /* call before changing ca_state (optional) */ - void (*set_state)(struct tcp_sock *tp, u8 new_state); + void (*set_state)(struct sock *sk, u8 new_state); /* call when cwnd event occurs (optional) */ - void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev); + void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); /* new value of cwnd after loss (optional) */ - u32 (*undo_cwnd)(struct tcp_sock *tp); + u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ - void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked); + void (*pkts_acked)(struct sock *sk, u32 num_acked); /* get info for tcp_diag (optional) */ - void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb); + void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); char name[TCP_CA_NAME_MAX]; struct module *owner; @@ -700,30 +700,34 @@ struct tcp_congestion_ops { extern int tcp_register_congestion_control(struct tcp_congestion_ops *type); extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); -extern void tcp_init_congestion_control(struct tcp_sock *tp); -extern void tcp_cleanup_congestion_control(struct tcp_sock *tp); +extern void tcp_init_congestion_control(struct sock *sk); +extern void tcp_cleanup_congestion_control(struct sock *sk); extern int tcp_set_default_congestion_control(const char *name); extern void tcp_get_default_congestion_control(char *name); -extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name); +extern int tcp_set_congestion_control(struct sock *sk, const char *name); extern struct tcp_congestion_ops tcp_init_congestion_ops; -extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); -extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, +extern u32 tcp_reno_ssthresh(struct sock *sk); +extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag); -extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp); +extern u32 tcp_reno_min_cwnd(struct sock *sk); extern struct tcp_congestion_ops tcp_reno; -static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state) +static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state) { - if (tp->ca_ops->set_state) - tp->ca_ops->set_state(tp, ca_state); - tp->ca_state = ca_state; + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->set_state) + icsk->icsk_ca_ops->set_state(sk, ca_state); + icsk->icsk_ca_state = ca_state; } -static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event) +static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) { - if (tp->ca_ops->cwnd_event) - tp->ca_ops->cwnd_event(tp, event); + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->cwnd_event) + icsk->icsk_ca_ops->cwnd_event(sk, event); } /* This determines how many packets are "in the network" to the best @@ -749,9 +753,10 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) * The exception is rate halving phase, when cwnd is decreasing towards * ssthresh. */ -static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp) +static inline __u32 tcp_current_ssthresh(const struct sock *sk) { - if ((1<ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery)) + const struct tcp_sock *tp = tcp_sk(sk); + if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery)) return tp->snd_ssthresh; else return max(tp->snd_ssthresh, @@ -768,10 +773,13 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp) } /* Set slow start threshold and cwnd not falling to slow start */ -static inline void __tcp_enter_cwr(struct tcp_sock *tp) +static inline void __tcp_enter_cwr(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + tp->undo_marker = 0; - tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1U); tp->snd_cwnd_cnt = 0; @@ -780,12 +788,14 @@ static inline void __tcp_enter_cwr(struct tcp_sock *tp) TCP_ECN_queue_cwr(tp); } -static inline void tcp_enter_cwr(struct tcp_sock *tp) +static inline void tcp_enter_cwr(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); + tp->prior_ssthresh = 0; - if (tp->ca_state < TCP_CA_CWR) { - __tcp_enter_cwr(tp); - tcp_set_ca_state(tp, TCP_CA_CWR); + if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { + __tcp_enter_cwr(sk); + tcp_set_ca_state(sk, TCP_CA_CWR); } } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 026630a15ea..fe3c6d3d0c9 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -508,7 +508,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, newsk->sk_write_space = sk_stream_write_space; newicsk->icsk_retransmits = 0; - newicsk->icsk_backoff = 0; + newicsk->icsk_backoff = 0; + newicsk->icsk_probes_out = 0; /* Deinitialize accept_queue to trap illegal accesses. */ memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0eed64a1991..02848e72e9c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1671,11 +1671,11 @@ int tcp_disconnect(struct sock *sk, int flags) tp->write_seq = 1; icsk->icsk_backoff = 0; tp->snd_cwnd = 2; - tp->probes_out = 0; + icsk->icsk_probes_out = 0; tp->packets_out = 0; tp->snd_ssthresh = 0x7fffffff; tp->snd_cwnd_cnt = 0; - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); inet_csk_delack_init(sk); sk->sk_send_head = NULL; @@ -1718,7 +1718,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, name[val] = 0; lock_sock(sk); - err = tcp_set_congestion_control(tp, name); + err = tcp_set_congestion_control(sk, name); release_sock(sk); return err; } @@ -1886,9 +1886,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) memset(info, 0, sizeof(*info)); info->tcpi_state = sk->sk_state; - info->tcpi_ca_state = tp->ca_state; + info->tcpi_ca_state = icsk->icsk_ca_state; info->tcpi_retransmits = icsk->icsk_retransmits; - info->tcpi_probes = tp->probes_out; + info->tcpi_probes = icsk->icsk_probes_out; info->tcpi_backoff = icsk->icsk_backoff; if (tp->rx_opt.tstamp_ok) @@ -2016,7 +2016,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, len = min_t(unsigned int, len, TCP_CA_NAME_MAX); if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval, tp->ca_ops->name, len)) + if (copy_to_user(optval, icsk->icsk_ca_ops->name, len)) return -EFAULT; return 0; default: diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index ec38d45d664..b940346de4e 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -86,11 +86,11 @@ static inline void bictcp_reset(struct bictcp *ca) ca->delayed_ack = 2 << ACK_RATIO_SHIFT; } -static void bictcp_init(struct tcp_sock *tp) +static void bictcp_init(struct sock *sk) { - bictcp_reset(tcp_ca(tp)); + bictcp_reset(inet_csk_ca(sk)); if (initial_ssthresh) - tp->snd_ssthresh = initial_ssthresh; + tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } /* @@ -156,9 +156,10 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) /* Detect low utilization in congestion avoidance */ -static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag) +static inline void bictcp_low_utilization(struct sock *sk, int flag) { - struct bictcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); u32 dist, delay; /* No time stamp */ @@ -208,12 +209,13 @@ static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag) } -static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack, +static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 seq_rtt, u32 in_flight, int data_acked) { - struct bictcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); - bictcp_low_utilization(tp, data_acked); + bictcp_low_utilization(sk, data_acked); if (in_flight < tp->snd_cwnd) return; @@ -242,9 +244,10 @@ static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack, * behave like Reno until low_window is reached, * then increase congestion window slowly */ -static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp) +static u32 bictcp_recalc_ssthresh(struct sock *sk) { - struct bictcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); ca->epoch_start = 0; /* end of epoch */ @@ -269,31 +272,34 @@ static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp) return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); } -static u32 bictcp_undo_cwnd(struct tcp_sock *tp) +static u32 bictcp_undo_cwnd(struct sock *sk) { - struct bictcp *ca = tcp_ca(tp); - + const struct tcp_sock *tp = tcp_sk(sk); + const struct bictcp *ca = inet_csk_ca(sk); return max(tp->snd_cwnd, ca->last_max_cwnd); } -static u32 bictcp_min_cwnd(struct tcp_sock *tp) +static u32 bictcp_min_cwnd(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return tp->snd_ssthresh; } -static void bictcp_state(struct tcp_sock *tp, u8 new_state) +static void bictcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Loss) - bictcp_reset(tcp_ca(tp)); + bictcp_reset(inet_csk_ca(sk)); } /* Track delayed acknowledgement ratio using sliding window * ratio = (15*ratio + sample) / 16 */ -static void bictcp_acked(struct tcp_sock *tp, u32 cnt) +static void bictcp_acked(struct sock *sk, u32 cnt) { - if (cnt > 0 && tp->ca_state == TCP_CA_Open) { - struct bictcp *ca = tcp_ca(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { + struct bictcp *ca = inet_csk_ca(sk); cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; ca->delayed_ack += cnt; } @@ -314,7 +320,7 @@ static struct tcp_congestion_ops bictcp = { static int __init bictcp_register(void) { - BUG_ON(sizeof(struct bictcp) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&bictcp); } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 4970d10a778..bbf2d6624e8 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -73,33 +73,36 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); /* Assign choice of congestion control. */ -void tcp_init_congestion_control(struct tcp_sock *tp) +void tcp_init_congestion_control(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_congestion_ops *ca; - if (tp->ca_ops != &tcp_init_congestion_ops) + if (icsk->icsk_ca_ops != &tcp_init_congestion_ops) return; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { if (try_module_get(ca->owner)) { - tp->ca_ops = ca; + icsk->icsk_ca_ops = ca; break; } } rcu_read_unlock(); - if (tp->ca_ops->init) - tp->ca_ops->init(tp); + if (icsk->icsk_ca_ops->init) + icsk->icsk_ca_ops->init(sk); } /* Manage refcounts on socket close. */ -void tcp_cleanup_congestion_control(struct tcp_sock *tp) +void tcp_cleanup_congestion_control(struct sock *sk) { - if (tp->ca_ops->release) - tp->ca_ops->release(tp); - module_put(tp->ca_ops->owner); + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->release) + icsk->icsk_ca_ops->release(sk); + module_put(icsk->icsk_ca_ops->owner); } /* Used by sysctl to change default congestion control */ @@ -143,14 +146,15 @@ void tcp_get_default_congestion_control(char *name) } /* Change congestion control for socket */ -int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) +int tcp_set_congestion_control(struct sock *sk, const char *name) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_congestion_ops *ca; int err = 0; rcu_read_lock(); ca = tcp_ca_find(name); - if (ca == tp->ca_ops) + if (ca == icsk->icsk_ca_ops) goto out; if (!ca) @@ -160,10 +164,10 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) err = -EBUSY; else { - tcp_cleanup_congestion_control(tp); - tp->ca_ops = ca; - if (tp->ca_ops->init) - tp->ca_ops->init(tp); + tcp_cleanup_congestion_control(sk); + icsk->icsk_ca_ops = ca; + if (icsk->icsk_ca_ops->init) + icsk->icsk_ca_ops->init(sk); } out: rcu_read_unlock(); @@ -177,9 +181,11 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ -void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight, +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag) { + struct tcp_sock *tp = tcp_sk(sk); + if (in_flight < tp->snd_cwnd) return; @@ -202,15 +208,17 @@ void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight, EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); /* Slow start threshold is half the congestion window (min 2) */ -u32 tcp_reno_ssthresh(struct tcp_sock *tp) +u32 tcp_reno_ssthresh(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return max(tp->snd_cwnd >> 1U, 2U); } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); /* Lower bound on congestion window. */ -u32 tcp_reno_min_cwnd(struct tcp_sock *tp) +u32 tcp_reno_min_cwnd(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return tp->snd_ssthresh/2; } EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 5f4c74f45e8..4288ecfec9a 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -66,10 +66,10 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, if (ext & (1<<(TCPDIAG_INFO-1))) info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info)); - if (ext & (1<<(TCPDIAG_CONG-1))) { - size_t len = strlen(tp->ca_ops->name); + if ((ext & (1 << (TCPDIAG_CONG - 1))) && icsk->icsk_ca_ops) { + size_t len = strlen(icsk->icsk_ca_ops->name); strcpy(TCPDIAG_PUT(skb, TCPDIAG_CONG, len+1), - tp->ca_ops->name); + icsk->icsk_ca_ops->name); } } r->tcpdiag_family = sk->sk_family; @@ -136,18 +136,17 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { r->tcpdiag_timer = 4; - r->tcpdiag_retrans = tp->probes_out; + r->tcpdiag_retrans = icsk->icsk_probes_out; r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (timer_pending(&sk->sk_timer)) { r->tcpdiag_timer = 2; - r->tcpdiag_retrans = tp->probes_out; + r->tcpdiag_retrans = icsk->icsk_probes_out; r->tcpdiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); } else { r->tcpdiag_timer = 0; r->tcpdiag_expires = 0; } #undef EXPIRES_IN_MS - r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq; r->tcpdiag_wqueue = tp->write_seq - tp->snd_una; r->tcpdiag_uid = sock_i_uid(sk); @@ -163,8 +162,9 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, if (info) tcp_get_info(sk, info); - if (sk->sk_state < TCP_TIME_WAIT && tp->ca_ops->get_info) - tp->ca_ops->get_info(tp, ext, skb); + if (sk->sk_state < TCP_TIME_WAIT && + icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) + icsk->icsk_ca_ops->get_info(sk, ext, skb); nlh->nlmsg_len = skb->tail - b; return skb->len; diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 36c51f8136b..6acc04bde08 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -98,9 +98,10 @@ struct hstcp { u32 ai; }; -static void hstcp_init(struct tcp_sock *tp) +static void hstcp_init(struct sock *sk) { - struct hstcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hstcp *ca = inet_csk_ca(sk); ca->ai = 0; @@ -109,10 +110,11 @@ static void hstcp_init(struct tcp_sock *tp) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt, +static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt, u32 in_flight, int good) { - struct hstcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hstcp *ca = inet_csk_ca(sk); if (in_flight < tp->snd_cwnd) return; @@ -143,9 +145,10 @@ static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt, } } -static u32 hstcp_ssthresh(struct tcp_sock *tp) +static u32 hstcp_ssthresh(struct sock *sk) { - struct hstcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + const struct hstcp *ca = inet_csk_ca(sk); /* Do multiplicative decrease */ return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); @@ -164,7 +167,7 @@ static struct tcp_congestion_ops tcp_highspeed = { static int __init hstcp_register(void) { - BUG_ON(sizeof(struct hstcp) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_highspeed); } diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 40168275acf..e47b37984e9 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -55,18 +55,21 @@ static inline void htcp_reset(struct htcp *ca) ca->snd_cwnd_cnt2 = 0; } -static u32 htcp_cwnd_undo(struct tcp_sock *tp) +static u32 htcp_cwnd_undo(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); ca->ccount = ca->undo_ccount; ca->maxRTT = ca->undo_maxRTT; ca->old_maxB = ca->undo_old_maxB; return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta); } -static inline void measure_rtt(struct tcp_sock *tp) +static inline void measure_rtt(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); u32 srtt = tp->srtt>>3; /* keep track of minimum RTT seen so far, minRTT is zero at first */ @@ -74,7 +77,7 @@ static inline void measure_rtt(struct tcp_sock *tp) ca->minRTT = srtt; /* max RTT */ - if (tp->ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) { + if (icsk->icsk_ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) { if (ca->maxRTT < ca->minRTT) ca->maxRTT = ca->minRTT; if (ca->maxRTT < srtt && srtt <= ca->maxRTT+HZ/50) @@ -82,13 +85,16 @@ static inline void measure_rtt(struct tcp_sock *tp) } } -static void measure_achieved_throughput(struct tcp_sock *tp, u32 pkts_acked) +static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked) { - struct htcp *ca = tcp_ca(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); u32 now = tcp_time_stamp; /* achieved throughput calculations */ - if (tp->ca_state != TCP_CA_Open && tp->ca_state != TCP_CA_Disorder) { + if (icsk->icsk_ca_state != TCP_CA_Open && + icsk->icsk_ca_state != TCP_CA_Disorder) { ca->packetcount = 0; ca->lasttime = now; return; @@ -173,9 +179,9 @@ static inline void htcp_alpha_update(struct htcp *ca) * that point do we really have a real sense of maxRTT (the queues en route * were getting just too full now). */ -static void htcp_param_update(struct tcp_sock *tp) +static void htcp_param_update(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + struct htcp *ca = inet_csk_ca(sk); u32 minRTT = ca->minRTT; u32 maxRTT = ca->maxRTT; @@ -187,17 +193,19 @@ static void htcp_param_update(struct tcp_sock *tp) ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100; } -static u32 htcp_recalc_ssthresh(struct tcp_sock *tp) +static u32 htcp_recalc_ssthresh(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); - htcp_param_update(tp); + const struct tcp_sock *tp = tcp_sk(sk); + const struct htcp *ca = inet_csk_ca(sk); + htcp_param_update(sk); return max((tp->snd_cwnd * ca->beta) >> 7, 2U); } -static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, +static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int data_acked) { - struct htcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); if (in_flight < tp->snd_cwnd) return; @@ -207,7 +215,7 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; } else { - measure_rtt(tp); + measure_rtt(sk); /* keep track of number of round-trip times since last backoff event */ if (ca->snd_cwnd_cnt2++ > tp->snd_cwnd) { @@ -229,28 +237,29 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, } /* Lower bound on congestion window. */ -static u32 htcp_min_cwnd(struct tcp_sock *tp) +static u32 htcp_min_cwnd(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return tp->snd_ssthresh; } -static void htcp_init(struct tcp_sock *tp) +static void htcp_init(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + struct htcp *ca = inet_csk_ca(sk); memset(ca, 0, sizeof(struct htcp)); ca->alpha = ALPHA_BASE; ca->beta = BETA_MIN; } -static void htcp_state(struct tcp_sock *tp, u8 new_state) +static void htcp_state(struct sock *sk, u8 new_state) { switch (new_state) { case TCP_CA_CWR: case TCP_CA_Recovery: case TCP_CA_Loss: - htcp_reset(tcp_ca(tp)); + htcp_reset(inet_csk_ca(sk)); break; } } @@ -269,7 +278,7 @@ static struct tcp_congestion_ops htcp = { static int __init htcp_register(void) { - BUG_ON(sizeof(struct htcp) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); BUILD_BUG_ON(BETA_MIN >= BETA_MAX); if (!use_bandwidth_switch) htcp.pkts_acked = NULL; diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 13a66342c30..77add63623d 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -33,19 +33,20 @@ MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)"); /* This is called to refresh values for hybla parameters */ -static inline void hybla_recalc_param (struct tcp_sock *tp) +static inline void hybla_recalc_param (struct sock *sk) { - struct hybla *ca = tcp_ca(tp); + struct hybla *ca = inet_csk_ca(sk); - ca->rho_3ls = max_t(u32, tp->srtt / msecs_to_jiffies(rtt0), 8); + ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8); ca->rho = ca->rho_3ls >> 3; ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1; ca->rho2 = ca->rho2_7ls >>7; } -static void hybla_init(struct tcp_sock *tp) +static void hybla_init(struct sock *sk) { - struct hybla *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hybla *ca = inet_csk_ca(sk); ca->rho = 0; ca->rho2 = 0; @@ -57,17 +58,16 @@ static void hybla_init(struct tcp_sock *tp) tp->snd_cwnd_clamp = 65535; /* 1st Rho measurement based on initial srtt */ - hybla_recalc_param(tp); + hybla_recalc_param(sk); /* set minimum rtt as this is the 1st ever seen */ ca->minrtt = tp->srtt; tp->snd_cwnd = ca->rho; } -static void hybla_state(struct tcp_sock *tp, u8 ca_state) +static void hybla_state(struct sock *sk, u8 ca_state) { - struct hybla *ca = tcp_ca(tp); - + struct hybla *ca = inet_csk_ca(sk); ca->hybla_en = (ca_state == TCP_CA_Open); } @@ -86,27 +86,28 @@ static inline u32 hybla_fraction(u32 odds) * o Give cwnd a new value based on the model proposed * o remember increments <1 */ -static void hybla_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, +static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag) { - struct hybla *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hybla *ca = inet_csk_ca(sk); u32 increment, odd, rho_fractions; int is_slowstart = 0; /* Recalculate rho only if this srtt is the lowest */ if (tp->srtt < ca->minrtt){ - hybla_recalc_param(tp); + hybla_recalc_param(sk); ca->minrtt = tp->srtt; } if (!ca->hybla_en) - return tcp_reno_cong_avoid(tp, ack, rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); if (in_flight < tp->snd_cwnd) return; if (ca->rho == 0) - hybla_recalc_param(tp); + hybla_recalc_param(sk); rho_fractions = ca->rho_3ls - (ca->rho << 3); @@ -170,7 +171,7 @@ static struct tcp_congestion_ops tcp_hybla = { static int __init hybla_register(void) { - BUG_ON(sizeof(struct hybla) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_hybla); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 71d456148de..fdd9547fb78 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -325,11 +325,12 @@ static void tcp_init_buffer_space(struct sock *sk) /* 5. Recalculate window clamp after socket hit its memory bounds. */ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) { + struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *skb; unsigned int app_win = tp->rcv_nxt - tp->copied_seq; int ofo_win = 0; - inet_csk(sk)->icsk_ack.quick = 0; + icsk->icsk_ack.quick = 0; skb_queue_walk(&tp->out_of_order_queue, skb) { ofo_win += skb->len; @@ -350,8 +351,8 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) app_win += ofo_win; if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf) app_win >>= 1; - if (app_win > inet_csk(sk)->icsk_ack.rcv_mss) - app_win -= inet_csk(sk)->icsk_ack.rcv_mss; + if (app_win > icsk->icsk_ack.rcv_mss) + app_win -= icsk->icsk_ack.rcv_mss; app_win = max(app_win, 2U*tp->advmss); if (!ofo_win) @@ -549,8 +550,10 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_ * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics */ -static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt) +static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt) { + struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); long m = mrtt; /* RTT */ /* The following amusing code comes from Jacobson's @@ -610,8 +613,8 @@ static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt) tp->rtt_seq = tp->snd_nxt; } - if (tp->ca_ops->rtt_sample) - tp->ca_ops->rtt_sample(tp, *usrtt); + if (icsk->icsk_ca_ops->rtt_sample) + icsk->icsk_ca_ops->rtt_sample(sk, *usrtt); } /* Calculate rto without backoff. This is the second half of Van Jacobson's @@ -663,9 +666,10 @@ void tcp_update_metrics(struct sock *sk) dst_confirm(dst); if (dst && (dst->flags&DST_HOST)) { + const struct inet_connection_sock *icsk = inet_csk(sk); int m; - if (inet_csk(sk)->icsk_backoff || !tp->srtt) { + if (icsk->icsk_backoff || !tp->srtt) { /* This session failed to estimate rtt. Why? * Probably, no packets returned in time. * Reset our results. @@ -714,7 +718,7 @@ void tcp_update_metrics(struct sock *sk) tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) dst->metrics[RTAX_CWND-1] = tp->snd_cwnd; } else if (tp->snd_cwnd > tp->snd_ssthresh && - tp->ca_state == TCP_CA_Open) { + icsk->icsk_ca_state == TCP_CA_Open) { /* Cong. avoidance phase, cwnd is reliable. */ if (!dst_metric_locked(dst, RTAX_SSTHRESH)) dst->metrics[RTAX_SSTHRESH-1] = @@ -828,8 +832,10 @@ reset: } } -static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts) +static void tcp_update_reordering(struct sock *sk, const int metric, + const int ts) { + struct tcp_sock *tp = tcp_sk(sk); if (metric > tp->reordering) { tp->reordering = min(TCP_MAX_REORDERING, metric); @@ -844,7 +850,7 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts) NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER); #if FASTRETRANS_DEBUG > 1 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", - tp->rx_opt.sack_ok, tp->ca_state, + tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, tp->reordering, tp->fackets_out, tp->sacked_out, @@ -906,6 +912,7 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts) static int tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); @@ -1071,7 +1078,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ * we have to account for reordering! Ugly, * but should help. */ - if (lost_retrans && tp->ca_state == TCP_CA_Recovery) { + if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) { struct sk_buff *skb; sk_stream_for_retrans_queue(skb, sk) { @@ -1100,8 +1107,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ tp->left_out = tp->sacked_out + tp->lost_out; - if ((reord < tp->fackets_out) && tp->ca_state != TCP_CA_Loss) - tcp_update_reordering(tp, ((tp->fackets_out + 1) - reord), 0); + if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss) + tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0); #if FASTRETRANS_DEBUG > 0 BUG_TRAP((int)tp->sacked_out >= 0); @@ -1118,17 +1125,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ */ void tcp_enter_frto(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; tp->frto_counter = 1; - if (tp->ca_state <= TCP_CA_Disorder || + if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || - (tp->ca_state == TCP_CA_Loss && !inet_csk(sk)->icsk_retransmits)) { - tp->prior_ssthresh = tcp_current_ssthresh(tp); - tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); - tcp_ca_event(tp, CA_EVENT_FRTO); + (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { + tp->prior_ssthresh = tcp_current_ssthresh(sk); + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); + tcp_ca_event(sk, CA_EVENT_FRTO); } /* Have to clear retransmission markers here to keep the bookkeeping @@ -1145,7 +1153,7 @@ void tcp_enter_frto(struct sock *sk) } tcp_sync_left_out(tp); - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); tp->frto_highmark = tp->snd_nxt; } @@ -1191,7 +1199,7 @@ static void tcp_enter_frto_loss(struct sock *sk) tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); - tcp_set_ca_state(tp, TCP_CA_Loss); + tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->frto_highmark; TCP_ECN_queue_cwr(tp); } @@ -1215,16 +1223,17 @@ void tcp_clear_retrans(struct tcp_sock *tp) */ void tcp_enter_loss(struct sock *sk, int how) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int cnt = 0; /* Reduce ssthresh if it has not yet been made inside this window. */ - if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || - (tp->ca_state == TCP_CA_Loss && !inet_csk(sk)->icsk_retransmits)) { - tp->prior_ssthresh = tcp_current_ssthresh(tp); - tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); - tcp_ca_event(tp, CA_EVENT_LOSS); + if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || + (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { + tp->prior_ssthresh = tcp_current_ssthresh(sk); + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); + tcp_ca_event(sk, CA_EVENT_LOSS); } tp->snd_cwnd = 1; tp->snd_cwnd_cnt = 0; @@ -1255,7 +1264,7 @@ void tcp_enter_loss(struct sock *sk, int how) tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); - tcp_set_ca_state(tp, TCP_CA_Loss); + tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); } @@ -1272,13 +1281,14 @@ static int tcp_check_sack_reneging(struct sock *sk) */ if ((skb = skb_peek(&sk->sk_write_queue)) != NULL && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { + struct inet_connection_sock *icsk = inet_csk(sk); NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); tcp_enter_loss(sk, 1); - inet_csk(sk)->icsk_retransmits++; + icsk->icsk_retransmits++; tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, TCP_RTO_MAX); + icsk->icsk_rto, TCP_RTO_MAX); return 1; } return 0; @@ -1431,8 +1441,9 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp) * in assumption of absent reordering, interpret this as reordering. * The only another reason could be bug in receiver TCP. */ -static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend) +static void tcp_check_reno_reordering(struct sock *sk, const int addend) { + struct tcp_sock *tp = tcp_sk(sk); u32 holes; holes = max(tp->lost_out, 1U); @@ -1440,16 +1451,17 @@ static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend) if ((tp->sacked_out + holes) > tp->packets_out) { tp->sacked_out = tp->packets_out - holes; - tcp_update_reordering(tp, tp->packets_out+addend, 0); + tcp_update_reordering(sk, tp->packets_out + addend, 0); } } /* Emulate SACKs for SACKless connection: account for a new dupack. */ -static void tcp_add_reno_sack(struct tcp_sock *tp) +static void tcp_add_reno_sack(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); tp->sacked_out++; - tcp_check_reno_reordering(tp, 0); + tcp_check_reno_reordering(sk, 0); tcp_sync_left_out(tp); } @@ -1464,7 +1476,7 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acke else tp->sacked_out -= acked-1; } - tcp_check_reno_reordering(tp, acked); + tcp_check_reno_reordering(sk, acked); tcp_sync_left_out(tp); } @@ -1538,14 +1550,16 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp) } /* Decrease cwnd each second ack. */ -static void tcp_cwnd_down(struct tcp_sock *tp) +static void tcp_cwnd_down(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); int decr = tp->snd_cwnd_cnt + 1; tp->snd_cwnd_cnt = decr&1; decr >>= 1; - if (decr && tp->snd_cwnd > tp->ca_ops->min_cwnd(tp)) + if (decr && tp->snd_cwnd > icsk->icsk_ca_ops->min_cwnd(sk)) tp->snd_cwnd -= decr; tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1); @@ -1579,11 +1593,15 @@ static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg) #define DBGUNDO(x...) do { } while (0) #endif -static void tcp_undo_cwr(struct tcp_sock *tp, int undo) +static void tcp_undo_cwr(struct sock *sk, const int undo) { + struct tcp_sock *tp = tcp_sk(sk); + if (tp->prior_ssthresh) { - if (tp->ca_ops->undo_cwnd) - tp->snd_cwnd = tp->ca_ops->undo_cwnd(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->undo_cwnd) + tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk); else tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1); @@ -1611,9 +1629,9 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp) /* Happy end! We did not retransmit anything * or our original transmission succeeded. */ - DBGUNDO(sk, tp, tp->ca_state == TCP_CA_Loss ? "loss" : "retrans"); - tcp_undo_cwr(tp, 1); - if (tp->ca_state == TCP_CA_Loss) + DBGUNDO(sk, tp, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); + tcp_undo_cwr(sk, 1); + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); else NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO); @@ -1626,7 +1644,7 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp) tcp_moderate_cwnd(tp); return 1; } - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); return 0; } @@ -1635,7 +1653,7 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp) { if (tp->undo_marker && !tp->undo_retrans) { DBGUNDO(sk, tp, "D-SACK"); - tcp_undo_cwr(tp, 1); + tcp_undo_cwr(sk, 1); tp->undo_marker = 0; NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO); } @@ -1656,10 +1674,10 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp, if (tp->retrans_out == 0) tp->retrans_stamp = 0; - tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1); + tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); DBGUNDO(sk, tp, "Hoe"); - tcp_undo_cwr(tp, 0); + tcp_undo_cwr(sk, 0); NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO); /* So... Do not make Hoe's retransmit yet. @@ -1682,22 +1700,23 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) DBGUNDO(sk, tp, "partial loss"); tp->lost_out = 0; tp->left_out = tp->sacked_out; - tcp_undo_cwr(tp, 1); + tcp_undo_cwr(sk, 1); NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); inet_csk(sk)->icsk_retransmits = 0; tp->undo_marker = 0; if (!IsReno(tp)) - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); return 1; } return 0; } -static inline void tcp_complete_cwr(struct tcp_sock *tp) +static inline void tcp_complete_cwr(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); tp->snd_cwnd_stamp = tcp_time_stamp; - tcp_ca_event(tp, CA_EVENT_COMPLETE_CWR); + tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) @@ -1708,21 +1727,21 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) tp->retrans_stamp = 0; if (flag&FLAG_ECE) - tcp_enter_cwr(tp); + tcp_enter_cwr(sk); - if (tp->ca_state != TCP_CA_CWR) { + if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { int state = TCP_CA_Open; if (tp->left_out || tp->retrans_out || tp->undo_marker) state = TCP_CA_Disorder; - if (tp->ca_state != state) { - tcp_set_ca_state(tp, state); + if (inet_csk(sk)->icsk_ca_state != state) { + tcp_set_ca_state(sk, state); tp->high_seq = tp->snd_nxt; } tcp_moderate_cwnd(tp); } else { - tcp_cwnd_down(tp); + tcp_cwnd_down(sk); } } @@ -1741,6 +1760,7 @@ static void tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, int prior_packets, int flag) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP)); @@ -1764,7 +1784,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* C. Process data loss notification, provided it is valid. */ if ((flag&FLAG_DATA_LOST) && before(tp->snd_una, tp->high_seq) && - tp->ca_state != TCP_CA_Open && + icsk->icsk_ca_state != TCP_CA_Open && tp->fackets_out > tp->reordering) { tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq); NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); @@ -1775,14 +1795,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* E. Check state exit conditions. State can be terminated * when high_seq is ACKed. */ - if (tp->ca_state == TCP_CA_Open) { + if (icsk->icsk_ca_state == TCP_CA_Open) { if (!sysctl_tcp_frto) BUG_TRAP(tp->retrans_out == 0); tp->retrans_stamp = 0; } else if (!before(tp->snd_una, tp->high_seq)) { - switch (tp->ca_state) { + switch (icsk->icsk_ca_state) { case TCP_CA_Loss: - inet_csk(sk)->icsk_retransmits = 0; + icsk->icsk_retransmits = 0; if (tcp_try_undo_recovery(sk, tp)) return; break; @@ -1791,8 +1811,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* CWR is to be held something *above* high_seq * is ACKed for CWR bit to reach receiver. */ if (tp->snd_una != tp->high_seq) { - tcp_complete_cwr(tp); - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_complete_cwr(sk); + tcp_set_ca_state(sk, TCP_CA_Open); } break; @@ -1803,7 +1823,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, * catching for all duplicate ACKs. */ IsReno(tp) || tp->snd_una != tp->high_seq) { tp->undo_marker = 0; - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); } break; @@ -1812,17 +1832,17 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tcp_reset_reno_sack(tp); if (tcp_try_undo_recovery(sk, tp)) return; - tcp_complete_cwr(tp); + tcp_complete_cwr(sk); break; } } /* F. Process state. */ - switch (tp->ca_state) { + switch (icsk->icsk_ca_state) { case TCP_CA_Recovery: if (prior_snd_una == tp->snd_una) { if (IsReno(tp) && is_dupack) - tcp_add_reno_sack(tp); + tcp_add_reno_sack(sk); } else { int acked = prior_packets - tp->packets_out; if (IsReno(tp)) @@ -1832,13 +1852,13 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, break; case TCP_CA_Loss: if (flag&FLAG_DATA_ACKED) - inet_csk(sk)->icsk_retransmits = 0; + icsk->icsk_retransmits = 0; if (!tcp_try_undo_loss(sk, tp)) { tcp_moderate_cwnd(tp); tcp_xmit_retransmit_queue(sk); return; } - if (tp->ca_state != TCP_CA_Open) + if (icsk->icsk_ca_state != TCP_CA_Open) return; /* Loss is undone; fall through to processing in Open state. */ default: @@ -1846,10 +1866,10 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, if (tp->snd_una != prior_snd_una) tcp_reset_reno_sack(tp); if (is_dupack) - tcp_add_reno_sack(tp); + tcp_add_reno_sack(sk); } - if (tp->ca_state == TCP_CA_Disorder) + if (icsk->icsk_ca_state == TCP_CA_Disorder) tcp_try_undo_dsack(sk, tp); if (!tcp_time_to_recover(sk, tp)) { @@ -1869,20 +1889,20 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tp->undo_marker = tp->snd_una; tp->undo_retrans = tp->retrans_out; - if (tp->ca_state < TCP_CA_CWR) { + if (icsk->icsk_ca_state < TCP_CA_CWR) { if (!(flag&FLAG_ECE)) - tp->prior_ssthresh = tcp_current_ssthresh(tp); - tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); + tp->prior_ssthresh = tcp_current_ssthresh(sk); + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); TCP_ECN_queue_cwr(tp); } tp->snd_cwnd_cnt = 0; - tcp_set_ca_state(tp, TCP_CA_Recovery); + tcp_set_ca_state(sk, TCP_CA_Recovery); } if (is_dupack || tcp_head_timedout(sk, tp)) tcp_update_scoreboard(sk, tp); - tcp_cwnd_down(tp); + tcp_cwnd_down(sk); tcp_xmit_retransmit_queue(sk); } @@ -1908,7 +1928,7 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag) */ struct tcp_sock *tp = tcp_sk(sk); const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; - tcp_rtt_estimator(tp, seq_rtt, usrtt); + tcp_rtt_estimator(sk, seq_rtt, usrtt); tcp_set_rto(sk); inet_csk(sk)->icsk_backoff = 0; tcp_bound_rto(sk); @@ -1928,7 +1948,7 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag if (flag & FLAG_RETRANS_DATA_ACKED) return; - tcp_rtt_estimator(tcp_sk(sk), seq_rtt, usrtt); + tcp_rtt_estimator(sk, seq_rtt, usrtt); tcp_set_rto(sk); inet_csk(sk)->icsk_backoff = 0; tcp_bound_rto(sk); @@ -1945,11 +1965,12 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag); } -static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, +static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int good) { - tp->ca_ops->cong_avoid(tp, ack, rtt, in_flight, good); - tp->snd_cwnd_stamp = tcp_time_stamp; + const struct inet_connection_sock *icsk = inet_csk(sk); + icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good); + tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; } /* Restart timer after forward progress on connection. @@ -2098,11 +2119,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt } if (acked&FLAG_ACKED) { + const struct inet_connection_sock *icsk = inet_csk(sk); tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt); tcp_ack_packets_out(sk, tp); - if (tp->ca_ops->pkts_acked) - tp->ca_ops->pkts_acked(tp, pkts_acked); + if (icsk->icsk_ca_ops->pkts_acked) + icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked); } #if FASTRETRANS_DEBUG > 0 @@ -2110,19 +2132,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt BUG_TRAP((int)tp->lost_out >= 0); BUG_TRAP((int)tp->retrans_out >= 0); if (!tp->packets_out && tp->rx_opt.sack_ok) { + const struct inet_connection_sock *icsk = inet_csk(sk); if (tp->lost_out) { printk(KERN_DEBUG "Leak l=%u %d\n", - tp->lost_out, tp->ca_state); + tp->lost_out, icsk->icsk_ca_state); tp->lost_out = 0; } if (tp->sacked_out) { printk(KERN_DEBUG "Leak s=%u %d\n", - tp->sacked_out, tp->ca_state); + tp->sacked_out, icsk->icsk_ca_state); tp->sacked_out = 0; } if (tp->retrans_out) { printk(KERN_DEBUG "Leak r=%u %d\n", - tp->retrans_out, tp->ca_state); + tp->retrans_out, icsk->icsk_ca_state); tp->retrans_out = 0; } } @@ -2152,16 +2175,17 @@ static void tcp_ack_probe(struct sock *sk) } } -static inline int tcp_ack_is_dubious(struct tcp_sock *tp, int flag) +static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag) { return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || - tp->ca_state != TCP_CA_Open); + inet_csk(sk)->icsk_ca_state != TCP_CA_Open); } -static inline int tcp_may_raise_cwnd(struct tcp_sock *tp, int flag) +static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag) { + const struct tcp_sock *tp = tcp_sk(sk); return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && - !((1<ca_state)&(TCPF_CA_Recovery|TCPF_CA_CWR)); + !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR)); } /* Check that window update is acceptable. @@ -2251,6 +2275,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una) /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); u32 prior_snd_una = tp->snd_una; u32 ack_seq = TCP_SKB_CB(skb)->seq; @@ -2278,7 +2303,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) tp->snd_una = ack; flag |= FLAG_WIN_UPDATE; - tcp_ca_event(tp, CA_EVENT_FAST_ACK); + tcp_ca_event(sk, CA_EVENT_FAST_ACK); NET_INC_STATS_BH(LINUX_MIB_TCPHPACKS); } else { @@ -2295,7 +2320,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th)) flag |= FLAG_ECE; - tcp_ca_event(tp, CA_EVENT_SLOW_ACK); + tcp_ca_event(sk, CA_EVENT_SLOW_ACK); } /* We passed data and got it acked, remove any soft error @@ -2311,19 +2336,19 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, &seq_rtt, - tp->ca_ops->rtt_sample ? &seq_usrtt : NULL); + icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL); if (tp->frto_counter) tcp_process_frto(sk, prior_snd_una); - if (tcp_ack_is_dubious(tp, flag)) { + if (tcp_ack_is_dubious(sk, flag)) { /* Advanve CWND, if state allows this. */ - if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(tp, flag)) - tcp_cong_avoid(tp, ack, seq_rtt, prior_in_flight, 0); + if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) + tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); } else { if ((flag & FLAG_DATA_ACKED)) - tcp_cong_avoid(tp, ack, seq_rtt, prior_in_flight, 1); + tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1); } if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP)) @@ -2332,7 +2357,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) return 1; no_queue: - tp->probes_out = 0; + icsk->icsk_probes_out = 0; /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than @@ -3301,12 +3326,12 @@ void tcp_cwnd_application_limited(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - if (tp->ca_state == TCP_CA_Open && + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { /* Limited by application or receiver window. */ u32 win_used = max(tp->snd_cwnd_used, 2U); if (win_used < tp->snd_cwnd) { - tp->snd_ssthresh = tcp_current_ssthresh(tp); + tp->snd_ssthresh = tcp_current_ssthresh(sk); tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; } tp->snd_cwnd_used = 0; @@ -3935,7 +3960,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_init_metrics(sk); - tcp_init_congestion_control(tp); + tcp_init_congestion_control(sk); /* Prevent spurious tcp_cwnd_restart() on first data * packet. @@ -4212,7 +4237,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tcp_init_metrics(sk); - tcp_init_congestion_control(tp); + tcp_init_congestion_control(sk); /* Prevent spurious tcp_cwnd_restart() on * first data packet. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 32a0ebc589d..97bbf595230 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1409,13 +1409,14 @@ struct tcp_func ipv4_specific = { */ static int tcp_v4_init_sock(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); skb_queue_head_init(&tp->out_of_order_queue); tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); - inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; + icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; /* So many TCP implementations out there (incorrectly) count the @@ -1433,7 +1434,7 @@ static int tcp_v4_init_sock(struct sock *sk) tp->mss_cache = 536; tp->reordering = sysctl_tcp_reordering; - tp->ca_ops = &tcp_init_congestion_ops; + icsk->icsk_ca_ops = &tcp_init_congestion_ops; sk->sk_state = TCP_CLOSE; @@ -1456,7 +1457,7 @@ int tcp_v4_destroy_sock(struct sock *sk) tcp_clear_xmit_timers(sk); - tcp_cleanup_congestion_control(tp); + tcp_cleanup_congestion_control(sk); /* Cleanup up the write buffer. */ sk_stream_writequeue_purge(sk); @@ -1883,7 +1884,7 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, sock_i_uid(sp), - tp->probes_out, + icsk->icsk_probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, icsk->icsk_rto, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index dc085233d51..a88db28b0af 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -384,9 +384,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->frto_counter = 0; newtp->frto_highmark = 0; - newtp->ca_ops = &tcp_reno; + newicsk->icsk_ca_ops = &tcp_reno; - tcp_set_ca_state(newtp, TCP_CA_Open); + tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); skb_queue_head_init(&newtp->out_of_order_queue); newtp->rcv_wup = treq->rcv_isn + 1; @@ -399,7 +399,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.dsack = 0; newtp->rx_opt.eff_sacks = 0; - newtp->probes_out = 0; newtp->rx_opt.num_sacks = 0; newtp->urg_data = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f458eacb5ef..267b0fcbfc9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -112,9 +112,9 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) u32 restart_cwnd = tcp_init_cwnd(tp, dst); u32 cwnd = tp->snd_cwnd; - tcp_ca_event(tp, CA_EVENT_CWND_RESTART); + tcp_ca_event(sk, CA_EVENT_CWND_RESTART); - tp->snd_ssthresh = tcp_current_ssthresh(tp); + tp->snd_ssthresh = tcp_current_ssthresh(sk); restart_cwnd = min(restart_cwnd, cwnd); while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) @@ -265,6 +265,7 @@ static __inline__ u16 tcp_select_window(struct sock *sk) static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) { if (skb != NULL) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); @@ -280,7 +281,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) #define SYSCTL_FLAG_SACK 0x4 /* If congestion control is doing timestamping */ - if (tp->ca_ops->rtt_sample) + if (icsk->icsk_ca_ops->rtt_sample) do_gettimeofday(&skb->stamp); sysctl_flags = 0; @@ -308,7 +309,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) } if (tcp_packets_in_flight(tp) == 0) - tcp_ca_event(tp, CA_EVENT_TX_START); + tcp_ca_event(sk, CA_EVENT_TX_START); th = (struct tcphdr *) skb_push(skb, tcp_header_size); skb->h.th = th; @@ -366,7 +367,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) if (err <= 0) return err; - tcp_enter_cwr(tp); + tcp_enter_cwr(sk); /* NET_XMIT_CN is special. It does not guarantee, * that this packet is lost. It tells that device @@ -905,12 +906,13 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, */ static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) { + const struct inet_connection_sock *icsk = inet_csk(sk); u32 send_win, cong_win, limit, in_flight; if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) return 0; - if (tp->ca_state != TCP_CA_Open) + if (icsk->icsk_ca_state != TCP_CA_Open) return 0; in_flight = tcp_packets_in_flight(tp); @@ -1287,6 +1289,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m */ void tcp_simple_retransmit(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; unsigned int mss = tcp_current_mss(sk, 0); @@ -1317,12 +1320,12 @@ void tcp_simple_retransmit(struct sock *sk) * in network, but units changed and effective * cwnd/ssthresh really reduced now. */ - if (tp->ca_state != TCP_CA_Loss) { + if (icsk->icsk_ca_state != TCP_CA_Loss) { tp->high_seq = tp->snd_nxt; - tp->snd_ssthresh = tcp_current_ssthresh(tp); + tp->snd_ssthresh = tcp_current_ssthresh(sk); tp->prior_ssthresh = 0; tp->undo_marker = 0; - tcp_set_ca_state(tp, TCP_CA_Loss); + tcp_set_ca_state(sk, TCP_CA_Loss); } tcp_xmit_retransmit_queue(sk); } @@ -1462,6 +1465,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) */ void tcp_xmit_retransmit_queue(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int packet_cnt = tp->lost_out; @@ -1485,7 +1489,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { if (tcp_retransmit_skb(sk, skb)) return; - if (tp->ca_state != TCP_CA_Loss) + if (icsk->icsk_ca_state != TCP_CA_Loss) NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); else NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); @@ -1507,7 +1511,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) /* OK, demanded retransmission is finished. */ /* Forward retransmissions are possible only during Recovery. */ - if (tp->ca_state != TCP_CA_Recovery) + if (icsk->icsk_ca_state != TCP_CA_Recovery) return; /* No forward retransmissions in Reno are possible. */ @@ -2028,7 +2032,7 @@ void tcp_send_probe0(struct sock *sk) if (tp->packets_out || !sk->sk_send_head) { /* Cancel probe timer, if it is not required. */ - tp->probes_out = 0; + icsk->icsk_probes_out = 0; icsk->icsk_backoff = 0; return; } @@ -2036,19 +2040,19 @@ void tcp_send_probe0(struct sock *sk) if (err <= 0) { if (icsk->icsk_backoff < sysctl_tcp_retries2) icsk->icsk_backoff++; - tp->probes_out++; + icsk->icsk_probes_out++; inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), TCP_RTO_MAX); } else { /* If packet was not sent due to local congestion, - * do not backoff and do not remember probes_out. + * do not backoff and do not remember icsk_probes_out. * Let local senders to fight for local resources. * * Use accumulated backoff yet. */ - if (!tp->probes_out) - tp->probes_out=1; + if (!icsk->icsk_probes_out) + icsk->icsk_probes_out = 1; inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RESOURCE_PROBE_INTERVAL), diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 70e108e15c7..327770bf552 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -16,9 +16,10 @@ #define TCP_SCALABLE_AI_CNT 50U #define TCP_SCALABLE_MD_SCALE 3 -static void tcp_scalable_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, +static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag) { + struct tcp_sock *tp = tcp_sk(sk); if (in_flight < tp->snd_cwnd) return; @@ -35,8 +36,9 @@ static void tcp_scalable_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, tp->snd_cwnd_stamp = tcp_time_stamp; } -static u32 tcp_scalable_ssthresh(struct tcp_sock *tp) +static u32 tcp_scalable_ssthresh(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 72cec698183..415ee47ac1c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -233,11 +233,12 @@ out_unlock: static void tcp_probe_timer(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int max_probes; if (tp->packets_out || !sk->sk_send_head) { - tp->probes_out = 0; + icsk->icsk_probes_out = 0; return; } @@ -248,7 +249,7 @@ static void tcp_probe_timer(struct sock *sk) * FIXME: We ought not to do it, Solaris 2.5 actually has fixing * this behaviour in Solaris down as a bug fix. [AC] * - * Let me to explain. probes_out is zeroed by incoming ACKs + * Let me to explain. icsk_probes_out is zeroed by incoming ACKs * even if they advertise zero window. Hence, connection is killed only * if we received no ACKs for normal connection timeout. It is not killed * only because window stays zero for some time, window may be zero @@ -259,16 +260,15 @@ static void tcp_probe_timer(struct sock *sk) max_probes = sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { - const struct inet_connection_sock *icsk = inet_csk(sk); const int alive = ((icsk->icsk_rto << icsk->icsk_backoff) < TCP_RTO_MAX); max_probes = tcp_orphan_retries(sk, alive); - if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes)) + if (tcp_out_of_resources(sk, alive || icsk->icsk_probes_out <= max_probes)) return; } - if (tp->probes_out > max_probes) { + if (icsk->icsk_probes_out > max_probes) { tcp_write_err(sk); } else { /* Only send another probe if we didn't close things up. */ @@ -319,19 +319,20 @@ static void tcp_retransmit_timer(struct sock *sk) goto out; if (icsk->icsk_retransmits == 0) { - if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) { + if (icsk->icsk_ca_state == TCP_CA_Disorder || + icsk->icsk_ca_state == TCP_CA_Recovery) { if (tp->rx_opt.sack_ok) { - if (tp->ca_state == TCP_CA_Recovery) + if (icsk->icsk_ca_state == TCP_CA_Recovery) NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL); else NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES); } else { - if (tp->ca_state == TCP_CA_Recovery) + if (icsk->icsk_ca_state == TCP_CA_Recovery) NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL); else NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES); } - } else if (tp->ca_state == TCP_CA_Loss) { + } else if (icsk->icsk_ca_state == TCP_CA_Loss) { NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES); } else { NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS); @@ -449,6 +450,7 @@ void tcp_set_keepalive(struct sock *sk, int val) static void tcp_keepalive_timer (unsigned long data) { struct sock *sk = (struct sock *) data; + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); __u32 elapsed; @@ -490,14 +492,14 @@ static void tcp_keepalive_timer (unsigned long data) elapsed = tcp_time_stamp - tp->rcv_tstamp; if (elapsed >= keepalive_time_when(tp)) { - if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) || - (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) { + if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) || + (tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) { tcp_send_active_reset(sk, GFP_ATOMIC); tcp_write_err(sk); goto out; } if (tcp_write_wakeup(sk) <= 0) { - tp->probes_out++; + icsk->icsk_probes_out++; elapsed = keepalive_intvl_when(tp); } else { /* If keepalive was lost due to local congestion, diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 9bd443db519..054de24efee 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -82,9 +82,10 @@ struct vegas { * Instead we must wait until the completion of an RTT during * which we actually receive ACKs. */ -static inline void vegas_enable(struct tcp_sock *tp) +static inline void vegas_enable(struct sock *sk) { - struct vegas *vegas = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct vegas *vegas = inet_csk_ca(sk); /* Begin taking Vegas samples next time we send something. */ vegas->doing_vegas_now = 1; @@ -97,19 +98,19 @@ static inline void vegas_enable(struct tcp_sock *tp) } /* Stop taking Vegas samples for now. */ -static inline void vegas_disable(struct tcp_sock *tp) +static inline void vegas_disable(struct sock *sk) { - struct vegas *vegas = tcp_ca(tp); + struct vegas *vegas = inet_csk_ca(sk); vegas->doing_vegas_now = 0; } -static void tcp_vegas_init(struct tcp_sock *tp) +static void tcp_vegas_init(struct sock *sk) { - struct vegas *vegas = tcp_ca(tp); + struct vegas *vegas = inet_csk_ca(sk); vegas->baseRTT = 0x7fffffff; - vegas_enable(tp); + vegas_enable(sk); } /* Do RTT sampling needed for Vegas. @@ -120,9 +121,9 @@ static void tcp_vegas_init(struct tcp_sock *tp) * o min-filter RTT samples from a much longer window (forever for now) * to find the propagation delay (baseRTT) */ -static void tcp_vegas_rtt_calc(struct tcp_sock *tp, u32 usrtt) +static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt) { - struct vegas *vegas = tcp_ca(tp); + struct vegas *vegas = inet_csk_ca(sk); u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */ /* Filter to find propagation delay: */ @@ -136,13 +137,13 @@ static void tcp_vegas_rtt_calc(struct tcp_sock *tp, u32 usrtt) vegas->cntRTT++; } -static void tcp_vegas_state(struct tcp_sock *tp, u8 ca_state) +static void tcp_vegas_state(struct sock *sk, u8 ca_state) { if (ca_state == TCP_CA_Open) - vegas_enable(tp); + vegas_enable(sk); else - vegas_disable(tp); + vegas_disable(sk); } /* @@ -154,20 +155,21 @@ static void tcp_vegas_state(struct tcp_sock *tp, u8 ca_state) * packets, _then_ we can make Vegas calculations * again. */ -static void tcp_vegas_cwnd_event(struct tcp_sock *tp, enum tcp_ca_event event) +static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START) - tcp_vegas_init(tp); + tcp_vegas_init(sk); } -static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack, +static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 seq_rtt, u32 in_flight, int flag) { - struct vegas *vegas = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct vegas *vegas = inet_csk_ca(sk); if (!vegas->doing_vegas_now) - return tcp_reno_cong_avoid(tp, ack, seq_rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); /* The key players are v_beg_snd_una and v_beg_snd_nxt. * @@ -219,7 +221,7 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack, * but that's not too awful, since we're taking the min, * rather than averaging. */ - tcp_vegas_rtt_calc(tp, seq_rtt*1000); + tcp_vegas_rtt_calc(sk, seq_rtt * 1000); /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got @@ -359,10 +361,10 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack, } /* Extract info for Tcp socket info provided via netlink. */ -static void tcp_vegas_get_info(struct tcp_sock *tp, u32 ext, +static void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) { - const struct vegas *ca = tcp_ca(tp); + const struct vegas *ca = inet_csk_ca(sk); if (ext & (1<<(TCPDIAG_VEGASINFO-1))) { struct tcpvegas_info *info; @@ -393,7 +395,7 @@ static struct tcp_congestion_ops tcp_vegas = { static int __init tcp_vegas_register(void) { - BUG_ON(sizeof(struct vegas) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(&tcp_vegas); return 0; } diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index ef827242c94..d8a5a2b92e3 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -40,9 +40,9 @@ struct westwood { * way as soon as possible. It will reasonably happen within the first * RTT period of the connection lifetime. */ -static void tcp_westwood_init(struct tcp_sock *tp) +static void tcp_westwood_init(struct sock *sk) { - struct westwood *w = tcp_ca(tp); + struct westwood *w = inet_csk_ca(sk); w->bk = 0; w->bw_ns_est = 0; @@ -51,7 +51,7 @@ static void tcp_westwood_init(struct tcp_sock *tp) w->cumul_ack = 0; w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT; w->rtt_win_sx = tcp_time_stamp; - w->snd_una = tp->snd_una; + w->snd_una = tcp_sk(sk)->snd_una; } /* @@ -74,11 +74,11 @@ static inline void westwood_filter(struct westwood *w, u32 delta) * Called after processing group of packets. * but all westwood needs is the last sample of srtt. */ -static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt) +static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt) { - struct westwood *w = tcp_ca(tp); + struct westwood *w = inet_csk_ca(sk); if (cnt > 0) - w->rtt = tp->srtt >> 3; + w->rtt = tcp_sk(sk)->srtt >> 3; } /* @@ -86,9 +86,9 @@ static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt) * It updates RTT evaluation window if it is the right moment to do * it. If so it calls filter for evaluating bandwidth. */ -static void westwood_update_window(struct tcp_sock *tp) +static void westwood_update_window(struct sock *sk) { - struct westwood *w = tcp_ca(tp); + struct westwood *w = inet_csk_ca(sk); s32 delta = tcp_time_stamp - w->rtt_win_sx; /* @@ -114,11 +114,12 @@ static void westwood_update_window(struct tcp_sock *tp) * header prediction is successful. In such case in fact update is * straight forward and doesn't need any particular care. */ -static inline void westwood_fast_bw(struct tcp_sock *tp) +static inline void westwood_fast_bw(struct sock *sk) { - struct westwood *w = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct westwood *w = inet_csk_ca(sk); - westwood_update_window(tp); + westwood_update_window(sk); w->bk += tp->snd_una - w->snd_una; w->snd_una = tp->snd_una; @@ -130,9 +131,10 @@ static inline void westwood_fast_bw(struct tcp_sock *tp) * This function evaluates cumul_ack for evaluating bk in case of * delayed or partial acks. */ -static inline u32 westwood_acked_count(struct tcp_sock *tp) +static inline u32 westwood_acked_count(struct sock *sk) { - struct westwood *w = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct westwood *w = inet_csk_ca(sk); w->cumul_ack = tp->snd_una - w->snd_una; @@ -160,9 +162,10 @@ static inline u32 westwood_acked_count(struct tcp_sock *tp) return w->cumul_ack; } -static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp) +static inline u32 westwood_bw_rttmin(const struct sock *sk) { - struct westwood *w = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + const struct westwood *w = inet_csk_ca(sk); return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2); } @@ -172,31 +175,32 @@ static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp) * in packets we use mss_cache). Rttmin is guaranteed to be >= 2 * so avoids ever returning 0. */ -static u32 tcp_westwood_cwnd_min(struct tcp_sock *tp) +static u32 tcp_westwood_cwnd_min(struct sock *sk) { - return westwood_bw_rttmin(tp); + return westwood_bw_rttmin(sk); } -static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event) +static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) { - struct westwood *w = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct westwood *w = inet_csk_ca(sk); switch(event) { case CA_EVENT_FAST_ACK: - westwood_fast_bw(tp); + westwood_fast_bw(sk); break; case CA_EVENT_COMPLETE_CWR: - tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(tp); + tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(sk); break; case CA_EVENT_FRTO: - tp->snd_ssthresh = westwood_bw_rttmin(tp); + tp->snd_ssthresh = westwood_bw_rttmin(sk); break; case CA_EVENT_SLOW_ACK: - westwood_update_window(tp); - w->bk += westwood_acked_count(tp); + westwood_update_window(sk); + w->bk += westwood_acked_count(sk); w->rtt_min = min(w->rtt, w->rtt_min); break; @@ -208,10 +212,10 @@ static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event) /* Extract info for Tcp socket info provided via netlink. */ -static void tcp_westwood_info(struct tcp_sock *tp, u32 ext, +static void tcp_westwood_info(struct sock *sk, u32 ext, struct sk_buff *skb) { - const struct westwood *ca = tcp_ca(tp); + const struct westwood *ca = inet_csk_ca(sk); if (ext & (1<<(TCPDIAG_VEGASINFO-1))) { struct rtattr *rta; struct tcpvegas_info *info; @@ -242,7 +246,7 @@ static struct tcp_congestion_ops tcp_westwood = { static int __init tcp_westwood_register(void) { - BUG_ON(sizeof(struct westwood) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_westwood); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 08c55b02470..3312cb8742e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2010,13 +2010,14 @@ static struct tcp_func ipv6_mapped = { */ static int tcp_v6_init_sock(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); skb_queue_head_init(&tp->out_of_order_queue); tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); - inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; + icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; /* So many TCP implementations out there (incorrectly) count the @@ -2038,7 +2039,7 @@ static int tcp_v6_init_sock(struct sock *sk) sk->sk_state = TCP_CLOSE; tp->af_specific = &ipv6_specific; - tp->ca_ops = &tcp_init_congestion_ops; + icsk->icsk_ca_ops = &tcp_init_congestion_ops; sk->sk_write_space = sk_stream_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); @@ -2135,7 +2136,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, sock_i_uid(sp), - tp->probes_out, + icsk->icsk_probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, icsk->icsk_rto, -- cgit v1.2.3-70-g09d2 From 505cbfc577f3fa778005e2800b869eca25727d5f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 09:19:38 -0300 Subject: [IPV6]: Generalise the tcp_v6_lookup routines In the same way as was done with the v4 counterparts, this will be moved to inet6_hashtables.c. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 ++ include/net/inet6_hashtables.h | 26 ++++++++ net/ipv4/Kconfig | 3 - net/ipv4/tcp_diag.c | 40 +++++------- net/ipv6/tcp_ipv6.c | 139 ++++++++++++++++++++++------------------- 5 files changed, 122 insertions(+), 91 deletions(-) create mode 100644 include/net/inet6_hashtables.h (limited to 'net/ipv6') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 777339b6846..3c7dbc6a0a7 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -193,6 +193,11 @@ struct inet6_skb_parm { #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) +static inline int inet6_iif(const struct sk_buff *skb) +{ + return IP6CB(skb)->iif; +} + struct tcp6_request_sock { struct tcp_request_sock req; struct in6_addr loc_addr; diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h new file mode 100644 index 00000000000..297c2b16967 --- /dev/null +++ b/include/net/inet6_hashtables.h @@ -0,0 +1,26 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Authors: Lotsa people, from code originally in tcp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _INET6_HASHTABLES_H +#define _INET6_HASHTABLES_H + +#include + +struct in6_addr; +struct inet_hashinfo; + +extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, const u16 sport, + const struct in6_addr *daddr, const u16 dport, + const int dif); +#endif /* _INET6_HASHTABLES_H */ diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index c844954c1ad..a79b4f9c10c 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -425,9 +425,6 @@ config IP_TCPDIAG If unsure, say Y. -config IP_TCPDIAG_IPV6 - def_bool (IP_TCPDIAG=y && IPV6=y) || (IP_TCPDIAG=m && IPV6) - config IP_TCPDIAG_DCCP def_bool (IP_TCPDIAG=y && IP_DCCP=y) || (IP_TCPDIAG=m && IP_DCCP) diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 8bf495c698f..b812191b2f5 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -24,6 +24,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -102,7 +106,7 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->tcpdiag_wqueue = 0; r->tcpdiag_uid = 0; r->tcpdiag_inode = 0; -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->tcpdiag_family == AF_INET6) { const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); @@ -121,7 +125,7 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->id.tcpdiag_src[0] = inet->rcv_saddr; r->id.tcpdiag_dst[0] = inet->daddr; -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->tcpdiag_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -196,19 +200,6 @@ nlmsg_failure: return -1; } -#ifdef CONFIG_IP_TCPDIAG_IPV6 -extern struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 dport, - int dif); -#else -static inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 dport, - int dif) -{ - return NULL; -} -#endif - static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) { int err; @@ -225,11 +216,14 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) req->id.tcpdiag_dport, req->id.tcpdiag_src[0], req->id.tcpdiag_sport, req->id.tcpdiag_if); } -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) else if (req->tcpdiag_family == AF_INET6) { - sk = tcp_v6_lookup((struct in6_addr*)req->id.tcpdiag_dst, req->id.tcpdiag_dport, - (struct in6_addr*)req->id.tcpdiag_src, req->id.tcpdiag_sport, - req->id.tcpdiag_if); + sk = inet6_lookup(hashinfo, + (struct in6_addr*)req->id.tcpdiag_dst, + req->id.tcpdiag_dport, + (struct in6_addr*)req->id.tcpdiag_src, + req->id.tcpdiag_sport, + req->id.tcpdiag_if); } #endif else { @@ -440,7 +434,7 @@ static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk, struct inet_sock *inet = inet_sk(sk); entry.family = sk->sk_family; -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (entry.family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -502,7 +496,7 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, r->tcpdiag_wqueue = 0; r->tcpdiag_uid = sock_i_uid(sk); r->tcpdiag_inode = 0; -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->tcpdiag_family == AF_INET6) { ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, &tcp6_rsk(req)->loc_addr); @@ -567,13 +561,13 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, if (bc) { entry.saddr = -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) (entry.family == AF_INET6) ? tcp6_rsk(req)->loc_addr.s6_addr32 : #endif &ireq->loc_addr; entry.daddr = -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) (entry.family == AF_INET6) ? tcp6_rsk(req)->rmt_addr.s6_addr32 : #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3312cb8742e..2bc7fafe766 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -76,26 +76,27 @@ static struct tcp_func ipv6_mapped; static struct tcp_func ipv6_specific; /* I have no idea if this is a good hash for v6 or not. -DaveM */ -static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport, - struct in6_addr *faddr, u16 fport) +static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport, + const struct in6_addr *faddr, const u16 fport, + const int ehash_size) { int hashent = (lport ^ fport); hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); hashent ^= hashent>>16; hashent ^= hashent>>8; - return (hashent & (tcp_hashinfo.ehash_size - 1)); + return (hashent & (ehash_size - 1)); } -static __inline__ int tcp_v6_sk_hashfn(struct sock *sk) +static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size) { - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *laddr = &np->rcv_saddr; - struct in6_addr *faddr = &np->daddr; - __u16 lport = inet->num; - __u16 fport = inet->dport; - return tcp_v6_hashfn(laddr, lport, faddr, fport); + const struct inet_sock *inet = inet_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); + const struct in6_addr *laddr = &np->rcv_saddr; + const struct in6_addr *faddr = &np->daddr; + const __u16 lport = inet->num; + const __u16 fport = inet->dport; + return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size); } static inline int tcp_v6_bind_conflict(const struct sock *sk, @@ -231,7 +232,7 @@ static __inline__ void __tcp_v6_hash(struct sock *sk) lock = &tcp_hashinfo.lhash_lock; inet_listen_wlock(&tcp_hashinfo); } else { - sk->sk_hashent = tcp_v6_sk_hashfn(sk); + sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size); list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock; write_lock(lock); @@ -258,7 +259,10 @@ static void tcp_v6_hash(struct sock *sk) } } -static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif) +static struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, + const struct in6_addr *daddr, + const unsigned short hnum, + const int dif) { struct sock *sk; struct hlist_node *node; @@ -266,8 +270,8 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor int score, hiscore; hiscore=0; - read_lock(&tcp_hashinfo.lhash_lock); - sk_for_each(sk, node, &tcp_hashinfo.listening_hash[inet_lhashfn(hnum)]) { + read_lock(&hashinfo->lhash_lock); + sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -294,7 +298,7 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor } if (result) sock_hold(result); - read_unlock(&tcp_hashinfo.lhash_lock); + read_unlock(&hashinfo->lhash_lock); return result; } @@ -304,9 +308,13 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor * The sockhash lock must be held as a reader here. */ -static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 hnum, - int dif) +static inline struct sock * + __inet6_lookup_established(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, + const u16 sport, + const struct in6_addr *daddr, + const u16 hnum, + const int dif) { struct sock *sk; const struct hlist_node *node; @@ -314,8 +322,9 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - const int hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); - struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; + const int hash = inet6_ehashfn(daddr, hnum, saddr, sport, + hashinfo->ehash_size); + struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { @@ -324,7 +333,7 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { + sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { const struct inet_timewait_sock *tw = inet_twsk(sk); if(*((__u32 *)&(tw->tw_dport)) == ports && @@ -347,34 +356,36 @@ hit: } -static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 hnum, - int dif) +static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, + const u16 sport, + const struct in6_addr *daddr, + const u16 hnum, + const int dif) { - struct sock *sk; - - sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif); - + struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport, + daddr, hnum, dif); if (sk) return sk; - return tcp_v6_lookup_listener(daddr, hnum, dif); + return inet6_lookup_listener(hashinfo, daddr, hnum, dif); } -inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 dport, - int dif) +inline struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, const u16 sport, + const struct in6_addr *daddr, const u16 dport, + const int dif) { struct sock *sk; local_bh_disable(); - sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif); + sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); local_bh_enable(); return sk; } -EXPORT_SYMBOL_GPL(tcp_v6_lookup); +EXPORT_SYMBOL_GPL(inet6_lookup); /* @@ -454,16 +465,17 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) } } -static int __tcp_v6_check_established(struct sock *sk, __u16 lport, +static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, struct inet_timewait_sock **twp) { struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *daddr = &np->rcv_saddr; - struct in6_addr *saddr = &np->daddr; - int dif = sk->sk_bound_dev_if; + const struct ipv6_pinfo *np = inet6_sk(sk); + const struct in6_addr *daddr = &np->rcv_saddr; + const struct in6_addr *saddr = &np->daddr; + const int dif = sk->sk_bound_dev_if; const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - const int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); + const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport, + tcp_hashinfo.ehash_size); struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; struct sock *sk2; const struct hlist_node *node; @@ -637,11 +649,6 @@ out: } } -static __inline__ int tcp_v6_iif(struct sk_buff *skb) -{ - return IP6CB(skb)->iif; -} - static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -833,14 +840,15 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __u32 info) { struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; - struct tcphdr *th = (struct tcphdr *)(skb->data+offset); + const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct ipv6_pinfo *np; struct sock *sk; int err; struct tcp_sock *tp; __u32 seq; - sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex); + sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, + th->source, skb->dev->ifindex); if (sk == NULL) { ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); @@ -927,7 +935,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr, - &hdr->saddr, tcp_v6_iif(skb)); + &hdr->saddr, inet6_iif(skb)); if (!req) goto out; @@ -1138,7 +1146,7 @@ static void tcp_v6_send_reset(struct sk_buff *skb) buff->csum); fl.proto = IPPROTO_TCP; - fl.oif = tcp_v6_iif(skb); + fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; @@ -1207,7 +1215,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 buff->csum); fl.proto = IPPROTO_TCP; - fl.oif = tcp_v6_iif(skb); + fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; @@ -1245,20 +1253,18 @@ static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) { struct request_sock *req, **prev; - struct tcphdr *th = skb->h.th; + const struct tcphdr *th = skb->h.th; struct sock *nsk; /* Find possible connection requests. */ req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr, - &skb->nh.ipv6h->daddr, tcp_v6_iif(skb)); + &skb->nh.ipv6h->daddr, inet6_iif(skb)); if (req) return tcp_check_req(sk, skb, req, prev); - nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr, - th->source, - &skb->nh.ipv6h->daddr, - ntohs(th->dest), - tcp_v6_iif(skb)); + nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr, + th->source, &skb->nh.ipv6h->daddr, + ntohs(th->dest), inet6_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1346,7 +1352,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) - treq->iif = tcp_v6_iif(skb); + treq->iif = inet6_iif(skb); if (isn == 0) isn = tcp_v6_init_sequence(sk,skb); @@ -1411,7 +1417,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->sk_backlog_rcv = tcp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; - newnp->mcast_oif = tcp_v6_iif(skb); + newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = skb->nh.ipv6h->hop_limit; /* @@ -1516,7 +1522,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, skb_set_owner_r(newnp->pktoptions, newsk); } newnp->opt = NULL; - newnp->mcast_oif = tcp_v6_iif(skb); + newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = skb->nh.ipv6h->hop_limit; /* Clone native IPv6 options from listening socket (if any) @@ -1691,7 +1697,7 @@ ipv6_pktoptions: if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { if (np->rxopt.bits.rxinfo) - np->mcast_oif = tcp_v6_iif(opt_skb); + np->mcast_oif = inet6_iif(opt_skb); if (np->rxopt.bits.rxhlim) np->mcast_hops = opt_skb->nh.ipv6h->hop_limit; if (ipv6_opt_accepted(sk, opt_skb)) { @@ -1746,8 +1752,9 @@ static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h); TCP_SKB_CB(skb)->sacked = 0; - sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source, - &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); + sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source, + &skb->nh.ipv6h->daddr, ntohs(th->dest), + inet6_iif(skb)); if (!sk) goto no_tcp_socket; @@ -1818,7 +1825,9 @@ do_time_wait: { struct sock *sk2; - sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); + sk2 = inet6_lookup_listener(&tcp_hashinfo, + &skb->nh.ipv6h->daddr, + ntohs(th->dest), inet6_iif(skb)); if (sk2 != NULL) { struct inet_timewait_sock *tw = inet_twsk(sk); inet_twsk_deschedule(tw, &tcp_death_row); -- cgit v1.2.3-70-g09d2 From 5324a040ccc708998e61ea93e669b81312f0ae11 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 09:26:18 -0300 Subject: [INET6_HASHTABLES]: Move inet6_lookup functions to net/ipv6/inet6_hashtables.c Doing this we allow tcp_diag to support IPV6 even if tcp_diag is compiled statically and IPV6 is compiled as a module, removing the previous restriction while not building any IPV6 code if it is not selected. Now to work on the tcpdiag_register infrastructure and then to rename the whole thing to inetdiag, reflecting its by then completely generic nature. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 106 +++++++++++++++++++++++++++- net/ipv4/Kconfig | 4 +- net/ipv6/Makefile | 2 + net/ipv6/inet6_hashtables.c | 81 ++++++++++++++++++++++ net/ipv6/tcp_ipv6.c | 154 +---------------------------------------- 5 files changed, 190 insertions(+), 157 deletions(-) create mode 100644 net/ipv6/inet6_hashtables.c (limited to 'net/ipv6') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 297c2b16967..03df3b15796 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -14,13 +14,117 @@ #ifndef _INET6_HASHTABLES_H #define _INET6_HASHTABLES_H +#include + +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#include +#include #include -struct in6_addr; +#include + struct inet_hashinfo; +/* I have no idea if this is a good hash for v6 or not. -DaveM */ +static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport, + const struct in6_addr *faddr, const u16 fport, + const int ehash_size) +{ + int hashent = (lport ^ fport); + + hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); + hashent ^= hashent >> 16; + hashent ^= hashent >> 8; + return (hashent & (ehash_size - 1)); +} + +static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); + const struct in6_addr *laddr = &np->rcv_saddr; + const struct in6_addr *faddr = &np->daddr; + const __u16 lport = inet->num; + const __u16 fport = inet->dport; + return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size); +} + +/* + * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so + * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM + * + * The sockhash lock must be held as a reader here. + */ +static inline struct sock * + __inet6_lookup_established(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, + const u16 sport, + const struct in6_addr *daddr, + const u16 hnum, + const int dif) +{ + struct sock *sk; + const struct hlist_node *node; + const __u32 ports = INET_COMBINED_PORTS(sport, hnum); + /* Optimize here for direct hit, only listening connections can + * have wildcards anyways. + */ + const int hash = inet6_ehashfn(daddr, hnum, saddr, sport, + hashinfo->ehash_size); + struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; + + read_lock(&head->lock); + sk_for_each(sk, node, &head->chain) { + /* For IPV6 do the cheaper port and family tests first. */ + if (INET6_MATCH(sk, saddr, daddr, ports, dif)) + goto hit; /* You sunk my battleship! */ + } + /* Must check for a TIME_WAIT'er before going to listener hash. */ + sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { + const struct inet_timewait_sock *tw = inet_twsk(sk); + + if(*((__u32 *)&(tw->tw_dport)) == ports && + sk->sk_family == PF_INET6) { + const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); + + if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && + (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) + goto hit; + } + } + read_unlock(&head->lock); + return NULL; + +hit: + sock_hold(sk); + read_unlock(&head->lock); + return sk; +} + +extern struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, + const struct in6_addr *daddr, + const unsigned short hnum, + const int dif); + +static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, + const u16 sport, + const struct in6_addr *daddr, + const u16 hnum, + const int dif) +{ + struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport, + daddr, hnum, dif); + if (sk) + return sk; + + return inet6_lookup_listener(hashinfo, daddr, hnum, dif); +} + extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const u16 sport, const struct in6_addr *daddr, const u16 dport, const int dif); +#endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */ #endif /* _INET6_HASHTABLES_H */ diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index a79b4f9c10c..960c02faf44 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -419,9 +419,7 @@ config IP_TCPDIAG ---help--- Support for TCP socket monitoring interface used by native Linux tools such as ss. ss is included in iproute2, currently downloadable - at . If you want IPv6 or DCCP - support and have selected IPv6 or DCCP as a module, you need to build - this as a module too. + at . If unsure, say Y. diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 5bccea2d81b..6460eec834b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -23,3 +23,5 @@ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-y += exthdrs_core.o + +obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c new file mode 100644 index 00000000000..01d5f46d4e4 --- /dev/null +++ b/net/ipv6/inet6_hashtables.c @@ -0,0 +1,81 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Generic INET6 transport hashtables + * + * Authors: Lotsa people, from code originally in tcp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +#include + +#include +#include +#include + +struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, + const struct in6_addr *daddr, + const unsigned short hnum, const int dif) +{ + struct sock *sk; + const struct hlist_node *node; + struct sock *result = NULL; + int score, hiscore = 0; + + read_lock(&hashinfo->lhash_lock); + sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { + if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { + const struct ipv6_pinfo *np = inet6_sk(sk); + + score = 1; + if (!ipv6_addr_any(&np->rcv_saddr)) { + if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + continue; + score++; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + continue; + score++; + } + if (score == 3) { + result = sk; + break; + } + if (score > hiscore) { + hiscore = score; + result = sk; + } + } + } + if (result) + sock_hold(result); + read_unlock(&hashinfo->lhash_lock); + return result; +} + +EXPORT_SYMBOL_GPL(inet6_lookup_listener); + +struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, const u16 sport, + const struct in6_addr *daddr, const u16 dport, + const int dif) +{ + struct sock *sk; + + local_bh_disable(); + sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); + local_bh_enable(); + + return sk; +} + +EXPORT_SYMBOL_GPL(inet6_lookup); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2bc7fafe766..fb291b81cf6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -47,6 +47,7 @@ #include #include +#include #include #include #include @@ -75,30 +76,6 @@ static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok); static struct tcp_func ipv6_mapped; static struct tcp_func ipv6_specific; -/* I have no idea if this is a good hash for v6 or not. -DaveM */ -static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport, - const struct in6_addr *faddr, const u16 fport, - const int ehash_size) -{ - int hashent = (lport ^ fport); - - hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); - hashent ^= hashent>>16; - hashent ^= hashent>>8; - return (hashent & (ehash_size - 1)); -} - -static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size) -{ - const struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - const struct in6_addr *laddr = &np->rcv_saddr; - const struct in6_addr *faddr = &np->daddr; - const __u16 lport = inet->num; - const __u16 fport = inet->dport; - return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size); -} - static inline int tcp_v6_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) { @@ -259,135 +236,6 @@ static void tcp_v6_hash(struct sock *sk) } } -static struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, - const struct in6_addr *daddr, - const unsigned short hnum, - const int dif) -{ - struct sock *sk; - struct hlist_node *node; - struct sock *result = NULL; - int score, hiscore; - - hiscore=0; - read_lock(&hashinfo->lhash_lock); - sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { - if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - - score = 1; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) - continue; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score++; - } - if (score == 3) { - result = sk; - break; - } - if (score > hiscore) { - hiscore = score; - result = sk; - } - } - } - if (result) - sock_hold(result); - read_unlock(&hashinfo->lhash_lock); - return result; -} - -/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so - * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM - * - * The sockhash lock must be held as a reader here. - */ - -static inline struct sock * - __inet6_lookup_established(struct inet_hashinfo *hashinfo, - const struct in6_addr *saddr, - const u16 sport, - const struct in6_addr *daddr, - const u16 hnum, - const int dif) -{ - struct sock *sk; - const struct hlist_node *node; - const __u32 ports = INET_COMBINED_PORTS(sport, hnum); - /* Optimize here for direct hit, only listening connections can - * have wildcards anyways. - */ - const int hash = inet6_ehashfn(daddr, hnum, saddr, sport, - hashinfo->ehash_size); - struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; - - read_lock(&head->lock); - sk_for_each(sk, node, &head->chain) { - /* For IPV6 do the cheaper port and family tests first. */ - if (INET6_MATCH(sk, saddr, daddr, ports, dif)) - goto hit; /* You sunk my battleship! */ - } - /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { - const struct inet_timewait_sock *tw = inet_twsk(sk); - - if(*((__u32 *)&(tw->tw_dport)) == ports && - sk->sk_family == PF_INET6) { - const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); - - if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && - (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) - goto hit; - } - } - read_unlock(&head->lock); - return NULL; - -hit: - sock_hold(sk); - read_unlock(&head->lock); - return sk; -} - - -static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo, - const struct in6_addr *saddr, - const u16 sport, - const struct in6_addr *daddr, - const u16 hnum, - const int dif) -{ - struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport, - daddr, hnum, dif); - if (sk) - return sk; - - return inet6_lookup_listener(hashinfo, daddr, hnum, dif); -} - -inline struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, - const struct in6_addr *saddr, const u16 sport, - const struct in6_addr *daddr, const u16 dport, - const int dif) -{ - struct sock *sk; - - local_bh_disable(); - sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); - local_bh_enable(); - - return sk; -} - -EXPORT_SYMBOL_GPL(inet6_lookup); - - /* * Open request hash tables. */ -- cgit v1.2.3-70-g09d2 From a61bbcf28a8cb0ba56f8193d512f7222e711a294 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 17:24:31 -0700 Subject: [NET]: Store skb->timestamp as offset to a base timestamp Reduces skb size by 8 bytes on 64-bit. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/atm/ambassador.c | 2 +- drivers/atm/atmtcp.c | 2 +- drivers/atm/eni.c | 2 +- drivers/atm/firestream.c | 2 +- drivers/atm/fore200e.c | 2 +- drivers/atm/he.c | 2 +- drivers/atm/horizon.c | 2 +- drivers/atm/idt77252.c | 8 +++---- drivers/atm/lanai.c | 2 +- drivers/atm/nicstar.c | 10 ++++----- drivers/atm/zatm.c | 2 +- include/linux/skbuff.h | 47 ++++++++++++++++++++++++++++++++++++++-- include/net/bluetooth/hci_core.h | 2 +- include/net/neighbour.h | 9 +++++++- include/net/sock.h | 13 ++++++----- net/bluetooth/hci_core.c | 2 +- net/bluetooth/hci_event.c | 2 +- net/bluetooth/hci_sock.c | 8 +++++-- net/bridge/netfilter/ebt_ulog.c | 2 +- net/core/dev.c | 28 ++++++++++++++++-------- net/core/neighbour.c | 7 +++--- net/core/skbuff.c | 8 +++++-- net/econet/af_econet.c | 2 +- net/ipv4/arp.c | 4 +++- net/ipv4/ip_fragment.c | 4 ++-- net/ipv4/netfilter/ip_queue.c | 4 ++-- net/ipv4/netfilter/ipt_ULOG.c | 8 +++---- net/ipv4/tcp_input.c | 10 ++++++--- net/ipv4/tcp_output.c | 4 ++-- net/ipv6/ndisc.c | 4 +++- net/ipv6/netfilter/ip6_queue.c | 4 ++-- net/ipv6/reassembly.c | 4 ++-- net/ipx/af_ipx.c | 4 ++-- net/netfilter/nfnetlink_log.c | 6 ++--- net/netfilter/nfnetlink_queue.c | 6 ++--- net/packet/af_packet.c | 8 +++---- net/sctp/input.c | 4 ++-- net/sctp/sm_make_chunk.c | 9 ++++---- net/sunrpc/svcsock.c | 11 ++++++---- 39 files changed, 172 insertions(+), 88 deletions(-) (limited to 'net/ipv6') diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index 73c6b85299c..d74a7c5e75d 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c @@ -513,7 +513,7 @@ static void rx_complete (amb_dev * dev, rx_out * rx) { // VC layer stats atomic_inc(&atm_vcc->stats->rx); - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); // end of our responsability atm_vcc->push (atm_vcc, skb); return; diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index f2f01cb82cb..57f1810fdcc 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -325,7 +325,7 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) result = -ENOBUFS; goto done; } - do_gettimeofday(&new_skb->stamp); + __net_timestamp(new_skb); memcpy(skb_put(new_skb,skb->len),skb->data,skb->len); out_vcc->push(out_vcc,new_skb); atomic_inc(&vcc->stats->tx); diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 10da3693476..c13c4d736ef 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -537,7 +537,7 @@ static int rx_aal0(struct atm_vcc *vcc) return 0; } skb_put(skb,length); - skb->stamp = eni_vcc->timestamp; + skb_set_timestamp(skb, &eni_vcc->timestamp); DPRINTK("got len %ld\n",length); if (do_rx_dma(vcc,skb,1,length >> 2,length >> 2)) return 1; eni_vcc->rxing++; diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index b078fa548eb..58219744f5d 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -815,7 +815,7 @@ static void process_incoming (struct fs_dev *dev, struct queue *q) skb_put (skb, qe->p1 & 0xffff); ATM_SKB(skb)->vcc = atm_vcc; atomic_inc(&atm_vcc->stats->rx); - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); fs_dprintk (FS_DEBUG_ALLOC, "Free rec-skb: %p (pushed)\n", skb); atm_vcc->push (atm_vcc, skb); fs_dprintk (FS_DEBUG_ALLOC, "Free rec-d: %p\n", pe); diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index 5f702199543..2bf723a7b6e 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -1176,7 +1176,7 @@ fore200e_push_rpd(struct fore200e* fore200e, struct atm_vcc* vcc, struct rpd* rp return -ENOMEM; } - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); #ifdef FORE200E_52BYTE_AAL0_SDU if (cell_header) { diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 28250c9b32d..fde9334059a 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -1886,7 +1886,7 @@ he_service_rbrq(struct he_dev *he_dev, int group) if (rx_skb_reserve > 0) skb_reserve(skb, rx_skb_reserve); - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); for (iov = he_vcc->iov_head; iov < he_vcc->iov_tail; ++iov) { diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 924a2c8988b..0cded046800 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -1034,7 +1034,7 @@ static void rx_schedule (hrz_dev * dev, int irq) { struct atm_vcc * vcc = ATM_SKB(skb)->vcc; // VC layer stats atomic_inc(&vcc->stats->rx); - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); // end of our responsability vcc->push (vcc, skb); } diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 30b7e990ed0..b4a76cade64 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -1101,7 +1101,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) cell, ATM_CELL_PAYLOAD); ATM_SKB(sb)->vcc = vcc; - do_gettimeofday(&sb->stamp); + __net_timestamp(sb); vcc->push(vcc, sb); atomic_inc(&vcc->stats->rx); @@ -1179,7 +1179,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) skb_trim(skb, len); ATM_SKB(skb)->vcc = vcc; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); @@ -1201,7 +1201,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) skb_trim(skb, len); ATM_SKB(skb)->vcc = vcc; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); @@ -1340,7 +1340,7 @@ idt77252_rx_raw(struct idt77252_dev *card) ATM_CELL_PAYLOAD); ATM_SKB(sb)->vcc = vcc; - do_gettimeofday(&sb->stamp); + __net_timestamp(sb); vcc->push(vcc, sb); atomic_inc(&vcc->stats->rx); diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index ffe3afa723b..51ec1478729 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -1427,7 +1427,7 @@ static void vcc_rx_aal5(struct lanai_vcc *lvcc, int endptr) skb_put(skb, size); vcc_rx_memcpy(skb->data, lvcc, size); ATM_SKB(skb)->vcc = lvcc->rx.atmvcc; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); lvcc->rx.atmvcc->push(lvcc->rx.atmvcc, skb); atomic_inc(&lvcc->rx.atmvcc->stats->rx); out: diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index a0e3bd861f1..c57e20dcb0f 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -2213,7 +2213,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) memcpy(sb->tail, cell, ATM_CELL_PAYLOAD); skb_put(sb, ATM_CELL_PAYLOAD); ATM_SKB(sb)->vcc = vcc; - do_gettimeofday(&sb->stamp); + __net_timestamp(sb); vcc->push(vcc, sb); atomic_inc(&vcc->stats->rx); cell += ATM_CELL_PAYLOAD; @@ -2346,7 +2346,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) skb->destructor = ns_sb_destructor; #endif /* NS_USE_DESTRUCTORS */ ATM_SKB(skb)->vcc = vcc; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); } @@ -2373,7 +2373,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) sb->destructor = ns_sb_destructor; #endif /* NS_USE_DESTRUCTORS */ ATM_SKB(sb)->vcc = vcc; - do_gettimeofday(&sb->stamp); + __net_timestamp(sb); vcc->push(vcc, sb); atomic_inc(&vcc->stats->rx); } @@ -2398,7 +2398,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) memcpy(skb->data, sb->data, NS_SMBUFSIZE); skb_put(skb, len - NS_SMBUFSIZE); ATM_SKB(skb)->vcc = vcc; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); } @@ -2505,7 +2505,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) #ifdef NS_USE_DESTRUCTORS hb->destructor = ns_hb_destructor; #endif /* NS_USE_DESTRUCTORS */ - do_gettimeofday(&hb->stamp); + __net_timestamp(hb); vcc->push(vcc, hb); atomic_inc(&vcc->stats->rx); } diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 85fee9530fa..c4b75ecf946 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -400,7 +400,7 @@ unsigned long *x; EVENT("error code 0x%x/0x%x\n",(here[3] & uPD98401_AAL5_ES) >> uPD98401_AAL5_ES_SHIFT,error); skb = ((struct rx_buffer_head *) bus_to_virt(here[2]))->skb; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); #if 0 printk("[-3..0] 0x%08lx 0x%08lx 0x%08lx 0x%08lx\n",((unsigned *) skb->data)[-3], ((unsigned *) skb->data)[-2],((unsigned *) skb->data)[-1], diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 60b32151f76..32635c401d4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -155,13 +155,20 @@ struct skb_shared_info { #define SKB_DATAREF_SHIFT 16 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) +extern struct timeval skb_tv_base; + +struct skb_timeval { + u32 off_sec; + u32 off_usec; +}; + /** * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list * @list: List we are on * @sk: Socket we are owned by - * @stamp: Time we arrived + * @tstamp: Time we arrived stored as offset to skb_tv_base * @dev: Device we arrived on/are leaving by * @input_dev: Device we arrived on * @h: Transport layer header @@ -202,7 +209,7 @@ struct sk_buff { struct sk_buff *prev; struct sock *sk; - struct timeval stamp; + struct skb_timeval tstamp; struct net_device *dev; struct net_device *input_dev; @@ -1213,6 +1220,42 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, extern void skb_init(void); extern void skb_add_mtu(int mtu); +/** + * skb_get_timestamp - get timestamp from a skb + * @skb: skb to get stamp from + * @stamp: pointer to struct timeval to store stamp in + * + * Timestamps are stored in the skb as offsets to a base timestamp. + * This function converts the offset back to a struct timeval and stores + * it in stamp. + */ +static inline void skb_get_timestamp(struct sk_buff *skb, struct timeval *stamp) +{ + stamp->tv_sec = skb->tstamp.off_sec; + stamp->tv_usec = skb->tstamp.off_usec; + if (skb->tstamp.off_sec) { + stamp->tv_sec += skb_tv_base.tv_sec; + stamp->tv_usec += skb_tv_base.tv_usec; + } +} + +/** + * skb_set_timestamp - set timestamp of a skb + * @skb: skb to set stamp of + * @stamp: pointer to struct timeval to get stamp from + * + * Timestamps are stored in the skb as offsets to a base timestamp. + * This function converts a struct timeval to an offset and stores + * it in the skb. + */ +static inline void skb_set_timestamp(struct sk_buff *skb, struct timeval *stamp) +{ + skb->tstamp.off_sec = stamp->tv_sec - skb_tv_base.tv_sec; + skb->tstamp.off_usec = stamp->tv_usec - skb_tv_base.tv_usec; +} + +extern void __net_timestamp(struct sk_buff *skb); + #ifdef CONFIG_NETFILTER static inline void nf_conntrack_put(struct nf_conntrack *nfct) { diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6d63a47c731..7f933f30207 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -404,7 +404,7 @@ static inline int hci_recv_frame(struct sk_buff *skb) bt_cb(skb)->incoming = 1; /* Time stamp */ - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); /* Queue frame for rx task */ skb_queue_tail(&hdev->rx_q, skb); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 89809891e5a..34c07731933 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -363,7 +363,14 @@ __neigh_lookup_errno(struct neigh_table *tbl, const void *pkey, return neigh_create(tbl, pkey, dev); } -#define LOCALLY_ENQUEUED -2 +struct neighbour_cb { + unsigned long sched_next; + unsigned int flags; +}; + +#define LOCALLY_ENQUEUED 0x1 + +#define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) #endif #endif diff --git a/include/net/sock.h b/include/net/sock.h index 065df67b642..d5942887707 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1282,16 +1282,19 @@ static inline int sock_intr_errno(long timeo) static __inline__ void sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - struct timeval *stamp = &skb->stamp; + struct timeval stamp; + + skb_get_timestamp(skb, &stamp); if (sock_flag(sk, SOCK_RCVTSTAMP)) { /* Race occurred between timestamp enabling and packet receiving. Fill in the current time for now. */ - if (stamp->tv_sec == 0) - do_gettimeofday(stamp); + if (stamp.tv_sec == 0) + do_gettimeofday(&stamp); + skb_set_timestamp(skb, &stamp); put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval), - stamp); + &stamp); } else - sk->sk_stamp = *stamp; + sk->sk_stamp = stamp; } /** diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 4f9e11b533a..55dc42eac92 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -999,7 +999,7 @@ static int hci_send_frame(struct sk_buff *skb) if (atomic_read(&hdev->promisc)) { /* Time stamp */ - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); hci_send_to_sock(hdev, skb); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 40b219560bb..d6da0939216 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1087,7 +1087,7 @@ void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) memcpy(ev->data, data, dlen); bt_cb(skb)->incoming = 1; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); bt_cb(skb)->pkt_type = HCI_EVENT_PKT; skb->dev = (void *) hdev; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index eed9090d77f..32ef7975a13 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -332,8 +332,12 @@ static inline void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_ put_cmsg(msg, SOL_HCI, HCI_CMSG_DIR, sizeof(incoming), &incoming); } - if (mask & HCI_CMSG_TSTAMP) - put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, sizeof(skb->stamp), &skb->stamp); + if (mask & HCI_CMSG_TSTAMP) { + struct timeval tv; + + skb_get_timestamp(skb, &tv); + put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, sizeof(tv), &tv); + } } static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 561d75c8ed5..acb888d3258 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -162,7 +162,7 @@ static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr, pm->version = EBT_ULOG_VERSION; do_gettimeofday(&pm->stamp); if (ub->qlen == 1) - ub->skb->stamp = pm->stamp; + skb_set_timestamp(ub->skb, &pm->stamp); pm->data_len = copy_len; pm->mark = skb->nfmark; pm->hook = hooknr; diff --git a/net/core/dev.c b/net/core/dev.c index 9d153eb1e8c..a3ed53cc4af 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1009,13 +1009,22 @@ void net_disable_timestamp(void) atomic_dec(&netstamp_needed); } -static inline void net_timestamp(struct timeval *stamp) +void __net_timestamp(struct sk_buff *skb) +{ + struct timeval tv; + + do_gettimeofday(&tv); + skb_set_timestamp(skb, &tv); +} +EXPORT_SYMBOL(__net_timestamp); + +static inline void net_timestamp(struct sk_buff *skb) { if (atomic_read(&netstamp_needed)) - do_gettimeofday(stamp); + __net_timestamp(skb); else { - stamp->tv_sec = 0; - stamp->tv_usec = 0; + skb->tstamp.off_sec = 0; + skb->tstamp.off_usec = 0; } } @@ -1027,7 +1036,8 @@ static inline void net_timestamp(struct timeval *stamp) void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { struct packet_type *ptype; - net_timestamp(&skb->stamp); + + net_timestamp(skb); rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_all, list) { @@ -1379,8 +1389,8 @@ int netif_rx(struct sk_buff *skb) if (netpoll_rx(skb)) return NET_RX_DROP; - if (!skb->stamp.tv_sec) - net_timestamp(&skb->stamp); + if (!skb->tstamp.off_sec) + net_timestamp(skb); /* * The code is rearranged so that the path is the most @@ -1566,8 +1576,8 @@ int netif_receive_skb(struct sk_buff *skb) if (skb->dev->poll && netpoll_rx(skb)) return NET_RX_DROP; - if (!skb->stamp.tv_sec) - net_timestamp(&skb->stamp); + if (!skb->tstamp.off_sec) + net_timestamp(skb); if (!skb->input_dev) skb->input_dev = skb->dev; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 1beb782ac41..72ee00f7b30 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1217,7 +1217,7 @@ static void neigh_proxy_process(unsigned long arg) while (skb != (struct sk_buff *)&tbl->proxy_queue) { struct sk_buff *back = skb; - long tdif = back->stamp.tv_usec - now; + long tdif = NEIGH_CB(back)->sched_next - now; skb = skb->next; if (tdif <= 0) { @@ -1248,8 +1248,9 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, kfree_skb(skb); return; } - skb->stamp.tv_sec = LOCALLY_ENQUEUED; - skb->stamp.tv_usec = sched_next; + + NEIGH_CB(skb)->sched_next = sched_next; + NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED; spin_lock(&tbl->proxy_queue.lock); if (del_timer(&tbl->proxy_timer)) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ef498cb9f78..39a161dbc16 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -70,6 +70,8 @@ static kmem_cache_t *skbuff_head_cache; +struct timeval __read_mostly skb_tv_base; + /* * Keep out-of-line to prevent kernel bloat. * __builtin_return_address is not used because it is not always @@ -331,7 +333,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) n->next = n->prev = NULL; n->sk = NULL; - C(stamp); + C(tstamp); C(dev); C(h); C(nh); @@ -408,7 +410,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) memcpy(new->cb, old->cb, sizeof(old->cb)); new->local_df = old->local_df; new->pkt_type = old->pkt_type; - new->stamp = old->stamp; + new->tstamp = old->tstamp; new->destructor = NULL; #ifdef CONFIG_NETFILTER new->nfmark = old->nfmark; @@ -1645,6 +1647,7 @@ void __init skb_init(void) NULL, NULL); if (!skbuff_head_cache) panic("cannot create skbuff cache"); + do_gettimeofday(&skb_tv_base); } EXPORT_SYMBOL(___pskb_trim); @@ -1678,3 +1681,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read); EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text); +EXPORT_SYMBOL(skb_tv_base); diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 8f063990555..4a62093eb34 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -159,7 +159,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock, err = memcpy_toiovec(msg->msg_iov, skb->data, copied); if (err) goto out_free; - sk->sk_stamp = skb->stamp; + skb_get_timestamp(skb, &sk->sk_stamp); if (msg->msg_name) memcpy(msg->msg_name, skb->cb, msg->msg_namelen); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 6eb9c549d64..8bf312bdea1 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -865,7 +865,7 @@ static int arp_process(struct sk_buff *skb) if (n) neigh_release(n); - if (skb->stamp.tv_sec == LOCALLY_ENQUEUED || + if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || skb->pkt_type == PACKET_HOST || in_dev->arp_parms->proxy_delay == 0) { arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); @@ -948,6 +948,8 @@ int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) goto out_of_mem; + memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); + return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process); freeskb: diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1ac64c0c5b3..9e6e683cc34 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -533,7 +533,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (skb->dev) qp->iif = skb->dev->ifindex; skb->dev = NULL; - qp->stamp = skb->stamp; + skb_get_timestamp(skb, &qp->stamp); qp->meat += skb->len; atomic_add(skb->truesize, &ip_frag_mem); if (offset == 0) @@ -615,7 +615,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) head->next = NULL; head->dev = dev; - head->stamp = qp->stamp; + skb_set_timestamp(head, &qp->stamp); iph = head->nh.iph; iph->frag_off = 0; diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 1c49833e00a..7f2bcc7198f 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -240,8 +240,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->packet_id = (unsigned long )entry; pmsg->data_len = data_len; - pmsg->timestamp_sec = entry->skb->stamp.tv_sec; - pmsg->timestamp_usec = entry->skb->stamp.tv_usec; + pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec; + pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec; pmsg->mark = entry->skb->nfmark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index b86f06ec976..1d8ac4595e1 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -220,13 +220,13 @@ static void ipt_ulog_packet(unsigned int hooknum, pm = NLMSG_DATA(nlh); /* We might not have a timestamp, get one */ - if (skb->stamp.tv_sec == 0) - do_gettimeofday((struct timeval *)&skb->stamp); + if (skb->tstamp.off_sec == 0) + __net_timestamp((struct sk_buff *)skb); /* copy hook, prefix, timestamp, payload, etc. */ pm->data_len = copy_len; - pm->timestamp_sec = skb->stamp.tv_sec; - pm->timestamp_usec = skb->stamp.tv_usec; + pm->timestamp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec; + pm->timestamp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec; pm->mark = skb->nfmark; pm->hook = hooknum; if (prefix != NULL) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fdd9547fb78..ebb8654e3de 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2097,9 +2097,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt seq_rtt = -1; } else if (seq_rtt < 0) seq_rtt = now - scb->when; - if (seq_usrtt) - *seq_usrtt = (usnow.tv_sec - skb->stamp.tv_sec) * 1000000 - + (usnow.tv_usec - skb->stamp.tv_usec); + if (seq_usrtt) { + struct timeval tv; + + skb_get_timestamp(skb, &tv); + *seq_usrtt = (usnow.tv_sec - tv.tv_sec) * 1000000 + + (usnow.tv_usec - tv.tv_usec); + } if (sacked & TCPCB_SACKED_ACKED) tp->sacked_out -= tcp_skb_pcount(skb); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 267b0fcbfc9..8d92ab562ae 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -282,7 +282,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) /* If congestion control is doing timestamping */ if (icsk->icsk_ca_ops->rtt_sample) - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); sysctl_flags = 0; if (tcb->flags & TCPCB_FLAG_SYN) { @@ -483,7 +483,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned * skbs, which it never sent before. --ANK */ TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; - buff->stamp = skb->stamp; + buff->tstamp = skb->tstamp; if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { tp->lost_out -= tcp_skb_pcount(skb); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7ae72d4c9bd..a7eae30f455 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -812,7 +812,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) { - if (skb->stamp.tv_sec != LOCALLY_ENQUEUED && + if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && inc != 0 && idev->nd_parms->proxy_delay != 0) { @@ -1487,6 +1487,8 @@ int ndisc_rcv(struct sk_buff *skb) return 0; } + memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); + switch (msg->icmph.icmp6_type) { case NDISC_NEIGHBOUR_SOLICITATION: ndisc_recv_ns(skb); diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 7ecb91e24a3..446764545b1 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -238,8 +238,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->packet_id = (unsigned long )entry; pmsg->data_len = data_len; - pmsg->timestamp_sec = entry->skb->stamp.tv_sec; - pmsg->timestamp_usec = entry->skb->stamp.tv_usec; + pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec; + pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec; pmsg->mark = entry->skb->nfmark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 59e7c631787..9d9e04344c7 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -562,7 +562,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (skb->dev) fq->iif = skb->dev->ifindex; skb->dev = NULL; - fq->stamp = skb->stamp; + skb_get_timestamp(skb, &fq->stamp); fq->meat += skb->len; atomic_add(skb->truesize, &ip6_frag_mem); @@ -664,7 +664,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, head->next = NULL; head->dev = dev; - head->stamp = fq->stamp; + skb_set_timestamp(head, &fq->stamp); head->nh.ipv6h->payload_len = htons(payload_len); *skb_in = head; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 39d5939ccd6..c54f8acc97e 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1796,8 +1796,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, copied); if (rc) goto out_free; - if (skb->stamp.tv_sec) - sk->sk_stamp = skb->stamp; + if (skb->tstamp.off_sec) + skb_get_timestamp(skb, &sk->sk_stamp) msg->msg_namelen = sizeof(*sipx); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 464c9fa2934..ff5601ceedc 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -491,11 +491,11 @@ __build_packet_message(struct nfulnl_instance *inst, NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw); } - if (skb->stamp.tv_sec) { + if (skb->tstamp.off_sec) { struct nfulnl_msg_packet_timestamp ts; - ts.sec = cpu_to_be64(skb->stamp.tv_sec); - ts.usec = cpu_to_be64(skb->stamp.tv_usec); + ts.sec = cpu_to_be64(skb_tv_base.tv_sec + skb->tstamp.off_sec); + ts.usec = cpu_to_be64(skb_tv_base.tv_usec + skb->tstamp.off_usec); NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts); } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 741686ff71d..e3a5285329a 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -494,11 +494,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); } - if (entry->skb->stamp.tv_sec) { + if (entry->skb->tstamp.off_sec) { struct nfqnl_msg_packet_timestamp ts; - ts.sec = htonll(entry->skb->stamp.tv_sec); - ts.usec = htonll(entry->skb->stamp.tv_usec); + ts.sec = htonll(skb_tv_base.tv_sec + entry->skb->tstamp.off_sec); + ts.usec = htonll(skb_tv_base.tv_usec + entry->skb->tstamp.off_usec); NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index deb5f6f7f85..ba997095f08 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -635,12 +635,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe h->tp_snaplen = snaplen; h->tp_mac = macoff; h->tp_net = netoff; - if (skb->stamp.tv_sec == 0) { - do_gettimeofday(&skb->stamp); + if (skb->tstamp.off_sec == 0) { + __net_timestamp(skb); sock_enable_timestamp(sk); } - h->tp_sec = skb->stamp.tv_sec; - h->tp_usec = skb->stamp.tv_usec; + h->tp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec; + h->tp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec; sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h))); sll->sll_halen = 0; diff --git a/net/sctp/input.c b/net/sctp/input.c index 742be9171b7..28f32243397 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -236,8 +236,8 @@ int sctp_rcv(struct sk_buff *skb) } /* SCTP seems to always need a timestamp right now (FIXME) */ - if (skb->stamp.tv_sec == 0) { - do_gettimeofday(&skb->stamp); + if (skb->tstamp.off_sec == 0) { + __net_timestamp(skb); sock_enable_timestamp(sk); } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 00d32b7c826..3868a8d70cc 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1362,6 +1362,7 @@ struct sctp_association *sctp_unpack_cookie( char *key; sctp_scope_t scope; struct sk_buff *skb = chunk->skb; + struct timeval tv; headersize = sizeof(sctp_chunkhdr_t) + SCTP_SECRET_SIZE; bodysize = ntohs(chunk->chunk_hdr->length) - headersize; @@ -1434,7 +1435,8 @@ no_hmac: * an association, there is no need to check cookie's expiration * for init collision case of lost COOKIE ACK. */ - if (!asoc && tv_lt(bear_cookie->expiration, skb->stamp)) { + skb_get_timestamp(skb, &tv); + if (!asoc && tv_lt(bear_cookie->expiration, tv)) { __u16 len; /* * Section 3.3.10.3 Stale Cookie Error (3) @@ -1447,10 +1449,9 @@ no_hmac: len = ntohs(chunk->chunk_hdr->length); *errp = sctp_make_op_error_space(asoc, chunk, len); if (*errp) { - suseconds_t usecs = (skb->stamp.tv_sec - + suseconds_t usecs = (tv.tv_sec - bear_cookie->expiration.tv_sec) * 1000000L + - skb->stamp.tv_usec - - bear_cookie->expiration.tv_usec; + tv.tv_usec - bear_cookie->expiration.tv_usec; usecs = htonl(usecs); sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE, diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 199d3747bd4..05fe2e73553 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -584,13 +584,16 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) /* possibly an icmp error */ dprintk("svc: recvfrom returned error %d\n", -err); } - if (skb->stamp.tv_sec == 0) { - skb->stamp.tv_sec = xtime.tv_sec; - skb->stamp.tv_usec = xtime.tv_nsec / NSEC_PER_USEC; + if (skb->tstamp.off_sec == 0) { + struct timeval tv; + + tv.tv_sec = xtime.tv_sec; + tv.tv_usec = xtime.tv_nsec * 1000; + skb_set_timestamp(skb, &tv); /* Don't enable netstamp, sunrpc doesn't need that much accuracy */ } - svsk->sk_sk->sk_stamp = skb->stamp; + skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp); set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ /* -- cgit v1.2.3-70-g09d2 From 34b4a4a624bafe089107966a6c56d2a1aca026d4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 14 Aug 2005 17:33:59 -0700 Subject: [NETFILTER]: Remove tasklist_lock abuse in ipt{,6}owner Rip out cmd/sid/pid matching since its unfixable broken and stands in the way of locking changes to tasklist_lock. Signed-off-by: Christoph Hellwig Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_owner.c | 132 +++------------------------------------- net/ipv6/netfilter/ip6t_owner.c | 90 +++------------------------ 2 files changed, 14 insertions(+), 208 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index 3b9065e0638..c1889f88262 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -20,106 +20,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher "); MODULE_DESCRIPTION("iptables owner match"); -static int -match_comm(const struct sk_buff *skb, const char *comm) -{ - struct task_struct *g, *p; - struct files_struct *files; - int i; - - read_lock(&tasklist_lock); - do_each_thread(g, p) { - if(strncmp(p->comm, comm, sizeof(p->comm))) - continue; - - task_lock(p); - files = p->files; - if(files) { - spin_lock(&files->file_lock); - for (i=0; i < files->max_fds; i++) { - if (fcheck_files(files, i) == - skb->sk->sk_socket->file) { - spin_unlock(&files->file_lock); - task_unlock(p); - read_unlock(&tasklist_lock); - return 1; - } - } - spin_unlock(&files->file_lock); - } - task_unlock(p); - } while_each_thread(g, p); - read_unlock(&tasklist_lock); - return 0; -} - -static int -match_pid(const struct sk_buff *skb, pid_t pid) -{ - struct task_struct *p; - struct files_struct *files; - int i; - - read_lock(&tasklist_lock); - p = find_task_by_pid(pid); - if (!p) - goto out; - task_lock(p); - files = p->files; - if(files) { - spin_lock(&files->file_lock); - for (i=0; i < files->max_fds; i++) { - if (fcheck_files(files, i) == - skb->sk->sk_socket->file) { - spin_unlock(&files->file_lock); - task_unlock(p); - read_unlock(&tasklist_lock); - return 1; - } - } - spin_unlock(&files->file_lock); - } - task_unlock(p); -out: - read_unlock(&tasklist_lock); - return 0; -} - -static int -match_sid(const struct sk_buff *skb, pid_t sid) -{ - struct task_struct *g, *p; - struct file *file = skb->sk->sk_socket->file; - int i, found=0; - - read_lock(&tasklist_lock); - do_each_thread(g, p) { - struct files_struct *files; - if (p->signal->session != sid) - continue; - - task_lock(p); - files = p->files; - if (files) { - spin_lock(&files->file_lock); - for (i=0; i < files->max_fds; i++) { - if (fcheck_files(files, i) == file) { - found = 1; - break; - } - } - spin_unlock(&files->file_lock); - } - task_unlock(p); - if (found) - goto out; - } while_each_thread(g, p); -out: - read_unlock(&tasklist_lock); - - return found; -} - static int match(const struct sk_buff *skb, const struct net_device *in, @@ -145,24 +45,6 @@ match(const struct sk_buff *skb, return 0; } - if(info->match & IPT_OWNER_PID) { - if (!match_pid(skb, info->pid) ^ - !!(info->invert & IPT_OWNER_PID)) - return 0; - } - - if(info->match & IPT_OWNER_SID) { - if (!match_sid(skb, info->sid) ^ - !!(info->invert & IPT_OWNER_SID)) - return 0; - } - - if(info->match & IPT_OWNER_COMM) { - if (!match_comm(skb, info->comm) ^ - !!(info->invert & IPT_OWNER_COMM)) - return 0; - } - return 1; } @@ -173,6 +55,8 @@ checkentry(const char *tablename, unsigned int matchsize, unsigned int hook_mask) { + const struct ipt_owner_info *info = matchinfo; + if (hook_mask & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING))) { printk("ipt_owner: only valid for LOCAL_OUT or POST_ROUTING.\n"); @@ -184,15 +68,13 @@ checkentry(const char *tablename, IPT_ALIGN(sizeof(struct ipt_owner_info))); return 0; } -#ifdef CONFIG_SMP - /* files->file_lock can not be used in a BH */ - if (((struct ipt_owner_info *)matchinfo)->match - & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) { - printk("ipt_owner: pid, sid and command matching is broken " - "on SMP.\n"); + + if (info->match & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) { + printk("ipt_owner: pid, sid and command matching " + "not supported anymore\n"); return 0; } -#endif + return 1; } diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index ab0e32d3de4..9b91decbfdd 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -20,71 +20,6 @@ MODULE_AUTHOR("Marc Boucher "); MODULE_DESCRIPTION("IP6 tables owner matching module"); MODULE_LICENSE("GPL"); -static int -match_pid(const struct sk_buff *skb, pid_t pid) -{ - struct task_struct *p; - struct files_struct *files; - int i; - - read_lock(&tasklist_lock); - p = find_task_by_pid(pid); - if (!p) - goto out; - task_lock(p); - files = p->files; - if(files) { - spin_lock(&files->file_lock); - for (i=0; i < files->max_fds; i++) { - if (fcheck_files(files, i) == skb->sk->sk_socket->file) { - spin_unlock(&files->file_lock); - task_unlock(p); - read_unlock(&tasklist_lock); - return 1; - } - } - spin_unlock(&files->file_lock); - } - task_unlock(p); -out: - read_unlock(&tasklist_lock); - return 0; -} - -static int -match_sid(const struct sk_buff *skb, pid_t sid) -{ - struct task_struct *g, *p; - struct file *file = skb->sk->sk_socket->file; - int i, found=0; - - read_lock(&tasklist_lock); - do_each_thread(g, p) { - struct files_struct *files; - if (p->signal->session != sid) - continue; - - task_lock(p); - files = p->files; - if (files) { - spin_lock(&files->file_lock); - for (i=0; i < files->max_fds; i++) { - if (fcheck_files(files, i) == file) { - found = 1; - break; - } - } - spin_unlock(&files->file_lock); - } - task_unlock(p); - if (found) - goto out; - } while_each_thread(g, p); -out: - read_unlock(&tasklist_lock); - - return found; -} static int match(const struct sk_buff *skb, @@ -112,18 +47,6 @@ match(const struct sk_buff *skb, return 0; } - if(info->match & IP6T_OWNER_PID) { - if (!match_pid(skb, info->pid) ^ - !!(info->invert & IP6T_OWNER_PID)) - return 0; - } - - if(info->match & IP6T_OWNER_SID) { - if (!match_sid(skb, info->sid) ^ - !!(info->invert & IP6T_OWNER_SID)) - return 0; - } - return 1; } @@ -134,6 +57,8 @@ checkentry(const char *tablename, unsigned int matchsize, unsigned int hook_mask) { + const struct ip6t_owner_info *info = matchinfo; + if (hook_mask & ~((1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING))) { printk("ip6t_owner: only valid for LOCAL_OUT or POST_ROUTING.\n"); @@ -142,14 +67,13 @@ checkentry(const char *tablename, if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_owner_info))) return 0; -#ifdef CONFIG_SMP - /* files->file_lock can not be used in a BH */ - if (((struct ip6t_owner_info *)matchinfo)->match - & (IP6T_OWNER_PID|IP6T_OWNER_SID)) { - printk("ip6t_owner: pid and sid matching is broken on SMP.\n"); + + if (info->match & (IP6T_OWNER_PID|IP6T_OWNER_SID)) { + printk("ipt_owner: pid and sid matching " + "not supported anymore\n"); return 0; } -#endif + return 1; } -- cgit v1.2.3-70-g09d2 From ac6d439d2097b72ea0cbc2322ce1263a38bc1fd0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 19:29:52 -0700 Subject: [NETLINK]: Convert netlink users to use group numbers instead of bitmasks Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/w1/w1_netlink.c | 2 +- include/linux/netfilter/nfnetlink.h | 23 ++++++++++++++++- include/linux/netfilter_decnet.h | 14 +++++++++++ include/linux/rtnetlink.h | 42 ++++++++++++++++++++++++++++--- include/linux/selinux_netlink.h | 13 +++++++++- include/linux/xfrm.h | 18 +++++++++++++ lib/kobject_uevent.c | 2 +- net/bridge/netfilter/ebt_ulog.c | 4 +-- net/core/neighbour.c | 8 +++--- net/core/rtnetlink.c | 6 ++--- net/core/wireless.c | 4 +-- net/decnet/dn_dev.c | 8 +++--- net/decnet/dn_table.c | 4 +-- net/decnet/netfilter/dn_rtmsg.c | 6 ++--- net/ipv4/devinet.c | 7 +++--- net/ipv4/fib_frontend.c | 2 +- net/ipv4/fib_semantics.c | 4 +-- net/ipv4/netfilter/ip_conntrack_netlink.c | 12 ++++----- net/ipv4/netfilter/ipt_ULOG.c | 8 +++--- net/ipv6/addrconf.c | 24 +++++++++--------- net/ipv6/route.c | 8 +++--- net/netfilter/nfnetlink.c | 2 +- net/sched/act_api.c | 8 +++--- net/sched/cls_api.c | 2 +- net/sched/sch_api.c | 4 +-- net/xfrm/xfrm_user.c | 35 +++++++++++--------------- security/selinux/netlink.c | 4 +-- 27 files changed, 183 insertions(+), 91 deletions(-) (limited to 'net/ipv6') diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c index 2a82fb055c7..e7b774423dd 100644 --- a/drivers/w1/w1_netlink.c +++ b/drivers/w1/w1_netlink.c @@ -51,7 +51,7 @@ void w1_netlink_send(struct w1_master *dev, struct w1_netlink_msg *msg) memcpy(data, msg, sizeof(struct w1_netlink_msg)); - NETLINK_CB(skb).dst_groups = dev->groups; + NETLINK_CB(skb).dst_group = dev->groups; netlink_broadcast(dev->nls, skb, 0, dev->groups, GFP_ATOMIC); nlmsg_failure: diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index b0feb237407..1d5b10ae239 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -2,13 +2,34 @@ #define _NFNETLINK_H #include -/* nfnetlink groups: Up to 32 maximum */ +#ifndef __KERNEL__ +/* nfnetlink groups: Up to 32 maximum - backwards compatibility for userspace */ #define NF_NETLINK_CONNTRACK_NEW 0x00000001 #define NF_NETLINK_CONNTRACK_UPDATE 0x00000002 #define NF_NETLINK_CONNTRACK_DESTROY 0x00000004 #define NF_NETLINK_CONNTRACK_EXP_NEW 0x00000008 #define NF_NETLINK_CONNTRACK_EXP_UPDATE 0x00000010 #define NF_NETLINK_CONNTRACK_EXP_DESTROY 0x00000020 +#endif + +enum nfnetlink_groups { + NFNLGRP_NONE, +#define NFNLGRP_NONE NFNLGRP_NONE + NFNLGRP_CONNTRACK_NEW, +#define NFNLGRP_CONNTRACK_NEW NFNLGRP_CONNTRACK_NEW + NFNLGRP_CONNTRACK_UPDATE, +#define NFNLGRP_CONNTRACK_UPDATE NFNLGRP_CONNTRACK_UPDATE + NFNLGRP_CONNTRACK_DESTROY, +#define NFNLGRP_CONNTRACK_DESTROY NFNLGRP_CONNTRACK_DESTROY + NFNLGRP_CONNTRACK_EXP_NEW, +#define NFNLGRP_CONNTRACK_EXP_NEW NFNLGRP_CONNTRACK_EXP_NEW + NFNLGRP_CONNTRACK_EXP_UPDATE, +#define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE + NFNLGRP_CONNTRACK_EXP_DESTROY, +#define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY + __NFNLGRP_MAX, +}; +#define NFNLGRP_MAX (__NFNLGRP_MAX - 1) /* Generic structure for encapsulation optional netfilter information. * It is reminiscent of sockaddr, but with sa_family replaced diff --git a/include/linux/netfilter_decnet.h b/include/linux/netfilter_decnet.h index 01897948415..6f425369ee2 100644 --- a/include/linux/netfilter_decnet.h +++ b/include/linux/netfilter_decnet.h @@ -56,7 +56,21 @@ struct nf_dn_rtmsg { #define NFDN_RTMSG(r) ((unsigned char *)(r) + NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg))) +#ifndef __KERNEL__ +/* backwards compatibility for userspace */ #define DNRMG_L1_GROUP 0x01 #define DNRMG_L2_GROUP 0x02 +#endif + +enum { + DNRNG_NLGRP_NONE, +#define DNRNG_NLGRP_NONE DNRNG_NLGRP_NONE + DNRNG_NLGRP_L1, +#define DNRNG_NLGRP_L1 DNRNG_NLGRP_L1 + DNRNG_NLGRP_L2, +#define DNRNG_NLGRP_L2 DNRNG_NLGRP_L2 + __DNRNG_NLGRP_MAX +}; +#define DNRNG_NLGRP_MAX (__DNRNG_NLGRP_MAX - 1) #endif /*__LINUX_DECNET_NETFILTER_H*/ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 657c05ab8f9..c231e9a08f0 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -826,9 +826,8 @@ enum #define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg)))) #define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg)) - -/* RTnetlink multicast groups */ - +#ifndef __KERNEL__ +/* RTnetlink multicast groups - backwards compatibility for userspace */ #define RTMGRP_LINK 1 #define RTMGRP_NOTIFY 2 #define RTMGRP_NEIGH 4 @@ -847,6 +846,43 @@ enum #define RTMGRP_DECnet_ROUTE 0x4000 #define RTMGRP_IPV6_PREFIX 0x20000 +#endif + +/* RTnetlink multicast groups */ +enum rtnetlink_groups { + RTNLGRP_NONE, +#define RTNLGRP_NONE RTNLGRP_NONE + RTNLGRP_LINK, +#define RTNLGRP_LINK RTNLGRP_LINK + RTNLGRP_NOTIFY, +#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY + RTNLGRP_NEIGH, +#define RTNLGRP_NEIGH RTNLGRP_NEIGH + RTNLGRP_TC, +#define RTNLGRP_TC RTNLGRP_TC + RTNLGRP_IPV4_IFADDR, +#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR + RTNLGRP_IPV4_MROUTE, +#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE + RTNLGRP_IPV4_ROUTE, +#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE + RTNLGRP_IPV6_IFADDR, +#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR + RTNLGRP_IPV6_MROUTE, +#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE + RTNLGRP_IPV6_ROUTE, +#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE + RTNLGRP_IPV6_IFINFO, +#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO + RTNLGRP_DECnet_IFADDR, +#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR + RTNLGRP_DECnet_ROUTE, +#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE + RTNLGRP_IPV6_PREFIX, +#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX + __RTNLGRP_MAX +}; +#define RTNLGRP_MAX (__RTNLGRP_MAX - 1) /* TC action piece */ struct tcamsg diff --git a/include/linux/selinux_netlink.h b/include/linux/selinux_netlink.h index 957e6ebca4e..bbf489decd8 100644 --- a/include/linux/selinux_netlink.h +++ b/include/linux/selinux_netlink.h @@ -20,10 +20,21 @@ enum { SELNL_MSG_MAX }; -/* Multicast groups */ +#ifndef __KERNEL__ +/* Multicast groups - backwards compatiblility for userspace */ #define SELNL_GRP_NONE 0x00000000 #define SELNL_GRP_AVC 0x00000001 /* AVC notifications */ #define SELNL_GRP_ALL 0xffffffff +#endif + +enum selinux_nlgroups { + SELNLGRP_NONE, +#define SELNLGRP_NONE SELNLGRP_NONE + SELNLGRP_AVC, +#define SELNLGRP_AVC SELNLGRP_AVC + __SELNLGRP_MAX +}; +#define SELNLGRP_MAX (__SELNLGRP_MAX - 1) /* Message structures */ struct selnl_msg_setenforce { diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index f0d423300d8..0fb077d6844 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -258,9 +258,27 @@ struct xfrm_usersa_flush { __u8 proto; }; +#ifndef __KERNEL__ +/* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 #define XFRMGRP_EXPIRE 2 #define XFRMGRP_SA 4 #define XFRMGRP_POLICY 8 +#endif + +enum xfrm_nlgroups { + XFRMNLGRP_NONE, +#define XFRMNLGRP_NONE XFRMNLGRP_NONE + XFRMNLGRP_ACQUIRE, +#define XFRMNLGRP_ACQUIRE XFRMNLGRP_ACQUIRE + XFRMNLGRP_EXPIRE, +#define XFRMNLGRP_EXPIRE XFRMNLGRP_EXPIRE + XFRMNLGRP_SA, +#define XFRMNLGRP_SA XFRMNLGRP_SA + XFRMNLGRP_POLICY, +#define XFRMNLGRP_POLICY XFRMNLGRP_POLICY + __XFRMNLGRP_MAX +}; +#define XFRMNLGRP_MAX (__XFRMNLGRP_MAX - 1) #endif /* _LINUX_XFRM_H */ diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index bc000619f4f..1ebd735d643 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -93,7 +93,7 @@ static int send_uevent(const char *signal, const char *obj, } } - NETLINK_CB(skb).dst_groups = 1; + NETLINK_CB(skb).dst_group = 1; return netlink_broadcast(uevent_sock, skb, 0, 1, gfp_mask); } diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index acb888d3258..6845b5dd6d7 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -78,8 +78,8 @@ static void ulog_send(unsigned int nlgroup) if (ub->qlen > 1) ub->lastnlh->nlmsg_type = NLMSG_DONE; - NETLINK_CB(ub->skb).dst_groups = 1 << nlgroup; - netlink_broadcast(ebtulognl, ub->skb, 0, 1 << nlgroup, GFP_ATOMIC); + NETLINK_CB(ub->skb).dst_group = nlgroup + 1; + netlink_broadcast(ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC); ub->qlen = 0; ub->skb = NULL; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 72ee00f7b30..39fc55edf69 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2343,8 +2343,8 @@ void neigh_app_ns(struct neighbour *n) } nlh = (struct nlmsghdr *)skb->data; nlh->nlmsg_flags = NLM_F_REQUEST; - NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH; - netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); } static void neigh_app_notify(struct neighbour *n) @@ -2361,8 +2361,8 @@ static void neigh_app_notify(struct neighbour *n) return; } nlh = (struct nlmsghdr *)skb->data; - NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH; - netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); } #endif /* CONFIG_ARPD */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9b3c61f1a37..5f3f95b5585 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -148,7 +148,7 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) { int err = 0; - NETLINK_CB(skb).dst_groups = group; + NETLINK_CB(skb).dst_group = group; if (echo) atomic_inc(&skb->users); netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL); @@ -458,8 +458,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) kfree_skb(skb); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_LINK; - netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL); + NETLINK_CB(skb).dst_group = RTNLGRP_LINK; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL); } static int rtnetlink_done(struct netlink_callback *cb) diff --git a/net/core/wireless.c b/net/core/wireless.c index 3ff5639c0b7..19fa6a5389b 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -1144,8 +1144,8 @@ static inline void rtmsg_iwinfo(struct net_device * dev, kfree_skb(skb); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_LINK; - netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_LINK; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); } #endif /* WE_EVENT_NETLINK */ diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 00233ecbc9c..5610bb16dbf 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -752,16 +752,16 @@ static void rtmsg_ifa(int event, struct dn_ifaddr *ifa) skb = alloc_skb(size, GFP_KERNEL); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, ENOBUFS); return; } if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_DECnet_IFADDR; - netlink_broadcast(rtnl, skb, 0, RTMGRP_DECnet_IFADDR, GFP_KERNEL); + NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_IFADDR; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_DECnet_IFADDR, GFP_KERNEL); } static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 28ba5777a25..73a88489ff3 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -349,10 +349,10 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id, kfree_skb(skb); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_DECnet_ROUTE; + NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_ROUTE; if (nlh->nlmsg_flags & NLM_F_ECHO) atomic_inc(&skb->users); - netlink_broadcast(rtnl, skb, pid, RTMGRP_DECnet_ROUTE, GFP_KERNEL); + netlink_broadcast(rtnl, skb, pid, RTNLGRP_DECnet_ROUTE, GFP_KERNEL); if (nlh->nlmsg_flags & NLM_F_ECHO) netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); } diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 3068fddb2da..353fed6888f 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -71,10 +71,10 @@ static void dnrmg_send_peer(struct sk_buff *skb) switch(flags & DN_RT_CNTL_MSK) { case DN_RT_PKT_L1RT: - group = DNRMG_L1_GROUP; + group = DNRMG_L1_NLGRP; break; case DN_RT_PKT_L2RT: - group = DNRMG_L2_GROUP; + group = DNRMG_L2_NLGRP; break; default: return; @@ -83,7 +83,7 @@ static void dnrmg_send_peer(struct sk_buff *skb) skb2 = dnrmg_build_message(skb, &status); if (skb2 == NULL) return; - NETLINK_CB(skb2).dst_groups = group; + NETLINK_CB(skb2).dst_group = group; netlink_broadcast(dnrmg, skb2, 0, group, GFP_ATOMIC); } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index d8a10e3dd77..ba2895ae815 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1111,13 +1111,12 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa) struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); if (!skb) - netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS); else if (inet_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL); } else { - NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_IFADDR; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV4_IFADDR, GFP_KERNEL); + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL); } } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 75d03e37b9a..d4e7b578a25 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -560,7 +560,7 @@ static void nl_fib_input(struct sock *sk, int len) pid = nlh->nlmsg_pid; /*pid of sending process */ NETLINK_CB(skb).pid = 0; /* from kernel */ NETLINK_CB(skb).dst_pid = pid; - NETLINK_CB(skb).dst_groups = 0; /* unicast */ + NETLINK_CB(skb).dst_group = 0; /* unicast */ netlink_unicast(sk, skb, pid, MSG_DONTWAIT); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e278cb9d007..7e4651b3caa 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -290,10 +290,10 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa, kfree_skb(skb); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE; + NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE; if (n->nlmsg_flags&NLM_F_ECHO) atomic_inc(&skb->users); - netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL); + netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL); if (n->nlmsg_flags&NLM_F_ECHO) netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); } diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 1221a9c8bac..a4e9278db4e 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -297,7 +297,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, struct sk_buff *skb; unsigned int type; unsigned char *b; - unsigned int flags = 0, groups; + unsigned int flags = 0, group; /* ignore our fake conntrack entry */ if (ct == &ip_conntrack_untracked) @@ -305,7 +305,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, if (events & IPCT_DESTROY) { type = IPCTNL_MSG_CT_DELETE; - groups = NF_NETLINK_CONNTRACK_DESTROY; + group = NFNLGRP_CONNTRACK_DESTROY; goto alloc_skb; } if (events & (IPCT_NEW | IPCT_RELATED)) { @@ -313,7 +313,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, flags = NLM_F_CREATE|NLM_F_EXCL; /* dump everything */ events = ~0UL; - groups = NF_NETLINK_CONNTRACK_NEW; + group = NFNLGRP_CONNTRACK_NEW; goto alloc_skb; } if (events & (IPCT_STATUS | @@ -322,7 +322,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, IPCT_HELPINFO | IPCT_NATINFO)) { type = IPCTNL_MSG_CT_NEW; - groups = NF_NETLINK_CONNTRACK_UPDATE; + group = NFNLGRP_CONNTRACK_UPDATE; goto alloc_skb; } @@ -375,7 +375,7 @@ alloc_skb: goto nfattr_failure; nlh->nlmsg_len = skb->tail - b; - nfnetlink_send(skb, 0, groups, 0); + nfnetlink_send(skb, 0, group, 0); return NOTIFY_DONE; nlmsg_failure: @@ -1194,7 +1194,7 @@ static int ctnetlink_expect_event(struct notifier_block *this, nlh->nlmsg_len = skb->tail - b; proto = exp->tuple.dst.protonum; - nfnetlink_send(skb, 0, NF_NETLINK_CONNTRACK_EXP_NEW, 0); + nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0); return NOTIFY_DONE; nlmsg_failure: diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 1d8ac4595e1..89816b83455 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -116,10 +116,10 @@ static void ulog_send(unsigned int nlgroupnum) if (ub->qlen > 1) ub->lastnlh->nlmsg_type = NLMSG_DONE; - NETLINK_CB(ub->skb).dst_groups = (1 << nlgroupnum); - DEBUGP("ipt_ULOG: throwing %d packets to netlink mask %u\n", - ub->qlen, nlgroupnum); - netlink_broadcast(nflognl, ub->skb, 0, (1 << nlgroupnum), GFP_ATOMIC); + NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; + DEBUGP("ipt_ULOG: throwing %d packets to netlink group %u\n", + ub->qlen, nlgroupnum + 1); + netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC); ub->qlen = 0; ub->skb = NULL; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b9c3da34949..493abf94bcf 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2858,16 +2858,16 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS); return; } if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFADDR; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFADDR, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFADDR; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC); } static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, @@ -2994,16 +2994,16 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS); return; } if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFINFO; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFINFO, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFINFO; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC); } static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, @@ -3054,16 +3054,16 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS); return; } if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_PREFIX; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_PREFIX, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_PREFIX; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_PREFIX, GFP_ATOMIC); } static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 878789b3122..6ea494ab4e0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1850,16 +1850,16 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, skb = alloc_skb(size, gfp_any()); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS); return; } if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any()); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any()); } /* diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 84efffdbade..36a4c5fbb7d 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -198,7 +198,7 @@ int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) int allocation = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; int err = 0; - NETLINK_CB(skb).dst_groups = group; + NETLINK_CB(skb).dst_group = group; if (echo) atomic_inc(&skb->users); netlink_broadcast(nfnl, skb, pid, group, allocation); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index c896a0118a3..8aebe8f6d27 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -593,7 +593,7 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid) nlh->nlmsg_flags |= NLM_F_ROOT; module_put(a->ops->owner); kfree(a); - err = rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + err = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); if (err > 0) return 0; @@ -656,7 +656,7 @@ tca_action_gd(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int event) /* now do the delete */ tcf_action_destroy(head, 0); - ret = rtnetlink_send(skb, pid, RTMGRP_TC, + ret = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); if (ret > 0) return 0; @@ -698,9 +698,9 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, x->rta_len = skb->tail - (u8*)x; nlh->nlmsg_len = skb->tail - b; - NETLINK_CB(skb).dst_groups = RTMGRP_TC; + NETLINK_CB(skb).dst_group = RTNLGRP_TC; - err = rtnetlink_send(skb, pid, RTMGRP_TC, flags&NLM_F_ECHO); + err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO); if (err > 0) err = 0; return err; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3b5714ef4d1..b4d89fbb378 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -367,7 +367,7 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, return -EINVAL; } - return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); } struct tcf_dump_args diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b9a069af4a0..737681cb9a9 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -816,7 +816,7 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, } if (skb->len) - return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); err_out: kfree_skb(skb); @@ -1040,7 +1040,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, return -EINVAL; } - return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); } struct qdisc_dump_args diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4d553a1d216..0579d209af2 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1125,9 +1125,8 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) if (build_expire(skb, x, c->data.hard) < 0) BUG(); - NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_notify_sa_flush(struct km_event *c) @@ -1152,9 +1151,8 @@ static int xfrm_notify_sa_flush(struct km_event *c) nlh->nlmsg_len = skb->tail - b; - NETLINK_CB(skb).dst_groups = XFRMGRP_SA; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_SA; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); nlmsg_failure: kfree_skb(skb); @@ -1228,9 +1226,8 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) nlh->nlmsg_len = skb->tail - b; - NETLINK_CB(skb).dst_groups = XFRMGRP_SA; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_SA; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); nlmsg_failure: rtattr_failure: @@ -1308,9 +1305,8 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, if (build_acquire(skb, x, xt, xp, dir) < 0) BUG(); - NETLINK_CB(skb).dst_groups = XFRMGRP_ACQUIRE; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_ACQUIRE, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_ACQUIRE; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); } /* User gives us xfrm_user_policy_info followed by an array of 0 @@ -1409,9 +1405,8 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve if (build_polexpire(skb, xp, dir, c->data.hard) < 0) BUG(); - NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) @@ -1459,9 +1454,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * nlh->nlmsg_len = skb->tail - b; - NETLINK_CB(skb).dst_groups = XFRMGRP_POLICY; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: rtattr_failure: @@ -1486,9 +1480,8 @@ static int xfrm_notify_policy_flush(struct km_event *c) nlh->nlmsg_len = skb->tail - b; - NETLINK_CB(skb).dst_groups = XFRMGRP_POLICY; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: kfree_skb(skb); diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c index 0f7be652455..20f481015db 100644 --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c @@ -80,8 +80,8 @@ static void selnl_notify(int msgtype, void *data) nlh = NLMSG_PUT(skb, 0, 0, msgtype, len); selnl_add_payload(nlh, len, msgtype, data); nlh->nlmsg_len = skb->tail - tmp; - NETLINK_CB(skb).dst_groups = SELNL_GRP_AVC; - netlink_broadcast(selnl, skb, 0, SELNL_GRP_AVC, GFP_USER); + NETLINK_CB(skb).dst_group = SELNLGRP_AVC; + netlink_broadcast(selnl, skb, 0, SELNLGRP_AVC, GFP_USER); out: return; -- cgit v1.2.3-70-g09d2 From 066286071d3542243baa68166acb779187c848b3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 15 Aug 2005 12:33:26 -0700 Subject: [NETLINK]: Add "groups" argument to netlink_kernel_create Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/w1/w1_int.c | 2 +- include/linux/netlink.h | 2 +- kernel/audit.c | 2 +- lib/kobject_uevent.c | 2 +- net/bridge/netfilter/ebt_ulog.c | 3 ++- net/core/rtnetlink.c | 3 ++- net/decnet/netfilter/dn_rtmsg.c | 4 ++-- net/ipv4/fib_frontend.c | 2 +- net/ipv4/inet_diag.c | 2 +- net/ipv4/netfilter/ip_queue.c | 2 +- net/ipv4/netfilter/ipt_ULOG.c | 3 ++- net/ipv6/netfilter/ip6_queue.c | 3 ++- net/netfilter/nfnetlink.c | 4 ++-- net/netlink/af_netlink.c | 6 ++++-- net/xfrm/xfrm_user.c | 4 ++-- security/selinux/netlink.c | 3 ++- 16 files changed, 27 insertions(+), 20 deletions(-) (limited to 'net/ipv6') diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index f3f339d057f..498ad505fa5 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -88,7 +88,7 @@ static struct w1_master * w1_alloc_dev(u32 id, int slave_count, int slave_ttl, dev->groups = 1; dev->seq = 1; - dev->nls = netlink_kernel_create(NETLINK_W1, NULL, THIS_MODULE); + dev->nls = netlink_kernel_create(NETLINK_W1, 1, NULL, THIS_MODULE); if (!dev->nls) { printk(KERN_ERR "Failed to create new netlink socket(%u) for w1 master %s.\n", NETLINK_NFLOG, dev->dev.bus_id); diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 36a40449f9f..7d1d9683b24 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -125,7 +125,7 @@ struct netlink_skb_parms #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) -extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module); +extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct module *module); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, diff --git a/kernel/audit.c b/kernel/audit.c index ed4019563d5..7f0699790d4 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -514,7 +514,7 @@ static int __init audit_init(void) { printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); - audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive, + audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, THIS_MODULE); if (!audit_sock) audit_panic("cannot initialize netlink socket"); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 1ebd735d643..04ca4429ddf 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(kobject_uevent_atomic); static int __init kobject_uevent_init(void) { - uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL, + uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL, THIS_MODULE); if (!uevent_sock) { diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 6845b5dd6d7..aae26ae2e61 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -258,7 +258,8 @@ static int __init init(void) spin_lock_init(&ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE); + ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, + NULL, THIS_MODULE); if (!ebtulognl) ret = -ENOMEM; else if ((ret = ebt_register_watcher(&ulog))) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5f3f95b5585..9bed7569ce3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -708,7 +708,8 @@ void __init rtnetlink_init(void) if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv, THIS_MODULE); + rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv, + THIS_MODULE); if (rtnl == NULL) panic("rtnetlink_init: cannot initialize rtnetlink\n"); netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 353fed6888f..afb33a25ea5 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -138,8 +138,8 @@ static int __init init(void) { int rv = 0; - dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk, - THIS_MODULE); + dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, + dnrmg_receive_user_sk, THIS_MODULE); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); return -ENOMEM; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d4e7b578a25..4e1379f7126 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -566,7 +566,7 @@ static void nl_fib_input(struct sock *sk, int len) static void nl_fib_lookup_init(void) { - netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input, THIS_MODULE); + netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE); } static void fib_disable_ip(struct net_device *dev, int force) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 1880ad8575d..71f3c7350c6 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -845,7 +845,7 @@ static int __init inet_diag_init(void) goto out; memset(inet_diag_table, 0, inet_diag_table_size); - idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, inet_diag_rcv, + idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv, THIS_MODULE); if (idiagnl == NULL) goto out_free_table; diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 7f2bcc7198f..d54f14d926f 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -671,7 +671,7 @@ init_or_cleanup(int init) goto cleanup; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk, + ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 89816b83455..e2c14f3cb2f 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -388,7 +388,8 @@ static int __init init(void) ulog_buffers[i].timer.data = i; } - nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE); + nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, + THIS_MODULE); if (!nflognl) return -ENOMEM; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 446764545b1..aa11cf366ef 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -667,7 +667,8 @@ init_or_cleanup(int init) goto cleanup; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk, THIS_MODULE); + ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, + THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 36a4c5fbb7d..e089f17bb80 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -355,8 +355,8 @@ int __init nfnetlink_init(void) { printk("Netfilter messages via NETLINK v%s.\n", nfversion); - nfnl = netlink_kernel_create(NETLINK_NETFILTER, nfnetlink_rcv, - THIS_MODULE); + nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX, + nfnetlink_rcv, THIS_MODULE); if (!nfnl) { printk(KERN_ERR "cannot initialize nfnetlink!\n"); return -1; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 47e79173801..e259f46e26f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1204,7 +1204,9 @@ static void netlink_data_ready(struct sock *sk, int len) */ struct sock * -netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module) +netlink_kernel_create(int unit, unsigned int groups, + void (*input)(struct sock *sk, int len), + struct module *module) { struct socket *sock; struct sock *sk; @@ -1234,7 +1236,7 @@ netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct nlk->flags |= NETLINK_KERNEL_SOCKET; netlink_table_grab(); - nl_table[unit].groups = 32; + nl_table[unit].groups = groups < 32 ? 32 : groups; nl_table[unit].module = module; nl_table[unit].registered = 1; netlink_table_ungrab(); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0579d209af2..c35336a0f71 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1520,8 +1520,8 @@ static int __init xfrm_user_init(void) { printk(KERN_INFO "Initializing IPsec netlink socket\n"); - xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv, - THIS_MODULE); + xfrm_nl = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, + xfrm_netlink_rcv, THIS_MODULE); if (xfrm_nl == NULL) return -ENOMEM; diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c index 20f481015db..e203883406d 100644 --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c @@ -104,7 +104,8 @@ void selnl_notify_policyload(u32 seqno) static int __init selnl_init(void) { - selnl = netlink_kernel_create(NETLINK_SELINUX, NULL, THIS_MODULE); + selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL, + THIS_MODULE); if (selnl == NULL) panic("SELinux: Cannot create netlink socket."); netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV); -- cgit v1.2.3-70-g09d2 From 20380731bc2897f2952ae055420972ded4cd786e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Aug 2005 02:18:02 -0300 Subject: [NET]: Fix sparse warnings Of this type, mostly: CHECK net/ipv6/netfilter.c net/ipv6/netfilter.c:96:12: warning: symbol 'ipv6_netfilter_init' was not declared. Should it be static? net/ipv6/netfilter.c:101:6: warning: symbol 'ipv6_netfilter_fini' was not declared. Should it be static? Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/if_ether.h | 2 ++ include/linux/if_frad.h | 6 ++++-- include/linux/if_tr.h | 4 ++++ include/linux/igmp.h | 3 +++ include/linux/net.h | 7 +++++++ include/linux/netdevice.h | 10 ++++++++++ include/linux/netfilter_ipv6.h | 4 ++-- include/linux/security.h | 6 ++++-- include/linux/skbuff.h | 2 ++ include/linux/socket.h | 7 +++++++ include/net/addrconf.h | 6 ++++++ include/net/af_unix.h | 15 ++++++++++++++ include/net/icmp.h | 7 +++++++ include/net/ip.h | 23 ++++++++++++++++++++++ include/net/ip_fib.h | 5 +++++ include/net/ipv6.h | 35 +++++++++++++++++++++++++++++++-- include/net/p8022.h | 2 ++ include/net/raw.h | 7 ++++++- include/net/route.h | 2 ++ include/net/sock.h | 12 ++++++++++++ include/net/tcp.h | 12 ++++++++++++ include/net/udp.h | 5 +++++ init/main.c | 2 +- kernel/sysctl.c | 4 +--- net/802/p8023.c | 1 + net/802/sysctl_net_802.c | 3 ++- net/core/dev.c | 6 ------ net/core/sysctl_net_core.c | 9 ++------- net/core/utils.c | 2 ++ net/core/wireless.c | 4 ---- net/ethernet/eth.c | 2 -- net/ethernet/sysctl_net_ether.c | 1 + net/ipv4/af_inet.c | 14 -------------- net/ipv4/datagram.c | 1 + net/ipv4/inetpeer.c | 1 + net/ipv4/ip_sockglue.c | 2 -- net/ipv4/proc.c | 3 --- net/ipv4/syncookies.c | 2 -- net/ipv4/sysctl_net_ipv4.c | 43 +++++++---------------------------------- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/addrconf.c | 4 ++-- net/ipv6/af_inet6.c | 24 ----------------------- net/ipv6/ipv6_sockglue.c | 8 -------- net/ipv6/route.c | 6 ++---- net/ipv6/sit.c | 2 +- net/ipv6/sysctl_net_ipv6.c | 3 --- net/ipv6/tcp_ipv6.c | 4 ---- net/ipv6/udp.c | 2 -- net/ipx/af_ipx.c | 2 -- net/socket.c | 11 +++++------ net/sysctl_net.c | 8 +++----- net/unix/af_unix.c | 8 -------- net/unix/sysctl_net_unix.c | 2 +- 54 files changed, 208 insertions(+), 162 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index b5b58e9c054..fc2d4c8225a 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -110,6 +110,8 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) { return (struct ethhdr *)skb->mac.raw; } + +extern struct ctl_table ether_table[]; #endif #endif /* _LINUX_IF_ETHER_H */ diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h index 3c94b173657..511999c7eed 100644 --- a/include/linux/if_frad.h +++ b/include/linux/if_frad.h @@ -191,10 +191,12 @@ struct frad_local int buffer; /* current buffer for S508 firmware */ }; -extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)); - #endif /* __KERNEL__ */ #endif /* CONFIG_DLCI || CONFIG_DLCI_MODULE */ +#ifdef __KERNEL__ +extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)); +#endif + #endif diff --git a/include/linux/if_tr.h b/include/linux/if_tr.h index 3fba9e2f542..5502f597cf0 100644 --- a/include/linux/if_tr.h +++ b/include/linux/if_tr.h @@ -43,12 +43,16 @@ struct trh_hdr { }; #ifdef __KERNEL__ +#include #include static inline struct trh_hdr *tr_hdr(const struct sk_buff *skb) { return (struct trh_hdr *)skb->mac.raw; } +#ifdef CONFIG_SYSCTL +extern struct ctl_table tr_table[]; +#endif #endif /* This is an Token-Ring LLC structure */ diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 0c31ef0b5ba..28f4f3b3695 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -129,6 +129,9 @@ struct igmpv3_query { #include #include +extern int sysctl_igmp_max_memberships; +extern int sysctl_igmp_max_msf; + struct ip_sf_socklist { unsigned int sl_max; diff --git a/include/linux/net.h b/include/linux/net.h index 5f8b632ff65..4e981585a89 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -286,5 +286,12 @@ static struct proto_ops name##_ops = { \ #define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \ MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto)) +#ifdef CONFIG_SYSCTL +#include +extern ctl_table net_table[]; +extern int net_msg_cost; +extern int net_msg_burst; +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_NET_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d8e52edfd52..1fcaa88b862 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -244,6 +244,7 @@ struct netdev_boot_setup { }; #define NETDEV_BOOT_SETUP_MAX 8 +extern int __init netdev_boot_setup(char *str); /* * The DEVICE structure. @@ -673,6 +674,7 @@ extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern void dev_init(void); extern int netdev_nit; +extern int netdev_budget; /* Called by rtnetlink.c:rtnl_unlock() */ extern void netdev_run_todo(void); @@ -908,6 +910,14 @@ extern int skb_checksum_help(struct sk_buff *skb, int inward); extern void net_enable_timestamp(void); extern void net_disable_timestamp(void); +#ifdef CONFIG_PROC_FS +extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); +extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); +extern void dev_seq_stop(struct seq_file *seq, void *v); +#endif + +extern void linkwatch_run_queue(void); + #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 5d204ee7a31..edcc2c6eb5c 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -71,7 +71,7 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_LAST = INT_MAX, }; -int ipv6_netfilter_init(void); -void ipv6_netfilter_fini(void); +extern int ipv6_netfilter_init(void); +extern void ipv6_netfilter_fini(void); #endif /*__LINUX_IP6_NETFILTER_H*/ diff --git a/include/linux/security.h b/include/linux/security.h index b42095a68b1..7aab6ab7c57 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2727,7 +2727,8 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o return security_ops->socket_getpeersec(sock, optval, optlen, len); } -static inline int security_sk_alloc(struct sock *sk, int family, int priority) +static inline int security_sk_alloc(struct sock *sk, int family, + unsigned int __nocast priority) { return security_ops->sk_alloc_security(sk, family, priority); } @@ -2844,7 +2845,8 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o return -ENOPROTOOPT; } -static inline int security_sk_alloc(struct sock *sk, int family, int priority) +static inline int security_sk_alloc(struct sock *sk, int family, + unsigned int __nocast priority) { return 0; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 32635c401d4..db10335e419 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1203,6 +1203,8 @@ extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); extern void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); +extern void skb_release_data(struct sk_buff *skb); + static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) { diff --git a/include/linux/socket.h b/include/linux/socket.h index acc55aac8a4..1739c2d5b95 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -26,6 +26,13 @@ struct __kernel_sockaddr_storage { #include /* pid_t */ #include /* __user */ +extern int sysctl_somaxconn; +extern void sock_init(void); +#ifdef CONFIG_PROC_FS +struct seq_file; +extern void socket_seq_show(struct seq_file *seq); +#endif + typedef unsigned short sa_family_t; /* diff --git a/include/net/addrconf.h b/include/net/addrconf.h index a0ed9367217..750e2508dd9 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -45,6 +45,7 @@ struct prefix_info { #ifdef __KERNEL__ +#include #include #include #include @@ -238,5 +239,10 @@ static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) addr->s6_addr32[3] == htonl(0x00000002)); } +#ifdef CONFIG_PROC_FS +extern int if6_proc_init(void); +extern void if6_proc_exit(void); +#endif + #endif #endif diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b60b3846b9d..b5d785ab4a0 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -1,5 +1,11 @@ #ifndef __LINUX_NET_AFUNIX_H #define __LINUX_NET_AFUNIX_H + +#include +#include +#include +#include + extern void unix_inflight(struct file *fp); extern void unix_notinflight(struct file *fp); extern void unix_gc(void); @@ -74,5 +80,14 @@ struct unix_sock { wait_queue_head_t peer_wait; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) + +#ifdef CONFIG_SYSCTL +extern int sysctl_unix_max_dgram_qlen; +extern void unix_sysctl_register(void); +extern void unix_sysctl_unregister(void); +#else +static inline void unix_sysctl_register(void) {} +static inline void unix_sysctl_unregister(void) {} +#endif #endif #endif diff --git a/include/net/icmp.h b/include/net/icmp.h index e5ef0d15fb4..6cdebeee5f9 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -57,4 +57,11 @@ static inline struct raw_sock *raw_sk(const struct sock *sk) return (struct raw_sock *)sk; } +extern int sysctl_icmp_echo_ignore_all; +extern int sysctl_icmp_echo_ignore_broadcasts; +extern int sysctl_icmp_ignore_bogus_error_responses; +extern int sysctl_icmp_errors_use_inbound_ifaddr; +extern int sysctl_icmp_ratelimit; +extern int sysctl_icmp_ratemask; + #endif /* _ICMP_H */ diff --git a/include/net/ip.h b/include/net/ip.h index c16fb6ac344..7623e414a5f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -163,6 +163,24 @@ extern int sysctl_local_port_range[2]; extern int sysctl_ip_default_ttl; extern int sysctl_ip_nonlocal_bind; +/* From ip_fragment.c */ +extern int sysctl_ipfrag_high_thresh; +extern int sysctl_ipfrag_low_thresh; +extern int sysctl_ipfrag_time; +extern int sysctl_ipfrag_secret_interval; + +/* From inetpeer.c */ +extern int inet_peer_threshold; +extern int inet_peer_minttl; +extern int inet_peer_maxttl; +extern int inet_peer_gc_mintime; +extern int inet_peer_gc_maxtime; + +/* From ip_output.c */ +extern int sysctl_ip_dynaddr; + +extern void ipfrag_init(void); + #ifdef CONFIG_INET /* The function in 2.2 was invalid, producing wrong result for * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */ @@ -348,5 +366,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen, void **context); +#ifdef CONFIG_PROC_FS +extern int ip_misc_proc_init(void); +#endif + +extern struct ctl_table ipv4_table[]; #endif /* _IP_H */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index a4208a336ac..14de4ebd121 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -295,4 +295,9 @@ static inline void fib_res_put(struct fib_result *res) #endif } +#ifdef CONFIG_PROC_FS +extern int fib_proc_init(void); +extern void fib_proc_exit(void); +#endif + #endif /* _NET_FIB_H */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c5a02ddc594..3203eaff4bd 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -104,6 +104,7 @@ struct frag_hdr { #ifdef __KERNEL__ +#include #include /* sysctls */ @@ -464,8 +465,38 @@ extern int sysctl_ip6frag_low_thresh; extern int sysctl_ip6frag_time; extern int sysctl_ip6frag_secret_interval; -#endif /* __KERNEL__ */ -#endif /* _NET_IPV6_H */ +extern struct proto_ops inet6_stream_ops; +extern struct proto_ops inet6_dgram_ops; + +extern int ip6_mc_source(int add, int omode, struct sock *sk, + struct group_source_req *pgsr); +extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf); +extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, + struct group_filter __user *optval, + int __user *optlen); + +#ifdef CONFIG_PROC_FS +extern int ac6_proc_init(void); +extern void ac6_proc_exit(void); +extern int raw6_proc_init(void); +extern void raw6_proc_exit(void); +extern int tcp6_proc_init(void); +extern void tcp6_proc_exit(void); +extern int udp6_proc_init(void); +extern void udp6_proc_exit(void); +extern int ipv6_misc_proc_init(void); +extern void ipv6_misc_proc_exit(void); + +extern struct rt6_statistics rt6_stats; +#endif +#ifdef CONFIG_SYSCTL +extern ctl_table ipv6_route_table[]; +extern ctl_table ipv6_icmp_table[]; +extern void ipv6_sysctl_register(void); +extern void ipv6_sysctl_unregister(void); +#endif +#endif /* __KERNEL__ */ +#endif /* _NET_IPV6_H */ diff --git a/include/net/p8022.h b/include/net/p8022.h index 223f8fa9ffc..42e9fac51b3 100644 --- a/include/net/p8022.h +++ b/include/net/p8022.h @@ -8,4 +8,6 @@ extern struct datalink_proto * struct net_device *orig_dev)); extern void unregister_8022_client(struct datalink_proto *proto); +extern struct datalink_proto *make_8023_client(void); +extern void destroy_8023_client(struct datalink_proto *dl); #endif diff --git a/include/net/raw.h b/include/net/raw.h index 1c4bc3e6809..f47917469b1 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -17,10 +17,10 @@ #ifndef _RAW_H #define _RAW_H +#include extern struct proto raw_prot; - extern void raw_err(struct sock *, struct sk_buff *, u32 info); extern int raw_rcv(struct sock *, struct sk_buff *); @@ -39,4 +39,9 @@ extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); +#ifdef CONFIG_PROC_FS +extern int raw_proc_init(void); +extern void raw_proc_exit(void); +#endif + #endif /* _RAW_H */ diff --git a/include/net/route.h b/include/net/route.h index 63c94558236..dbe79ca67d3 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -195,4 +195,6 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt) return rt->peer; } +extern ctl_table ipv4_route_table[]; + #endif /* _ROUTE_H */ diff --git a/include/net/sock.h b/include/net/sock.h index d5942887707..14183883e8e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1370,4 +1370,16 @@ static inline int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsign } #endif +extern void sk_init(void); + +#ifdef CONFIG_SYSCTL +extern struct ctl_table core_table[]; +extern int sysctl_optmem_max; +#endif + +#ifdef CONFIG_PROC_FS +extern __u32 sysctl_wmem_default; +extern __u32 sysctl_rmem_default; +#endif + #endif /* _SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index d958260af23..d6bcf1317a6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1183,4 +1183,16 @@ struct tcp_iter_state { extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo); extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); +extern struct request_sock_ops tcp_request_sock_ops; + +extern int tcp_v4_destroy_sock(struct sock *sk); + +#ifdef CONFIG_PROC_FS +extern int tcp4_proc_init(void); +extern void tcp4_proc_exit(void); +#endif + +extern void tcp_v4_init(struct net_proto_family *ops); +extern void tcp_init(void); + #endif /* _TCP_H */ diff --git a/include/net/udp.h b/include/net/udp.h index ac229b761db..107b9d791a1 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -94,6 +94,11 @@ struct udp_iter_state { struct seq_operations seq_ops; }; +#ifdef CONFIG_PROC_FS extern int udp_proc_register(struct udp_seq_afinfo *afinfo); extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo); + +extern int udp4_proc_init(void); +extern void udp4_proc_exit(void); +#endif #endif /* _UDP_H */ diff --git a/init/main.c b/init/main.c index c9c311cf177..ff410063e4e 100644 --- a/init/main.c +++ b/init/main.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -80,7 +81,6 @@ static int init(void *); extern void init_IRQ(void); -extern void sock_init(void); extern void fork_init(unsigned long); extern void mca_init(void); extern void sbus_init(void); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3e0bbee549e..8e56e249554 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -136,9 +137,6 @@ static struct ctl_table_header root_table_header = static ctl_table kern_table[]; static ctl_table vm_table[]; -#ifdef CONFIG_NET -extern ctl_table net_table[]; -#endif static ctl_table proc_table[]; static ctl_table fs_table[]; static ctl_table debug_table[]; diff --git a/net/802/p8023.c b/net/802/p8023.c index a0b61b40225..6368d3dce44 100644 --- a/net/802/p8023.c +++ b/net/802/p8023.c @@ -20,6 +20,7 @@ #include #include +#include /* * Place an 802.3 header on a packet. The driver will do the mac diff --git a/net/802/sysctl_net_802.c b/net/802/sysctl_net_802.c index 36079630c49..700129556c1 100644 --- a/net/802/sysctl_net_802.c +++ b/net/802/sysctl_net_802.c @@ -10,9 +10,10 @@ * 2 of the License, or (at your option) any later version. */ +#include #include +#include #include -#include #ifdef CONFIG_TR extern int sysctl_tr_rif_timeout; diff --git a/net/core/dev.c b/net/core/dev.c index a3ed53cc4af..c01511e3d0c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -267,10 +267,6 @@ void dev_add_pack(struct packet_type *pt) spin_unlock_bh(&ptype_lock); } -extern void linkwatch_run_queue(void); - - - /** * __dev_remove_pack - remove packet handler * @pt: packet type declaration @@ -1133,8 +1129,6 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) #define illegal_highdma(dev, skb) (0) #endif -extern void skb_release_data(struct sk_buff *); - /* Keep head the same: replace data */ int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask) { diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 8f817ad9f54..2f278c8e474 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -9,23 +9,18 @@ #include #include #include +#include +#include #ifdef CONFIG_SYSCTL extern int netdev_max_backlog; -extern int netdev_budget; extern int weight_p; -extern int net_msg_cost; -extern int net_msg_burst; extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; -extern __u32 sysctl_wmem_default; -extern __u32 sysctl_rmem_default; extern int sysctl_core_destroy_delay; -extern int sysctl_optmem_max; -extern int sysctl_somaxconn; #ifdef CONFIG_NET_DIVERT extern char sysctl_divert_version[]; diff --git a/net/core/utils.c b/net/core/utils.c index 88eb8b68e26..7b5970fc9e4 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -16,7 +16,9 @@ #include #include #include +#include #include +#include #include #include #include diff --git a/net/core/wireless.c b/net/core/wireless.c index 19fa6a5389b..5caae2399f3 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -571,10 +571,6 @@ static int wireless_seq_show(struct seq_file *seq, void *v) return 0; } -extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); -extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); -extern void dev_seq_stop(struct seq_file *seq, void *v); - static struct seq_operations wireless_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f444a2f2675..87a052a9a84 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -62,8 +62,6 @@ #include #include -extern int __init netdev_boot_setup(char *str); - __setup("ether=", netdev_boot_setup); /* diff --git a/net/ethernet/sysctl_net_ether.c b/net/ethernet/sysctl_net_ether.c index b81a6d53234..66b39fc342d 100644 --- a/net/ethernet/sysctl_net_ether.c +++ b/net/ethernet/sysctl_net_ether.c @@ -7,6 +7,7 @@ #include #include +#include ctl_table ether_table[] = { {0} diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 20f52b5f5de..5810f9d1491 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -859,10 +859,6 @@ static struct net_proto_family inet_family_ops = { .owner = THIS_MODULE, }; - -extern void tcp_init(void); -extern void tcp_v4_init(struct net_proto_family *); - /* Upon startup we insert all the elements in inetsw_array[] into * the linked list inetsw. */ @@ -1132,7 +1128,6 @@ static int __init init_ipv4_mibs(void) } static int ipv4_proc_init(void); -extern void ipfrag_init(void); /* * IP protocol layer initialiser @@ -1253,19 +1248,10 @@ module_init(inet_init); /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS -extern int fib_proc_init(void); -extern void fib_proc_exit(void); #ifdef CONFIG_IP_FIB_TRIE extern int fib_stat_proc_init(void); extern void fib_stat_proc_exit(void); #endif -extern int ip_misc_proc_init(void); -extern int raw_proc_init(void); -extern void raw_proc_exit(void); -extern int tcp4_proc_init(void); -extern void tcp4_proc_exit(void); -extern int udp4_proc_init(void); -extern void udp4_proc_exit(void); static int __init ipv4_proc_init(void) { diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 3fd49f4282a..c1b42b5257f 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 3c513ceaca7..4410b9dc03e 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -20,6 +20,7 @@ #include #include #include +#include #include /* diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ddb1aedbdc6..aca088b3707 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -614,7 +614,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, } case IP_MSFILTER: { - extern int sysctl_optmem_max; extern int sysctl_igmp_max_msf; struct ip_msfilter *msf; @@ -769,7 +768,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, } case MCAST_MSFILTER: { - extern int sysctl_optmem_max; extern int sysctl_igmp_max_msf; struct sockaddr_in *psin; struct ip_msfilter *msf = NULL; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 3eadbb27187..f7943ba1f43 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -59,9 +59,6 @@ static int fold_prot_inuse(struct proto *proto) */ static int sockstat_seq_show(struct seq_file *seq, void *v) { - /* From net/socket.c */ - extern void socket_seq_show(struct seq_file *seq); - socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 8692cb9d4bd..a34e60ea48a 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -169,8 +169,6 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; } -extern struct request_sock_ops tcp_request_sock_ops; - static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ce47a345ecc..65268562351 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -11,7 +11,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -19,36 +21,6 @@ /* From af_inet.c */ extern int sysctl_ip_nonlocal_bind; -/* From icmp.c */ -extern int sysctl_icmp_echo_ignore_all; -extern int sysctl_icmp_echo_ignore_broadcasts; -extern int sysctl_icmp_ignore_bogus_error_responses; -extern int sysctl_icmp_errors_use_inbound_ifaddr; - -/* From ip_fragment.c */ -extern int sysctl_ipfrag_low_thresh; -extern int sysctl_ipfrag_high_thresh; -extern int sysctl_ipfrag_time; -extern int sysctl_ipfrag_secret_interval; - -/* From ip_output.c */ -extern int sysctl_ip_dynaddr; - -/* From icmp.c */ -extern int sysctl_icmp_ratelimit; -extern int sysctl_icmp_ratemask; - -/* From igmp.c */ -extern int sysctl_igmp_max_memberships; -extern int sysctl_igmp_max_msf; - -/* From inetpeer.c */ -extern int inet_peer_threshold; -extern int inet_peer_minttl; -extern int inet_peer_maxttl; -extern int inet_peer_gc_mintime; -extern int inet_peer_gc_maxtime; - #ifdef CONFIG_SYSCTL static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; @@ -57,8 +29,6 @@ static int ip_local_port_range_max[] = { 65535, 65535 }; struct ipv4_config ipv4_config; -extern ctl_table ipv4_route_table[]; - #ifdef CONFIG_SYSCTL static @@ -136,10 +106,11 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * return ret; } -int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) +static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, + int nlen, void __user *oldval, + size_t __user *oldlenp, + void __user *newval, size_t newlen, + void **context) { char val[TCP_CA_NAME_MAX]; ctl_table tbl = { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ebb8654e3de..1afb080bdf0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4229,7 +4229,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, */ if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && !tp->srtt) - tcp_ack_saw_tstamp(sk, 0, 0); + tcp_ack_saw_tstamp(sk, NULL, 0); if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 97bbf595230..13dfb391cdf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -76,7 +77,6 @@ #include #include -extern int sysctl_ip_dynaddr; int sysctl_tcp_tw_reuse; int sysctl_tcp_low_latency; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 493abf94bcf..937ad32db77 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1126,7 +1126,7 @@ void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr) __ipv6_dev_mc_dec(idev, &maddr); } -void addrconf_join_anycast(struct inet6_ifaddr *ifp) +static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); @@ -1135,7 +1135,7 @@ void addrconf_join_anycast(struct inet6_ifaddr *ifp) ipv6_dev_ac_inc(ifp->idev->dev, &addr); } -void addrconf_leave_anycast(struct inet6_ifaddr *ifp) +static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7df2ccb380d..4f8795af2ed 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -67,23 +67,6 @@ MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); MODULE_LICENSE("GPL"); -/* IPv6 procfs goodies... */ - -#ifdef CONFIG_PROC_FS -extern int raw6_proc_init(void); -extern void raw6_proc_exit(void); -extern int tcp6_proc_init(void); -extern void tcp6_proc_exit(void); -extern int udp6_proc_init(void); -extern void udp6_proc_exit(void); -extern int ipv6_misc_proc_init(void); -extern void ipv6_misc_proc_exit(void); -extern int ac6_proc_init(void); -extern void ac6_proc_exit(void); -extern int if6_proc_init(void); -extern void if6_proc_exit(void); -#endif - int sysctl_ipv6_bindv6only; /* The inetsw table contains everything that inet_create needs to @@ -505,11 +488,6 @@ static struct net_proto_family inet6_family_ops = { .owner = THIS_MODULE, }; -#ifdef CONFIG_SYSCTL -extern void ipv6_sysctl_register(void); -extern void ipv6_sysctl_unregister(void); -#endif - /* Same as inet6_dgram_ops, sans udp_poll. */ static struct proto_ops inet6_sockraw_ops = { .family = PF_INET6, @@ -676,8 +654,6 @@ static void cleanup_ipv6_mibs(void) snmp6_mib_free((void **)udp_stats_in6); } -extern int ipv6_misc_proc_init(void); - static int __init inet6_init(void) { struct sk_buff *dummy_skb; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 76fe23925d7..7516b8829a9 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -109,13 +109,6 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) return 0; } -extern int ip6_mc_source(int add, int omode, struct sock *sk, - struct group_source_req *pgsr); -extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf); -extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, - struct group_filter __user *optval, int __user *optlen); - - int ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { @@ -446,7 +439,6 @@ done: } case MCAST_MSFILTER: { - extern int sysctl_optmem_max; extern int sysctl_mld_max_msf; struct group_filter *gsf; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6ea494ab4e0..5d5bbb49ec7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1372,7 +1372,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) * Drop the packet on the floor */ -int ip6_pkt_discard(struct sk_buff *skb) +static int ip6_pkt_discard(struct sk_buff *skb) { IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev); @@ -1380,7 +1380,7 @@ int ip6_pkt_discard(struct sk_buff *skb) return 0; } -int ip6_pkt_discard_out(struct sk_buff *skb) +static int ip6_pkt_discard_out(struct sk_buff *skb) { skb->dev = skb->dst->dev; return ip6_pkt_discard(skb); @@ -1960,8 +1960,6 @@ static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) return arg.len; } -extern struct rt6_statistics rt6_stats; - static int rt6_stats_seq_show(struct seq_file *seq, void *v) { seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e553e5b80d6..c3123c9e1a8 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -770,7 +770,7 @@ static int ipip6_tunnel_init(struct net_device *dev) return 0; } -int __init ipip6_fb_tunnel_init(struct net_device *dev) +static int __init ipip6_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = dev->priv; struct iphdr *iph = &tunnel->parms.iph; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 3a18e0e6ffe..8eff9fa1e98 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -14,9 +14,6 @@ #include #include -extern ctl_table ipv6_route_table[]; -extern ctl_table ipv6_icmp_table[]; - #ifdef CONFIG_SYSCTL static ctl_table ipv6_table[] = { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fb291b81cf6..794734f1d23 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1910,8 +1910,6 @@ static int tcp_v6_init_sock(struct sock *sk) static int tcp_v6_destroy_sock(struct sock *sk) { - extern int tcp_v4_destroy_sock(struct sock *sk); - tcp_v4_destroy_sock(sk); return inet6_destroy_sock(sk); } @@ -2123,8 +2121,6 @@ static struct inet6_protocol tcpv6_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -extern struct proto_ops inet6_stream_ops; - static struct inet_protosw tcpv6_protosw = { .type = SOCK_STREAM, .protocol = IPPROTO_TCP, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c348307e577..67d9a04b690 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1054,8 +1054,6 @@ struct proto udpv6_prot = { .obj_size = sizeof(struct udp6_sock), }; -extern struct proto_ops inet6_dgram_ops; - static struct inet_protosw udpv6_protosw = { .type = SOCK_DGRAM, .protocol = IPPROTO_UDP, diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 180e383f707..34b3bb86840 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1940,9 +1940,7 @@ static struct notifier_block ipx_dev_notifier = { }; extern struct datalink_proto *make_EII_client(void); -extern struct datalink_proto *make_8023_client(void); extern void destroy_EII_client(struct datalink_proto *); -extern void destroy_8023_client(struct datalink_proto *); static unsigned char ipx_8022_type = 0xE0; static unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 }; diff --git a/net/socket.c b/net/socket.c index 5f76ab8a159..ce69b7862f5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -70,6 +70,8 @@ #include #include #include +#include +#include #include #include #include @@ -724,8 +726,8 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, return __sock_sendmsg(iocb, sock, &x->async_msg, size); } -ssize_t sock_sendpage(struct file *file, struct page *page, - int offset, size_t size, loff_t *ppos, int more) +static ssize_t sock_sendpage(struct file *file, struct page *page, + int offset, size_t size, loff_t *ppos, int more) { struct socket *sock; int flags; @@ -948,7 +950,7 @@ static int sock_mmap(struct file * file, struct vm_area_struct * vma) return sock->ops->mmap(file, sock, vma); } -int sock_close(struct inode *inode, struct file *filp) +static int sock_close(struct inode *inode, struct file *filp) { /* * It was possible the inode is NULL we were @@ -2027,9 +2029,6 @@ int sock_unregister(int family) return 0; } - -extern void sk_init(void); - void __init sock_init(void) { /* diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 3f6e31069c5..c5241fcbb96 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -17,17 +17,15 @@ #include #ifdef CONFIG_INET -extern struct ctl_table ipv4_table[]; +#include #endif -extern struct ctl_table core_table[]; - #ifdef CONFIG_NET -extern struct ctl_table ether_table[]; +#include #endif #ifdef CONFIG_TR -extern struct ctl_table tr_table[]; +#include #endif struct ctl_table net_table[] = { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index bc4c44552c1..41feca3bef8 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2026,14 +2026,6 @@ static struct net_proto_family unix_family_ops = { .owner = THIS_MODULE, }; -#ifdef CONFIG_SYSCTL -extern void unix_sysctl_register(void); -extern void unix_sysctl_unregister(void); -#else -static inline void unix_sysctl_register(void) {} -static inline void unix_sysctl_unregister(void) {} -#endif - static int __init af_unix_init(void) { int rc = -1; diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index c974dac4580..690ffa5d5bf 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -12,7 +12,7 @@ #include #include -extern int sysctl_unix_max_dgram_qlen; +#include static ctl_table unix_table[] = { { -- cgit v1.2.3-70-g09d2 From 764d8a9f240729534a1d8a0ffd39e722cf5cc5af Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 21 Aug 2005 23:31:06 -0700 Subject: [NETFILTER]: Add IPv6 REJECT target Originally written by Yasuyuki Kozakai , taken from netfilter patch-o-matic and fixed up to work with current kernels. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6/ip6t_REJECT.h | 18 ++ net/ipv6/netfilter/Kconfig | 10 + net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_REJECT.c | 284 +++++++++++++++++++++++++++++ 4 files changed, 313 insertions(+) create mode 100644 include/linux/netfilter_ipv6/ip6t_REJECT.h create mode 100644 net/ipv6/netfilter/ip6t_REJECT.c (limited to 'net/ipv6') diff --git a/include/linux/netfilter_ipv6/ip6t_REJECT.h b/include/linux/netfilter_ipv6/ip6t_REJECT.h new file mode 100644 index 00000000000..6be6504162b --- /dev/null +++ b/include/linux/netfilter_ipv6/ip6t_REJECT.h @@ -0,0 +1,18 @@ +#ifndef _IP6T_REJECT_H +#define _IP6T_REJECT_H + +enum ip6t_reject_with { + IP6T_ICMP6_NO_ROUTE, + IP6T_ICMP6_ADM_PROHIBITED, + IP6T_ICMP6_NOT_NEIGHBOUR, + IP6T_ICMP6_ADDR_UNREACH, + IP6T_ICMP6_PORT_UNREACH, + IP6T_ICMP6_ECHOREPLY, + IP6T_TCP_RESET +}; + +struct ip6t_reject_info { + u_int32_t with; /* reject type */ +}; + +#endif /*_IP6T_REJECT_H*/ diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index cd1551983c6..8a10c2d0d15 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -199,6 +199,16 @@ config IP6_NF_TARGET_LOG To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_TARGET_REJECT + tristate "REJECT target support" + depends on IP6_NF_FILTER + help + The REJECT target allows a filtering rule to specify that an ICMPv6 + error should be issued in response to an incoming packet, rather + than silently being dropped. + + To compile it as a module, choose M here. If unsure, say N. + # if [ "$CONFIG_IP6_NF_FILTER" != "n" ]; then # dep_tristate ' REJECT target support' CONFIG_IP6_NF_TARGET_REJECT $CONFIG_IP6_NF_FILTER # if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 847651dbcd2..70f6ba61010 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -24,4 +24,5 @@ obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o +obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ip6t_NFQUEUE.o diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c new file mode 100644 index 00000000000..14316c3ebde --- /dev/null +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -0,0 +1,284 @@ +/* + * IP6 tables REJECT target module + * Linux INET6 implementation + * + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on net/ipv4/netfilter/ipt_REJECT.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Yasuyuki KOZAKAI "); +MODULE_DESCRIPTION("IP6 tables REJECT target module"); +MODULE_LICENSE("GPL"); + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +/* Send RST reply */ +static void send_reset(struct sk_buff *oldskb) +{ + struct sk_buff *nskb; + struct tcphdr otcph, *tcph; + unsigned int otcplen, hh_len; + int tcphoff, needs_ack; + struct ipv6hdr *oip6h = oldskb->nh.ipv6h, *ip6h; + struct dst_entry *dst = NULL; + u8 proto; + struct flowi fl; + + if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || + (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { + DEBUGP("ip6t_REJECT: addr is not unicast.\n"); + return; + } + + proto = oip6h->nexthdr; + tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto); + + if ((tcphoff < 0) || (tcphoff > oldskb->len)) { + DEBUGP("ip6t_REJECT: Can't get TCP header.\n"); + return; + } + + otcplen = oldskb->len - tcphoff; + + /* IP header checks: fragment, too short. */ + if ((proto != IPPROTO_TCP) || (otcplen < sizeof(struct tcphdr))) { + DEBUGP("ip6t_REJECT: proto(%d) != IPPROTO_TCP, or too short. otcplen = %d\n", + proto, otcplen); + return; + } + + if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) + BUG(); + + /* No RST for RST. */ + if (otcph.rst) { + DEBUGP("ip6t_REJECT: RST is set\n"); + return; + } + + /* Check checksum. */ + if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP, + skb_checksum(oldskb, tcphoff, otcplen, 0))) { + DEBUGP("ip6t_REJECT: TCP checksum is invalid\n"); + return; + } + + memset(&fl, 0, sizeof(fl)); + fl.proto = IPPROTO_TCP; + ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr); + ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); + fl.fl_ip_sport = otcph.dest; + fl.fl_ip_dport = otcph.source; + dst = ip6_route_output(NULL, &fl); + if (dst == NULL) + return; + if (dst->error || + xfrm_lookup(&dst, &fl, NULL, 0)) { + dst_release(dst); + return; + } + + hh_len = (dst->dev->hard_header_len + 15)&~15; + nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + dst->trailer_len, + GFP_ATOMIC); + + if (!nskb) { + if (net_ratelimit()) + printk("ip6t_REJECT: Can't alloc skb\n"); + dst_release(dst); + return; + } + + nskb->dst = dst; + + skb_reserve(nskb, hh_len + dst->header_len); + + ip6h = nskb->nh.ipv6h = (struct ipv6hdr *) + skb_put(nskb, sizeof(struct ipv6hdr)); + ip6h->version = 6; + ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT); + ip6h->nexthdr = IPPROTO_TCP; + ip6h->payload_len = htons(sizeof(struct tcphdr)); + ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr); + ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr); + + tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); + /* Truncate to length (no data) */ + tcph->doff = sizeof(struct tcphdr)/4; + tcph->source = otcph.dest; + tcph->dest = otcph.source; + + if (otcph.ack) { + needs_ack = 0; + tcph->seq = otcph.ack_seq; + tcph->ack_seq = 0; + } else { + needs_ack = 1; + tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin + + otcplen - (otcph.doff<<2)); + tcph->seq = 0; + } + + /* Reset flags */ + ((u_int8_t *)tcph)[13] = 0; + tcph->rst = 1; + tcph->ack = needs_ack; + tcph->window = 0; + tcph->urg_ptr = 0; + tcph->check = 0; + + /* Adjust TCP checksum */ + tcph->check = csum_ipv6_magic(&nskb->nh.ipv6h->saddr, + &nskb->nh.ipv6h->daddr, + sizeof(struct tcphdr), IPPROTO_TCP, + csum_partial((char *)tcph, + sizeof(struct tcphdr), 0)); + + NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, nskb, NULL, nskb->dst->dev, + dst_output); +} + +static inline void +send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) +{ + if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL) + skb_in->dev = &loopback_dev; + + icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); +} + +static unsigned int reject6_target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + const struct ip6t_reject_info *reject = targinfo; + + DEBUGP(KERN_DEBUG "%s: medium point\n", __FUNCTION__); + /* WARNING: This code causes reentry within ip6tables. + This means that the ip6tables jump stack is now crap. We + must return an absolute verdict. --RR */ + switch (reject->with) { + case IP6T_ICMP6_NO_ROUTE: + send_unreach(*pskb, ICMPV6_NOROUTE, hooknum); + break; + case IP6T_ICMP6_ADM_PROHIBITED: + send_unreach(*pskb, ICMPV6_ADM_PROHIBITED, hooknum); + break; + case IP6T_ICMP6_NOT_NEIGHBOUR: + send_unreach(*pskb, ICMPV6_NOT_NEIGHBOUR, hooknum); + break; + case IP6T_ICMP6_ADDR_UNREACH: + send_unreach(*pskb, ICMPV6_ADDR_UNREACH, hooknum); + break; + case IP6T_ICMP6_PORT_UNREACH: + send_unreach(*pskb, ICMPV6_PORT_UNREACH, hooknum); + break; + case IP6T_ICMP6_ECHOREPLY: + /* Do nothing */ + break; + case IP6T_TCP_RESET: + send_reset(*pskb); + break; + default: + if (net_ratelimit()) + printk(KERN_WARNING "ip6t_REJECT: case %u not handled yet\n", reject->with); + break; + } + + return NF_DROP; +} + +static int check(const char *tablename, + const struct ip6t_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + const struct ip6t_reject_info *rejinfo = targinfo; + + if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_reject_info))) { + DEBUGP("ip6t_REJECT: targinfosize %u != 0\n", targinfosize); + return 0; + } + + /* Only allow these for packet filtering. */ + if (strcmp(tablename, "filter") != 0) { + DEBUGP("ip6t_REJECT: bad table `%s'.\n", tablename); + return 0; + } + + if ((hook_mask & ~((1 << NF_IP6_LOCAL_IN) + | (1 << NF_IP6_FORWARD) + | (1 << NF_IP6_LOCAL_OUT))) != 0) { + DEBUGP("ip6t_REJECT: bad hook mask %X\n", hook_mask); + return 0; + } + + if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) { + printk("ip6t_REJECT: ECHOREPLY is not supported.\n"); + return 0; + } else if (rejinfo->with == IP6T_TCP_RESET) { + /* Must specify that it's a TCP packet */ + if (e->ipv6.proto != IPPROTO_TCP + || (e->ipv6.invflags & IP6T_INV_PROTO)) { + DEBUGP("ip6t_REJECT: TCP_RESET illegal for non-tcp\n"); + return 0; + } + } + + return 1; +} + +static struct ip6t_target ip6t_reject_reg = { + .name = "REJECT", + .target = reject6_target, + .checkentry = check, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + if (ip6t_register_target(&ip6t_reject_reg)) + return -EINVAL; + return 0; +} + +static void __exit fini(void) +{ + ip6t_unregister_target(&ip6t_reject_reg); +} + +module_init(init); +module_exit(fini); -- cgit v1.2.3-70-g09d2 From 05465343bf74e00c8c2c5a310740157de3149f27 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 21 Aug 2005 23:31:43 -0700 Subject: [NETFILTER]: Add goto target Originally written by Henrik Nordstrom , taken from netfilter patch-o-matic and added ip6_tables support. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_tables.h | 3 ++- include/linux/netfilter_ipv6/ip6_tables.h | 3 ++- net/ipv4/netfilter/ip_tables.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 4 ++-- 4 files changed, 8 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 12ce47808e7..d19d65cf453 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -109,7 +109,8 @@ struct ipt_counters /* Values for "flag" field in struct ipt_ip (general ip structure). */ #define IPT_F_FRAG 0x01 /* Set if rule is a fragment rule */ -#define IPT_F_MASK 0x01 /* All possible flag bits mask. */ +#define IPT_F_GOTO 0x02 /* Set if jump is a goto */ +#define IPT_F_MASK 0x03 /* All possible flag bits mask. */ /* Values for "inv" field in struct ipt_ip. */ #define IPT_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index f1ce3b00985..58c72a52dc6 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -111,7 +111,8 @@ struct ip6t_counters #define IP6T_F_PROTO 0x01 /* Set if rule cares about upper protocols */ #define IP6T_F_TOS 0x02 /* Match the TOS. */ -#define IP6T_F_MASK 0x03 /* All possible flag bits mask. */ +#define IP6T_F_GOTO 0x04 /* Set if jump is a goto */ +#define IP6T_F_MASK 0x07 /* All possible flag bits mask. */ /* Values for "inv" field in struct ip6t_ip6. */ #define IP6T_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ff8d85d2070..eef99a1b5de 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -340,8 +340,8 @@ ipt_do_table(struct sk_buff **pskb, back->comefrom); continue; } - if (table_base + v - != (void *)e + e->next_offset) { + if (table_base + v != (void *)e + e->next_offset + && !(e->ip.flags & IPT_F_GOTO)) { /* Save old back ptr in next entry */ struct ipt_entry *next = (void *)e + e->next_offset; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 41a67cf6e33..1cb8adb2787 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -433,8 +433,8 @@ ip6t_do_table(struct sk_buff **pskb, back->comefrom); continue; } - if (table_base + v - != (void *)e + e->next_offset) { + if (table_base + v != (void *)e + e->next_offset + && !(e->ipv6.flags & IP6T_F_GOTO)) { /* Save old back ptr in next entry */ struct ip6t_entry *next = (void *)e + e->next_offset; -- cgit v1.2.3-70-g09d2 From ba89966c1984513f4f2cc0a6c182266be44ddd03 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Aug 2005 12:05:31 -0700 Subject: [NET]: use __read_mostly on kmem_cache_t , DEFINE_SNMP_STAT pointers This patch puts mostly read only data in the right section (read_mostly), to help sharing of these data between CPUS without memory ping pongs. On one of my production machine, tcp_statistics was sitting in a heavily modified cache line, so *every* SNMP update had to force a reload. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 2 +- net/core/flow.c | 2 +- net/core/skbuff.c | 4 ++-- net/dccp/ccids/ccid3.c | 2 +- net/dccp/proto.c | 2 +- net/decnet/dn_table.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/fib_hash.c | 4 ++-- net/ipv4/fib_trie.c | 2 +- net/ipv4/icmp.c | 2 +- net/ipv4/inetpeer.c | 2 +- net/ipv4/ip_input.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv4/ipvs/ip_vs_conn.c | 2 +- net/ipv4/netfilter/ip_conntrack_core.c | 4 ++-- net/ipv4/netfilter/ipt_hashlimit.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/icmp.c | 2 +- net/ipv6/ip6_fib.c | 2 +- net/ipv6/ipv6_sockglue.c | 2 +- net/ipv6/udp.c | 2 +- net/ipv6/xfrm6_tunnel.c | 2 +- net/sctp/protocol.c | 6 +++--- net/socket.c | 4 ++-- net/sunrpc/rpc_pipe.c | 4 ++-- net/sunrpc/sched.c | 8 ++++---- net/xfrm/xfrm_input.c | 2 +- net/xfrm/xfrm_policy.c | 2 +- 29 files changed, 39 insertions(+), 39 deletions(-) (limited to 'net/ipv6') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e6c2200b7ca..24396b914d1 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -23,7 +23,7 @@ #include #include "br_private.h" -static kmem_cache_t *br_fdb_cache; +static kmem_cache_t *br_fdb_cache __read_mostly; static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr); diff --git a/net/core/flow.c b/net/core/flow.c index f289570b15a..7e95b39de9f 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -42,7 +42,7 @@ static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL }; #define flow_table(cpu) (per_cpu(flow_tables, cpu)) -static kmem_cache_t *flow_cachep; +static kmem_cache_t *flow_cachep __read_mostly; static int flow_lwm, flow_hwm; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b853a9b29eb..f80a2878561 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -68,8 +68,8 @@ #include #include -static kmem_cache_t *skbuff_head_cache; -static kmem_cache_t *skbuff_fclone_cache; +static kmem_cache_t *skbuff_head_cache __read_mostly; +static kmem_cache_t *skbuff_fclone_cache __read_mostly; struct timeval __read_mostly skb_tv_base; diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index fe4cc85f5bc..cf93b019ecb 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -85,7 +85,7 @@ static int ccid3_debug; static struct dccp_tx_hist *ccid3_tx_hist; static struct dccp_rx_hist *ccid3_rx_hist; -static kmem_cache_t *ccid3_loss_interval_hist_slab; +static kmem_cache_t *ccid3_loss_interval_hist_slab __read_mostly; static inline struct ccid3_loss_interval_hist_entry * ccid3_loss_interval_hist_entry_new(const unsigned int __nocast prio) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 600dda51d99..f97e92ea34f 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -39,7 +39,7 @@ #include "ccid.h" #include "dccp.h" -DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics); +DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; atomic_t dccp_orphan_count = ATOMIC_INIT(0); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 73a88489ff3..eeba56f9932 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -79,7 +79,7 @@ for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_n static DEFINE_RWLOCK(dn_fib_tables_lock); struct dn_fib_table *dn_fib_tables[RT_TABLE_MAX + 1]; -static kmem_cache_t *dn_hash_kmem; +static kmem_cache_t *dn_hash_kmem __read_mostly; static int dn_fib_hash_zombies; static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5810f9d1491..bf147f8db39 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -113,7 +113,7 @@ #include #endif -DEFINE_SNMP_STAT(struct linux_mib, net_statistics); +DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly; extern void ip_mc_drop_socket(struct sock *sk); diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index b10d6bb5ef3..2a8c9afc369 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -45,8 +45,8 @@ #include "fib_lookup.h" -static kmem_cache_t *fn_hash_kmem; -static kmem_cache_t *fn_alias_kmem; +static kmem_cache_t *fn_hash_kmem __read_mostly; +static kmem_cache_t *fn_alias_kmem __read_mostly; struct fib_node { struct hlist_node fn_hash; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ff21748248e..b2dea4e5da7 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -166,7 +166,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn); static void tnode_free(struct tnode *tn); static void trie_dump_seq(struct seq_file *seq, struct trie *t); -static kmem_cache_t *fn_alias_kmem; +static kmem_cache_t *fn_alias_kmem __read_mostly; static struct trie *trie_local = NULL, *trie_main = NULL; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 25f66b750fd..24eb56ae1b5 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -114,7 +114,7 @@ struct icmp_bxm { /* * Statistics */ -DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics); +DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly; /* An array of errno for error messages from dest unreach. */ /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 4410b9dc03e..f84ba9c9655 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -73,7 +73,7 @@ /* Exported for inet_getid inline function. */ DEFINE_SPINLOCK(inet_peer_idlock); -static kmem_cache_t *peer_cachep; +static kmem_cache_t *peer_cachep __read_mostly; #define node_height(x) x->avl_height static struct inet_peer peer_fake_node = { diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 220a8b5920e..473d0f2b2e0 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -150,7 +150,7 @@ * SNMP management statistics */ -DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics); +DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly; /* * Process Router Attention IP option diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index dc806b57842..9dbf5909f3a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -103,7 +103,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock); In this case data path is free of exclusive locks at all. */ -static kmem_cache_t *mrt_cachep; +static kmem_cache_t *mrt_cachep __read_mostly; static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index d0145a8b155..e11952ea17a 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -40,7 +40,7 @@ static struct list_head *ip_vs_conn_tab; /* SLAB cache for IPVS connections */ -static kmem_cache_t *ip_vs_conn_cachep; +static kmem_cache_t *ip_vs_conn_cachep __read_mostly; /* counter for current IPVS connections */ static atomic_t ip_vs_conn_count = ATOMIC_INIT(0); diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 285743bfbed..a0648600190 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -70,8 +70,8 @@ static LIST_HEAD(helpers); unsigned int ip_conntrack_htable_size = 0; int ip_conntrack_max; struct list_head *ip_conntrack_hash; -static kmem_cache_t *ip_conntrack_cachep; -static kmem_cache_t *ip_conntrack_expect_cachep; +static kmem_cache_t *ip_conntrack_cachep __read_mostly; +static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly; struct ip_conntrack ip_conntrack_untracked; unsigned int ip_ct_log_invalid; static LIST_HEAD(unconfirmed); diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c index 564b49bfebc..2dd1cccbdab 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/ipv4/netfilter/ipt_hashlimit.c @@ -94,7 +94,7 @@ struct ipt_hashlimit_htable { static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */ static DECLARE_MUTEX(hlimit_mutex); /* additional checkentry protection */ static HLIST_HEAD(hashlimit_htables); -static kmem_cache_t *hashlimit_cachep; +static kmem_cache_t *hashlimit_cachep __read_mostly; static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 68626de6d69..02fdda68718 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -269,7 +269,7 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; -DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); +DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly; atomic_t tcp_orphan_count = ATOMIC_INIT(0); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3a5bbbe7dd8..e5beca7de86 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -113,7 +113,7 @@ * Snmp MIB for the UDP layer */ -DEFINE_SNMP_STAT(struct udp_mib, udp_statistics); +DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ff685f229b6..5176fc655ea 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -67,7 +67,7 @@ #include #include -DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); +DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly; /* * The ICMP socket(s). This is the most convenient way to flow control diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1b354aa9793..16af874c9e8 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -49,7 +49,7 @@ struct rt6_statistics rt6_stats; -static kmem_cache_t * fib6_node_kmem; +static kmem_cache_t * fib6_node_kmem __read_mostly; enum fib_walk_state_t { diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 7516b8829a9..76466af8331 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -55,7 +55,7 @@ #include -DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); +DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly; static struct packet_type ipv6_packet_type = { .type = __constant_htons(ETH_P_IPV6), diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 67d9a04b690..390d750449c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -59,7 +59,7 @@ #include #include -DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6); +DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; /* Grrr, addr_type already calculated by caller, but I don't want * to add some silly "cookie" argument to this method just for that. diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 60c26c87277..fbef7826a74 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -79,7 +79,7 @@ static u32 xfrm6_tunnel_spi; #define XFRM6_TUNNEL_SPI_MIN 1 #define XFRM6_TUNNEL_SPI_MAX 0xffffffff -static kmem_cache_t *xfrm6_tunnel_spi_kmem; +static kmem_cache_t *xfrm6_tunnel_spi_kmem __read_mostly; #define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256 #define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256 diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 7d8ec652634..e7025be7769 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -62,7 +62,7 @@ /* Global data structures. */ struct sctp_globals sctp_globals; struct proc_dir_entry *proc_net_sctp; -DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics); +DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly; struct idr sctp_assocs_id; DEFINE_SPINLOCK(sctp_assocs_id_lock); @@ -78,8 +78,8 @@ static struct sctp_pf *sctp_pf_inet_specific; static struct sctp_af *sctp_af_v4_specific; static struct sctp_af *sctp_af_v6_specific; -kmem_cache_t *sctp_chunk_cachep; -kmem_cache_t *sctp_bucket_cachep; +kmem_cache_t *sctp_chunk_cachep __read_mostly; +kmem_cache_t *sctp_bucket_cachep __read_mostly; extern int sctp_snmp_proc_init(void); extern int sctp_snmp_proc_exit(void); diff --git a/net/socket.c b/net/socket.c index ce69b7862f5..94fe638b4d7 100644 --- a/net/socket.c +++ b/net/socket.c @@ -274,7 +274,7 @@ int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ule #define SOCKFS_MAGIC 0x534F434B -static kmem_cache_t * sock_inode_cachep; +static kmem_cache_t * sock_inode_cachep __read_mostly; static struct inode *sock_alloc_inode(struct super_block *sb) { @@ -333,7 +333,7 @@ static struct super_block *sockfs_get_sb(struct file_system_type *fs_type, return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC); } -static struct vfsmount *sock_mnt; +static struct vfsmount *sock_mnt __read_mostly; static struct file_system_type sock_fs_type = { .name = "sockfs", diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 554f224c044..fe1a73ce6cf 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -28,13 +28,13 @@ #include #include -static struct vfsmount *rpc_mount; +static struct vfsmount *rpc_mount __read_mostly; static int rpc_mount_count; static struct file_system_type rpc_pipe_fs_type; -static kmem_cache_t *rpc_inode_cachep; +static kmem_cache_t *rpc_inode_cachep __read_mostly; #define RPC_UPCALL_TIMEOUT (30*HZ) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 2d9eb7fbd52..f3104035e35 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -34,10 +34,10 @@ static int rpc_task_id; #define RPC_BUFFER_MAXSIZE (2048) #define RPC_BUFFER_POOLSIZE (8) #define RPC_TASK_POOLSIZE (8) -static kmem_cache_t *rpc_task_slabp; -static kmem_cache_t *rpc_buffer_slabp; -static mempool_t *rpc_task_mempool; -static mempool_t *rpc_buffer_mempool; +static kmem_cache_t *rpc_task_slabp __read_mostly; +static kmem_cache_t *rpc_buffer_slabp __read_mostly; +static mempool_t *rpc_task_mempool __read_mostly; +static mempool_t *rpc_buffer_mempool __read_mostly; static void __rpc_default_timer(struct rpc_task *task); static void rpciod_killall(void); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index c58a6f05a0b..2407a707232 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -12,7 +12,7 @@ #include #include -static kmem_cache_t *secpath_cachep; +static kmem_cache_t *secpath_cachep __read_mostly; void __secpath_destroy(struct sec_path *sp) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d65ed8684fc..83c8135e176 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -37,7 +37,7 @@ EXPORT_SYMBOL(xfrm_policy_list); static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; -static kmem_cache_t *xfrm_dst_cache; +static kmem_cache_t *xfrm_dst_cache __read_mostly; static struct work_struct xfrm_policy_gc_work; static struct list_head xfrm_policy_gc_list = -- cgit v1.2.3-70-g09d2 From 0ac4f893f20ed524198da5ebf591fc0b9e2ced2f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 27 Aug 2005 22:37:30 -0700 Subject: [NETFILTER6]: Add new ip6tables HOPLIMIT target This target allows users to modify the hoplimit header field of the IPv6 header. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6/ip6t_HL.h | 22 ++++++ net/ipv6/netfilter/Kconfig | 16 +++++ net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_HL.c | 118 +++++++++++++++++++++++++++++++++ 4 files changed, 157 insertions(+) create mode 100644 include/linux/netfilter_ipv6/ip6t_HL.h create mode 100644 net/ipv6/netfilter/ip6t_HL.c (limited to 'net/ipv6') diff --git a/include/linux/netfilter_ipv6/ip6t_HL.h b/include/linux/netfilter_ipv6/ip6t_HL.h new file mode 100644 index 00000000000..afb7813d45a --- /dev/null +++ b/include/linux/netfilter_ipv6/ip6t_HL.h @@ -0,0 +1,22 @@ +/* Hop Limit modification module for ip6tables + * Maciej Soltysiak + * Based on HW's TTL module */ + +#ifndef _IP6T_HL_H +#define _IP6T_HL_H + +enum { + IP6T_HL_SET = 0, + IP6T_HL_INC, + IP6T_HL_DEC +}; + +#define IP6T_HL_MAXMODE IP6T_HL_DEC + +struct ip6t_HL_info { + u_int8_t mode; + u_int8_t hop_limit; +}; + + +#endif diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 8a10c2d0d15..216fbe1ac65 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -239,6 +239,22 @@ config IP6_NF_TARGET_MARK To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_TARGET_HL + tristate 'HL (hoplimit) target support' + depends on IP6_NF_MANGLE + help + This option adds a `HL' target, which enables the user to decrement + the hoplimit value of the IPv6 header or set it to a given (lower) + value. + + While it is safe to decrement the hoplimit value, this option also + enables functionality to increment and set the hoplimit value of the + IPv6 header to arbitrary values. This is EXTREMELY DANGEROUS since + you can easily create immortal packets that loop forever on the + network. + + To compile it as a module, choose M here. If unsure, say N. + #dep_tristate ' LOG target support' CONFIG_IP6_NF_TARGET_LOG $CONFIG_IP6_NF_IPTABLES config IP6_NF_RAW tristate 'raw table support (required for TRACE)' diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 70f6ba61010..bd9a16a5cbb 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_IP6_NF_MATCH_PHYSDEV) += ip6t_physdev.o obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o +obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c new file mode 100644 index 00000000000..8f5549b7272 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -0,0 +1,118 @@ +/* + * Hop Limit modification target for ip6tables + * Maciej Soltysiak + * Based on HW's TTL module + * + * This software is distributed under the terms of GNU GPL + */ + +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Maciej Soltysiak "); +MODULE_DESCRIPTION("IP tables Hop Limit modification module"); +MODULE_LICENSE("GPL"); + +static unsigned int ip6t_hl_target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, void *userinfo) +{ + struct ipv6hdr *ip6h; + const struct ip6t_HL_info *info = targinfo; + u_int16_t diffs[2]; + int new_hl; + + if (!skb_make_writable(pskb, (*pskb)->len)) + return NF_DROP; + + ip6h = (*pskb)->nh.ipv6h; + + switch (info->mode) { + case IP6T_HL_SET: + new_hl = info->hop_limit; + break; + case IP6T_HL_INC: + new_hl = ip6h->hop_limit + info->hop_limit; + if (new_hl > 255) + new_hl = 255; + break; + case IP6T_HL_DEC: + new_hl = ip6h->hop_limit - info->hop_limit; + if (new_hl < 0) + new_hl = 0; + break; + default: + new_hl = ip6h->hop_limit; + break; + } + + if (new_hl != ip6h->hop_limit) { + diffs[0] = htons(((unsigned)ip6h->hop_limit) << 8) ^ 0xFFFF; + ip6h->hop_limit = new_hl; + diffs[1] = htons(((unsigned)ip6h->hop_limit) << 8); + } + + return IP6T_CONTINUE; +} + +static int ip6t_hl_checkentry(const char *tablename, + const struct ip6t_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + struct ip6t_HL_info *info = targinfo; + + if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_HL_info))) { + printk(KERN_WARNING "ip6t_HL: targinfosize %u != %Zu\n", + targinfosize, + IP6T_ALIGN(sizeof(struct ip6t_HL_info))); + return 0; + } + + if (strcmp(tablename, "mangle")) { + printk(KERN_WARNING "ip6t_HL: can only be called from " + "\"mangle\" table, not \"%s\"\n", tablename); + return 0; + } + + if (info->mode > IP6T_HL_MAXMODE) { + printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", + info->mode); + return 0; + } + + if ((info->mode != IP6T_HL_SET) && (info->hop_limit == 0)) { + printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't " + "make sense with value 0\n"); + return 0; + } + + return 1; +} + +static struct ip6t_target ip6t_HL = { + .name = "HL", + .target = ip6t_hl_target, + .checkentry = ip6t_hl_checkentry, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ip6t_register_target(&ip6t_HL); +} + +static void __exit fini(void) +{ + ip6t_unregister_target(&ip6t_HL); +} + +module_init(init); +module_exit(fini); -- cgit v1.2.3-70-g09d2