diff options
author | Linus Torvalds <torvalds@g5.osdl.org> | 2006-10-04 08:26:19 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-10-04 08:26:19 -0700 |
commit | d002ec481c24f325ed6cfcb7810d317c015dd1b5 (patch) | |
tree | 71fbdce6aab6ffb5f8590e7ddde7c095a7fa31ab /net/ipv4 | |
parent | 5a96c5d0c58ead9a0ece03ffe1c116dea6dafe9c (diff) | |
parent | 0a69452cb45add0841c2bc1e75c25f6bd4f1d8d9 (diff) |
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6:
[XFRM]: BEET mode
[TCP]: Kill warning in tcp_clean_rtx_queue().
[NET_SCHED]: Remove old estimator implementation
[ATM]: [zatm] always *pcr in alloc_shaper()
[ATM]: [ambassador] Change the return type to reflect reality
[ATM]: kmalloc to kzalloc patches for drivers/atm
[TIPC]: fix printk warning
[XFRM]: Clearing xfrm_policy_count[] to zero during flush is incorrect.
[XFRM] STATE: Use destination address for src hash.
[NEIGH]: always use hash_mask under tbl lock
[UDP]: Fix MSG_PROBE crash
[UDP6]: Fix flowi clobbering
[NET_SCHED]: Revert "HTB: fix incorrect use of RB_EMPTY_NODE"
[NETFILTER]: ebt_mark: add or/and/xor action support to mark target
[NETFILTER]: ipt_REJECT: remove largely duplicate route_reverse function
[NETFILTER]: Honour source routing for LVS-NAT
[NETFILTER]: add type parameter to ip_route_me_harder
[NETFILTER]: Kconfig: fix xt_physdev dependencies
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 9 | ||||
-rw-r--r-- | net/ipv4/Makefile | 1 | ||||
-rw-r--r-- | net/ipv4/esp4.c | 26 | ||||
-rw-r--r-- | net/ipv4/ipcomp.c | 5 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_core.c | 10 | ||||
-rw-r--r-- | net/ipv4/netfilter.c | 9 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_nat_standalone.c | 3 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_REJECT.c | 97 | ||||
-rw-r--r-- | net/ipv4/netfilter/iptable_mangle.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 2 | ||||
-rw-r--r-- | net/ipv4/udp.c | 2 | ||||
-rw-r--r-- | net/ipv4/xfrm4_mode_beet.c | 139 |
12 files changed, 214 insertions, 92 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index d172a980444..5572071af73 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -434,6 +434,15 @@ config INET_XFRM_MODE_TUNNEL If unsure, say Y. +config INET_XFRM_MODE_BEET + tristate "IP: IPsec BEET mode" + default y + select XFRM + ---help--- + Support for IPsec BEET mode. + + If unsure, say Y. + config INET_DIAG tristate "INET: socket monitoring interface" default y diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index f66049e28ae..15645c51520 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_INET_AH) += ah4.o obj-$(CONFIG_INET_ESP) += esp4.o obj-$(CONFIG_INET_IPCOMP) += ipcomp.o obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o +obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o obj-$(CONFIG_INET_TUNNEL) += tunnel4.o obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 13b29360d10..b5c205b5766 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -253,7 +253,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) * as per draft-ietf-ipsec-udp-encaps-06, * section 3.1.2 */ - if (x->props.mode == XFRM_MODE_TRANSPORT) + if (x->props.mode == XFRM_MODE_TRANSPORT || + x->props.mode == XFRM_MODE_BEET) skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -271,17 +272,28 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - - if (x->props.mode == XFRM_MODE_TUNNEL) { - mtu = ALIGN(mtu + 2, blksize); - } else { - /* The worst case. */ + int enclen = 0; + + switch (x->props.mode) { + case XFRM_MODE_TUNNEL: + mtu = ALIGN(mtu +2, blksize); + break; + default: + case XFRM_MODE_TRANSPORT: + /* The worst case */ mtu = ALIGN(mtu + 2, 4) + blksize - 4; + break; + case XFRM_MODE_BEET: + /* The worst case. */ + enclen = IPV4_BEET_PHMAXLEN; + mtu = ALIGN(mtu + enclen + 2, blksize); + break; } + if (esp->conf.padlen) mtu = ALIGN(mtu, esp->conf.padlen); - return mtu + x->props.header_len + esp->auth.icv_trunc_len; + return mtu + x->props.header_len + esp->auth.icv_trunc_len - enclen; } static void esp4_err(struct sk_buff *skb, u32 info) diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 2017d36024d..3839b706142 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -206,6 +206,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) { struct xfrm_state *t; + u8 mode = XFRM_MODE_TUNNEL; t = xfrm_state_alloc(); if (t == NULL) @@ -216,7 +217,9 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->id.daddr.a4 = x->id.daddr.a4; memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET; - t->props.mode = XFRM_MODE_TUNNEL; + if (x->props.mode == XFRM_MODE_BEET) + mode = x->props.mode; + t->props.mode = mode; t->props.saddr.a4 = x->props.saddr.a4; t->props.flags = x->props.flags; diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 6dee03935f7..1445bb47fea 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -813,6 +813,16 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, skb->nh.iph->saddr = cp->vaddr; ip_send_check(skb->nh.iph); + /* For policy routing, packets originating from this + * machine itself may be routed differently to packets + * passing through. We want this packet to be routed as + * if it came from this machine itself. So re-compute + * the routing information. + */ + if (ip_route_me_harder(pskb, RTN_LOCAL) != 0) + goto drop; + skb = *pskb; + IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); ip_vs_out_stats(cp, skb); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 5ac15379a0c..e2005c6810a 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -8,7 +8,7 @@ #include <net/ip.h> /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct sk_buff **pskb) +int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) { struct iphdr *iph = (*pskb)->nh.iph; struct rtable *rt; @@ -16,10 +16,13 @@ int ip_route_me_harder(struct sk_buff **pskb) struct dst_entry *odst; unsigned int hh_len; + if (addr_type == RTN_UNSPEC) + addr_type = inet_addr_type(iph->saddr); + /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. */ - if (inet_addr_type(iph->saddr) == RTN_LOCAL) { + if (addr_type == RTN_LOCAL) { fl.nl_u.ip4_u.daddr = iph->daddr; fl.nl_u.ip4_u.saddr = iph->saddr; fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); @@ -156,7 +159,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info) if (!(iph->tos == rt_info->tos && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) - return ip_route_me_harder(pskb); + return ip_route_me_harder(pskb, RTN_UNSPEC); } return 0; } diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 021395b6746..d85d2de5044 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -265,7 +265,8 @@ ip_nat_local_fn(unsigned int hooknum, ct->tuplehash[!dir].tuple.src.u.all #endif ) - return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + if (ip_route_me_harder(pskb, RTN_UNSPEC)) + ret = NF_DROP; } return ret; } diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index fd0c05efed8..ad0312d0e4f 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -38,76 +38,16 @@ MODULE_DESCRIPTION("iptables REJECT target module"); #define DEBUGP(format, args...) #endif -static inline struct rtable *route_reverse(struct sk_buff *skb, - struct tcphdr *tcph, int hook) -{ - struct iphdr *iph = skb->nh.iph; - struct dst_entry *odst; - struct flowi fl = {}; - struct rtable *rt; - - /* We don't require ip forwarding to be enabled to be able to - * send a RST reply for bridged traffic. */ - if (hook != NF_IP_FORWARD -#ifdef CONFIG_BRIDGE_NETFILTER - || (skb->nf_bridge && skb->nf_bridge->mask & BRNF_BRIDGED) -#endif - ) { - fl.nl_u.ip4_u.daddr = iph->saddr; - if (hook == NF_IP_LOCAL_IN) - fl.nl_u.ip4_u.saddr = iph->daddr; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - - if (ip_route_output_key(&rt, &fl) != 0) - return NULL; - } else { - /* non-local src, find valid iif to satisfy - * rp-filter when calling ip_route_input. */ - fl.nl_u.ip4_u.daddr = iph->daddr; - if (ip_route_output_key(&rt, &fl) != 0) - return NULL; - - odst = skb->dst; - if (ip_route_input(skb, iph->saddr, iph->daddr, - RT_TOS(iph->tos), rt->u.dst.dev) != 0) { - dst_release(&rt->u.dst); - return NULL; - } - dst_release(&rt->u.dst); - rt = (struct rtable *)skb->dst; - skb->dst = odst; - - fl.nl_u.ip4_u.daddr = iph->saddr; - fl.nl_u.ip4_u.saddr = iph->daddr; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - } - - if (rt->u.dst.error) { - dst_release(&rt->u.dst); - return NULL; - } - - fl.proto = IPPROTO_TCP; - fl.fl_ip_sport = tcph->dest; - fl.fl_ip_dport = tcph->source; - security_skb_classify_flow(skb, &fl); - - xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); - - return rt; -} - /* Send RST reply */ static void send_reset(struct sk_buff *oldskb, int hook) { struct sk_buff *nskb; struct iphdr *iph = oldskb->nh.iph; struct tcphdr _otcph, *oth, *tcph; - struct rtable *rt; __be16 tmp_port; __be32 tmp_addr; int needs_ack; - int hh_len; + unsigned int addr_type; /* IP header checks: fragment. */ if (oldskb->nh.iph->frag_off & htons(IP_OFFSET)) @@ -126,23 +66,13 @@ static void send_reset(struct sk_buff *oldskb, int hook) if (nf_ip_checksum(oldskb, hook, iph->ihl * 4, IPPROTO_TCP)) return; - if ((rt = route_reverse(oldskb, oth, hook)) == NULL) - return; - - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - /* We need a linear, writeable skb. We also need to expand headroom in case hh_len of incoming interface < hh_len of outgoing interface */ - nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb), + nskb = skb_copy_expand(oldskb, LL_MAX_HEADER, skb_tailroom(oldskb), GFP_ATOMIC); - if (!nskb) { - dst_release(&rt->u.dst); + if (!nskb) return; - } - - dst_release(nskb->dst); - nskb->dst = &rt->u.dst; /* This packet will not be the same as the other: clear nf fields */ nf_reset(nskb); @@ -184,6 +114,21 @@ static void send_reset(struct sk_buff *oldskb, int hook) tcph->window = 0; tcph->urg_ptr = 0; + /* Set DF, id = 0 */ + nskb->nh.iph->frag_off = htons(IP_DF); + nskb->nh.iph->id = 0; + + addr_type = RTN_UNSPEC; + if (hook != NF_IP_FORWARD +#ifdef CONFIG_BRIDGE_NETFILTER + || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED) +#endif + ) + addr_type = RTN_LOCAL; + + if (ip_route_me_harder(&nskb, addr_type)) + goto free_nskb; + /* Adjust TCP checksum */ nskb->ip_summed = CHECKSUM_NONE; tcph->check = 0; @@ -192,12 +137,8 @@ static void send_reset(struct sk_buff *oldskb, int hook) nskb->nh.iph->daddr, csum_partial((char *)tcph, sizeof(struct tcphdr), 0)); - - /* Adjust IP TTL, DF */ + /* Adjust IP TTL */ nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT); - /* Set DF, id = 0 */ - nskb->nh.iph->frag_off = htons(IP_DF); - nskb->nh.iph->id = 0; /* Adjust IP checksum */ nskb->nh.iph->check = 0; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index e62ea2bb9c0..b91f3582359 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -157,7 +157,8 @@ ipt_local_hook(unsigned int hook, || (*pskb)->nfmark != nfmark #endif || (*pskb)->nh.iph->tos != tos)) - return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + if (ip_route_me_harder(pskb, RTN_UNSPEC)) + ret = NF_DROP; return ret; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3f884cea14f..cf06accbe68 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2259,7 +2259,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) u32 pkts_acked = 0; void (*rtt_sample)(struct sock *sk, u32 usrtt) = icsk->icsk_ca_ops->rtt_sample; - struct timeval tv; + struct timeval tv = { .tv_sec = 0, .tv_usec = 0 }; while ((skb = skb_peek(&sk->sk_write_queue)) && skb != sk->sk_send_head) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6d6142f9c47..865d75214a9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -675,6 +675,8 @@ do_append_data: udp_flush_pending_frames(sk); else if (!corkreq) err = udp_push_pending_frames(sk, up); + else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) + up->pending = 0; release_sock(sk); out: diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c new file mode 100644 index 00000000000..89cf59ea7bb --- /dev/null +++ b/net/ipv4/xfrm4_mode_beet.c @@ -0,0 +1,139 @@ +/* + * xfrm4_mode_beet.c - BEET mode encapsulation for IPv4. + * + * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com> + * Miika Komu <miika@iki.fi> + * Herbert Xu <herbert@gondor.apana.org.au> + * Abhinav Pathak <abhinav.pathak@hiit.fi> + * Jeff Ahrenholz <ahrenholz@gmail.com> + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/stringify.h> +#include <net/dst.h> +#include <net/ip.h> +#include <net/xfrm.h> + +/* Add encapsulation header. + * + * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. + * The following fields in it shall be filled in by x->type->output: + * tot_len + * check + * + * On exit, skb->h will be set to the start of the payload to be processed + * by x->type->output and skb->nh will be set to the top IP header. + */ +static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct iphdr *iph, *top_iph = NULL; + int hdrlen, optlen; + + iph = skb->nh.iph; + skb->h.ipiph = iph; + + hdrlen = 0; + optlen = iph->ihl * 4 - sizeof(*iph); + if (unlikely(optlen)) + hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); + + skb->nh.raw = skb_push(skb, x->props.header_len + hdrlen); + top_iph = skb->nh.iph; + hdrlen = iph->ihl * 4 - optlen; + skb->h.raw += hdrlen; + + memmove(top_iph, iph, hdrlen); + if (unlikely(optlen)) { + struct ip_beet_phdr *ph; + + BUG_ON(optlen < 0); + + ph = (struct ip_beet_phdr *)skb->h.raw; + ph->padlen = 4 - (optlen & 4); + ph->hdrlen = (optlen + ph->padlen + sizeof(*ph)) / 8; + ph->nexthdr = top_iph->protocol; + + top_iph->protocol = IPPROTO_BEETPH; + top_iph->ihl = sizeof(struct iphdr) / 4; + } + + top_iph->saddr = x->props.saddr.a4; + top_iph->daddr = x->id.daddr.a4; + + return 0; +} + +static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + int phlen = 0; + int optlen = 0; + __u8 ph_nexthdr = 0, protocol = 0; + int err = -EINVAL; + + protocol = iph->protocol; + + if (unlikely(iph->protocol == IPPROTO_BEETPH)) { + struct ip_beet_phdr *ph = (struct ip_beet_phdr*)(iph + 1); + + if (!pskb_may_pull(skb, sizeof(*ph))) + goto out; + + phlen = ph->hdrlen * 8; + optlen = phlen - ph->padlen - sizeof(*ph); + if (optlen < 0 || optlen & 3 || optlen > 250) + goto out; + + if (!pskb_may_pull(skb, phlen)) + goto out; + + ph_nexthdr = ph->nexthdr; + } + + skb_push(skb, sizeof(*iph) - phlen + optlen); + memmove(skb->data, skb->nh.raw, sizeof(*iph)); + skb->nh.raw = skb->data; + + iph = skb->nh.iph; + iph->ihl = (sizeof(*iph) + optlen) / 4; + iph->tot_len = htons(skb->len); + iph->daddr = x->sel.daddr.a4; + iph->saddr = x->sel.saddr.a4; + if (ph_nexthdr) + iph->protocol = ph_nexthdr; + else + iph->protocol = protocol; + iph->check = 0; + iph->check = ip_fast_csum(skb->nh.raw, iph->ihl); + err = 0; +out: + return err; +} + +static struct xfrm_mode xfrm4_beet_mode = { + .input = xfrm4_beet_input, + .output = xfrm4_beet_output, + .owner = THIS_MODULE, + .encap = XFRM_MODE_BEET, +}; + +static int __init xfrm4_beet_init(void) +{ + return xfrm_register_mode(&xfrm4_beet_mode, AF_INET); +} + +static void __exit xfrm4_beet_exit(void) +{ + int err; + + err = xfrm_unregister_mode(&xfrm4_beet_mode, AF_INET); + BUG_ON(err); +} + +module_init(xfrm4_beet_init); +module_exit(xfrm4_beet_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_BEET); |