From 88ef4a5a78e63420dd1dd770f1bd1dc198926b04 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:41:00 -0700 Subject: tcp: Handle TCP SYN+ACK/ACK/RST transparency The TCP stack sends out SYN+ACK/ACK/RST reply packets in response to incoming packets. The non-local source address check on output bites us again, as replies for transparently redirected traffic won't have a chance to leave the node. This patch selectively sets the FLOWI_FLAG_ANYSRC flag when doing the route lookup for those replies. Transparent replies are enabled if the listening socket has the transparent socket flag set. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/ip.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net/ip.h') diff --git a/include/net/ip.h b/include/net/ip.h index 250e6ef025a..90b27f634b7 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -140,12 +140,15 @@ static inline void ip_tr_mc_map(__be32 addr, char *buf) struct ip_reply_arg { struct kvec iov[1]; + int flags; __wsum csum; int csumoffset; /* u16 offset of csum in iov[0].iov_base */ /* -1 if not needed */ int bound_dev_if; }; +#define IP_REPLY_ARG_NOSRCCHECK 1 + void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len); -- cgit v1.2.3-70-g09d2 From 86b08d867d7de001ab224180ed7865fab93fd56e Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:44:42 -0700 Subject: ipv4: Make Netfilter's ip_route_me_harder() non-local address compatible Netfilter's ip_route_me_harder() tries to re-route packets either generated or re-routed by Netfilter. This patch changes ip_route_me_harder() to handle packets from non-locally-bound sockets with IP_TRANSPARENT set as local and to set the appropriate flowi flags when re-doing the routing lookup. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/ip.h | 6 ++++++ net/ipv4/inet_connection_sock.c | 1 + net/ipv4/ip_output.c | 4 +++- net/ipv4/netfilter.c | 3 +++ net/ipv4/syncookies.c | 2 ++ 5 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/net/ip.h') diff --git a/include/net/ip.h b/include/net/ip.h index 90b27f634b7..d678ea3d474 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -29,6 +29,7 @@ #include #include +#include struct sock; @@ -149,6 +150,11 @@ struct ip_reply_arg { #define IP_REPLY_ARG_NOSRCCHECK 1 +static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) +{ + return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; +} + void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0c1ae68ee84..432c570c9f5 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -335,6 +335,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d533a89e08d..d2a8f8bb78a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -340,6 +340,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) .saddr = inet->saddr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet->sport, .dport = inet->dport } } }; @@ -1371,7 +1372,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar .uli_u = { .ports = { .sport = tcp_hdr(skb)->dest, .dport = tcp_hdr(skb)->source } }, - .proto = sk->sk_protocol }; + .proto = sk->sk_protocol, + .flags = ip_reply_arg_flowi_flags(arg) }; security_skb_classify_flow(skb, &fl); if (ip_route_output_key(sock_net(sk), &rt, &fl)) return; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index f8edacdf991..01671ad51ed 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -20,6 +20,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) unsigned int type; type = inet_addr_type(&init_net, iph->saddr); + if (skb->sk && inet_sk(skb->sk)->transparent) + type = RTN_LOCAL; if (addr_type == RTN_UNSPEC) addr_type = type; @@ -33,6 +35,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl.mark = skb->mark; + fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; if (ip_route_output_key(&init_net, &rt, &fl) != 0) return -1; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 9d38005abba..929302b2ba9 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -16,6 +16,7 @@ #include #include #include +#include /* Timestamps: lowest 9 bits store TCP options */ #define TSBITS 9 @@ -337,6 +338,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, .proto = IPPROTO_TCP, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = th->dest, .dport = th->source } } }; -- cgit v1.2.3-70-g09d2 From 3c689b7320ae6f20dba6a8b71806a6c6fd604ee8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Oct 2008 14:18:04 -0700 Subject: inet: cleanup of local_port_range I noticed sysctl_local_port_range[] and its associated seqlock sysctl_local_port_range_lock were on separate cache lines. Moreover, sysctl_local_port_range[] was close to unrelated variables, highly modified, leading to cache misses. Moving these two variables in a structure can help data locality and moving this structure to read_mostly section helps sharing of this data among cpus. Cleanup of extern declarations (moved in include file where they belong), and use of inet_get_local_port_range() accessor instead of direct access to ports values. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 4 ++++ net/ipv4/inet_connection_sock.c | 16 +++++++++------- net/ipv4/sysctl_net_ipv4.c | 23 ++++++++++------------- 3 files changed, 23 insertions(+), 20 deletions(-) (limited to 'include/net/ip.h') diff --git a/include/net/ip.h b/include/net/ip.h index d678ea3d474..1cbccaf0de3 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -178,6 +178,10 @@ extern unsigned long snmp_fold_field(void *mib[], int offt); extern int snmp_mib_init(void *ptr[2], size_t mibsize); extern void snmp_mib_free(void *ptr[2]); +extern struct local_ports { + seqlock_t lock; + int range[2]; +} sysctl_local_ports; extern void inet_get_local_port_range(int *low, int *high); extern int sysctl_ip_default_ttl; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 21fcc5a9045..bd1278a2d82 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -30,20 +30,22 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); #endif /* - * This array holds the first and last local port number. + * This struct holds the first and last local port number. */ -int sysctl_local_port_range[2] = { 32768, 61000 }; -DEFINE_SEQLOCK(sysctl_port_range_lock); +struct local_ports sysctl_local_ports __read_mostly = { + .lock = SEQLOCK_UNLOCKED, + .range = { 32768, 61000 }, +}; void inet_get_local_port_range(int *low, int *high) { unsigned seq; do { - seq = read_seqbegin(&sysctl_port_range_lock); + seq = read_seqbegin(&sysctl_local_ports.lock); - *low = sysctl_local_port_range[0]; - *high = sysctl_local_port_range[1]; - } while (read_seqretry(&sysctl_port_range_lock, seq)); + *low = sysctl_local_ports.range[0]; + *high = sysctl_local_ports.range[1]; + } while (read_seqretry(&sysctl_local_ports.lock, seq)); } EXPORT_SYMBOL(inet_get_local_port_range); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e0689fd7b79..276d047fb85 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -26,16 +26,13 @@ static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; -extern seqlock_t sysctl_port_range_lock; -extern int sysctl_local_port_range[2]; - /* Update system visible IP port range */ static void set_local_port_range(int range[2]) { - write_seqlock(&sysctl_port_range_lock); - sysctl_local_port_range[0] = range[0]; - sysctl_local_port_range[1] = range[1]; - write_sequnlock(&sysctl_port_range_lock); + write_seqlock(&sysctl_local_ports.lock); + sysctl_local_ports.range[0] = range[0]; + sysctl_local_ports.range[1] = range[1]; + write_sequnlock(&sysctl_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -44,8 +41,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, size_t *lenp, loff_t *ppos) { int ret; - int range[2] = { sysctl_local_port_range[0], - sysctl_local_port_range[1] }; + int range[2]; ctl_table tmp = { .data = &range, .maxlen = sizeof(range), @@ -54,6 +50,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, .extra2 = &ip_local_port_range_max, }; + inet_get_local_port_range(range, range + 1); ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); if (write && ret == 0) { @@ -73,8 +70,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, void __user *newval, size_t newlen) { int ret; - int range[2] = { sysctl_local_port_range[0], - sysctl_local_port_range[1] }; + int range[2]; ctl_table tmp = { .data = &range, .maxlen = sizeof(range), @@ -83,6 +79,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, .extra2 = &ip_local_port_range_max, }; + inet_get_local_port_range(range, range + 1); ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen); if (ret == 0 && newval && newlen) { if (range[1] < range[0]) @@ -396,8 +393,8 @@ static struct ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, .procname = "ip_local_port_range", - .data = &sysctl_local_port_range, - .maxlen = sizeof(sysctl_local_port_range), + .data = &sysctl_local_ports.range, + .maxlen = sizeof(sysctl_local_ports.range), .mode = 0644, .proc_handler = &ipv4_local_port_range, .strategy = &ipv4_sysctl_local_port_range, -- cgit v1.2.3-70-g09d2