From 5e2afba4ecd7931ea06e6fa116ab28e6943dbd42 Mon Sep 17 00:00:00 2001 From: Paul Guo Date: Mon, 14 Nov 2011 19:00:54 +0800 Subject: netfilter: possible unaligned packet header in ip_route_me_harder This patch tries to fix the following issue in netfilter: In ip_route_me_harder(), we invoke pskb_expand_head() that rellocates new header with additional head room which can break the alignment of the original packet header. In one of my NAT test case, the NIC port for internal hosts is configured with vlan and the port for external hosts is with general configuration. If we ping an external "unknown" hosts from an internal host, an icmp packet will be sent. We find that in icmp_send()->...->ip_route_me_harder()->pskb_expand_head(), hh_len=18 and current headroom (skb_headroom(skb)) of the packet is 16. After calling pskb_expand_head() the packet header becomes to be unaligned and then our system (arch/tile) panics immediately. Signed-off-by: Paul Guo Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 9899619ab9b..4f47e064e26 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -64,7 +64,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) /* Change in oif may mean change in hh_len. */ hh_len = skb_dst(skb)->dev->hard_header_len; if (skb_headroom(skb) < hh_len && - pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) + pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), + 0, GFP_ATOMIC)) return -1; return 0; -- cgit v1.2.3-70-g09d2 From de68dca1816660b0d3ac89fa59ffb410007a143f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 26 Nov 2011 12:13:44 +0000 Subject: inet: add a redirect generation id in inetpeer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now inetpeer is the place where we cache redirect information for ipv4 destinations, we must be able to invalidate informations when a route is added/removed on host. As inetpeer is not yet namespace aware, this patch adds a shared redirect_genid, and a per inetpeer redirect_genid. This might be changed later if inetpeer becomes ns aware. Cache information for one inerpeer is valid as long as its redirect_genid has the same value than global redirect_genid. Reported-by: Arkadiusz Miśkiewicz Tested-by: Arkadiusz Miśkiewicz Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inetpeer.h | 1 + net/ipv4/route.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 78c83e62218..e9ff3fc5e68 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -35,6 +35,7 @@ struct inet_peer { u32 metrics[RTAX_MAX]; u32 rate_tokens; /* rate limiting for ICMP */ + int redirect_genid; unsigned long rate_last; unsigned long pmtu_expires; u32 pmtu_orig; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fb47c8f0cd8..5c2847247f5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -131,6 +131,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; static int rt_chain_length_max __read_mostly = 20; +static int redirect_genid; /* * Interface to generic destination cache. @@ -837,6 +838,7 @@ static void rt_cache_invalidate(struct net *net) get_random_bytes(&shuffle, sizeof(shuffle)); atomic_add(shuffle + 1U, &net->ipv4.rt_genid); + redirect_genid++; } /* @@ -1391,8 +1393,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, peer = rt->peer; if (peer) { - if (peer->redirect_learned.a4 != new_gw) { + if (peer->redirect_learned.a4 != new_gw || + peer->redirect_genid != redirect_genid) { peer->redirect_learned.a4 = new_gw; + peer->redirect_genid = redirect_genid; atomic_inc(&__rt_peer_genid); } check_peer_redir(&rt->dst, peer); @@ -1701,6 +1705,8 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) if (peer) { check_peer_pmtu(dst, peer); + if (peer->redirect_genid != redirect_genid) + peer->redirect_learned.a4 = 0; if (peer->redirect_learned.a4 && peer->redirect_learned.a4 != rt->rt_gateway) { if (check_peer_redir(dst, peer)) @@ -1857,6 +1863,8 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, dst_init_metrics(&rt->dst, peer->metrics, false); check_peer_pmtu(&rt->dst, peer); + if (peer->redirect_genid != redirect_genid) + peer->redirect_learned.a4 = 0; if (peer->redirect_learned.a4 && peer->redirect_learned.a4 != rt->rt_gateway) { rt->rt_gateway = peer->redirect_learned.a4; -- cgit v1.2.3-70-g09d2 From 218fa90f072e4aeff9003d57e390857f4f35513e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Nov 2011 20:05:55 +0000 Subject: ipv4: fix lockdep splat in rt_cache_seq_show After commit f2c31e32b378 (fix NULL dereferences in check_peer_redir()), dst_get_neighbour() should be guarded by rcu_read_lock() / rcu_read_unlock() section. Reported-by: Miles Lane Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5c2847247f5..57e01bc6094 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -417,9 +417,13 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) else { struct rtable *r = v; struct neighbour *n; - int len; + int len, HHUptod; + rcu_read_lock(); n = dst_get_neighbour(&r->dst); + HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0; + rcu_read_unlock(); + seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", r->dst.dev ? r->dst.dev->name : "*", @@ -433,7 +437,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) dst_metric(&r->dst, RTAX_RTTVAR)), r->rt_key_tos, -1, - (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0, + HHUptod, r->rt_spec_dst, &len); seq_printf(seq, "%*s\n", 127 - len, ""); -- cgit v1.2.3-70-g09d2 From efbc368dcc6426d5430b9b8eeda944cf2cb74b8c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 1 Dec 2011 13:38:59 -0500 Subject: ipv4: Perform peer validation on cached route lookup. Otherwise we won't notice the peer GENID change. Reported-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/route.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 57e01bc6094..ca5e237df02 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1693,12 +1693,8 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } -static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) +static struct rtable *ipv4_validate_peer(struct rtable *rt) { - struct rtable *rt = (struct rtable *) dst; - - if (rt_is_expired(rt)) - return NULL; if (rt->rt_peer_genid != rt_peer_genid()) { struct inet_peer *peer; @@ -1707,19 +1703,29 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) peer = rt->peer; if (peer) { - check_peer_pmtu(dst, peer); + check_peer_pmtu(&rt->dst, peer); if (peer->redirect_genid != redirect_genid) peer->redirect_learned.a4 = 0; if (peer->redirect_learned.a4 && peer->redirect_learned.a4 != rt->rt_gateway) { - if (check_peer_redir(dst, peer)) + if (check_peer_redir(&rt->dst, peer)) return NULL; } } rt->rt_peer_genid = rt_peer_genid(); } + return rt; +} + +static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) +{ + struct rtable *rt = (struct rtable *) dst; + + if (rt_is_expired(rt)) + return NULL; + dst = (struct dst_entry *) ipv4_validate_peer(rt); return dst; } @@ -2374,6 +2380,9 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_mark == skb->mark && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { + rth = ipv4_validate_peer(rth); + if (!rth) + continue; if (noref) { dst_use_noref(&rth->dst, jiffies); skb_dst_set_noref(skb, &rth->dst); @@ -2749,6 +2758,9 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { + rth = ipv4_validate_peer(rth); + if (!rth) + continue; dst_use(&rth->dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); -- cgit v1.2.3-70-g09d2 From 59c2cdae2791c0b2ee13d148edc6b771e7e7953f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 1 Dec 2011 14:12:55 -0500 Subject: Revert "udp: remove redundant variable" This reverts commit 81d54ec8479a2c695760da81f05b5a9fb2dbe40a. If we take the "try_again" goto, due to a checksum error, the 'len' has already been truncated. So we won't compute the same values as the original code did. Reported-by: paul bilke Signed-off-by: David S. Miller --- net/ipv4/udp.c | 15 ++++++++------- net/ipv6/udp.c | 15 ++++++++------- 2 files changed, 16 insertions(+), 14 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ab0966df1e2..5a65eeac1d2 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1164,7 +1164,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; struct sk_buff *skb; - unsigned int ulen; + unsigned int ulen, copied; int peeked; int err; int is_udplite = IS_UDPLITE(sk); @@ -1186,9 +1186,10 @@ try_again: goto out; ulen = skb->len - sizeof(struct udphdr); - if (len > ulen) - len = ulen; - else if (len < ulen) + copied = len; + if (copied > ulen) + copied = ulen; + else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; /* @@ -1197,14 +1198,14 @@ try_again: * coverage checksum (UDP-Lite), do it before the copy. */ - if (len < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { if (udp_lib_checksum_complete(skb)) goto csum_copy_err; } if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov, len); + msg->msg_iov, copied); else { err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), @@ -1233,7 +1234,7 @@ try_again: if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); - err = len; + err = copied; if (flags & MSG_TRUNC) err = ulen; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 846f4757eb8..8c254191518 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -340,7 +340,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; - unsigned int ulen; + unsigned int ulen, copied; int peeked; int err; int is_udplite = IS_UDPLITE(sk); @@ -363,9 +363,10 @@ try_again: goto out; ulen = skb->len - sizeof(struct udphdr); - if (len > ulen) - len = ulen; - else if (len < ulen) + copied = len; + if (copied > ulen) + copied = ulen; + else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; is_udp4 = (skb->protocol == htons(ETH_P_IP)); @@ -376,14 +377,14 @@ try_again: * coverage checksum (UDP-Lite), do it before the copy. */ - if (len < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { if (udp_lib_checksum_complete(skb)) goto csum_copy_err; } if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov,len); + msg->msg_iov, copied ); else { err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); if (err == -EINVAL) @@ -432,7 +433,7 @@ try_again: datagram_recv_ctl(sk, msg, skb); } - err = len; + err = copied; if (flags & MSG_TRUNC) err = ulen; -- cgit v1.2.3-70-g09d2 From d01ff0a049f749e0bf10a35bb23edd012718c8c2 Mon Sep 17 00:00:00 2001 From: "Peter Pan(潘卫平)" Date: Thu, 1 Dec 2011 15:47:06 +0000 Subject: ipv4: flush route cache after change accept_local After reset ipv4_devconf->data[IPV4_DEVCONF_ACCEPT_LOCAL] to 0, we should flush route cache, or it will continue receive packets with local source address, which should be dropped. Signed-off-by: Weiping Pan Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c6b5092f29a..65f01dc4756 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1490,7 +1490,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + int old_value = *(int *)ctl->data; int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + int new_value = *(int *)ctl->data; if (write) { struct ipv4_devconf *cnf = ctl->extra1; @@ -1501,6 +1503,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write, if (cnf == net->ipv4.devconf_dflt) devinet_copy_dflt_conf(net, i); + if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1) + if ((new_value == 0) && (old_value != 0)) + rt_cache_flush(net, 0); } return ret; -- cgit v1.2.3-70-g09d2