From 7586eceb0abc0ea1c2b023e3e5d4dfd4ff40930a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Jun 2012 05:02:19 +0000 Subject: ipv4: tcp: dont cache output dst for syncookies Don't cache output dst for syncookies, as this adds pressure on IP route cache and rcu subsystem for no gain. Signed-off-by: Eric Dumazet Cc: Hans Schillstrom Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net/ipv4/inet_connection_sock.c') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f9ee7417f6a..034ddbe42ad 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -368,17 +368,21 @@ EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); struct dst_entry *inet_csk_route_req(struct sock *sk, struct flowi4 *fl4, - const struct request_sock *req) + const struct request_sock *req, + bool nocache) { struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); struct ip_options_rcu *opt = inet_rsk(req)->opt; struct net *net = sock_net(sk); + int flags = inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS; + if (nocache) + flags |= FLOWI_FLAG_RT_NOCACHE; flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS, + flags, (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); security_req_classify_flow(req, flowi4_to_flowi(fl4)); -- cgit v1.2.3-70-g09d2 From 3e12939a2a67fbb4cbd962c3b9bc398c73319766 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 04:01:57 -0700 Subject: inet: Kill FLOWI_FLAG_PRECOW_METRICS. No longer needed. TCP writes metrics, but now in it's own special cache that does not dirty the route metrics. Therefore there is no longer any reason to pre-cow metrics in this way. Signed-off-by: David S. Miller --- include/net/flow.h | 5 ++--- include/net/inet_sock.h | 2 -- include/net/route.h | 2 -- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/route.c | 11 ++--------- net/ipv6/route.c | 2 +- 6 files changed, 6 insertions(+), 18 deletions(-) (limited to 'net/ipv4/inet_connection_sock.c') diff --git a/include/net/flow.h b/include/net/flow.h index bd524f59856..ce9cb7656b4 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -20,9 +20,8 @@ struct flowi_common { __u8 flowic_proto; __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 -#define FLOWI_FLAG_PRECOW_METRICS 0x02 -#define FLOWI_FLAG_CAN_SLEEP 0x04 -#define FLOWI_FLAG_RT_NOCACHE 0x08 +#define FLOWI_FLAG_CAN_SLEEP 0x02 +#define FLOWI_FLAG_RT_NOCACHE 0x04 __u32 flowic_secid; }; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index ae17e1352d7..924d7b98ab6 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -245,8 +245,6 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk) if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl) flags |= FLOWI_FLAG_ANYSRC; - if (sk->sk_protocol == IPPROTO_TCP) - flags |= FLOWI_FLAG_PRECOW_METRICS; return flags; } diff --git a/include/net/route.h b/include/net/route.h index 211e2665139..635d7a99d19 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -278,8 +278,6 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 if (inet_sk(sk)->transparent) flow_flags |= FLOWI_FLAG_ANYSRC; - if (protocol == IPPROTO_TCP) - flow_flags |= FLOWI_FLAG_PRECOW_METRICS; if (can_sleep) flow_flags |= FLOWI_FLAG_CAN_SLEEP; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 034ddbe42ad..76825be3b64 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -375,7 +375,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, const struct inet_request_sock *ireq = inet_rsk(req); struct ip_options_rcu *opt = inet_rsk(req)->opt; struct net *net = sock_net(sk); - int flags = inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS; + int flags = inet_sk_flowi_flags(sk); if (nocache) flags |= FLOWI_FLAG_RT_NOCACHE; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e376354dcb6..d4834e2914a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1658,7 +1658,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, struct rtable *rt; flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, - protocol, flow_flags | FLOWI_FLAG_PRECOW_METRICS, + protocol, flow_flags, iph->daddr, iph->saddr, 0, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1836,18 +1836,11 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, { struct inet_peer_base *base; struct inet_peer *peer; - int create = 0; - - /* If a peer entry exists for this destination, we must hook - * it up in order to get at cached metrics. - */ - if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) - create = 1; base = inetpeer_base_ptr(rt->_peer); BUG_ON(!base); - peer = inet_getpeer_v4(base, rt->rt_dst, create); + peer = inet_getpeer_v4(base, rt->rt_dst, 0); if (peer) { __rt_set_peer(rt, peer); rt->rt_peer_genid = rt_peer_genid(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0c068475378..b7eb51e1a0e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1093,7 +1093,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; - fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS; + fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; -- cgit v1.2.3-70-g09d2 From 80d0a69fc57715dc9080c0567df1ed911b78abea Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 16 Jul 2012 03:28:06 -0700 Subject: ipv4: Add helper inet_csk_update_pmtu(). This abstracts away the call to dst_ops->update_pmtu() so that we can transparently handle the fact that, in the future, the dst itself can be invalidated by the PMTU update (when we have non-host routes cached in sockets). So we try to rebuild the socket cached route after the method invocation if necessary. This isn't used by SCTP because it needs to cache dsts per-transport, and thus will need it's own local version of this helper. Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 2 ++ net/dccp/ipv4.c | 11 ++------- net/ipv4/inet_connection_sock.c | 46 ++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 11 ++------- 4 files changed, 52 insertions(+), 18 deletions(-) (limited to 'net/ipv4/inet_connection_sock.c') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 291e7cee14e..2cf44b4ed2e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -337,4 +337,6 @@ extern int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); extern int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); + +extern struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 129ed8f7413..683902fcc8e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -161,17 +161,10 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, if (sk->sk_state == DCCP_LISTEN) return; - /* We don't check in the destentry if pmtu discovery is forbidden - * on this route. We just assume that no packet_to_big packets - * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the - * route, but I think that's acceptable. - */ - if ((dst = __sk_dst_check(sk, 0)) == NULL) + dst = inet_csk_update_pmtu(sk, mtu); + if (!dst) return; - dst->ops->update_pmtu(dst, mtu); - /* Something is about to be wrong... Remember soft error * for the case, if this connection will not able to recover. */ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 76825be3b64..200d2180937 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -803,3 +803,49 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, } EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); #endif + +static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) +{ + struct inet_sock *inet = inet_sk(sk); + struct ip_options_rcu *inet_opt; + __be32 daddr = inet->inet_daddr; + struct flowi4 *fl4; + struct rtable *rt; + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; + fl4 = &fl->u.ip4; + rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, + inet->inet_saddr, inet->inet_dport, + inet->inet_sport, sk->sk_protocol, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); + if (IS_ERR(rt)) + rt = NULL; + if (rt) + sk_setup_caps(sk, &rt->dst); + rcu_read_unlock(); + + return &rt->dst; +} + +struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) +{ + struct dst_entry *dst = __sk_dst_check(sk, 0); + struct inet_sock *inet = inet_sk(sk); + + if (!dst) { + dst = inet_csk_rebuild_route(sk, &inet->cork.fl); + if (!dst) + goto out; + } + dst->ops->update_pmtu(dst, mtu); + + dst = __sk_dst_check(sk, 0); + if (!dst) + dst = inet_csk_rebuild_route(sk, &inet->cork.fl); +out: + return dst; +} +EXPORT_SYMBOL_GPL(inet_csk_update_pmtu); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7a0062cb4ed..b8e7e059540 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -289,17 +289,10 @@ static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu) if (sk->sk_state == TCP_LISTEN) return; - /* We don't check in the destentry if pmtu discovery is forbidden - * on this route. We just assume that no packet_to_big packets - * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the - * route, but I think that's acceptable. - */ - if ((dst = __sk_dst_check(sk, 0)) == NULL) + dst = inet_csk_update_pmtu(sk, mtu); + if (!dst) return; - dst->ops->update_pmtu(dst, mtu); - /* Something is about to be wrong... Remember soft error * for the case, if this connection will not able to recover. */ -- cgit v1.2.3-70-g09d2 From 6700c2709c08d74ae2c3c29b84a30da012dbc7f1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 03:29:28 -0700 Subject: net: Pass optional SKB and SK arguments to dst_ops->{update_pmtu,redirect}() This will be used so that we can compose a full flow key. Even though we have a route in this context, we need more. In the future the routes will be without destination address, source address, etc. keying. One ipv4 route will cover entire subnets, etc. In this environment we have to have a way to possess persistent storage for redirects and PMTU information. This persistent storage will exist in the FIB tables, and that's why we'll need to be able to rebuild a full lookup flow key here. Using that flow key will do a fib_lookup() and create/update the persistent entry. Signed-off-by: David S. Miller --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 2 +- include/net/dst_ops.h | 6 ++++-- net/bridge/br_netfilter.c | 6 ++++-- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 2 +- net/decnet/dn_route.c | 12 ++++++++---- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/ip_gre.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv4/route.c | 21 +++++++++++++-------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/xfrm4_policy.c | 10 ++++++---- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_tunnel.c | 6 +++--- net/ipv6/route.c | 21 +++++++++++++-------- net/ipv6/sit.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/xfrm6_policy.c | 10 ++++++---- net/netfilter/ipvs/ip_vs_xmit.c | 4 ++-- net/sctp/input.c | 2 +- net/sctp/transport.c | 2 +- 21 files changed, 71 insertions(+), 49 deletions(-) (limited to 'net/ipv4/inet_connection_sock.c') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 014504d8e43..1ca732201f3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1397,7 +1397,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, int e = skb_queue_empty(&priv->cm.skb_queue); if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); skb_queue_tail(&priv->cm.skb_queue, skb); if (e) diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 085931fa7ce..d079fc61c12 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -24,8 +24,10 @@ struct dst_ops { struct net_device *dev, int how); struct dst_entry * (*negative_advice)(struct dst_entry *); void (*link_failure)(struct sk_buff *); - void (*update_pmtu)(struct dst_entry *dst, u32 mtu); - void (*redirect)(struct dst_entry *dst, struct sk_buff *skb); + void (*update_pmtu)(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu); + void (*redirect)(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); int (*local_out)(struct sk_buff *skb); struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, struct sk_buff *skb, diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 81f76c402cf..68e8f364bbf 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -111,11 +111,13 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) pppoe_proto(skb) == htons(PPP_IPV6) && \ brnf_filter_pppoe_tagged) -static void fake_update_pmtu(struct dst_entry *dst, u32 mtu) +static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { } -static void fake_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void fake_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 683902fcc8e..ab4f44c9bb2 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -193,7 +193,7 @@ static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk) struct dst_entry *dst = __sk_dst_check(sk, 0); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } /* diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 3ee0342e1ce..56840b249f3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -134,7 +134,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } if (type == ICMPV6_PKT_TOOBIG) { diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index e9c4e2e864c..47de90d8fe9 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -117,8 +117,10 @@ static void dn_dst_destroy(struct dst_entry *); static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); -static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); -static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb); +static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb , u32 mtu); +static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr); @@ -266,7 +268,8 @@ static int dn_dst_gc(struct dst_ops *ops) * We update both the mtu and the advertised mss (i.e. the segment size we * advertise to the other end). */ -static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) +static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct dn_route *rt = (struct dn_route *) dst; struct neighbour *n = rt->n; @@ -294,7 +297,8 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) } } -static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 200d2180937..3ea465286a3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -840,7 +840,7 @@ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) if (!dst) goto out; } - dst->ops->update_pmtu(dst, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu); dst = __sk_dst_check(sk, 0); if (!dst) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0c3123566d7..42c44b1403c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -833,7 +833,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { df |= (old_iph->frag_off&htons(IP_DF)); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index c2d0e6d8baa..2c2c35bace7 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -519,7 +519,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if ((old_iph->frag_off & htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index aad21819316..b35d3bfc66c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -148,8 +148,10 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst); static void ipv4_dst_destroy(struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); -static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); -static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb); +static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu); +static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); static int rt_garbage_collect(struct dst_ops *ops); static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -1273,7 +1275,7 @@ static void rt_del(unsigned int hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { __be32 new_gw = icmp_hdr(skb)->un.gateway; __be32 old_gw = ip_hdr(skb)->saddr; @@ -1506,7 +1508,8 @@ out: kfree_skb(skb); return 0; } -static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct rtable *rt = (struct rtable *) dst; @@ -1531,7 +1534,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, iph->daddr, iph->saddr, 0, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { - ip_rt_update_pmtu(&rt->dst, mtu); + ip_rt_update_pmtu(&rt->dst, NULL, skb, mtu); ip_rt_put(rt); } } @@ -1559,7 +1562,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, protocol, flow_flags, iph->daddr, iph->saddr, 0, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { - ip_do_redirect(&rt->dst, skb); + ip_do_redirect(&rt->dst, NULL, skb); ip_rt_put(rt); } } @@ -2587,11 +2590,13 @@ static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) return mtu ? : dst->dev->mtu; } -static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { } -static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b8e7e059540..d9caf5c07aa 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -319,7 +319,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk) struct dst_entry *dst = __sk_dst_check(sk, 0); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } /* diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 737131cef37..fcf7678bc00 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -194,20 +194,22 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops) return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); } -static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) +static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, mtu); + path->ops->update_pmtu(path, sk, skb, mtu); } -static void xfrm4_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->redirect(path, skb); + path->ops->redirect(path, sk, skb); } static void xfrm4_dst_destroy(struct dst_entry *dst) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 62539a4b2dc..4a0c4d2d8b0 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -269,7 +269,7 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) if (IS_ERR(dst)) return NULL; - dst->ops->update_pmtu(dst, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu); return inet6_csk_route_socket(sk); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 61d10659729..db328466796 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -609,10 +609,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rel_info > dst_mtu(skb_dst(skb2))) goto out; - skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); + skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info); } if (rel_type == ICMP_REDIRECT) - skb_dst(skb2)->ops->redirect(skb_dst(skb2), skb2); + skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2); icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); @@ -952,7 +952,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len > mtu) { *pmtu = mtu; err = -EMSGSIZE; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2a4c8d48977..31af1ed6c1d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -78,8 +78,10 @@ static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); -static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); -static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb); +static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu); +static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_add_route_info(struct net *net, @@ -187,11 +189,13 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) return mtu ? : dst->dev->mtu; } -static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { } -static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } @@ -1071,7 +1075,8 @@ static void ip6_link_failure(struct sk_buff *skb) } } -static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct rt6_info *rt6 = (struct rt6_info*)dst; @@ -1108,7 +1113,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - ip6_rt_update_pmtu(dst, ntohl(mtu)); + ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_update_pmtu); @@ -1136,7 +1141,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - rt6_do_redirect(dst, skb); + rt6_do_redirect(dst, NULL, skb); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_redirect); @@ -1639,7 +1644,7 @@ static int ip6_route_del(struct fib6_config *cfg) return err; } -static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb->dev); struct netevent_redirect netevent; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index fbf1622fdee..3bd1bfc01f8 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -807,7 +807,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (tunnel->parms.iph.daddr && skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len > mtu) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ecdf241cad0..c9dabdd832d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -367,7 +367,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); if (dst) - dst->ops->redirect(dst,skb); + dst->ops->redirect(dst, sk, skb); } if (type == ICMPV6_PKT_TOOBIG) { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index f5a9cb8257b..ef39812107b 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -207,20 +207,22 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops) return dst_entries_get_fast(ops) > ops->gc_thresh * 2; } -static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) +static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, mtu); + path->ops->update_pmtu(path, sk, skb, mtu); } -static void xfrm6_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->redirect(path, skb); + path->ops->redirect(path, sk, skb); } static void xfrm6_dst_destroy(struct dst_entry *dst) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 71d6ecb6592..65b616ae171 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -797,7 +797,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; } if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); df |= (old_iph->frag_off & htons(IP_DF)); @@ -913,7 +913,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; } if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) && !skb_is_gso(skb)) { diff --git a/net/sctp/input.c b/net/sctp/input.c index a67bc31f49f..c201b26879a 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -432,7 +432,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t, return; dst = sctp_transport_dst_check(t); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } /* diff --git a/net/sctp/transport.c b/net/sctp/transport.c index e69e1a2175a..a6b7ee9ce28 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -249,7 +249,7 @@ void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 p t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); if (dst) { - dst->ops->update_pmtu(dst, pmtu); + dst->ops->update_pmtu(dst, sk, NULL, pmtu); dst = sctp_transport_dst_check(t); if (!dst) -- cgit v1.2.3-70-g09d2 From 5abf7f7e0f6bdbfcac737f636497d7016d9507eb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Jul 2012 22:42:13 +0200 Subject: ipv4: fix rcu splat free_nh_exceptions() should use rcu_dereference_protected(..., 1) since its called after one RCU grace period. Also add some const-ification in recent code. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 4 ++-- net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/route.c | 13 +++++++------ 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'net/ipv4/inet_connection_sock.c') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 1e09852df51..2b57d768240 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -148,11 +148,11 @@ static void free_nh_exceptions(struct fib_nh *nh) for (i = 0; i < FNHE_HASH_SIZE; i++) { struct fib_nh_exception *fnhe; - fnhe = rcu_dereference(hash[i].chain); + fnhe = rcu_dereference_protected(hash[i].chain, 1); while (fnhe) { struct fib_nh_exception *next; - next = rcu_dereference(fnhe->fnhe_next); + next = rcu_dereference_protected(fnhe->fnhe_next, 1); kfree(fnhe); fnhe = next; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 3ea465286a3..c7a4de05ca0 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -806,8 +806,8 @@ EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) { - struct inet_sock *inet = inet_sk(sk); - struct ip_options_rcu *inet_opt; + const struct inet_sock *inet = inet_sk(sk); + const struct ip_options_rcu *inet_opt; __be32 daddr = inet->inet_daddr; struct flowi4 *fl4; struct rtable *rt; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 812e4447a22..f67e7023672 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1275,7 +1275,7 @@ static void rt_del(unsigned int hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -static void __build_flow_key(struct flowi4 *fl4, struct sock *sk, +static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, const struct iphdr *iph, int oif, u8 tos, u8 prot, u32 mark, int flow_flags) @@ -1294,7 +1294,8 @@ static void __build_flow_key(struct flowi4 *fl4, struct sock *sk, iph->daddr, iph->saddr, 0, 0); } -static void build_skb_flow_key(struct flowi4 *fl4, struct sk_buff *skb, struct sock *sk) +static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, + const struct sock *sk) { const struct iphdr *iph = ip_hdr(skb); int oif = skb->dev->ifindex; @@ -1305,10 +1306,10 @@ static void build_skb_flow_key(struct flowi4 *fl4, struct sk_buff *skb, struct s __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0); } -static void build_sk_flow_key(struct flowi4 *fl4, struct sock *sk) +static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); - struct ip_options_rcu *inet_opt; + const struct ip_options_rcu *inet_opt; __be32 daddr = inet->inet_daddr; rcu_read_lock(); @@ -1323,8 +1324,8 @@ static void build_sk_flow_key(struct flowi4 *fl4, struct sock *sk) rcu_read_unlock(); } -static void ip_rt_build_flow_key(struct flowi4 *fl4, struct sock *sk, - struct sk_buff *skb) +static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, + const struct sk_buff *skb) { if (skb) build_skb_flow_key(fl4, skb, sk); -- cgit v1.2.3-70-g09d2 From f8126f1d5136be1ca1a3536d43ad7a710b5620f8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 13 Jul 2012 05:03:45 -0700 Subject: ipv4: Adjust semantics of rt->rt_gateway. In order to allow prefixed routes, we have to adjust how rt_gateway is set and interpreted. The new interpretation is: 1) rt_gateway == 0, destination is on-link, nexthop is iph->daddr 2) rt_gateway != 0, destination requires a nexthop gateway Abstract the fetching of the proper nexthop value using a new inline helper, rt_nexthop(), as suggested by Joe Perches. Signed-off-by: David S. Miller Tested-by: Vijay Subramanian --- include/net/route.h | 7 +++++++ net/ipv4/arp.c | 3 +-- net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/ip_gre.c | 2 +- net/ipv4/ip_output.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv4/netfilter/ipt_MASQUERADE.c | 5 +++-- net/ipv4/route.c | 17 +++++++++-------- 8 files changed, 25 insertions(+), 17 deletions(-) (limited to 'net/ipv4/inet_connection_sock.c') diff --git a/include/net/route.h b/include/net/route.h index 6d111bceb16..3c1eeab9749 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -70,6 +70,13 @@ static inline bool rt_is_output_route(const struct rtable *rt) return rt->rt_route_iif == 0; } +static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr) +{ + if (rt->rt_gateway) + return rt->rt_gateway; + return daddr; +} + struct ip_rt_acct { __u32 o_bytes; __u32 o_packets; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index c38293f3816..a0124eb7dbe 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -475,8 +475,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) return 1; } - paddr = skb_rtable(skb)->rt_gateway; - + paddr = rt_nexthop(skb_rtable(skb), ip_hdr(skb)->daddr); if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev)) return 0; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c7a4de05ca0..0a290d719bc 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -389,7 +389,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; - if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) + if (opt && opt->opt.is_strictroute && rt->rt_gateway) goto route_err; return &rt->dst; @@ -422,7 +422,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; - if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) + if (opt && opt->opt.is_strictroute && rt->rt_gateway) goto route_err; return &rt->dst; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 42c44b1403c..b062a98574f 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -766,7 +766,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (skb->protocol == htons(ETH_P_IP)) { rt = skb_rtable(skb); - dst = rt->rt_gateway; + dst = rt_nexthop(rt, old_iph->daddr); } #if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c528f841ca4..4494015f7e3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -371,7 +371,7 @@ int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) skb_dst_set_noref(skb, &rt->dst); packet_routed: - if (inet_opt && inet_opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) + if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gateway) goto no_route; /* OK, we know where to send it, allocate and build IP header. */ diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 2c2c35bace7..99af1f0cc65 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -487,7 +487,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_fifo_errors++; goto tx_error; } - dst = rt->rt_gateway; + dst = rt_nexthop(rt, old_iph->daddr); } rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 2f210c79dc8..cbb6a1a6f6f 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -52,7 +52,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) struct nf_nat_ipv4_range newrange; const struct nf_nat_ipv4_multi_range_compat *mr; const struct rtable *rt; - __be32 newsrc; + __be32 newsrc, nh; NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); @@ -70,7 +70,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) mr = par->targinfo; rt = skb_rtable(skb); - newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE); + nh = rt_nexthop(rt, ip_hdr(skb)->daddr); + newsrc = inet_select_addr(par->out, nh, RT_SCOPE_UNIVERSE); if (!newsrc) { pr_info("%s ate my IP address\n", par->out->name); return NF_DROP; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 85d103fee88..d1d57963809 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1085,8 +1085,9 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); else - src = inet_select_addr(rt->dst.dev, rt->rt_gateway, - RT_SCOPE_UNIVERSE); + src = inet_select_addr(rt->dst.dev, + rt_nexthop(rt, iph->daddr), + RT_SCOPE_UNIVERSE); rcu_read_unlock(); } memcpy(addr, &src, 4); @@ -1132,7 +1133,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = dst->dev->mtu; if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { - if (rt->rt_gateway != 0 && mtu > 576) + if (rt->rt_gateway && mtu > 576) mtu = 576; } @@ -1274,7 +1275,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_pmtu = 0; - rth->rt_gateway = daddr; + rth->rt_gateway = 0; rth->fi = NULL; if (our) { rth->dst.input= ip_local_deliver; @@ -1392,7 +1393,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = in_dev->dev->ifindex; rth->rt_oif = 0; rth->rt_pmtu = 0; - rth->rt_gateway = daddr; + rth->rt_gateway = 0; rth->fi = NULL; rth->dst.input = ip_forward; @@ -1557,7 +1558,7 @@ local_input: rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_pmtu = 0; - rth->rt_gateway = daddr; + rth->rt_gateway = 0; rth->fi = NULL; if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; @@ -1707,7 +1708,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_oif = orig_oif; rth->rt_pmtu = 0; - rth->rt_gateway = fl4->daddr; + rth->rt_gateway = 0; rth->fi = NULL; RT_CACHE_STAT_INC(out_slow_tot); @@ -2070,7 +2071,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr)) goto nla_put_failure; } - if (fl4->daddr != rt->rt_gateway && + if (rt->rt_gateway && nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) goto nla_put_failure; -- cgit v1.2.3-70-g09d2 From ba3f7f04ef2b19aace38f855aedd17fe43035d50 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 14:02:46 -0700 Subject: ipv4: Kill FLOWI_FLAG_RT_NOCACHE and associated code. Signed-off-by: David S. Miller --- include/net/flow.h | 1 - include/net/inet_connection_sock.h | 3 +-- net/dccp/ipv4.c | 2 +- net/ipv4/inet_connection_sock.c | 5 +---- net/ipv4/route.c | 3 --- net/ipv4/tcp_ipv4.c | 4 ++-- 6 files changed, 5 insertions(+), 13 deletions(-) (limited to 'net/ipv4/inet_connection_sock.c') diff --git a/include/net/flow.h b/include/net/flow.h index ce9cb7656b4..e1dd5082ec7 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -21,7 +21,6 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_CAN_SLEEP 0x02 -#define FLOWI_FLAG_RT_NOCACHE 0x04 __u32 flowic_secid; }; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 2cf44b4ed2e..5ee66f517b4 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -250,8 +250,7 @@ extern int inet_csk_get_port(struct sock *sk, unsigned short snum); extern struct dst_entry* inet_csk_route_req(struct sock *sk, struct flowi4 *fl4, - const struct request_sock *req, - bool nocache); + const struct request_sock *req); extern struct dst_entry* inet_csk_route_child_sock(struct sock *sk, struct sock *newsk, const struct request_sock *req); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ab4f44c9bb2..25428d0c50c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -508,7 +508,7 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, struct dst_entry *dst; struct flowi4 fl4; - dst = inet_csk_route_req(sk, &fl4, req, false); + dst = inet_csk_route_req(sk, &fl4, req); if (dst == NULL) goto out; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0a290d719bc..db0cf17c00f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -368,8 +368,7 @@ EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); struct dst_entry *inet_csk_route_req(struct sock *sk, struct flowi4 *fl4, - const struct request_sock *req, - bool nocache) + const struct request_sock *req) { struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); @@ -377,8 +376,6 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct net *net = sock_net(sk); int flags = inet_sk_flowi_flags(sk); - if (nocache) - flags |= FLOWI_FLAG_RT_NOCACHE; flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 97cca8a03d9..7e1c0ed0ef7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1836,9 +1836,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0); - if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE) - rth->dst.flags |= DST_NOCACHE; - return rth; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1d8b75a5898..59110caeb07 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -824,7 +824,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct sk_buff * skb; /* First, grab a route. */ - if (!dst && (dst = inet_csk_route_req(sk, &fl4, req, nocache)) == NULL) + if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; skb = tcp_make_synack(sk, dst, req, rvp); @@ -1378,7 +1378,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && - (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL && + (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && fl4.daddr == saddr) { if (!tcp_peer_is_proven(req, dst, true)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); -- cgit v1.2.3-70-g09d2