summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c2
-rw-r--r--include/net/dst_ops.h6
-rw-r--r--include/net/inet6_connection_sock.h2
-rw-r--r--include/net/inet_connection_sock.h2
-rw-r--r--include/net/ip_fib.h18
-rw-r--r--include/net/sctp/sctp.h4
-rw-r--r--include/net/sctp/structs.h4
-rw-r--r--net/bridge/br_netfilter.c6
-rw-r--r--net/dccp/ipv4.c13
-rw-r--r--net/dccp/ipv6.c37
-rw-r--r--net/decnet/dn_route.c12
-rw-r--r--net/ipv4/fib_semantics.c23
-rw-r--r--net/ipv4/inet_connection_sock.c46
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/route.c267
-rw-r--r--net/ipv4/tcp_ipv4.c13
-rw-r--r--net/ipv4/xfrm4_policy.c10
-rw-r--r--net/ipv6/inet6_connection_sock.c49
-rw-r--r--net/ipv6/ip6_tunnel.c6
-rw-r--r--net/ipv6/route.c21
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/tcp_ipv6.c39
-rw-r--r--net/ipv6/xfrm6_policy.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c4
-rw-r--r--net/sctp/associola.c4
-rw-r--r--net/sctp/input.c6
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/socket.c6
-rw-r--r--net/sctp/transport.c14
30 files changed, 449 insertions, 183 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 014504d8e43..1ca732201f3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1397,7 +1397,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
int e = skb_queue_empty(&priv->cm.skb_queue);
if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
skb_queue_tail(&priv->cm.skb_queue, skb);
if (e)
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 085931fa7ce..d079fc61c12 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -24,8 +24,10 @@ struct dst_ops {
struct net_device *dev, int how);
struct dst_entry * (*negative_advice)(struct dst_entry *);
void (*link_failure)(struct sk_buff *);
- void (*update_pmtu)(struct dst_entry *dst, u32 mtu);
- void (*redirect)(struct dst_entry *dst, struct sk_buff *skb);
+ void (*update_pmtu)(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu);
+ void (*redirect)(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb);
int (*local_out)(struct sk_buff *skb);
struct neighbour * (*neigh_lookup)(const struct dst_entry *dst,
struct sk_buff *skb,
diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index df2a857e853..04642c92043 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -43,4 +43,6 @@ extern void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
extern void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
extern int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl);
+
+extern struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu);
#endif /* _INET6_CONNECTION_SOCK_H */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 291e7cee14e..2cf44b4ed2e 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -337,4 +337,6 @@ extern int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
extern int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen);
+
+extern struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
#endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 5697acefeba..e9ee1ca0708 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -18,6 +18,7 @@
#include <net/flow.h>
#include <linux/seq_file.h>
+#include <linux/rcupdate.h>
#include <net/fib_rules.h>
#include <net/inetpeer.h>
@@ -46,6 +47,22 @@ struct fib_config {
struct fib_info;
+struct fib_nh_exception {
+ struct fib_nh_exception __rcu *fnhe_next;
+ __be32 fnhe_daddr;
+ u32 fnhe_pmtu;
+ u32 fnhe_gw;
+ unsigned long fnhe_expires;
+ unsigned long fnhe_stamp;
+};
+
+struct fnhe_hash_bucket {
+ struct fib_nh_exception __rcu *chain;
+};
+
+#define FNHE_HASH_SIZE 2048
+#define FNHE_RECLAIM_DEPTH 5
+
struct fib_nh {
struct net_device *nh_dev;
struct hlist_node nh_hash;
@@ -63,6 +80,7 @@ struct fib_nh {
__be32 nh_gw;
__be32 nh_saddr;
int nh_saddr_genid;
+ struct fnhe_hash_bucket *nh_exceptions;
};
/*
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 1f2735dba75..ff499640528 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -519,10 +519,10 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu)
return frag;
}
-static inline void sctp_assoc_pending_pmtu(struct sctp_association *asoc)
+static inline void sctp_assoc_pending_pmtu(struct sock *sk, struct sctp_association *asoc)
{
- sctp_assoc_sync_pmtu(asoc);
+ sctp_assoc_sync_pmtu(sk, asoc);
asoc->pmtu_pending = 0;
}
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index fecdf31816f..536e439ddf1 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1091,7 +1091,7 @@ void sctp_transport_burst_limited(struct sctp_transport *);
void sctp_transport_burst_reset(struct sctp_transport *);
unsigned long sctp_transport_timeout(struct sctp_transport *);
void sctp_transport_reset(struct sctp_transport *);
-void sctp_transport_update_pmtu(struct sctp_transport *, u32);
+void sctp_transport_update_pmtu(struct sock *, struct sctp_transport *, u32);
void sctp_transport_immediate_rtx(struct sctp_transport *);
@@ -2003,7 +2003,7 @@ void sctp_assoc_update(struct sctp_association *old,
__u32 sctp_association_get_next_tsn(struct sctp_association *);
-void sctp_assoc_sync_pmtu(struct sctp_association *);
+void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *);
void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int);
void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int);
void sctp_assoc_set_primary(struct sctp_association *,
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 81f76c402cf..68e8f364bbf 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -111,11 +111,13 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
pppoe_proto(skb) == htons(PPP_IPV6) && \
brnf_filter_pppoe_tagged)
-static void fake_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
{
}
-static void fake_redirect(struct dst_entry *dst, struct sk_buff *skb)
+static void fake_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb)
{
}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 129ed8f7413..ab4f44c9bb2 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -161,17 +161,10 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
if (sk->sk_state == DCCP_LISTEN)
return;
- /* We don't check in the destentry if pmtu discovery is forbidden
- * on this route. We just assume that no packet_to_big packets
- * are send back when pmtu discovery is not active.
- * There is a small race when the user changes this flag in the
- * route, but I think that's acceptable.
- */
- if ((dst = __sk_dst_check(sk, 0)) == NULL)
+ dst = inet_csk_update_pmtu(sk, mtu);
+ if (!dst)
return;
- dst->ops->update_pmtu(dst, mtu);
-
/* Something is about to be wrong... Remember soft error
* for the case, if this connection will not able to recover.
*/
@@ -200,7 +193,7 @@ static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk)
struct dst_entry *dst = __sk_dst_check(sk, 0);
if (dst)
- dst->ops->redirect(dst, skb);
+ dst->ops->redirect(dst, sk, skb);
}
/*
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 090c0800ce0..56840b249f3 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -134,7 +134,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
if (dst)
- dst->ops->redirect(dst, skb);
+ dst->ops->redirect(dst, sk, skb);
}
if (type == ICMPV6_PKT_TOOBIG) {
@@ -145,39 +145,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED))
goto out;
- /* icmp should have updated the destination cache entry */
- dst = __sk_dst_check(sk, np->dst_cookie);
- if (dst == NULL) {
- struct inet_sock *inet = inet_sk(sk);
- struct flowi6 fl6;
-
- /* BUGGG_FUTURE: Again, it is not clear how
- to handle rthdr case. Ignore this complexity
- for now.
- */
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_DCCP;
- fl6.daddr = np->daddr;
- fl6.saddr = np->saddr;
- fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.fl6_dport = inet->inet_dport;
- fl6.fl6_sport = inet->inet_sport;
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
-
- dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
- if (IS_ERR(dst)) {
- sk->sk_err_soft = -PTR_ERR(dst);
- goto out;
- }
- } else
- dst_hold(dst);
-
- dst->ops->update_pmtu(dst, ntohl(info));
+ dst = inet6_csk_update_pmtu(sk, ntohl(info));
+ if (!dst)
+ goto out;
- if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
+ if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst))
dccp_sync_mss(sk, dst_mtu(dst));
- } /* else let the usual retransmit timer handle it */
- dst_release(dst);
goto out;
}
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index e9c4e2e864c..47de90d8fe9 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -117,8 +117,10 @@ static void dn_dst_destroy(struct dst_entry *);
static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how);
static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
static void dn_dst_link_failure(struct sk_buff *);
-static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu);
-static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb);
+static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb , u32 mtu);
+static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb);
static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
struct sk_buff *skb,
const void *daddr);
@@ -266,7 +268,8 @@ static int dn_dst_gc(struct dst_ops *ops)
* We update both the mtu and the advertised mss (i.e. the segment size we
* advertise to the other end).
*/
-static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
{
struct dn_route *rt = (struct dn_route *) dst;
struct neighbour *n = rt->n;
@@ -294,7 +297,8 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
}
}
-static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb)
+static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb)
{
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d71bfbdc0bf..1e09852df51 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -140,6 +140,27 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
},
};
+static void free_nh_exceptions(struct fib_nh *nh)
+{
+ struct fnhe_hash_bucket *hash = nh->nh_exceptions;
+ int i;
+
+ for (i = 0; i < FNHE_HASH_SIZE; i++) {
+ struct fib_nh_exception *fnhe;
+
+ fnhe = rcu_dereference(hash[i].chain);
+ while (fnhe) {
+ struct fib_nh_exception *next;
+
+ next = rcu_dereference(fnhe->fnhe_next);
+ kfree(fnhe);
+
+ fnhe = next;
+ }
+ }
+ kfree(hash);
+}
+
/* Release a nexthop info record */
static void free_fib_info_rcu(struct rcu_head *head)
{
@@ -148,6 +169,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
change_nexthops(fi) {
if (nexthop_nh->nh_dev)
dev_put(nexthop_nh->nh_dev);
+ if (nexthop_nh->nh_exceptions)
+ free_nh_exceptions(nexthop_nh);
} endfor_nexthops(fi);
release_net(fi->fib_net);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 76825be3b64..3ea465286a3 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -803,3 +803,49 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
}
EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt);
#endif
+
+static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ip_options_rcu *inet_opt;
+ __be32 daddr = inet->inet_daddr;
+ struct flowi4 *fl4;
+ struct rtable *rt;
+
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
+ if (inet_opt && inet_opt->opt.srr)
+ daddr = inet_opt->opt.faddr;
+ fl4 = &fl->u.ip4;
+ rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr,
+ inet->inet_saddr, inet->inet_dport,
+ inet->inet_sport, sk->sk_protocol,
+ RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
+ if (IS_ERR(rt))
+ rt = NULL;
+ if (rt)
+ sk_setup_caps(sk, &rt->dst);
+ rcu_read_unlock();
+
+ return &rt->dst;
+}
+
+struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu)
+{
+ struct dst_entry *dst = __sk_dst_check(sk, 0);
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (!dst) {
+ dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
+ if (!dst)
+ goto out;
+ }
+ dst->ops->update_pmtu(dst, sk, NULL, mtu);
+
+ dst = __sk_dst_check(sk, 0);
+ if (!dst)
+ dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
+out:
+ return dst;
+}
+EXPORT_SYMBOL_GPL(inet_csk_update_pmtu);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0c3123566d7..42c44b1403c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -833,7 +833,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
if (skb->protocol == htons(ETH_P_IP)) {
df |= (old_iph->frag_off&htons(IP_DF));
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c2d0e6d8baa..2c2c35bace7 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -519,7 +519,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
}
if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
if ((old_iph->frag_off & htons(IP_DF)) &&
mtu < ntohs(old_iph->tot_len)) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index aad21819316..a5bd0b4acc6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -148,8 +148,10 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst);
static void ipv4_dst_destroy(struct dst_entry *dst);
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
static void ipv4_link_failure(struct sk_buff *skb);
-static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
-static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb);
+static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu);
+static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb);
static int rt_garbage_collect(struct dst_ops *ops);
static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -1273,14 +1275,130 @@ static void rt_del(unsigned int hash, struct rtable *rt)
spin_unlock_bh(rt_hash_lock_addr(hash));
}
-static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
+static void __build_flow_key(struct flowi4 *fl4, struct sock *sk,
+ const struct iphdr *iph,
+ int oif, u8 tos,
+ u8 prot, u32 mark, int flow_flags)
+{
+ if (sk) {
+ const struct inet_sock *inet = inet_sk(sk);
+
+ oif = sk->sk_bound_dev_if;
+ mark = sk->sk_mark;
+ tos = RT_CONN_FLAGS(sk);
+ prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
+ }
+ flowi4_init_output(fl4, oif, mark, tos,
+ RT_SCOPE_UNIVERSE, prot,
+ flow_flags,
+ iph->daddr, iph->saddr, 0, 0);
+}
+
+static void build_skb_flow_key(struct flowi4 *fl4, struct sk_buff *skb, struct sock *sk)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ int oif = skb->dev->ifindex;
+ u8 tos = RT_TOS(iph->tos);
+ u8 prot = iph->protocol;
+ u32 mark = skb->mark;
+
+ __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
+}
+
+static void build_sk_flow_key(struct flowi4 *fl4, struct sock *sk)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ struct ip_options_rcu *inet_opt;
+ __be32 daddr = inet->inet_daddr;
+
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
+ if (inet_opt && inet_opt->opt.srr)
+ daddr = inet_opt->opt.faddr;
+ flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+ RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+ inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ inet_sk_flowi_flags(sk),
+ daddr, inet->inet_saddr, 0, 0);
+ rcu_read_unlock();
+}
+
+static void ip_rt_build_flow_key(struct flowi4 *fl4, struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (skb)
+ build_skb_flow_key(fl4, skb, sk);
+ else
+ build_sk_flow_key(fl4, sk);
+}
+
+static DEFINE_SPINLOCK(fnhe_lock);
+
+static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash, __be32 daddr)
+{
+ struct fib_nh_exception *fnhe, *oldest;
+
+ oldest = rcu_dereference(hash->chain);
+ for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
+ fnhe = rcu_dereference(fnhe->fnhe_next)) {
+ if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
+ oldest = fnhe;
+ }
+ return oldest;
+}
+
+static struct fib_nh_exception *find_or_create_fnhe(struct fib_nh *nh, __be32 daddr)
+{
+ struct fnhe_hash_bucket *hash = nh->nh_exceptions;
+ struct fib_nh_exception *fnhe;
+ int depth;
+ u32 hval;
+
+ if (!hash) {
+ hash = nh->nh_exceptions = kzalloc(FNHE_HASH_SIZE * sizeof(*hash),
+ GFP_ATOMIC);
+ if (!hash)
+ return NULL;
+ }
+
+ hval = (__force u32) daddr;
+ hval ^= (hval >> 11) ^ (hval >> 22);
+ hash += hval;
+
+ depth = 0;
+ for (fnhe = rcu_dereference(hash->chain); fnhe;
+ fnhe = rcu_dereference(fnhe->fnhe_next)) {
+ if (fnhe->fnhe_daddr == daddr)
+ goto out;
+ depth++;
+ }
+
+ if (depth > FNHE_RECLAIM_DEPTH) {
+ fnhe = fnhe_oldest(hash + hval, daddr);
+ goto out_daddr;
+ }
+ fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
+ if (!fnhe)
+ return NULL;
+
+ fnhe->fnhe_next = hash->chain;
+ rcu_assign_pointer(hash->chain, fnhe);
+
+out_daddr:
+ fnhe->fnhe_daddr = daddr;
+out:
+ fnhe->fnhe_stamp = jiffies;
+ return fnhe;
+}
+
+static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4)
{
__be32 new_gw = icmp_hdr(skb)->un.gateway;
__be32 old_gw = ip_hdr(skb)->saddr;
struct net_device *dev = skb->dev;
struct in_device *in_dev;
+ struct fib_result res;
struct neighbour *n;
- struct rtable *rt;
struct net *net;
switch (icmp_hdr(skb)->code & 7) {
@@ -1294,7 +1412,6 @@ static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
return;
}
- rt = (struct rtable *) dst;
if (rt->rt_gateway != old_gw)
return;
@@ -1318,11 +1435,21 @@ static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
goto reject_redirect;
}
- n = ipv4_neigh_lookup(dst, NULL, &new_gw);
+ n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
if (n) {
if (!(n->nud_state & NUD_VALID)) {
neigh_event_send(n, NULL);
} else {
+ if (fib_lookup(net, fl4, &res) == 0) {
+ struct fib_nh *nh = &FIB_RES_NH(res);
+ struct fib_nh_exception *fnhe;
+
+ spin_lock_bh(&fnhe_lock);
+ fnhe = find_or_create_fnhe(nh, fl4->daddr);
+ if (fnhe)
+ fnhe->fnhe_gw = new_gw;
+ spin_unlock_bh(&fnhe_lock);
+ }
rt->rt_gateway = new_gw;
rt->rt_flags |= RTCF_REDIRECTED;
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
@@ -1347,6 +1474,17 @@ reject_redirect:
;
}
+static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
+{
+ struct rtable *rt;
+ struct flowi4 fl4;
+
+ rt = (struct rtable *) dst;
+
+ ip_rt_build_flow_key(&fl4, sk, skb);
+ __ip_do_redirect(rt, skb, &fl4);
+}
+
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
{
struct rtable *rt = (struct rtable *)dst;
@@ -1506,32 +1644,51 @@ out: kfree_skb(skb);
return 0;
}
-static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
{
- struct rtable *rt = (struct rtable *) dst;
-
- dst_confirm(dst);
+ struct fib_result res;
if (mtu < ip_rt_min_pmtu)
mtu = ip_rt_min_pmtu;
+ if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) {
+ struct fib_nh *nh = &FIB_RES_NH(res);
+ struct fib_nh_exception *fnhe;
+
+ spin_lock_bh(&fnhe_lock);
+ fnhe = find_or_create_fnhe(nh, fl4->daddr);
+ if (fnhe) {
+ fnhe->fnhe_pmtu = mtu;
+ fnhe->fnhe_expires = jiffies + ip_rt_mtu_expires;
+ }
+ spin_unlock_bh(&fnhe_lock);
+ }
rt->rt_pmtu = mtu;
dst_set_expires(&rt->dst, ip_rt_mtu_expires);
}
+static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
+{
+ struct rtable *rt = (struct rtable *) dst;
+ struct flowi4 fl4;
+
+ ip_rt_build_flow_key(&fl4, sk, skb);
+ __ip_rt_update_pmtu(rt, &fl4, mtu);
+}
+
void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
int oif, u32 mark, u8 protocol, int flow_flags)
{
- const struct iphdr *iph = (const struct iphdr *)skb->data;
+ const struct iphdr *iph = (const struct iphdr *) skb->data;
struct flowi4 fl4;
struct rtable *rt;
- flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
- protocol, flow_flags,
- iph->daddr, iph->saddr, 0, 0);
+ __build_flow_key(&fl4, NULL, iph, oif,
+ RT_TOS(iph->tos), protocol, mark, flow_flags);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
- ip_rt_update_pmtu(&rt->dst, mtu);
+ __ip_rt_update_pmtu(rt, &fl4, mtu);
ip_rt_put(rt);
}
}
@@ -1539,27 +1696,31 @@ EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
{
- const struct inet_sock *inet = inet_sk(sk);
+ const struct iphdr *iph = (const struct iphdr *) skb->data;
+ struct flowi4 fl4;
+ struct rtable *rt;
- return ipv4_update_pmtu(skb, sock_net(sk), mtu,
- sk->sk_bound_dev_if, sk->sk_mark,
- inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
- inet_sk_flowi_flags(sk));
+ __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+ rt = __ip_route_output_key(sock_net(sk), &fl4);
+ if (!IS_ERR(rt)) {
+ __ip_rt_update_pmtu(rt, &fl4, mtu);
+ ip_rt_put(rt);
+ }
}
EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
void ipv4_redirect(struct sk_buff *skb, struct net *net,
int oif, u32 mark, u8 protocol, int flow_flags)
{
- const struct iphdr *iph = (const struct iphdr *)skb->data;
+ const struct iphdr *iph = (const struct iphdr *) skb->data;
struct flowi4 fl4;
struct rtable *rt;
- flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
- protocol, flow_flags, iph->daddr, iph->saddr, 0, 0);
+ __build_flow_key(&fl4, NULL, iph, oif,
+ RT_TOS(iph->tos), protocol, mark, flow_flags);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
- ip_do_redirect(&rt->dst, skb);
+ __ip_do_redirect(rt, skb, &fl4);
ip_rt_put(rt);
}
}
@@ -1567,12 +1728,16 @@ EXPORT_SYMBOL_GPL(ipv4_redirect);
void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
{
- const struct inet_sock *inet = inet_sk(sk);
+ const struct iphdr *iph = (const struct iphdr *) skb->data;
+ struct flowi4 fl4;
+ struct rtable *rt;
- return ipv4_redirect(skb, sock_net(sk), sk->sk_bound_dev_if,
- sk->sk_mark,
- inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
- inet_sk_flowi_flags(sk));
+ __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+ rt = __ip_route_output_key(sock_net(sk), &fl4);
+ if (!IS_ERR(rt)) {
+ __ip_do_redirect(rt, skb, &fl4);
+ ip_rt_put(rt);
+ }
}
EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
@@ -1719,14 +1884,46 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
dst_init_metrics(&rt->dst, fi->fib_metrics, true);
}
+static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr)
+{
+ struct fnhe_hash_bucket *hash = nh->nh_exceptions;
+ struct fib_nh_exception *fnhe;
+ u32 hval;
+
+ hval = (__force u32) daddr;
+ hval ^= (hval >> 11) ^ (hval >> 22);
+
+ for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
+ fnhe = rcu_dereference(fnhe->fnhe_next)) {
+ if (fnhe->fnhe_daddr == daddr) {
+ if (fnhe->fnhe_pmtu) {
+ unsigned long expires = fnhe->fnhe_expires;
+ unsigned long diff = jiffies - expires;
+
+ if (time_before(jiffies, expires)) {
+ rt->rt_pmtu = fnhe->fnhe_pmtu;
+ dst_set_expires(&rt->dst, diff);
+ }
+ }
+ if (fnhe->fnhe_gw)
+ rt->rt_gateway = fnhe->fnhe_gw;
+ fnhe->fnhe_stamp = jiffies;
+ break;
+ }
+ }
+}
+
static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
const struct fib_result *res,
struct fib_info *fi, u16 type, u32 itag)
{
if (fi) {
- if (FIB_RES_GW(*res) &&
- FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
- rt->rt_gateway = FIB_RES_GW(*res);
+ struct fib_nh *nh = &FIB_RES_NH(*res);
+
+ if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
+ rt->rt_gateway = nh->nh_gw;
+ if (unlikely(nh->nh_exceptions))
+ rt_bind_exception(rt, nh, fl4->daddr);
rt_init_metrics(rt, fl4, fi);
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
@@ -2587,11 +2784,13 @@ static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
return mtu ? : dst->dev->mtu;
}
-static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
{
}
-static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb)
+static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb)
{
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7a0062cb4ed..d9caf5c07aa 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -289,17 +289,10 @@ static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
if (sk->sk_state == TCP_LISTEN)
return;
- /* We don't check in the destentry if pmtu discovery is forbidden
- * on this route. We just assume that no packet_to_big packets
- * are send back when pmtu discovery is not active.
- * There is a small race when the user changes this flag in the
- * route, but I think that's acceptable.
- */
- if ((dst = __sk_dst_check(sk, 0)) == NULL)
+ dst = inet_csk_update_pmtu(sk, mtu);
+ if (!dst)
return;
- dst->ops->update_pmtu(dst, mtu);
-
/* Something is about to be wrong... Remember soft error
* for the case, if this connection will not able to recover.
*/
@@ -326,7 +319,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk)
struct dst_entry *dst = __sk_dst_check(sk, 0);
if (dst)
- dst->ops->redirect(dst, skb);
+ dst->ops->redirect(dst, sk, skb);
}
/*
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 737131cef37..fcf7678bc00 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -194,20 +194,22 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops)
return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
}
-static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
struct dst_entry *path = xdst->route;
- path->ops->update_pmtu(path, mtu);
+ path->ops->update_pmtu(path, sk, skb, mtu);
}
-static void xfrm4_redirect(struct dst_entry *dst, struct sk_buff *skb)
+static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
struct dst_entry *path = xdst->route;
- path->ops->redirect(path, skb);
+ path->ops->redirect(path, sk, skb);
}
static void xfrm4_dst_destroy(struct dst_entry *dst)
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index bceb14450a1..4a0c4d2d8b0 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -203,15 +203,13 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
return dst;
}
-int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
+static struct dst_entry *inet6_csk_route_socket(struct sock *sk)
{
- struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct flowi6 fl6;
- struct dst_entry *dst;
struct in6_addr *final_p, final;
- int res;
+ struct dst_entry *dst;
+ struct flowi6 fl6;
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = sk->sk_protocol;
@@ -228,18 +226,29 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
final_p = fl6_update_dst(&fl6, np->opt, &final);
dst = __inet6_csk_dst_check(sk, np->dst_cookie);
-
- if (dst == NULL) {
+ if (!dst) {
dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
- if (IS_ERR(dst)) {
- sk->sk_err_soft = -PTR_ERR(dst);
- sk->sk_route_caps = 0;
- kfree_skb(skb);
- return PTR_ERR(dst);
- }
+ if (!IS_ERR(dst))
+ __inet6_csk_dst_store(sk, dst, NULL, NULL);
+ }
+ return dst;
+}
- __inet6_csk_dst_store(sk, dst, NULL, NULL);
+int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
+{
+ struct sock *sk = skb->sk;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct flowi6 fl6;
+ struct dst_entry *dst;
+ int res;
+
+ dst = inet6_csk_route_socket(sk);
+ if (IS_ERR(dst)) {
+ sk->sk_err_soft = -PTR_ERR(dst);
+ sk->sk_route_caps = 0;
+ kfree_skb(skb);
+ return PTR_ERR(dst);
}
rcu_read_lock();
@@ -253,3 +262,15 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
return res;
}
EXPORT_SYMBOL_GPL(inet6_csk_xmit);
+
+struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
+{
+ struct dst_entry *dst = inet6_csk_route_socket(sk);
+
+ if (IS_ERR(dst))
+ return NULL;
+ dst->ops->update_pmtu(dst, sk, NULL, mtu);
+
+ return inet6_csk_route_socket(sk);
+}
+EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 61d10659729..db328466796 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -609,10 +609,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rel_info > dst_mtu(skb_dst(skb2)))
goto out;
- skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info);
+ skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
}
if (rel_type == ICMP_REDIRECT)
- skb_dst(skb2)->ops->redirect(skb_dst(skb2), skb2);
+ skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
@@ -952,7 +952,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
if (skb->len > mtu) {
*pmtu = mtu;
err = -EMSGSIZE;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 412fad809a3..84f6564dd37 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -78,8 +78,10 @@ static int ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
static int ip6_pkt_discard_out(struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
-static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
-static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb);
+static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu);
+static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb);
#ifdef CONFIG_IPV6_ROUTE_INFO
static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -187,11 +189,13 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
return mtu ? : dst->dev->mtu;
}
-static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
{
}
-static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb)
+static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb)
{
}
@@ -1071,7 +1075,8 @@ static void ip6_link_failure(struct sk_buff *skb)
}
}
-static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
{
struct rt6_info *rt6 = (struct rt6_info*)dst;
@@ -1108,7 +1113,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
dst = ip6_route_output(net, NULL, &fl6);
if (!dst->error)
- ip6_rt_update_pmtu(dst, ntohl(mtu));
+ ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
dst_release(dst);
}
EXPORT_SYMBOL_GPL(ip6_update_pmtu);
@@ -1136,7 +1141,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
dst = ip6_route_output(net, NULL, &fl6);
if (!dst->error)
- rt6_do_redirect(dst, skb);
+ rt6_do_redirect(dst, NULL, skb);
dst_release(dst);
}
EXPORT_SYMBOL_GPL(ip6_redirect);
@@ -1639,7 +1644,7 @@ static int ip6_route_del(struct fib6_config *cfg)
return err;
}
-static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
+static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
struct netevent_redirect netevent;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index fbf1622fdee..3bd1bfc01f8 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -807,7 +807,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
}
if (tunnel->parms.iph.daddr && skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
if (skb->len > mtu) {
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3071f377145..c9dabdd832d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -367,7 +367,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
if (dst)
- dst->ops->redirect(dst,skb);
+ dst->ops->redirect(dst, sk, skb);
}
if (type == ICMPV6_PKT_TOOBIG) {
@@ -378,43 +378,14 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
goto out;
- /* icmp should have updated the destination cache entry */
- dst = __sk_dst_check(sk, np->dst_cookie);
-
- if (dst == NULL) {
- struct inet_sock *inet = inet_sk(sk);
- struct flowi6 fl6;
-
- /* BUGGG_FUTURE: Again, it is not clear how
- to handle rthdr case. Ignore this complexity
- for now.
- */
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_TCP;
- fl6.daddr = np->daddr;
- fl6.saddr = np->saddr;
- fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.flowi6_mark = sk->sk_mark;
- fl6.fl6_dport = inet->inet_dport;
- fl6.fl6_sport = inet->inet_sport;
- security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
-
- dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
- if (IS_ERR(dst)) {
- sk->sk_err_soft = -PTR_ERR(dst);
- goto out;
- }
-
- } else
- dst_hold(dst);
-
- dst->ops->update_pmtu(dst, ntohl(info));
+ dst = inet6_csk_update_pmtu(sk, ntohl(info));
+ if (!dst)
+ goto out;
if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
tcp_sync_mss(sk, dst_mtu(dst));
tcp_simple_retransmit(sk);
- } /* else let the usual retransmit timer handle it */
- dst_release(dst);
+ }
goto out;
}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index f5a9cb8257b..ef39812107b 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -207,20 +207,22 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops)
return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
}
-static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
struct dst_entry *path = xdst->route;
- path->ops->update_pmtu(path, mtu);
+ path->ops->update_pmtu(path, sk, skb, mtu);
}
-static void xfrm6_redirect(struct dst_entry *dst, struct sk_buff *skb)
+static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
struct dst_entry *path = xdst->route;
- path->ops->redirect(path, skb);
+ path->ops->redirect(path, sk, skb);
}
static void xfrm6_dst_destroy(struct dst_entry *dst)
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 71d6ecb6592..65b616ae171 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -797,7 +797,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error_put;
}
if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
df |= (old_iph->frag_off & htons(IP_DF));
@@ -913,7 +913,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error_put;
}
if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
!skb_is_gso(skb)) {
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b16517ee1aa..8cf348e62e7 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1360,7 +1360,7 @@ struct sctp_transport *sctp_assoc_choose_alter_transport(
/* Update the association's pmtu and frag_point by going through all the
* transports. This routine is called when a transport's PMTU has changed.
*/
-void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
+void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
{
struct sctp_transport *t;
__u32 pmtu = 0;
@@ -1372,7 +1372,7 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
list_for_each_entry(t, &asoc->peer.transport_addr_list,
transports) {
if (t->pmtu_pending && t->dst) {
- sctp_transport_update_pmtu(t, dst_mtu(t->dst));
+ sctp_transport_update_pmtu(sk, t, dst_mtu(t->dst));
t->pmtu_pending = 0;
}
if (!pmtu || (t->pathmtu < pmtu))
diff --git a/net/sctp/input.c b/net/sctp/input.c
index f050d45faa9..c201b26879a 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -408,10 +408,10 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
if (t->param_flags & SPP_PMTUD_ENABLE) {
/* Update transports view of the MTU */
- sctp_transport_update_pmtu(t, pmtu);
+ sctp_transport_update_pmtu(sk, t, pmtu);
/* Update association pmtu. */
- sctp_assoc_sync_pmtu(asoc);
+ sctp_assoc_sync_pmtu(sk, asoc);
}
/* Retransmit with the new pmtu setting.
@@ -432,7 +432,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t,
return;
dst = sctp_transport_dst_check(t);
if (dst)
- dst->ops->redirect(dst, skb);
+ dst->ops->redirect(dst, sk, skb);
}
/*
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 539f35d07f4..838e18b4d7e 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -410,7 +410,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
if (!sctp_transport_dst_check(tp)) {
sctp_transport_route(tp, NULL, sctp_sk(sk));
if (asoc && (asoc->param_flags & SPP_PMTUD_ENABLE)) {
- sctp_assoc_sync_pmtu(asoc);
+ sctp_assoc_sync_pmtu(sk, asoc);
}
}
dst = dst_clone(tp->dst);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b3b8a8d813e..74bd3c47350 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1853,7 +1853,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
}
if (asoc->pmtu_pending)
- sctp_assoc_pending_pmtu(asoc);
+ sctp_assoc_pending_pmtu(sk, asoc);
/* If fragmentation is disabled and the message length exceeds the
* association fragmentation point, return EMSGSIZE. The I-D
@@ -2365,7 +2365,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) {
if (trans) {
trans->pathmtu = params->spp_pathmtu;
- sctp_assoc_sync_pmtu(asoc);
+ sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc);
} else if (asoc) {
asoc->pathmtu = params->spp_pathmtu;
sctp_frag_point(asoc, params->spp_pathmtu);
@@ -2382,7 +2382,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
(trans->param_flags & ~SPP_PMTUD) | pmtud_change;
if (update) {
sctp_transport_pmtu(trans, sctp_opt2sk(sp));
- sctp_assoc_sync_pmtu(asoc);
+ sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc);
}
} else if (asoc) {
asoc->param_flags =
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 1dcceb6e0ce..a6b7ee9ce28 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -228,7 +228,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
}
-void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
+void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 pmtu)
{
struct dst_entry *dst;
@@ -245,8 +245,16 @@ void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
}
dst = sctp_transport_dst_check(t);
- if (dst)
- dst->ops->update_pmtu(dst, pmtu);
+ if (!dst)
+ t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
+
+ if (dst) {
+ dst->ops->update_pmtu(dst, sk, NULL, pmtu);
+
+ dst = sctp_transport_dst_check(t);
+ if (!dst)
+ t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
+ }
}
/* Caches the dst entry and source address for a transport's destination