diff options
Diffstat (limited to 'net/ipv4/ip_output.c')
-rw-r--r-- | net/ipv4/ip_output.c | 79 |
1 files changed, 60 insertions, 19 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 73c6b63bba7..a52f50187b5 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -101,17 +101,17 @@ int __ip_local_out(struct sk_buff *skb) skb_dst(skb)->dev, dst_output); } -int ip_local_out(struct sk_buff *skb) +int ip_local_out_sk(struct sock *sk, struct sk_buff *skb) { int err; err = __ip_local_out(skb); if (likely(err == 1)) - err = dst_output(skb); + err = dst_output_sk(sk, skb); return err; } -EXPORT_SYMBOL_GPL(ip_local_out); +EXPORT_SYMBOL_GPL(ip_local_out_sk); static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) { @@ -211,6 +211,48 @@ static inline int ip_finish_output2(struct sk_buff *skb) return -EINVAL; } +static int ip_finish_output_gso(struct sk_buff *skb) +{ + netdev_features_t features; + struct sk_buff *segs; + int ret = 0; + + /* common case: locally created skb or seglen is <= mtu */ + if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) || + skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb)) + return ip_finish_output2(skb); + + /* Slowpath - GSO segment length is exceeding the dst MTU. + * + * This can happen in two cases: + * 1) TCP GRO packet, DF bit not set + * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly + * from host network stack. + */ + features = netif_skb_features(skb); + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + if (IS_ERR(segs)) { + kfree_skb(skb); + return -ENOMEM; + } + + consume_skb(skb); + + do { + struct sk_buff *nskb = segs->next; + int err; + + segs->next = NULL; + err = ip_fragment(segs, ip_finish_output2); + + if (err && ret == 0) + ret = err; + segs = nskb; + } while (segs); + + return ret; +} + static int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) @@ -220,15 +262,17 @@ static int ip_finish_output(struct sk_buff *skb) return dst_output(skb); } #endif - if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) + if (skb_is_gso(skb)) + return ip_finish_output_gso(skb); + + if (skb->len > ip_skb_dst_mtu(skb)) return ip_fragment(skb, ip_finish_output2); - else - return ip_finish_output2(skb); + + return ip_finish_output2(skb); } -int ip_mc_output(struct sk_buff *skb) +int ip_mc_output(struct sock *sk, struct sk_buff *skb) { - struct sock *sk = skb->sk; struct rtable *rt = skb_rtable(skb); struct net_device *dev = rt->dst.dev; @@ -287,7 +331,7 @@ int ip_mc_output(struct sk_buff *skb) !(IPCB(skb)->flags & IPSKB_REROUTED)); } -int ip_output(struct sk_buff *skb) +int ip_output(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; @@ -315,9 +359,9 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) sizeof(fl4->saddr) + sizeof(fl4->daddr)); } -int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) +/* Note: skb->sk can be different from sk, in case of tunnels */ +int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl) { - struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct ip_options_rcu *inet_opt; struct flowi4 *fl4; @@ -389,6 +433,7 @@ packet_routed: ip_select_ident_more(skb, &rt->dst, sk, (skb_shinfo(skb)->gso_segs ?: 1) - 1); + /* TODO : should we use skb->sk here instead of sk ? */ skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; @@ -446,7 +491,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) __be16 not_last_frag; struct rtable *rt = skb_rtable(skb); int err = 0; - bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; dev = rt->dst.dev; @@ -456,7 +500,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) iph = ip_hdr(skb); - mtu = ip_dst_mtu_maybe_forward(&rt->dst, forwarding); + mtu = ip_skb_dst_mtu(skb); if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) || (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size > mtu))) { @@ -822,8 +866,7 @@ static int __ip_append_data(struct sock *sk, fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ? - mtu : 0xFFFF; + maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu; if (cork->length + length > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -1146,8 +1189,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ? - mtu : 0xFFFF; + maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu; if (cork->length + size > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -1308,8 +1350,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, * to fragment the frame generated here. No matter, what transforms * how transforms change size of the packet, it will come out. */ - if (inet->pmtudisc < IP_PMTUDISC_DO) - skb->local_df = 1; + skb->local_df = ip_sk_local_df(sk); /* DF bit is set when we want to see DF on outgoing frames. * If local_df is set too, we still allow to fragment this frame |