From 86911732d3996a9da07914b280621450111bb6da Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 29 Jan 2009 14:19:50 +0000 Subject: gro: Avoid copying headers of unmerged packets Unfortunately simplicity isn't always the best. The fraginfo interface turned out to be suboptimal. The problem was quite obvious. For every packet, we have to copy the headers from the frags structure into skb->head, even though for 99% of the packets this part is immediately thrown away after the merge. LRO didn't have this problem because it directly read the headers from the frags structure. This patch attempts to address this by creating an interface that allows GRO to access the headers in the first frag without having to copy it. Because all drivers that use frags place the headers in the first frag this optimisation should be enough. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 11 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index cd23ae15a1d..df406dcf748 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -215,6 +215,13 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; } +static inline void *skb_gro_mac_header(struct sk_buff *skb) +{ + return skb_headlen(skb) ? skb_mac_header(skb) : + page_address(skb_shinfo(skb)->frags[0].page) + + skb_shinfo(skb)->frags[0].page_offset; +} + /* Device list insertion */ static int list_netdevice(struct net_device *dev) { @@ -2350,7 +2357,6 @@ static int napi_gro_complete(struct sk_buff *skb) out: skb_shinfo(skb)->gso_size = 0; - __skb_push(skb, -skb_network_offset(skb)); return netif_receive_skb(skb); } @@ -2368,6 +2374,25 @@ void napi_gro_flush(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_flush); +void *skb_gro_header(struct sk_buff *skb, unsigned int hlen) +{ + unsigned int offset = skb_gro_offset(skb); + + hlen += offset; + if (hlen <= skb_headlen(skb)) + return skb->data + offset; + + if (unlikely(!skb_shinfo(skb)->nr_frags || + skb_shinfo(skb)->frags[0].size <= + hlen - skb_headlen(skb) || + PageHighMem(skb_shinfo(skb)->frags[0].page))) + return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; + + return page_address(skb_shinfo(skb)->frags[0].page) + + skb_shinfo(skb)->frags[0].page_offset + offset; +} +EXPORT_SYMBOL(skb_gro_header); + int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; @@ -2388,11 +2413,13 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { struct sk_buff *p; + void *mac; if (ptype->type != type || ptype->dev || !ptype->gro_receive) continue; - skb_reset_network_header(skb); + skb_set_network_header(skb, skb_gro_offset(skb)); + mac = skb_gro_mac_header(skb); mac_len = skb->network_header - skb->mac_header; skb->mac_len = mac_len; NAPI_GRO_CB(skb)->same_flow = 0; @@ -2406,8 +2433,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) continue; if (p->mac_len != mac_len || - memcmp(skb_mac_header(p), skb_mac_header(skb), - mac_len)) + memcmp(skb_mac_header(p), mac, mac_len)) NAPI_GRO_CB(p)->same_flow = 0; } @@ -2434,13 +2460,11 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) if (same_flow) goto ok; - if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { - __skb_push(skb, -skb_network_offset(skb)); + if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) goto normal; - } NAPI_GRO_CB(skb)->count = 1; - skb_shinfo(skb)->gso_size = skb->len; + skb_shinfo(skb)->gso_size = skb_gro_len(skb); skb->next = napi->gro_list; napi->gro_list = skb; ret = GRO_HELD; @@ -2488,6 +2512,8 @@ EXPORT_SYMBOL(napi_skb_finish); int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { + skb_gro_reset_offset(skb); + return napi_skb_finish(__napi_gro_receive(napi, skb), skb); } EXPORT_SYMBOL(napi_gro_receive); @@ -2506,6 +2532,7 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, { struct net_device *dev = napi->dev; struct sk_buff *skb = napi->skb; + struct ethhdr *eth; napi->skb = NULL; @@ -2525,13 +2552,23 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, skb->len += info->len; skb->truesize += info->len; - if (!pskb_may_pull(skb, ETH_HLEN)) { + skb_reset_mac_header(skb); + skb_gro_reset_offset(skb); + + eth = skb_gro_header(skb, sizeof(*eth)); + if (!eth) { napi_reuse_skb(napi, skb); skb = NULL; goto out; } - skb->protocol = eth_type_trans(skb, dev); + skb_gro_pull(skb, sizeof(*eth)); + + /* + * This works because the only protocols we care about don't require + * special handling. We'll fix it up properly at the end. + */ + skb->protocol = eth->h_proto; skb->ip_summed = info->ip_summed; skb->csum = info->csum; @@ -2544,10 +2581,21 @@ EXPORT_SYMBOL(napi_fraginfo_skb); int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) { int err = NET_RX_SUCCESS; + int may; switch (ret) { case GRO_NORMAL: - return netif_receive_skb(skb); + case GRO_HELD: + may = pskb_may_pull(skb, skb_gro_offset(skb)); + BUG_ON(!may); + + skb->protocol = eth_type_trans(skb, napi->dev); + + if (ret == GRO_NORMAL) + return netif_receive_skb(skb); + + skb_gro_pull(skb, -ETH_HLEN); + break; case GRO_DROP: err = NET_RX_DROP; -- cgit v1.2.3-70-g09d2