diff options
Diffstat (limited to 'net/core/skbuff.c')
-rw-r--r-- | net/core/skbuff.c | 461 |
1 files changed, 309 insertions, 152 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index da0c97f2fab..016694d6248 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -36,6 +36,8 @@ * The functions in this file will not compile correctly with gcc 2.4.x */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -66,12 +68,10 @@ #include <net/xfrm.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <trace/events/skb.h> +#include <linux/highmem.h> -#include "kmap_skb.h" - -static struct kmem_cache *skbuff_head_cache __read_mostly; +struct kmem_cache *skbuff_head_cache __read_mostly; static struct kmem_cache *skbuff_fclone_cache __read_mostly; static void sock_pipe_buf_release(struct pipe_inode_info *pipe, @@ -120,11 +120,10 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = { */ static void skb_over_panic(struct sk_buff *skb, int sz, void *here) { - printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " - "data:%p tail:%#lx end:%#lx dev:%s\n", - here, skb->len, sz, skb->head, skb->data, - (unsigned long)skb->tail, (unsigned long)skb->end, - skb->dev ? skb->dev->name : "<NULL>"); + pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", + __func__, here, skb->len, sz, skb->head, skb->data, + (unsigned long)skb->tail, (unsigned long)skb->end, + skb->dev ? skb->dev->name : "<NULL>"); BUG(); } @@ -139,11 +138,10 @@ static void skb_over_panic(struct sk_buff *skb, int sz, void *here) static void skb_under_panic(struct sk_buff *skb, int sz, void *here) { - printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " - "data:%p tail:%#lx end:%#lx dev:%s\n", - here, skb->len, sz, skb->head, skb->data, - (unsigned long)skb->tail, (unsigned long)skb->end, - skb->dev ? skb->dev->name : "<NULL>"); + pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", + __func__, here, skb->len, sz, skb->head, skb->data, + (unsigned long)skb->tail, (unsigned long)skb->end, + skb->dev ? skb->dev->name : "<NULL>"); BUG(); } @@ -247,6 +245,7 @@ EXPORT_SYMBOL(__alloc_skb); /** * build_skb - build a network buffer * @data: data buffer provided by caller + * @frag_size: size of fragment, or 0 if head was kmalloced * * Allocate a new &sk_buff. Caller provides space holding head and * skb_shared_info. @data must have been allocated by kmalloc() @@ -260,20 +259,21 @@ EXPORT_SYMBOL(__alloc_skb); * before giving packet to stack. * RX rings only contains data buffers, not full skbs. */ -struct sk_buff *build_skb(void *data) +struct sk_buff *build_skb(void *data, unsigned int frag_size) { struct skb_shared_info *shinfo; struct sk_buff *skb; - unsigned int size; + unsigned int size = frag_size ? : ksize(data); skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); if (!skb) return NULL; - size = ksize(data) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); memset(skb, 0, offsetof(struct sk_buff, tail)); skb->truesize = SKB_TRUESIZE(size); + skb->head_frag = frag_size != 0; atomic_set(&skb->users, 1); skb->head = data; skb->data = data; @@ -293,6 +293,46 @@ struct sk_buff *build_skb(void *data) } EXPORT_SYMBOL(build_skb); +struct netdev_alloc_cache { + struct page *page; + unsigned int offset; +}; +static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); + +/** + * netdev_alloc_frag - allocate a page fragment + * @fragsz: fragment size + * + * Allocates a frag from a page for receive buffer. + * Uses GFP_ATOMIC allocations. + */ +void *netdev_alloc_frag(unsigned int fragsz) +{ + struct netdev_alloc_cache *nc; + void *data = NULL; + unsigned long flags; + + local_irq_save(flags); + nc = &__get_cpu_var(netdev_alloc_cache); + if (unlikely(!nc->page)) { +refill: + nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD); + nc->offset = 0; + } + if (likely(nc->page)) { + if (nc->offset + fragsz > PAGE_SIZE) { + put_page(nc->page); + goto refill; + } + data = page_address(nc->page) + nc->offset; + nc->offset += fragsz; + get_page(nc->page); + } + local_irq_restore(flags); + return data; +} +EXPORT_SYMBOL(netdev_alloc_frag); + /** * __netdev_alloc_skb - allocate an skbuff for rx on a specific device * @dev: network device to receive on @@ -307,11 +347,23 @@ EXPORT_SYMBOL(build_skb); * %NULL is returned if there is no free memory. */ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, - unsigned int length, gfp_t gfp_mask) + unsigned int length, gfp_t gfp_mask) { - struct sk_buff *skb; + struct sk_buff *skb = NULL; + unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + if (fragsz <= PAGE_SIZE && !(gfp_mask & __GFP_WAIT)) { + void *data = netdev_alloc_frag(fragsz); - skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE); + if (likely(data)) { + skb = build_skb(data, fragsz); + if (unlikely(!skb)) + put_page(virt_to_head_page(data)); + } + } else { + skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE); + } if (likely(skb)) { skb_reserve(skb, NET_SKB_PAD); skb->dev = dev; @@ -321,37 +373,15 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, EXPORT_SYMBOL(__netdev_alloc_skb); void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, - int size) + int size, unsigned int truesize) { skb_fill_page_desc(skb, i, page, off, size); skb->len += size; skb->data_len += size; - skb->truesize += size; + skb->truesize += truesize; } EXPORT_SYMBOL(skb_add_rx_frag); -/** - * dev_alloc_skb - allocate an skbuff for receiving - * @length: length to allocate - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned if there is no free memory. Although this function - * allocates memory it can be called from an interrupt. - */ -struct sk_buff *dev_alloc_skb(unsigned int length) -{ - /* - * There is more code here than it seems: - * __dev_alloc_skb is an inline - */ - return __dev_alloc_skb(length, GFP_ATOMIC); -} -EXPORT_SYMBOL(dev_alloc_skb); - static void skb_drop_list(struct sk_buff **listp) { struct sk_buff *list = *listp; @@ -378,6 +408,14 @@ static void skb_clone_fraglist(struct sk_buff *skb) skb_get(list); } +static void skb_free_head(struct sk_buff *skb) +{ + if (skb->head_frag) + put_page(virt_to_head_page(skb->head)); + else + kfree(skb->head); +} + static void skb_release_data(struct sk_buff *skb) { if (!skb->cloned || @@ -404,7 +442,7 @@ static void skb_release_data(struct sk_buff *skb) if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); - kfree(skb->head); + skb_free_head(skb); } } @@ -592,6 +630,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->rxhash = old->rxhash; new->ooo_okay = old->ooo_okay; new->l4_rxhash = old->l4_rxhash; + new->no_fcs = old->no_fcs; #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif @@ -645,6 +684,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) C(tail); C(end); C(head); + C(head_frag); C(data); C(truesize); atomic_set(&n->users, 1); @@ -707,10 +747,10 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) } return -ENOMEM; } - vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); + vaddr = kmap_atomic(skb_frag_page(f)); memcpy(page_address(page), vaddr + f->page_offset, skb_frag_size(f)); - kunmap_skb_frag(vaddr); + kunmap_atomic(vaddr); page->private = (unsigned long)head; head = page; } @@ -819,7 +859,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { int headerlen = skb_headroom(skb); - unsigned int size = (skb_end_pointer(skb) - skb->head) + skb->data_len; + unsigned int size = skb_end_offset(skb) + skb->data_len; struct sk_buff *n = alloc_skb(size, gfp_mask); if (!n) @@ -920,9 +960,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, { int i; u8 *data; - int size = nhead + (skb_end_pointer(skb) - skb->head) + ntail; + int size = nhead + skb_end_offset(skb) + ntail; long off; - bool fastpath; BUG_ON(nhead < 0); @@ -931,30 +970,11 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, size = SKB_DATA_ALIGN(size); - /* Check if we can avoid taking references on fragments if we own - * the last reference on skb->head. (see skb_release_data()) - */ - if (!skb->cloned) - fastpath = true; - else { - int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; - fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta; - } - - if (fastpath && - size + sizeof(struct skb_shared_info) <= ksize(skb->head)) { - memmove(skb->head + size, skb_shinfo(skb), - offsetof(struct skb_shared_info, - frags[skb_shinfo(skb)->nr_frags])); - memmove(skb->head + nhead, skb->head, - skb_tail_pointer(skb) - skb->head); - off = nhead; - goto adjust_others; - } - - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); + data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), + gfp_mask); if (!data) goto nodata; + size = SKB_WITH_OVERHEAD(ksize(data)); /* Copy only real data... and, alas, header. This should be * optimized for the cases when header is void. @@ -965,9 +985,12 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb_shinfo(skb), offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); - if (fastpath) { - kfree(skb->head); - } else { + /* + * if shinfo is shared we must drop the old head gracefully, but if it + * is not we can just drop the old head and let the existing refcount + * be since all we did is relocate the values + */ + if (skb_cloned(skb)) { /* copy this zero copy skb frags */ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { if (skb_copy_ubufs(skb, gfp_mask)) @@ -980,11 +1003,13 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb_clone_fraglist(skb); skb_release_data(skb); + } else { + skb_free_head(skb); } off = (data + nhead) - skb->head; skb->head = data; -adjust_others: + skb->head_frag = 0; skb->data += off; #ifdef NET_SKBUFF_DATA_USES_OFFSET skb->end = size; @@ -1273,7 +1298,7 @@ drop_pages: return -ENOMEM; nfrag->next = frag->next; - kfree_skb(frag); + consume_skb(frag); frag = nfrag; *fragp = frag; } @@ -1485,21 +1510,22 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + skb_frag_t *f = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); + end = start + skb_frag_size(f); if ((copy = end - offset) > 0) { u8 *vaddr; if (copy > len) copy = len; - vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); + vaddr = kmap_atomic(skb_frag_page(f)); memcpy(to, - vaddr + skb_shinfo(skb)->frags[i].page_offset+ - offset - start, copy); - kunmap_skb_frag(vaddr); + vaddr + f->page_offset + offset - start, + copy); + kunmap_atomic(vaddr); if ((len -= copy) == 0) return 0; @@ -1545,9 +1571,9 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) put_page(spd->pages[i]); } -static inline struct page *linear_to_page(struct page *page, unsigned int *len, - unsigned int *offset, - struct sk_buff *skb, struct sock *sk) +static struct page *linear_to_page(struct page *page, unsigned int *len, + unsigned int *offset, + struct sk_buff *skb, struct sock *sk) { struct page *p = sk->sk_sndmsg_page; unsigned int off; @@ -1563,6 +1589,9 @@ new_page: } else { unsigned int mlen; + /* If we are the only user of the page, we can reset offset */ + if (page_count(p) == 1) + sk->sk_sndmsg_off = 0; off = sk->sk_sndmsg_off; mlen = PAGE_SIZE - off; if (mlen < 64 && mlen < *len) { @@ -1576,36 +1605,48 @@ new_page: memcpy(page_address(p) + off, page_address(page) + *offset, *len); sk->sk_sndmsg_off += *len; *offset = off; - get_page(p); return p; } +static bool spd_can_coalesce(const struct splice_pipe_desc *spd, + struct page *page, + unsigned int offset) +{ + return spd->nr_pages && + spd->pages[spd->nr_pages - 1] == page && + (spd->partial[spd->nr_pages - 1].offset + + spd->partial[spd->nr_pages - 1].len == offset); +} + /* * Fill page/offset/length into spd, if it can hold more pages. */ -static inline int spd_fill_page(struct splice_pipe_desc *spd, - struct pipe_inode_info *pipe, struct page *page, - unsigned int *len, unsigned int offset, - struct sk_buff *skb, int linear, - struct sock *sk) +static bool spd_fill_page(struct splice_pipe_desc *spd, + struct pipe_inode_info *pipe, struct page *page, + unsigned int *len, unsigned int offset, + struct sk_buff *skb, bool linear, + struct sock *sk) { - if (unlikely(spd->nr_pages == pipe->buffers)) - return 1; + if (unlikely(spd->nr_pages == MAX_SKB_FRAGS)) + return true; if (linear) { page = linear_to_page(page, len, &offset, skb, sk); if (!page) - return 1; - } else - get_page(page); - + return true; + } + if (spd_can_coalesce(spd, page, offset)) { + spd->partial[spd->nr_pages - 1].len += *len; + return false; + } + get_page(page); spd->pages[spd->nr_pages] = page; spd->partial[spd->nr_pages].len = *len; spd->partial[spd->nr_pages].offset = offset; spd->nr_pages++; - return 0; + return false; } static inline void __segment_seek(struct page **page, unsigned int *poff, @@ -1622,20 +1663,20 @@ static inline void __segment_seek(struct page **page, unsigned int *poff, *plen -= off; } -static inline int __splice_segment(struct page *page, unsigned int poff, - unsigned int plen, unsigned int *off, - unsigned int *len, struct sk_buff *skb, - struct splice_pipe_desc *spd, int linear, - struct sock *sk, - struct pipe_inode_info *pipe) +static bool __splice_segment(struct page *page, unsigned int poff, + unsigned int plen, unsigned int *off, + unsigned int *len, struct sk_buff *skb, + struct splice_pipe_desc *spd, bool linear, + struct sock *sk, + struct pipe_inode_info *pipe) { if (!*len) - return 1; + return true; /* skip this segment if already processed */ if (*off >= plen) { *off -= plen; - return 0; + return false; } /* ignore any bits we already processed */ @@ -1651,34 +1692,38 @@ static inline int __splice_segment(struct page *page, unsigned int poff, flen = min_t(unsigned int, flen, PAGE_SIZE - poff); if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk)) - return 1; + return true; __segment_seek(&page, &poff, &plen, flen); *len -= flen; } while (*len && plen); - return 0; + return false; } /* - * Map linear and fragment data from the skb to spd. It reports failure if the + * Map linear and fragment data from the skb to spd. It reports true if the * pipe is full or if we already spliced the requested length. */ -static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, - unsigned int *offset, unsigned int *len, - struct splice_pipe_desc *spd, struct sock *sk) +static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, + unsigned int *offset, unsigned int *len, + struct splice_pipe_desc *spd, struct sock *sk) { int seg; - /* - * map the linear part + /* map the linear part : + * If skb->head_frag is set, this 'linear' part is backed by a + * fragment, and if the head is not shared with any clones then + * we can avoid a copy since we own the head portion of this page. */ if (__splice_segment(virt_to_page(skb->data), (unsigned long) skb->data & (PAGE_SIZE - 1), skb_headlen(skb), - offset, len, skb, spd, 1, sk, pipe)) - return 1; + offset, len, skb, spd, + skb_head_is_locked(skb), + sk, pipe)) + return true; /* * then map the fragments @@ -1688,11 +1733,11 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, if (__splice_segment(skb_frag_page(f), f->page_offset, skb_frag_size(f), - offset, len, skb, spd, 0, sk, pipe)) - return 1; + offset, len, skb, spd, false, sk, pipe)) + return true; } - return 0; + return false; } /* @@ -1705,8 +1750,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct pipe_inode_info *pipe, unsigned int tlen, unsigned int flags) { - struct partial_page partial[PIPE_DEF_BUFFERS]; - struct page *pages[PIPE_DEF_BUFFERS]; + struct partial_page partial[MAX_SKB_FRAGS]; + struct page *pages[MAX_SKB_FRAGS]; struct splice_pipe_desc spd = { .pages = pages, .partial = partial, @@ -1718,9 +1763,6 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct sock *sk = skb->sk; int ret = 0; - if (splice_grow_spd(pipe, &spd)) - return -ENOMEM; - /* * __skb_splice_bits() only fails if the output has no room left, * so no point in going over the frag_list for the error case. @@ -1756,7 +1798,6 @@ done: lock_sock(sk); } - splice_shrink_spd(pipe, &spd); return ret; } @@ -1804,10 +1845,10 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) if (copy > len) copy = len; - vaddr = kmap_skb_frag(frag); + vaddr = kmap_atomic(skb_frag_page(frag)); memcpy(vaddr + frag->page_offset + offset - start, from, copy); - kunmap_skb_frag(vaddr); + kunmap_atomic(vaddr); if ((len -= copy) == 0) return 0; @@ -1867,21 +1908,21 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { __wsum csum2; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; if (copy > len) copy = len; - vaddr = kmap_skb_frag(frag); + vaddr = kmap_atomic(skb_frag_page(frag)); csum2 = csum_partial(vaddr + frag->page_offset + offset - start, copy, 0); - kunmap_skb_frag(vaddr); + kunmap_atomic(vaddr); csum = csum_block_add(csum, csum2, pos); if (!(len -= copy)) return csum; @@ -1953,12 +1994,12 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, if (copy > len) copy = len; - vaddr = kmap_skb_frag(frag); + vaddr = kmap_atomic(skb_frag_page(frag)); csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset + offset - start, to, copy, 0); - kunmap_skb_frag(vaddr); + kunmap_atomic(vaddr); csum = csum_block_add(csum, csum2, pos); if (!(len -= copy)) return csum; @@ -2478,7 +2519,7 @@ next_skb: if (abs_offset < block_limit) { if (!st->frag_data) - st->frag_data = kmap_skb_frag(frag); + st->frag_data = kmap_atomic(skb_frag_page(frag)); *data = (u8 *) st->frag_data + frag->page_offset + (abs_offset - st->stepped_offset); @@ -2487,7 +2528,7 @@ next_skb: } if (st->frag_data) { - kunmap_skb_frag(st->frag_data); + kunmap_atomic(st->frag_data); st->frag_data = NULL; } @@ -2496,7 +2537,7 @@ next_skb: } if (st->frag_data) { - kunmap_skb_frag(st->frag_data); + kunmap_atomic(st->frag_data); st->frag_data = NULL; } @@ -2524,7 +2565,7 @@ EXPORT_SYMBOL(skb_seq_read); void skb_abort_seq_read(struct skb_seq_state *st) { if (st->frag_data) - kunmap_skb_frag(st->frag_data); + kunmap_atomic(st->frag_data); } EXPORT_SYMBOL(skb_abort_seq_read); @@ -2716,14 +2757,13 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (unlikely(!nskb)) goto err; - hsize = skb_end_pointer(nskb) - nskb->head; + hsize = skb_end_offset(nskb); if (skb_cow_head(nskb, doffset + headroom)) { kfree_skb(nskb); goto err; } - nskb->truesize += skb_end_pointer(nskb) - nskb->head - - hsize; + nskb->truesize += skb_end_offset(nskb) - hsize; skb_release_head_state(nskb); __skb_push(nskb, doffset); } else { @@ -2841,6 +2881,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) unsigned int len = skb_gro_len(skb); unsigned int offset = skb_gro_offset(skb); unsigned int headlen = skb_headlen(skb); + unsigned int delta_truesize; if (p->len + len >= 65536) return -E2BIG; @@ -2870,11 +2911,41 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) frag->page_offset += offset; skb_frag_size_sub(frag, offset); + /* all fragments truesize : remove (head size + sk_buff) */ + delta_truesize = skb->truesize - + SKB_TRUESIZE(skb_end_offset(skb)); + skb->truesize -= skb->data_len; skb->len -= skb->data_len; skb->data_len = 0; - NAPI_GRO_CB(skb)->free = 1; + NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE; + goto done; + } else if (skb->head_frag) { + int nr_frags = pinfo->nr_frags; + skb_frag_t *frag = pinfo->frags + nr_frags; + struct page *page = virt_to_head_page(skb->head); + unsigned int first_size = headlen - offset; + unsigned int first_offset; + + if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS) + return -E2BIG; + + first_offset = skb->data - + (unsigned char *)page_address(page) + + offset; + + pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags; + + frag->page.p = page; + frag->page_offset = first_offset; + skb_frag_size_set(frag, first_size); + + memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); + /* We dont need to clear skbinfo->nr_frags here */ + + delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); + NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; goto done; } else if (skb_gro_len(p) != pinfo->gso_size) return -E2BIG; @@ -2906,7 +2977,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) nskb->prev = p; nskb->data_len += p->len; - nskb->truesize += p->len; + nskb->truesize += p->truesize; nskb->len += p->len; *head = nskb; @@ -2916,6 +2987,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) p = nskb; merge: + delta_truesize = skb->truesize; if (offset > headlen) { unsigned int eat = offset - headlen; @@ -2935,7 +3007,7 @@ merge: done: NAPI_GRO_CB(p)->count++; p->data_len += len; - p->truesize += len; + p->truesize += delta_truesize; p->len += len; NAPI_GRO_CB(skb)->same_flow = 1; @@ -3160,8 +3232,10 @@ static void sock_rmem_free(struct sk_buff *skb) */ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) { + int len = skb->len; + if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= - (unsigned)sk->sk_rcvbuf) + (unsigned int)sk->sk_rcvbuf) return -ENOMEM; skb_orphan(skb); @@ -3174,7 +3248,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) skb_queue_tail(&sk->sk_error_queue, skb); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk, len); return 0; } EXPORT_SYMBOL(sock_queue_err_skb); @@ -3255,10 +3329,8 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) { if (unlikely(start > skb_headlen(skb)) || unlikely((int)start + off > skb_headlen(skb) - 2)) { - if (net_ratelimit()) - printk(KERN_WARNING - "bad partial csum: csum=%u/%u len=%u\n", - start, off, skb_headlen(skb)); + net_warn_ratelimited("bad partial csum: csum=%u/%u len=%u\n", + start, off, skb_headlen(skb)); return false; } skb->ip_summed = CHECKSUM_PARTIAL; @@ -3270,8 +3342,93 @@ EXPORT_SYMBOL_GPL(skb_partial_csum_set); void __skb_warn_lro_forwarding(const struct sk_buff *skb) { - if (net_ratelimit()) - pr_warning("%s: received packets cannot be forwarded" - " while LRO is enabled\n", skb->dev->name); + net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", + skb->dev->name); } EXPORT_SYMBOL(__skb_warn_lro_forwarding); + +void kfree_skb_partial(struct sk_buff *skb, bool head_stolen) +{ + if (head_stolen) + kmem_cache_free(skbuff_head_cache, skb); + else + __kfree_skb(skb); +} +EXPORT_SYMBOL(kfree_skb_partial); + +/** + * skb_try_coalesce - try to merge skb to prior one + * @to: prior buffer + * @from: buffer to add + * @fragstolen: pointer to boolean + * + */ +bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, + bool *fragstolen, int *delta_truesize) +{ + int i, delta, len = from->len; + + *fragstolen = false; + + if (skb_cloned(to)) + return false; + + if (len <= skb_tailroom(to)) { + BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); + *delta_truesize = 0; + return true; + } + + if (skb_has_frag_list(to) || skb_has_frag_list(from)) + return false; + + if (skb_headlen(from) != 0) { + struct page *page; + unsigned int offset; + + if (skb_shinfo(to)->nr_frags + + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) + return false; + + if (skb_head_is_locked(from)) + return false; + + delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); + + page = virt_to_head_page(from->head); + offset = from->data - (unsigned char *)page_address(page); + + skb_fill_page_desc(to, skb_shinfo(to)->nr_frags, + page, offset, skb_headlen(from)); + *fragstolen = true; + } else { + if (skb_shinfo(to)->nr_frags + + skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS) + return false; + + delta = from->truesize - + SKB_TRUESIZE(skb_end_pointer(from) - from->head); + } + + WARN_ON_ONCE(delta < len); + + memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags, + skb_shinfo(from)->frags, + skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); + skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags; + + if (!skb_cloned(from)) + skb_shinfo(from)->nr_frags = 0; + + /* if the skb is cloned this does nothing since we set nr_frags to 0 */ + for (i = 0; i < skb_shinfo(from)->nr_frags; i++) + skb_frag_ref(from, i); + + to->truesize += delta; + to->len += len; + to->data_len += len; + + *delta_truesize = delta; + return true; +} +EXPORT_SYMBOL(skb_try_coalesce); |