summaryrefslogtreecommitdiffstats
path: root/net/core/skbuff.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/skbuff.c')
-rw-r--r--net/core/skbuff.c495
1 files changed, 370 insertions, 125 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 224506a6fa8..c16615bfb61 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -62,6 +62,7 @@
#include <linux/scatterlist.h>
#include <linux/errqueue.h>
#include <linux/prefetch.h>
+#include <linux/if_vlan.h>
#include <net/protocol.h>
#include <net/dst.h>
@@ -256,16 +257,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
kmemcheck_annotate_variable(shinfo->destructor_arg);
if (flags & SKB_ALLOC_FCLONE) {
- struct sk_buff *child = skb + 1;
- atomic_t *fclone_ref = (atomic_t *) (child + 1);
+ struct sk_buff_fclones *fclones;
- kmemcheck_annotate_bitfield(child, flags1);
- kmemcheck_annotate_bitfield(child, flags2);
+ fclones = container_of(skb, struct sk_buff_fclones, skb1);
+
+ kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
skb->fclone = SKB_FCLONE_ORIG;
- atomic_set(fclone_ref, 1);
+ atomic_set(&fclones->fclone_ref, 1);
- child->fclone = SKB_FCLONE_UNAVAILABLE;
- child->pfmemalloc = pfmemalloc;
+ fclones->skb2.fclone = SKB_FCLONE_FREE;
+ fclones->skb2.pfmemalloc = pfmemalloc;
}
out:
return skb;
@@ -344,7 +345,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
unsigned long flags;
local_irq_save(flags);
- nc = &__get_cpu_var(netdev_alloc_cache);
+ nc = this_cpu_ptr(&netdev_alloc_cache);
if (unlikely(!nc->frag.page)) {
refill:
for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) {
@@ -359,18 +360,29 @@ refill:
goto end;
}
nc->frag.size = PAGE_SIZE << order;
-recycle:
- atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS);
+ /* Even if we own the page, we do not use atomic_set().
+ * This would break get_page_unless_zero() users.
+ */
+ atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1,
+ &nc->frag.page->_count);
nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
nc->frag.offset = 0;
}
if (nc->frag.offset + fragsz > nc->frag.size) {
- /* avoid unnecessary locked operations if possible */
- if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) ||
- atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count))
- goto recycle;
- goto refill;
+ if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) {
+ if (!atomic_sub_and_test(nc->pagecnt_bias,
+ &nc->frag.page->_count))
+ goto refill;
+ /* OK, page count is 0, we can safely set it */
+ atomic_set(&nc->frag.page->_count,
+ NETDEV_PAGECNT_MAX_BIAS);
+ } else {
+ atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias,
+ &nc->frag.page->_count);
+ }
+ nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
+ nc->frag.offset = 0;
}
data = page_address(nc->frag.page) + nc->frag.offset;
@@ -490,32 +502,33 @@ static void skb_free_head(struct sk_buff *skb)
static void skb_release_data(struct sk_buff *skb)
{
- if (!skb->cloned ||
- !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
- &skb_shinfo(skb)->dataref)) {
- if (skb_shinfo(skb)->nr_frags) {
- int i;
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- skb_frag_unref(skb, i);
- }
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ int i;
- /*
- * If skb buf is from userspace, we need to notify the caller
- * the lower device DMA has done;
- */
- if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
- struct ubuf_info *uarg;
+ if (skb->cloned &&
+ atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
+ &shinfo->dataref))
+ return;
- uarg = skb_shinfo(skb)->destructor_arg;
- if (uarg->callback)
- uarg->callback(uarg, true);
- }
+ for (i = 0; i < shinfo->nr_frags; i++)
+ __skb_frag_unref(&shinfo->frags[i]);
- if (skb_has_frag_list(skb))
- skb_drop_fraglist(skb);
+ /*
+ * If skb buf is from userspace, we need to notify the caller
+ * the lower device DMA has done;
+ */
+ if (shinfo->tx_flags & SKBTX_DEV_ZEROCOPY) {
+ struct ubuf_info *uarg;
- skb_free_head(skb);
+ uarg = shinfo->destructor_arg;
+ if (uarg->callback)
+ uarg->callback(uarg, true);
}
+
+ if (shinfo->frag_list)
+ kfree_skb_list(shinfo->frag_list);
+
+ skb_free_head(skb);
}
/*
@@ -523,8 +536,7 @@ static void skb_release_data(struct sk_buff *skb)
*/
static void kfree_skbmem(struct sk_buff *skb)
{
- struct sk_buff *other;
- atomic_t *fclone_ref;
+ struct sk_buff_fclones *fclones;
switch (skb->fclone) {
case SKB_FCLONE_UNAVAILABLE:
@@ -532,22 +544,28 @@ static void kfree_skbmem(struct sk_buff *skb)
break;
case SKB_FCLONE_ORIG:
- fclone_ref = (atomic_t *) (skb + 2);
- if (atomic_dec_and_test(fclone_ref))
- kmem_cache_free(skbuff_fclone_cache, skb);
+ fclones = container_of(skb, struct sk_buff_fclones, skb1);
+ if (atomic_dec_and_test(&fclones->fclone_ref))
+ kmem_cache_free(skbuff_fclone_cache, fclones);
break;
case SKB_FCLONE_CLONE:
- fclone_ref = (atomic_t *) (skb + 1);
- other = skb - 1;
+ fclones = container_of(skb, struct sk_buff_fclones, skb2);
- /* The clone portion is available for
- * fast-cloning again.
+ /* Warning : We must perform the atomic_dec_and_test() before
+ * setting skb->fclone back to SKB_FCLONE_FREE, otherwise
+ * skb_clone() could set clone_ref to 2 before our decrement.
+ * Anyway, if we are going to free the structure, no need to
+ * rewrite skb->fclone.
*/
- skb->fclone = SKB_FCLONE_UNAVAILABLE;
-
- if (atomic_dec_and_test(fclone_ref))
- kmem_cache_free(skbuff_fclone_cache, other);
+ if (atomic_dec_and_test(&fclones->fclone_ref)) {
+ kmem_cache_free(skbuff_fclone_cache, fclones);
+ } else {
+ /* The clone portion is available for
+ * fast-cloning again.
+ */
+ skb->fclone = SKB_FCLONE_FREE;
+ }
break;
}
}
@@ -565,7 +583,7 @@ static void skb_release_head_state(struct sk_buff *skb)
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
nf_conntrack_put(skb->nfct);
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(skb->nf_bridge);
#endif
/* XXX: IS this still necessary? - JHS */
@@ -673,57 +691,61 @@ void consume_skb(struct sk_buff *skb)
}
EXPORT_SYMBOL(consume_skb);
+/* Make sure a field is enclosed inside headers_start/headers_end section */
+#define CHECK_SKB_FIELD(field) \
+ BUILD_BUG_ON(offsetof(struct sk_buff, field) < \
+ offsetof(struct sk_buff, headers_start)); \
+ BUILD_BUG_ON(offsetof(struct sk_buff, field) > \
+ offsetof(struct sk_buff, headers_end)); \
+
static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
{
new->tstamp = old->tstamp;
+ /* We do not copy old->sk */
new->dev = old->dev;
- new->transport_header = old->transport_header;
- new->network_header = old->network_header;
- new->mac_header = old->mac_header;
- new->inner_protocol = old->inner_protocol;
- new->inner_transport_header = old->inner_transport_header;
- new->inner_network_header = old->inner_network_header;
- new->inner_mac_header = old->inner_mac_header;
+ memcpy(new->cb, old->cb, sizeof(old->cb));
skb_dst_copy(new, old);
- skb_copy_hash(new, old);
- new->ooo_okay = old->ooo_okay;
- new->no_fcs = old->no_fcs;
- new->encapsulation = old->encapsulation;
- new->encap_hdr_csum = old->encap_hdr_csum;
- new->csum_valid = old->csum_valid;
- new->csum_complete_sw = old->csum_complete_sw;
#ifdef CONFIG_XFRM
new->sp = secpath_get(old->sp);
#endif
- memcpy(new->cb, old->cb, sizeof(old->cb));
- new->csum = old->csum;
- new->ignore_df = old->ignore_df;
- new->pkt_type = old->pkt_type;
- new->ip_summed = old->ip_summed;
- skb_copy_queue_mapping(new, old);
- new->priority = old->priority;
-#if IS_ENABLED(CONFIG_IP_VS)
- new->ipvs_property = old->ipvs_property;
+ __nf_copy(new, old, false);
+
+ /* Note : this field could be in headers_start/headers_end section
+ * It is not yet because we do not want to have a 16 bit hole
+ */
+ new->queue_mapping = old->queue_mapping;
+
+ memcpy(&new->headers_start, &old->headers_start,
+ offsetof(struct sk_buff, headers_end) -
+ offsetof(struct sk_buff, headers_start));
+ CHECK_SKB_FIELD(protocol);
+ CHECK_SKB_FIELD(csum);
+ CHECK_SKB_FIELD(hash);
+ CHECK_SKB_FIELD(priority);
+ CHECK_SKB_FIELD(skb_iif);
+ CHECK_SKB_FIELD(vlan_proto);
+ CHECK_SKB_FIELD(vlan_tci);
+ CHECK_SKB_FIELD(transport_header);
+ CHECK_SKB_FIELD(network_header);
+ CHECK_SKB_FIELD(mac_header);
+ CHECK_SKB_FIELD(inner_protocol);
+ CHECK_SKB_FIELD(inner_transport_header);
+ CHECK_SKB_FIELD(inner_network_header);
+ CHECK_SKB_FIELD(inner_mac_header);
+ CHECK_SKB_FIELD(mark);
+#ifdef CONFIG_NETWORK_SECMARK
+ CHECK_SKB_FIELD(secmark);
+#endif
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ CHECK_SKB_FIELD(napi_id);
#endif
- new->pfmemalloc = old->pfmemalloc;
- new->protocol = old->protocol;
- new->mark = old->mark;
- new->skb_iif = old->skb_iif;
- __nf_copy(new, old);
#ifdef CONFIG_NET_SCHED
- new->tc_index = old->tc_index;
+ CHECK_SKB_FIELD(tc_index);
#ifdef CONFIG_NET_CLS_ACT
- new->tc_verd = old->tc_verd;
+ CHECK_SKB_FIELD(tc_verd);
#endif
#endif
- new->vlan_proto = old->vlan_proto;
- new->vlan_tci = old->vlan_tci;
-
- skb_copy_secmark(new, old);
-#ifdef CONFIG_NET_RX_BUSY_POLL
- new->napi_id = old->napi_id;
-#endif
}
/*
@@ -854,17 +876,22 @@ EXPORT_SYMBOL_GPL(skb_copy_ubufs);
struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
{
- struct sk_buff *n;
+ struct sk_buff_fclones *fclones = container_of(skb,
+ struct sk_buff_fclones,
+ skb1);
+ struct sk_buff *n = &fclones->skb2;
if (skb_orphan_frags(skb, gfp_mask))
return NULL;
- n = skb + 1;
if (skb->fclone == SKB_FCLONE_ORIG &&
- n->fclone == SKB_FCLONE_UNAVAILABLE) {
- atomic_t *fclone_ref = (atomic_t *) (n + 1);
+ n->fclone == SKB_FCLONE_FREE) {
n->fclone = SKB_FCLONE_CLONE;
- atomic_inc(fclone_ref);
+ /* As our fastclone was free, clone_ref must be 1 at this point.
+ * We could use atomic_inc() here, but it is faster
+ * to set the final value.
+ */
+ atomic_set(&fclones->fclone_ref, 2);
} else {
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
@@ -874,7 +901,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
return NULL;
kmemcheck_annotate_bitfield(n, flags1);
- kmemcheck_annotate_bitfield(n, flags2);
n->fclone = SKB_FCLONE_UNAVAILABLE;
}
@@ -2646,7 +2672,7 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
* skb_seq_read() will return the remaining part of the block.
*
* Note 1: The size of each block of data returned can be arbitrary,
- * this limitation is the cost for zerocopy seqeuental
+ * this limitation is the cost for zerocopy sequential
* reads of potentially non linear data.
*
* Note 2: Fragment lists within fragments are not implemented
@@ -2780,7 +2806,7 @@ EXPORT_SYMBOL(skb_find_text);
/**
* skb_append_datato_frags - append the user data to a skb
* @sk: sock structure
- * @skb: skb structure to be appened with user data.
+ * @skb: skb structure to be appended with user data.
* @getfrag: call back function to be used for getting the user data
* @from: pointer to user message iov
* @length: length of the iov message
@@ -3068,6 +3094,11 @@ perform_csum_check:
}
} while ((offset += len) < head_skb->len);
+ /* Some callers want to get the end of the list.
+ * Put it in segs->prev to avoid walking the list.
+ * (see validate_xmit_skb_list() for example)
+ */
+ segs->prev = tail;
return segs;
err:
@@ -3151,6 +3182,9 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
goto done;
}
+ /* switch back to head shinfo */
+ pinfo = skb_shinfo(p);
+
if (pinfo->frag_list)
goto merge;
if (skb_gro_len(p) != pinfo->gso_size)
@@ -3178,7 +3212,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
skb_shinfo(nskb)->frag_list = p;
skb_shinfo(nskb)->gso_size = pinfo->gso_size;
pinfo->gso_size = 0;
- skb_header_release(p);
+ __skb_header_release(p);
NAPI_GRO_CB(nskb)->last = p;
nskb->data_len += p->len;
@@ -3210,7 +3244,7 @@ merge:
else
NAPI_GRO_CB(p)->last->next = skb;
NAPI_GRO_CB(p)->last = skb;
- skb_header_release(skb);
+ __skb_header_release(skb);
lp = p;
done:
@@ -3226,7 +3260,6 @@ done:
NAPI_GRO_CB(skb)->same_flow = 1;
return 0;
}
-EXPORT_SYMBOL_GPL(skb_gro_receive);
void __init skb_init(void)
{
@@ -3236,8 +3269,7 @@ void __init skb_init(void)
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
- (2*sizeof(struct sk_buff)) +
- sizeof(atomic_t),
+ sizeof(struct sk_buff_fclones),
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
@@ -3490,32 +3522,66 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_queue_err_skb);
-void __skb_tstamp_tx(struct sk_buff *orig_skb,
- struct skb_shared_hwtstamps *hwtstamps,
- struct sock *sk, int tstype)
+struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
{
- struct sock_exterr_skb *serr;
- struct sk_buff *skb;
- int err;
+ struct sk_buff_head *q = &sk->sk_error_queue;
+ struct sk_buff *skb, *skb_next;
+ int err = 0;
- if (!sk)
- return;
+ spin_lock_bh(&q->lock);
+ skb = __skb_dequeue(q);
+ if (skb && (skb_next = skb_peek(q)))
+ err = SKB_EXT_ERR(skb_next)->ee.ee_errno;
+ spin_unlock_bh(&q->lock);
- if (hwtstamps) {
- *skb_hwtstamps(orig_skb) =
- *hwtstamps;
- } else {
- /*
- * no hardware time stamps available,
- * so keep the shared tx_flags and only
- * store software time stamp
- */
- orig_skb->tstamp = ktime_get_real();
+ sk->sk_err = err;
+ if (err)
+ sk->sk_error_report(sk);
+
+ return skb;
+}
+EXPORT_SYMBOL(sock_dequeue_err_skb);
+
+/**
+ * skb_clone_sk - create clone of skb, and take reference to socket
+ * @skb: the skb to clone
+ *
+ * This function creates a clone of a buffer that holds a reference on
+ * sk_refcnt. Buffers created via this function are meant to be
+ * returned using sock_queue_err_skb, or free via kfree_skb.
+ *
+ * When passing buffers allocated with this function to sock_queue_err_skb
+ * it is necessary to wrap the call with sock_hold/sock_put in order to
+ * prevent the socket from being released prior to being enqueued on
+ * the sk_error_queue.
+ */
+struct sk_buff *skb_clone_sk(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+ struct sk_buff *clone;
+
+ if (!sk || !atomic_inc_not_zero(&sk->sk_refcnt))
+ return NULL;
+
+ clone = skb_clone(skb, GFP_ATOMIC);
+ if (!clone) {
+ sock_put(sk);
+ return NULL;
}
- skb = skb_clone(orig_skb, GFP_ATOMIC);
- if (!skb)
- return;
+ clone->sk = sk;
+ clone->destructor = sock_efree;
+
+ return clone;
+}
+EXPORT_SYMBOL(skb_clone_sk);
+
+static void __skb_complete_tx_timestamp(struct sk_buff *skb,
+ struct sock *sk,
+ int tstype)
+{
+ struct sock_exterr_skb *serr;
+ int err;
serr = SKB_EXT_ERR(skb);
memset(serr, 0, sizeof(*serr));
@@ -3533,6 +3599,42 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if (err)
kfree_skb(skb);
}
+
+void skb_complete_tx_timestamp(struct sk_buff *skb,
+ struct skb_shared_hwtstamps *hwtstamps)
+{
+ struct sock *sk = skb->sk;
+
+ /* take a reference to prevent skb_orphan() from freeing the socket */
+ sock_hold(sk);
+
+ *skb_hwtstamps(skb) = *hwtstamps;
+ __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
+
+ sock_put(sk);
+}
+EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
+
+void __skb_tstamp_tx(struct sk_buff *orig_skb,
+ struct skb_shared_hwtstamps *hwtstamps,
+ struct sock *sk, int tstype)
+{
+ struct sk_buff *skb;
+
+ if (!sk)
+ return;
+
+ if (hwtstamps)
+ *skb_hwtstamps(orig_skb) = *hwtstamps;
+ else
+ orig_skb->tstamp = ktime_get_real();
+
+ skb = skb_clone(orig_skb, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ __skb_complete_tx_timestamp(skb, sk, tstype);
+}
EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
void skb_tstamp_tx(struct sk_buff *orig_skb,
@@ -3557,9 +3659,14 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
+ /* take a reference to prevent skb_orphan() from freeing the socket */
+ sock_hold(sk);
+
err = sock_queue_err_skb(sk, skb);
if (err)
kfree_skb(skb);
+
+ sock_put(sk);
}
EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
@@ -3860,7 +3967,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
return false;
if (len <= skb_tailroom(to)) {
- BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
+ if (len)
+ BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
*delta_truesize = 0;
return true;
}
@@ -3962,14 +4070,151 @@ EXPORT_SYMBOL_GPL(skb_scrub_packet);
unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
{
const struct skb_shared_info *shinfo = skb_shinfo(skb);
+ unsigned int thlen = 0;
- if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
- return tcp_hdrlen(skb) + shinfo->gso_size;
+ if (skb->encapsulation) {
+ thlen = skb_inner_transport_header(skb) -
+ skb_transport_header(skb);
+ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+ thlen += inner_tcp_hdrlen(skb);
+ } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
+ thlen = tcp_hdrlen(skb);
+ }
/* UFO sets gso_size to the size of the fragmentation
* payload, i.e. the size of the L4 (UDP) header is already
* accounted for.
*/
- return shinfo->gso_size;
+ return thlen + shinfo->gso_size;
}
EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
+
+static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
+{
+ if (skb_cow(skb, skb_headroom(skb)) < 0) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
+ skb->mac_header += VLAN_HLEN;
+ return skb;
+}
+
+struct sk_buff *skb_vlan_untag(struct sk_buff *skb)
+{
+ struct vlan_hdr *vhdr;
+ u16 vlan_tci;
+
+ if (unlikely(vlan_tx_tag_present(skb))) {
+ /* vlan_tci is already set-up so leave this for another time */
+ return skb;
+ }
+
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb))
+ goto err_free;
+
+ if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
+ goto err_free;
+
+ vhdr = (struct vlan_hdr *)skb->data;
+ vlan_tci = ntohs(vhdr->h_vlan_TCI);
+ __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
+
+ skb_pull_rcsum(skb, VLAN_HLEN);
+ vlan_set_encap_proto(skb, vhdr);
+
+ skb = skb_reorder_vlan_header(skb);
+ if (unlikely(!skb))
+ goto err_free;
+
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb_reset_mac_len(skb);
+
+ return skb;
+
+err_free:
+ kfree_skb(skb);
+ return NULL;
+}
+EXPORT_SYMBOL(skb_vlan_untag);
+
+/**
+ * alloc_skb_with_frags - allocate skb with page frags
+ *
+ * @header_len: size of linear part
+ * @data_len: needed length in frags
+ * @max_page_order: max page order desired.
+ * @errcode: pointer to error code if any
+ * @gfp_mask: allocation mask
+ *
+ * This can be used to allocate a paged skb, given a maximal order for frags.
+ */
+struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
+ unsigned long data_len,
+ int max_page_order,
+ int *errcode,
+ gfp_t gfp_mask)
+{
+ int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+ unsigned long chunk;
+ struct sk_buff *skb;
+ struct page *page;
+ gfp_t gfp_head;
+ int i;
+
+ *errcode = -EMSGSIZE;
+ /* Note this test could be relaxed, if we succeed to allocate
+ * high order pages...
+ */
+ if (npages > MAX_SKB_FRAGS)
+ return NULL;
+
+ gfp_head = gfp_mask;
+ if (gfp_head & __GFP_WAIT)
+ gfp_head |= __GFP_REPEAT;
+
+ *errcode = -ENOBUFS;
+ skb = alloc_skb(header_len, gfp_head);
+ if (!skb)
+ return NULL;
+
+ skb->truesize += npages << PAGE_SHIFT;
+
+ for (i = 0; npages > 0; i++) {
+ int order = max_page_order;
+
+ while (order) {
+ if (npages >= 1 << order) {
+ page = alloc_pages(gfp_mask |
+ __GFP_COMP |
+ __GFP_NOWARN |
+ __GFP_NORETRY,
+ order);
+ if (page)
+ goto fill_page;
+ /* Do not retry other high order allocations */
+ order = 1;
+ max_page_order = 0;
+ }
+ order--;
+ }
+ page = alloc_page(gfp_mask);
+ if (!page)
+ goto failure;
+fill_page:
+ chunk = min_t(unsigned long, data_len,
+ PAGE_SIZE << order);
+ skb_fill_page_desc(skb, i, page, 0, chunk);
+ data_len -= chunk;
+ npages -= 1 << order;
+ }
+ return skb;
+
+failure:
+ kfree_skb(skb);
+ return NULL;
+}
+EXPORT_SYMBOL(alloc_skb_with_frags);