From 4262e5ccbbb5171abd2921eed16ed339633d6478 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 6 Dec 2013 11:36:16 +0100 Subject: net: dev: move inline skb_needs_linearize helper to header As we need it elsewhere, move the inline helper function of skb_needs_linearize() over to skbuff.h include file. While at it, also convert the return to 'bool' instead of 'int' and add a proper kernel doc. Signed-off-by: Daniel Borkmann Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/skbuff.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 215b5ea1cb3..77c7aae1c6b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2392,6 +2392,24 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, return buffer; } +/** + * skb_needs_linearize - check if we need to linearize a given skb + * depending on the given device features. + * @skb: socket buffer to check + * @features: net device features + * + * Returns true if either: + * 1. skb has frag_list and the device doesn't support FRAGLIST, or + * 2. skb is fragmented and the device does not support SG. + */ +static inline bool skb_needs_linearize(struct sk_buff *skb, + netdev_features_t features) +{ + return skb_is_nonlinear(skb) && + ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) || + (skb_shinfo(skb)->nr_frags && !(features & NETIF_F_SG))); +} + static inline void skb_copy_from_linear_data(const struct sk_buff *skb, void *to, const unsigned int len) -- cgit v1.2.3-70-g09d2 From 3958afa1b272eb07109fd31549e69193b4d7c364 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 15 Dec 2013 22:12:06 -0800 Subject: net: Change skb_get_rxhash to skb_get_hash Changing name of function as part of making the hash in skbuff to be generic property, not just for receive path. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 2 +- drivers/net/tun.c | 4 ++-- drivers/net/vxlan.c | 2 +- include/linux/skbuff.h | 6 +++--- net/core/dev.c | 4 ++-- net/core/flow_dissector.c | 6 +++--- net/packet/af_packet.c | 4 ++-- net/sched/cls_flow.c | 2 +- net/sched/em_meta.c | 2 +- net/sched/sch_fq.c | 2 +- 10 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 64409af0da3..431f58cb2ce 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -224,7 +224,7 @@ static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, goto out; /* Check if we can use flow to select a queue */ - rxq = skb_get_rxhash(skb); + rxq = skb_get_hash(skb); if (rxq) { tap = rcu_dereference(vlan->taps[rxq % numvtaps]); goto out; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3c5a8d8cde5..8569da24833 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -358,7 +358,7 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb) rcu_read_lock(); numqueues = ACCESS_ONCE(tun->numqueues); - txq = skb_get_rxhash(skb); + txq = skb_get_hash(skb); if (txq) { e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); if (e) @@ -1146,7 +1146,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_reset_network_header(skb); skb_probe_transport_header(skb, 0); - rxhash = skb_get_rxhash(skb); + rxhash = skb_get_hash(skb); netif_rx_ni(skb); tun->dev->stats.rx_packets++; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 58f6a0c02b1..b247a7a2936 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1405,7 +1405,7 @@ __be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb) unsigned int range = (port_max - port_min) + 1; u32 hash; - hash = skb_get_rxhash(skb); + hash = skb_get_hash(skb); if (!hash) hash = jhash(skb->data, 2 * ETH_ALEN, (__force u32) skb->protocol); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 77c7aae1c6b..4725b953e00 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -703,11 +703,11 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config, struct ts_state *state); -void __skb_get_rxhash(struct sk_buff *skb); -static inline __u32 skb_get_rxhash(struct sk_buff *skb) +void __skb_get_hash(struct sk_buff *skb); +static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_rxhash) - __skb_get_rxhash(skb); + __skb_get_hash(skb); return skb->rxhash; } diff --git a/net/core/dev.c b/net/core/dev.c index 9d4369ece67..c482fe8abf8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3006,7 +3006,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } skb_reset_network_header(skb); - if (!skb_get_rxhash(skb)) + if (!skb_get_hash(skb)) goto done; flow_table = rcu_dereference(rxqueue->rps_flow_table); @@ -3151,7 +3151,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) rcu_read_lock(); fl = rcu_dereference(sd->flow_limit); if (fl) { - new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1); + new_flow = skb_get_hash(skb) & (fl->num_buckets - 1); old_flow = fl->history[fl->history_head]; fl->history[fl->history_head] = new_flow; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d6ef1732250..b324bfa3485 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -202,12 +202,12 @@ static __always_inline u32 __flow_hash_1word(u32 a) } /* - * __skb_get_rxhash: calculate a flow hash based on src/dst addresses + * __skb_get_hash: calculate a flow hash based on src/dst addresses * and src/dst port numbers. Sets rxhash in skb to non-zero hash value * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb * if hash is a canonical 4-tuple hash over transport ports. */ -void __skb_get_rxhash(struct sk_buff *skb) +void __skb_get_hash(struct sk_buff *skb) { struct flow_keys keys; u32 hash; @@ -234,7 +234,7 @@ void __skb_get_rxhash(struct sk_buff *skb) skb->rxhash = hash; } -EXPORT_SYMBOL(__skb_get_rxhash); +EXPORT_SYMBOL(__skb_get_hash); /* * Returns a Tx hash based on the given packet descriptor a Tx queues' number diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index cc803c63059..24675f06f4f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -963,7 +963,7 @@ static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { - ppd->hv1.tp_rxhash = skb_get_rxhash(pkc->skb); + ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb); } static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc, @@ -1295,7 +1295,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, if (!skb) return 0; } - skb_get_rxhash(skb); + skb_get_hash(skb); idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 7881e2fccbc..83a6322b775 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -220,7 +220,7 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb) static u32 flow_get_rxhash(struct sk_buff *skb) { - return skb_get_rxhash(skb); + return skb_get_hash(skb); } static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow) diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 382519a5d7f..9b8c0b0e60d 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -222,7 +222,7 @@ META_COLLECTOR(int_maclen) META_COLLECTOR(int_rxhash) { - dst->value = skb_get_rxhash(skb); + dst->value = skb_get_hash(skb); } /************************************************************************** diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index f2fb92dd970..08ef7a42c0e 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -226,7 +226,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) /* By forcing low order bit to 1, we make sure to not * collide with a local flow (socket pointers are word aligned) */ - sk = (struct sock *)(skb_get_rxhash(skb) | 1L); + sk = (struct sock *)(skb_get_hash(skb) | 1L); } root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)]; -- cgit v1.2.3-70-g09d2 From 7539fadcb8146a5f0db51e80d99c9e724efec7b0 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 15 Dec 2013 22:12:18 -0800 Subject: net: Add utility functions to clear rxhash In several places 'skb->rxhash = 0' is being done to clear the rxhash value in an skb. This does not clear l4_rxhash which could still be set so that the rxhash wouldn't be recalculated on subsequent call to skb_get_rxhash. This patch adds an explict function to clear all the rxhash related information in the skb properly. skb_clear_hash_if_not_l4 clears the rxhash only if it is not marked as l4_rxhash. Fixed up places where 'skb->rxhash = 0' was being called. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 ++++++++++++ include/net/dst.h | 5 ++--- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_tunnel_core.c | 5 ++--- net/openvswitch/actions.c | 10 +++++----- 5 files changed, 22 insertions(+), 12 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4725b953e00..7deb7ad6591 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -712,6 +712,18 @@ static inline __u32 skb_get_hash(struct sk_buff *skb) return skb->rxhash; } +static inline void skb_clear_hash(struct sk_buff *skb) +{ + skb->rxhash = 0; + skb->l4_rxhash = 0; +} + +static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb) +{ + if (!skb->l4_rxhash) + skb_clear_hash(skb); +} + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { diff --git a/include/net/dst.h b/include/net/dst.h index 44995c13e94..77eb53fabfb 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -322,12 +322,11 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, skb->dev = dev; /* - * Clear rxhash so that we can recalulate the hash for the + * Clear hash so that we can recalulate the hash for the * encapsulated packet, unless we have already determine the hash * over the L4 4-tuple. */ - if (!skb->l4_rxhash) - skb->rxhash = 0; + skb_clear_hash_if_not_l4(skb); skb_set_queue_mapping(skb, 0); skb_scrub_packet(skb, !net_eq(net, dev_net(dev))); } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 2481993a497..c10a3ce5cbf 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -704,7 +704,7 @@ struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user) memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); if (ip_defrag(skb, user)) return NULL; - skb->rxhash = 0; + skb_clear_hash(skb); } } return skb; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 42ffbc8d65c..6156f4ef5e9 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -56,7 +56,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, skb_scrub_packet(skb, xnet); - skb->rxhash = 0; + skb_clear_hash(skb); skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); @@ -107,8 +107,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) nf_reset(skb); secpath_reset(skb); - if (!skb->l4_rxhash) - skb->rxhash = 0; + skb_clear_hash_if_not_l4(skb); skb_dst_drop(skb); skb->vlan_tci = 0; skb_set_queue_mapping(skb, 0); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 65cfaa81607..716b7eebfe7 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -165,7 +165,7 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, } csum_replace4(&nh->check, *addr, new_addr); - skb->rxhash = 0; + skb_clear_hash(skb); *addr = new_addr; } @@ -199,7 +199,7 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, if (recalculate_csum) update_ipv6_checksum(skb, l4_proto, addr, new_addr); - skb->rxhash = 0; + skb_clear_hash(skb); memcpy(addr, new_addr, sizeof(__be32[4])); } @@ -296,7 +296,7 @@ static void set_tp_port(struct sk_buff *skb, __be16 *port, { inet_proto_csum_replace2(check, skb, *port, new_port, 0); *port = new_port; - skb->rxhash = 0; + skb_clear_hash(skb); } static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port) @@ -310,7 +310,7 @@ static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port) uh->check = CSUM_MANGLED_0; } else { *port = new_port; - skb->rxhash = 0; + skb_clear_hash(skb); } } @@ -381,7 +381,7 @@ static int set_sctp(struct sk_buff *skb, /* Carry any checksum errors through. */ sh->checksum = old_csum ^ old_correct_csum ^ new_csum; - skb->rxhash = 0; + skb_clear_hash(skb); } return 0; -- cgit v1.2.3-70-g09d2 From 09323cc479316e046931a2c679932204b36fea6c Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 15 Dec 2013 22:16:19 -0800 Subject: net: Add function to set the rxhash The function skb_set_rxash was added for drivers to call to set the rxhash in an skb. The type of hash is also specified as a parameter (L2, L3, L4, or unknown type). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7deb7ad6591..99846956dff 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -703,6 +703,46 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config, struct ts_state *state); +/* + * Packet hash types specify the type of hash in skb_set_hash. + * + * Hash types refer to the protocol layer addresses which are used to + * construct a packet's hash. The hashes are used to differentiate or identify + * flows of the protocol layer for the hash type. Hash types are either + * layer-2 (L2), layer-3 (L3), or layer-4 (L4). + * + * Properties of hashes: + * + * 1) Two packets in different flows have different hash values + * 2) Two packets in the same flow should have the same hash value + * + * A hash at a higher layer is considered to be more specific. A driver should + * set the most specific hash possible. + * + * A driver cannot indicate a more specific hash than the layer at which a hash + * was computed. For instance an L3 hash cannot be set as an L4 hash. + * + * A driver may indicate a hash level which is less specific than the + * actual layer the hash was computed on. For instance, a hash computed + * at L4 may be considered an L3 hash. This should only be done if the + * driver can't unambiguously determine that the HW computed the hash at + * the higher layer. Note that the "should" in the second property above + * permits this. + */ +enum pkt_hash_types { + PKT_HASH_TYPE_NONE, /* Undefined type */ + PKT_HASH_TYPE_L2, /* Input: src_MAC, dest_MAC */ + PKT_HASH_TYPE_L3, /* Input: src_IP, dst_IP */ + PKT_HASH_TYPE_L4, /* Input: src_IP, dst_IP, src_port, dst_port */ +}; + +static inline void +skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type) +{ + skb->l4_rxhash = (type == PKT_HASH_TYPE_L4); + skb->rxhash = hash; +} + void __skb_get_hash(struct sk_buff *skb); static inline __u32 skb_get_hash(struct sk_buff *skb) { -- cgit v1.2.3-70-g09d2 From 3df7a74e797aa2d8be9b7c649cfd56a8517dcf6e Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 15 Dec 2013 22:16:29 -0800 Subject: net: Add utility function to copy skb hash Adds skb_copy_hash to copy rxhash and l4_rxhash from one skb to another. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++++ net/core/skbuff.c | 3 +-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 99846956dff..06bedeb0d49 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -764,6 +764,12 @@ static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb) skb_clear_hash(skb); } +static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) +{ + to->rxhash = from->rxhash; + to->l4_rxhash = from->l4_rxhash; +}; + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 06e72d3cdf6..2b6b863f51f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -712,9 +712,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->inner_network_header = old->inner_network_header; new->inner_mac_header = old->inner_mac_header; skb_dst_copy(new, old); - new->rxhash = old->rxhash; + skb_copy_hash(new, old); new->ooo_okay = old->ooo_okay; - new->l4_rxhash = old->l4_rxhash; new->no_fcs = old->no_fcs; new->encapsulation = old->encapsulation; #ifdef CONFIG_XFRM -- cgit v1.2.3-70-g09d2 From 78ea85f17b15390e30d8b47488ec7b6cf0790663 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 16 Dec 2013 23:27:09 +0100 Subject: net: skbuff: improve comment on checksumming It can be a bit confusing when looking for checksumming flags that the actual comment for this resides elsewhere further below in the header file. Thus, bring the documentation where we define these flags, and slightly improve the doc text to make it a bit more clear/readable. Also, whitespace-align values of the define while at it. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/skbuff.h | 131 ++++++++++++++++++++++++++++--------------------- 1 file changed, 75 insertions(+), 56 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 06bedeb0d49..c5cd016f512 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -34,11 +34,82 @@ #include #include +/* A. Checksumming of received packets by device. + * + * CHECKSUM_NONE: + * + * Device failed to checksum this packet e.g. due to lack of capabilities. + * The packet contains full (though not verified) checksum in packet but + * not in skb->csum. Thus, skb->csum is undefined in this case. + * + * CHECKSUM_UNNECESSARY: + * + * The hardware you're dealing with doesn't calculate the full checksum + * (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums + * for specific protocols e.g. TCP/UDP/SCTP, then, for such packets it will + * set CHECKSUM_UNNECESSARY if their checksums are okay. skb->csum is still + * undefined in this case though. It is a bad option, but, unfortunately, + * nowadays most vendors do this. Apparently with the secret goal to sell + * you new devices, when you will add new protocol to your host, f.e. IPv6 8) + * + * CHECKSUM_COMPLETE: + * + * This is the most generic way. The device supplied checksum of the _whole_ + * packet as seen by netif_rx() and fills out in skb->csum. Meaning, the + * hardware doesn't need to parse L3/L4 headers to implement this. + * + * Note: Even if device supports only some protocols, but is able to produce + * skb->csum, it MUST use CHECKSUM_COMPLETE, not CHECKSUM_UNNECESSARY. + * + * CHECKSUM_PARTIAL: + * + * This is identical to the case for output below. This may occur on a packet + * received directly from another Linux OS, e.g., a virtualized Linux kernel + * on the same host. The packet can be treated in the same way as + * CHECKSUM_UNNECESSARY, except that on output (i.e., forwarding) the + * checksum must be filled in by the OS or the hardware. + * + * B. Checksumming on output. + * + * CHECKSUM_NONE: + * + * The skb was already checksummed by the protocol, or a checksum is not + * required. + * + * CHECKSUM_PARTIAL: + * + * The device is required to checksum the packet as seen by hard_start_xmit() + * from skb->csum_start up to the end, and to record/write the checksum at + * offset skb->csum_start + skb->csum_offset. + * + * The device must show its capabilities in dev->features, set up at device + * setup time, e.g. netdev_features.h: + * + * NETIF_F_HW_CSUM - It's a clever device, it's able to checksum everything. + * NETIF_F_IP_CSUM - Device is dumb, it's able to checksum only TCP/UDP over + * IPv4. Sigh. Vendors like this way for an unknown reason. + * Though, see comment above about CHECKSUM_UNNECESSARY. 8) + * NETIF_F_IPV6_CSUM - About as dumb as the last one but does IPv6 instead. + * NETIF_F_... - Well, you get the picture. + * + * CHECKSUM_UNNECESSARY: + * + * Normally, the device will do per protocol specific checksumming. Protocol + * implementations that do not want the NIC to perform the checksum + * calculation should use this flag in their outgoing skbs. + * + * NETIF_F_FCOE_CRC - This indicates that the device can do FCoE FC CRC + * offload. Correspondingly, the FCoE protocol driver + * stack should use CHECKSUM_UNNECESSARY. + * + * Any questions? No questions, good. --ANK + */ + /* Don't change this without changing skb_csum_unnecessary! */ -#define CHECKSUM_NONE 0 -#define CHECKSUM_UNNECESSARY 1 -#define CHECKSUM_COMPLETE 2 -#define CHECKSUM_PARTIAL 3 +#define CHECKSUM_NONE 0 +#define CHECKSUM_UNNECESSARY 1 +#define CHECKSUM_COMPLETE 2 +#define CHECKSUM_PARTIAL 3 #define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \ ~(SMP_CACHE_BYTES - 1)) @@ -54,58 +125,6 @@ SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) -/* A. Checksumming of received packets by device. - * - * NONE: device failed to checksum this packet. - * skb->csum is undefined. - * - * UNNECESSARY: device parsed packet and wouldbe verified checksum. - * skb->csum is undefined. - * It is bad option, but, unfortunately, many of vendors do this. - * Apparently with secret goal to sell you new device, when you - * will add new protocol to your host. F.e. IPv6. 8) - * - * COMPLETE: the most generic way. Device supplied checksum of _all_ - * the packet as seen by netif_rx in skb->csum. - * NOTE: Even if device supports only some protocols, but - * is able to produce some skb->csum, it MUST use COMPLETE, - * not UNNECESSARY. - * - * PARTIAL: identical to the case for output below. This may occur - * on a packet received directly from another Linux OS, e.g., - * a virtualised Linux kernel on the same host. The packet can - * be treated in the same way as UNNECESSARY except that on - * output (i.e., forwarding) the checksum must be filled in - * by the OS or the hardware. - * - * B. Checksumming on output. - * - * NONE: skb is checksummed by protocol or csum is not required. - * - * PARTIAL: device is required to csum packet as seen by hard_start_xmit - * from skb->csum_start to the end and to record the checksum - * at skb->csum_start + skb->csum_offset. - * - * Device must show its capabilities in dev->features, set - * at device setup time. - * NETIF_F_HW_CSUM - it is clever device, it is able to checksum - * everything. - * NETIF_F_IP_CSUM - device is dumb. It is able to csum only - * TCP/UDP over IPv4. Sigh. Vendors like this - * way by an unknown reason. Though, see comment above - * about CHECKSUM_UNNECESSARY. 8) - * NETIF_F_IPV6_CSUM about as dumb as the last one but does IPv6 instead. - * - * UNNECESSARY: device will do per protocol specific csum. Protocol drivers - * that do not want net to perform the checksum calculation should use - * this flag in their outgoing skbs. - * NETIF_F_FCOE_CRC this indicates the device can do FCoE FC CRC - * offload. Correspondingly, the FCoE protocol driver - * stack should use CHECKSUM_UNNECESSARY. - * - * Any questions? No questions, good. --ANK - */ - struct net_device; struct scatterlist; struct pipe_inode_info; -- cgit v1.2.3-70-g09d2 From af2806f8f90a150160be898cd85332459c83c5cb Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 13 Dec 2013 15:22:17 +0100 Subject: net: Export skb_zerocopy() to zerocopy from one skb to another Make the skb zerocopy logic written for nfnetlink queue available for use by other modules. Signed-off-by: Thomas Graf Reviewed-by: Daniel Borkmann Acked-by: David S. Miller Signed-off-by: Jesse Gross --- include/linux/skbuff.h | 3 ++ net/core/skbuff.c | 85 ++++++++++++++++++++++++++++++++++++ net/netfilter/nfnetlink_queue_core.c | 59 ++----------------------- 3 files changed, 92 insertions(+), 55 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bec1cc7d5e3..7c48e2d4c72 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2345,6 +2345,9 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct pipe_inode_info *pipe, unsigned int len, unsigned int flags); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); +unsigned int skb_zerocopy_headlen(const struct sk_buff *from); +void skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, + int len, int hlen); void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2718fed53d8..55859cb8b83 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2122,6 +2122,91 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, } EXPORT_SYMBOL(skb_copy_and_csum_bits); + /** + * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy() + * @from: source buffer + * + * Calculates the amount of linear headroom needed in the 'to' skb passed + * into skb_zerocopy(). + */ +unsigned int +skb_zerocopy_headlen(const struct sk_buff *from) +{ + unsigned int hlen = 0; + + if (!from->head_frag || + skb_headlen(from) < L1_CACHE_BYTES || + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) + hlen = skb_headlen(from); + + if (skb_has_frag_list(from)) + hlen = from->len; + + return hlen; +} +EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); + +/** + * skb_zerocopy - Zero copy skb to skb + * @to: destination buffer + * @source: source buffer + * @len: number of bytes to copy from source buffer + * @hlen: size of linear headroom in destination buffer + * + * Copies up to `len` bytes from `from` to `to` by creating references + * to the frags in the source buffer. + * + * The `hlen` as calculated by skb_zerocopy_headlen() specifies the + * headroom in the `to` buffer. + */ +void +skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) +{ + int i, j = 0; + int plen = 0; /* length of skb->head fragment */ + struct page *page; + unsigned int offset; + + BUG_ON(!from->head_frag && !hlen); + + /* dont bother with small payloads */ + if (len <= skb_tailroom(to)) { + skb_copy_bits(from, 0, skb_put(to, len), len); + return; + } + + if (hlen) { + skb_copy_bits(from, 0, skb_put(to, hlen), hlen); + len -= hlen; + } else { + plen = min_t(int, skb_headlen(from), len); + if (plen) { + page = virt_to_head_page(from->head); + offset = from->data - (unsigned char *)page_address(page); + __skb_fill_page_desc(to, 0, page, offset, plen); + get_page(page); + j = 1; + len -= plen; + } + } + + to->truesize += len + plen; + to->len += len + plen; + to->data_len += len + plen; + + for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { + if (!len) + break; + skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; + skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); + len -= skb_shinfo(to)->frags[j].size; + skb_frag_ref(to, j); + j++; + } + skb_shinfo(to)->nr_frags = j; +} +EXPORT_SYMBOL_GPL(skb_zerocopy); + void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) { __wsum csum; diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 21258cf7009..615ee12647a 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -235,51 +235,6 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) spin_unlock_bh(&queue->lock); } -static void -nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) -{ - int i, j = 0; - int plen = 0; /* length of skb->head fragment */ - struct page *page; - unsigned int offset; - - /* dont bother with small payloads */ - if (len <= skb_tailroom(to)) { - skb_copy_bits(from, 0, skb_put(to, len), len); - return; - } - - if (hlen) { - skb_copy_bits(from, 0, skb_put(to, hlen), hlen); - len -= hlen; - } else { - plen = min_t(int, skb_headlen(from), len); - if (plen) { - page = virt_to_head_page(from->head); - offset = from->data - (unsigned char *)page_address(page); - __skb_fill_page_desc(to, 0, page, offset, plen); - get_page(page); - j = 1; - len -= plen; - } - } - - to->truesize += len + plen; - to->len += len + plen; - to->data_len += len + plen; - - for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { - if (!len) - break; - skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; - skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); - len -= skb_shinfo(to)->frags[j].size; - skb_frag_ref(to, j); - j++; - } - skb_shinfo(to)->nr_frags = j; -} - static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, bool csum_verify) @@ -304,7 +259,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, { size_t size; size_t data_len = 0, cap_len = 0; - int hlen = 0; + unsigned int hlen = 0; struct sk_buff *skb; struct nlattr *nla; struct nfqnl_msg_packet_hdr *pmsg; @@ -356,14 +311,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (data_len > entskb->len) data_len = entskb->len; - if (!entskb->head_frag || - skb_headlen(entskb) < L1_CACHE_BYTES || - skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS) - hlen = skb_headlen(entskb); - - if (skb_has_frag_list(entskb)) - hlen = entskb->len; - hlen = min_t(int, data_len, hlen); + hlen = skb_zerocopy_headlen(entskb); + hlen = min_t(unsigned int, hlen, data_len); size += sizeof(struct nlattr) + hlen; cap_len = entskb->len; break; @@ -504,7 +453,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, nla->nla_type = NFQA_PAYLOAD; nla->nla_len = nla_attr_size(data_len); - nfqnl_zcopy(skb, entskb, data_len, hlen); + skb_zerocopy(skb, entskb, data_len, hlen); } nlh->nlmsg_len = skb->len; -- cgit v1.2.3-70-g09d2 From fd44b93cb5eee218231f6ce5883df937b3b9c3eb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 7 Jan 2014 23:23:44 +0100 Subject: net: skbuff: const-ify casts in skb_queue_* functions We should const-ify comparisons on skb_queue_* inline helper functions as their parameters are const as well, so lets not drop that. Suggested-by: Brad Spengler Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 956e11a168d..d97f2d07d02 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -827,7 +827,7 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) */ static inline int skb_queue_empty(const struct sk_buff_head *list) { - return list->next == (struct sk_buff *)list; + return list->next == (const struct sk_buff *) list; } /** @@ -840,7 +840,7 @@ static inline int skb_queue_empty(const struct sk_buff_head *list) static inline bool skb_queue_is_last(const struct sk_buff_head *list, const struct sk_buff *skb) { - return skb->next == (struct sk_buff *)list; + return skb->next == (const struct sk_buff *) list; } /** @@ -853,7 +853,7 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list, static inline bool skb_queue_is_first(const struct sk_buff_head *list, const struct sk_buff *skb) { - return skb->prev == (struct sk_buff *)list; + return skb->prev == (const struct sk_buff *) list; } /** -- cgit v1.2.3-70-g09d2 From ed1f50c3a7c1ad1b1b4d584308eab77d57a330f8 Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Thu, 9 Jan 2014 10:02:46 +0000 Subject: net: add skb_checksum_setup This patch adds a function to set up the partial checksum offset for IP packets (and optionally re-calculate the pseudo-header checksum) into the core network code. The implementation was previously private and duplicated between xen-netback and xen-netfront, however it is not xen-specific and is potentially useful to any network driver. Signed-off-by: Paul Durrant Cc: David Miller Cc: Eric Dumazet Cc: Veaceslav Falico Cc: Alexander Duyck Cc: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 + net/core/skbuff.c | 273 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 275 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d97f2d07d02..48b760505cb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2893,6 +2893,8 @@ static inline void skb_checksum_none_assert(const struct sk_buff *skb) bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); +int skb_checksum_setup(struct sk_buff *skb, bool recalculate); + u32 __skb_get_poff(const struct sk_buff *skb); /** diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1d641e781f8..15057d29b01 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include @@ -3549,6 +3550,278 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) } EXPORT_SYMBOL_GPL(skb_partial_csum_set); +static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len, + unsigned int max) +{ + if (skb_headlen(skb) >= len) + return 0; + + /* If we need to pullup then pullup to the max, so we + * won't need to do it again. + */ + if (max > skb->len) + max = skb->len; + + if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL) + return -ENOMEM; + + if (skb_headlen(skb) < len) + return -EPROTO; + + return 0; +} + +/* This value should be large enough to cover a tagged ethernet header plus + * maximally sized IP and TCP or UDP headers. + */ +#define MAX_IP_HDR_LEN 128 + +static int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate) +{ + unsigned int off; + bool fragment; + int err; + + fragment = false; + + err = skb_maybe_pull_tail(skb, + sizeof(struct iphdr), + MAX_IP_HDR_LEN); + if (err < 0) + goto out; + + if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF)) + fragment = true; + + off = ip_hdrlen(skb); + + err = -EPROTO; + + if (fragment) + goto out; + + switch (ip_hdr(skb)->protocol) { + case IPPROTO_TCP: + err = skb_maybe_pull_tail(skb, + off + sizeof(struct tcphdr), + MAX_IP_HDR_LEN); + if (err < 0) + goto out; + + if (!skb_partial_csum_set(skb, off, + offsetof(struct tcphdr, check))) { + err = -EPROTO; + goto out; + } + + if (recalculate) + tcp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - off, + IPPROTO_TCP, 0); + break; + case IPPROTO_UDP: + err = skb_maybe_pull_tail(skb, + off + sizeof(struct udphdr), + MAX_IP_HDR_LEN); + if (err < 0) + goto out; + + if (!skb_partial_csum_set(skb, off, + offsetof(struct udphdr, check))) { + err = -EPROTO; + goto out; + } + + if (recalculate) + udp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - off, + IPPROTO_UDP, 0); + break; + default: + goto out; + } + + err = 0; + +out: + return err; +} + +/* This value should be large enough to cover a tagged ethernet header plus + * an IPv6 header, all options, and a maximal TCP or UDP header. + */ +#define MAX_IPV6_HDR_LEN 256 + +#define OPT_HDR(type, skb, off) \ + (type *)(skb_network_header(skb) + (off)) + +static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate) +{ + int err; + u8 nexthdr; + unsigned int off; + unsigned int len; + bool fragment; + bool done; + + fragment = false; + done = false; + + off = sizeof(struct ipv6hdr); + + err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + nexthdr = ipv6_hdr(skb)->nexthdr; + + len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); + while (off <= len && !done) { + switch (nexthdr) { + case IPPROTO_DSTOPTS: + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: { + struct ipv6_opt_hdr *hp; + + err = skb_maybe_pull_tail(skb, + off + + sizeof(struct ipv6_opt_hdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + hp = OPT_HDR(struct ipv6_opt_hdr, skb, off); + nexthdr = hp->nexthdr; + off += ipv6_optlen(hp); + break; + } + case IPPROTO_AH: { + struct ip_auth_hdr *hp; + + err = skb_maybe_pull_tail(skb, + off + + sizeof(struct ip_auth_hdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + hp = OPT_HDR(struct ip_auth_hdr, skb, off); + nexthdr = hp->nexthdr; + off += ipv6_authlen(hp); + break; + } + case IPPROTO_FRAGMENT: { + struct frag_hdr *hp; + + err = skb_maybe_pull_tail(skb, + off + + sizeof(struct frag_hdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + hp = OPT_HDR(struct frag_hdr, skb, off); + + if (hp->frag_off & htons(IP6_OFFSET | IP6_MF)) + fragment = true; + + nexthdr = hp->nexthdr; + off += sizeof(struct frag_hdr); + break; + } + default: + done = true; + break; + } + } + + err = -EPROTO; + + if (!done || fragment) + goto out; + + switch (nexthdr) { + case IPPROTO_TCP: + err = skb_maybe_pull_tail(skb, + off + sizeof(struct tcphdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + if (!skb_partial_csum_set(skb, off, + offsetof(struct tcphdr, check))) { + err = -EPROTO; + goto out; + } + + if (recalculate) + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - off, + IPPROTO_TCP, 0); + break; + case IPPROTO_UDP: + err = skb_maybe_pull_tail(skb, + off + sizeof(struct udphdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + if (!skb_partial_csum_set(skb, off, + offsetof(struct udphdr, check))) { + err = -EPROTO; + goto out; + } + + if (recalculate) + udp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - off, + IPPROTO_UDP, 0); + break; + default: + goto out; + } + + err = 0; + +out: + return err; +} + +/** + * skb_checksum_setup - set up partial checksum offset + * @skb: the skb to set up + * @recalculate: if true the pseudo-header checksum will be recalculated + */ +int skb_checksum_setup(struct sk_buff *skb, bool recalculate) +{ + int err; + + switch (skb->protocol) { + case htons(ETH_P_IP): + err = skb_checksum_setup_ip(skb, recalculate); + break; + + case htons(ETH_P_IPV6): + err = skb_checksum_setup_ipv6(skb, recalculate); + break; + + default: + err = -EPROTO; + break; + } + + return err; +} +EXPORT_SYMBOL(skb_checksum_setup); + void __skb_warn_lro_forwarding(const struct sk_buff *skb) { net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", -- cgit v1.2.3-70-g09d2 From 57bdf7f42be05640f8080b06844c94367ad1884b Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 15 Jan 2014 08:57:54 -0800 Subject: net: Add skb_get_hash_raw Function to just return skb->rxhash without checking to see if it needs to be recomputed. Signed-off-by: Tom Herbert Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 48b760505cb..1f689e62e4c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -771,6 +771,11 @@ static inline __u32 skb_get_hash(struct sk_buff *skb) return skb->rxhash; } +static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) +{ + return skb->rxhash; +} + static inline void skb_clear_hash(struct sk_buff *skb) { skb->rxhash = 0; -- cgit v1.2.3-70-g09d2 From de960aa9ab4decc3304959f69533eef64d05d8e8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 26 Jan 2014 10:58:16 +0100 Subject: net: add and use skb_gso_transport_seglen() This moves part of Eric Dumazets skb_gso_seglen helper from tbf sched to skbuff core so it may be reused by upcoming ip forwarding path patch. Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + net/core/skbuff.c | 25 +++++++++++++++++++++++++ net/sched/sch_tbf.c | 13 +++---------- 3 files changed, 29 insertions(+), 10 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1f689e62e4c..f589c9af8cb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2456,6 +2456,7 @@ void skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); +unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct skb_checksum_ops { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8f519dbb358..9ae6d11374d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -47,6 +47,8 @@ #include #include #include +#include +#include #include #ifdef CONFIG_NET_CLS_ACT #include @@ -3916,3 +3918,26 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) nf_reset_trace(skb); } EXPORT_SYMBOL_GPL(skb_scrub_packet); + +/** + * skb_gso_transport_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_transport_seglen is used to determine the real size of the + * individual segments, including Layer4 headers (TCP/UDP). + * + * The MAC/L2 or network (IP, IPv6) headers are not accounted for. + */ +unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + unsigned int hdr_len; + + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + hdr_len = tcp_hdrlen(skb); + else + hdr_len = sizeof(struct udphdr); + return hdr_len + shinfo->gso_size; +} +EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index fbba5b0ec12..1cb413fead8 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -21,7 +21,6 @@ #include #include #include -#include /* Simple Token Bucket Filter. @@ -148,16 +147,10 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r, * Return length of individual segments of a gso packet, * including all headers (MAC, IP, TCP/UDP) */ -static unsigned int skb_gso_seglen(const struct sk_buff *skb) +static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) { unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); - const struct skb_shared_info *shinfo = skb_shinfo(skb); - - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - hdr_len += tcp_hdrlen(skb); - else - hdr_len += sizeof(struct udphdr); - return hdr_len + shinfo->gso_size; + return hdr_len + skb_gso_transport_seglen(skb); } /* GSO packet is too big, segment it so that tbf can transmit @@ -202,7 +195,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) int ret; if (qdisc_pkt_len(skb) > q->max_size) { - if (skb_is_gso(skb) && skb_gso_seglen(skb) <= q->max_size) + if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size) return tbf_segment(skb, sch); return qdisc_reshape_fail(skb, sch); } -- cgit v1.2.3-70-g09d2