From b78462ebc6a4ef9074aa80abebcdd470dc5f0ce0 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 2 Jun 2010 12:24:37 +0000 Subject: skbuff: add check for non-linear to warn_if_lro and needs_linearize We can avoid an unecessary cache miss by checking if the skb is non-linear before accessing gso_size/gso_type in skb_warn_if_lro, the same can also be done to avoid a cache miss on nr_frags if data_len is 0. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bf243fc5495..645e78d395f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2129,7 +2129,8 @@ static inline bool skb_warn_if_lro(const struct sk_buff *skb) /* LRO sets gso_size but not gso_type, whereas if GSO is really * wanted then gso_type will be set. */ struct skb_shared_info *shinfo = skb_shinfo(skb); - if (shinfo->gso_size != 0 && unlikely(shinfo->gso_type == 0)) { + if (skb_is_nonlinear(skb) && shinfo->gso_size != 0 && + unlikely(shinfo->gso_type == 0)) { __skb_warn_lro_forwarding(skb); return true; } -- cgit v1.2.3-70-g09d2 From 5933dd2f028cdcbb4b3169dca594324704ba10ae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Jun 2010 18:16:43 -0700 Subject: net: NET_SKB_PAD should depend on L1_CACHE_BYTES In old kernels, NET_SKB_PAD was defined to 16. Then commit d6301d3dd1c2 (net: Increase default NET_SKB_PAD to 32), and commit 18e8c134f4e9 (net: Increase NET_SKB_PAD to 64 bytes) increased it to 64. While first patch was governed by network stack needs, second was more driven by performance issues on current hardware. Real intent was to align data on a cache line boundary. So use max(32, L1_CACHE_BYTES) instead of 64, to be more generic. Remove microblaze and powerpc own NET_SKB_PAD definitions. Thanks to Alexander Duyck and David Miller for their comments. Suggested-by: David Miller Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/microblaze/include/asm/system.h | 3 --- arch/powerpc/include/asm/system.h | 3 --- include/linux/skbuff.h | 8 +++++--- 3 files changed, 5 insertions(+), 9 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/arch/microblaze/include/asm/system.h b/arch/microblaze/include/asm/system.h index 48c4f0335e3..81e1f7d5b4c 100644 --- a/arch/microblaze/include/asm/system.h +++ b/arch/microblaze/include/asm/system.h @@ -101,10 +101,7 @@ extern struct dentry *of_debugfs_root; * MicroBlaze doesn't handle unaligned accesses in hardware. * * Based on this we force the IP header alignment in network drivers. - * We also modify NET_SKB_PAD to be a cacheline in size, thus maintaining - * cacheline alignment of buffers. */ #define NET_IP_ALIGN 2 -#define NET_SKB_PAD L1_CACHE_BYTES #endif /* _ASM_MICROBLAZE_SYSTEM_H */ diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h index a6297c67c3d..6c294acac84 100644 --- a/arch/powerpc/include/asm/system.h +++ b/arch/powerpc/include/asm/system.h @@ -515,11 +515,8 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, * powers of 2 writes until it reaches sufficient alignment). * * Based on this we disable the IP header alignment in network drivers. - * We also modify NET_SKB_PAD to be a cacheline in size, thus maintaining - * cacheline alignment of buffers. */ #define NET_IP_ALIGN 0 -#define NET_SKB_PAD L1_CACHE_BYTES #define cmpxchg64(ptr, o, n) \ ({ \ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 122d08396e5..ac74ee085d7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1414,12 +1414,14 @@ static inline int skb_network_offset(const struct sk_buff *skb) * * Various parts of the networking layer expect at least 32 bytes of * headroom, you should not reduce this. - * With RPS, we raised NET_SKB_PAD to 64 so that get_rps_cpus() fetches span - * a 64 bytes aligned block to fit modern (>= 64 bytes) cache line sizes + * + * Using max(32, L1_CACHE_BYTES) makes sense (especially with RPS) + * to reduce average number of cache lines per packet. + * get_rps_cpus() for example only access one 64 bytes aligned block : * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8) */ #ifndef NET_SKB_PAD -#define NET_SKB_PAD 64 +#define NET_SKB_PAD max(32, L1_CACHE_BYTES) #endif extern int ___pskb_trim(struct sk_buff *skb, unsigned int len); -- cgit v1.2.3-70-g09d2 From 4507a71507d4ff37e9a499c4241b7701ed1feab4 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Sat, 17 Jul 2010 08:48:28 +0000 Subject: net: add driver hook for tx time stamping. This patch adds a hook for transmit time stamps. The transmit hook allows a software fallback for transmit time stamps, for MACs lacking time stamping hardware. Using the hook will still require adding an inline function call to each MAC driver. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/skbuff.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ac74ee085d7..a1b0400c8d8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1947,6 +1947,27 @@ static inline ktime_t net_invalid_timestamp(void) extern void skb_tstamp_tx(struct sk_buff *orig_skb, struct skb_shared_hwtstamps *hwtstamps); +static inline void sw_tx_timestamp(struct sk_buff *skb) +{ + union skb_shared_tx *shtx = skb_tx(skb); + if (shtx->software && !shtx->in_progress) + skb_tstamp_tx(skb, NULL); +} + +/** + * skb_tx_timestamp() - Driver hook for transmit timestamping + * + * Ethernet MAC Drivers should call this function in their hard_xmit() + * function as soon as possible after giving the sk_buff to the MAC + * hardware, but before freeing the sk_buff. + * + * @skb: A socket buffer. + */ +static inline void skb_tx_timestamp(struct sk_buff *skb) +{ + sw_tx_timestamp(skb); +} + extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); extern __sum16 __skb_checksum_complete(struct sk_buff *skb); -- cgit v1.2.3-70-g09d2 From c1f19b51d1d87f3e3bb7e6648f43f7d57ed2da6b Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Sat, 17 Jul 2010 08:49:36 +0000 Subject: net: support time stamping in phy devices. This patch adds a new networking option to allow hardware time stamps from PHY devices. When enabled, likely candidates among incoming and outgoing network packets are offered to the PHY driver for possible time stamping. When accepted by the PHY driver, incoming packets are deferred for later delivery by the driver. The patch also adds phylib driver methods for the SIOCSHWTSTAMP ioctl and callbacks for transmit and receive time stamping. Drivers may optionally implement these functions. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 5 ++ drivers/net/phy/phy_device.c | 2 + include/linux/netdevice.h | 4 ++ include/linux/phy.h | 22 ++++++++ include/linux/skbuff.h | 31 +++++++++++ net/Kconfig | 10 ++++ net/core/Makefile | 2 +- net/core/dev.c | 3 ++ net/core/timestamping.c | 126 +++++++++++++++++++++++++++++++++++++++++++ net/socket.c | 4 ++ 10 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 net/core/timestamping.c (limited to 'include/linux/skbuff.h') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index bd88d818f08..5130db8f5c4 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -361,6 +361,11 @@ int phy_mii_ioctl(struct phy_device *phydev, } break; + case SIOCSHWTSTAMP: + if (phydev->drv->hwtstamp) + return phydev->drv->hwtstamp(phydev, ifr); + /* fall through */ + default: return -EOPNOTSUPP; } diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 1a99bb24410..c0761197c07 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -460,6 +460,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, } phydev->attached_dev = dev; + dev->phydev = phydev; phydev->dev_flags = flags; @@ -513,6 +514,7 @@ EXPORT_SYMBOL(phy_attach); */ void phy_detach(struct phy_device *phydev) { + phydev->attached_dev->phydev = NULL; phydev->attached_dev = NULL; /* If the device had no specific driver before (i.e. - it diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c4fedf00054..fdc3f299223 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -54,6 +54,7 @@ struct vlan_group; struct netpoll_info; +struct phy_device; /* 802.11 specific */ struct wireless_dev; /* source back-compat hooks */ @@ -1065,6 +1066,9 @@ struct net_device { #endif /* n-tuple filter list attached to this device */ struct ethtool_rx_ntuple_list ethtool_ntuple_list; + + /* phy device may attach itself for hardware timestamping */ + struct phy_device *phydev; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/include/linux/phy.h b/include/linux/phy.h index d63736a8400..6b0a782c622 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -234,6 +234,8 @@ enum phy_state { PHY_RESUMING }; +struct sk_buff; + /* phy_device: An instance of a PHY * * drv: Pointer to the driver for this PHY instance @@ -402,6 +404,26 @@ struct phy_driver { /* Clears up any memory if needed */ void (*remove)(struct phy_device *phydev); + /* Handles SIOCSHWTSTAMP ioctl for hardware time stamping. */ + int (*hwtstamp)(struct phy_device *phydev, struct ifreq *ifr); + + /* + * Requests a Rx timestamp for 'skb'. If the skb is accepted, + * the phy driver promises to deliver it using netif_rx() as + * soon as a timestamp becomes available. One of the + * PTP_CLASS_ values is passed in 'type'. The function must + * return true if the skb is accepted for delivery. + */ + bool (*rxtstamp)(struct phy_device *dev, struct sk_buff *skb, int type); + + /* + * Requests a Tx timestamp for 'skb'. The phy driver promises + * to deliver it to the socket's error queue as soon as a + * timestamp becomes available. One of the PTP_CLASS_ values + * is passed in 'type'. + */ + void (*txtstamp)(struct phy_device *dev, struct sk_buff *skb, int type); + struct device_driver driver; }; #define to_phy_driver(d) container_of(d, struct phy_driver, driver) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a1b0400c8d8..f5aa87e1e0c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1933,6 +1933,36 @@ static inline ktime_t net_invalid_timestamp(void) return ktime_set(0, 0); } +extern void skb_timestamping_init(void); + +#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING + +extern void skb_clone_tx_timestamp(struct sk_buff *skb); +extern bool skb_defer_rx_timestamp(struct sk_buff *skb); + +#else /* CONFIG_NETWORK_PHY_TIMESTAMPING */ + +static inline void skb_clone_tx_timestamp(struct sk_buff *skb) +{ +} + +static inline bool skb_defer_rx_timestamp(struct sk_buff *skb) +{ + return false; +} + +#endif /* !CONFIG_NETWORK_PHY_TIMESTAMPING */ + +/** + * skb_complete_tx_timestamp() - deliver cloned skb with tx timestamps + * + * @skb: clone of the the original outgoing packet + * @hwtstamps: hardware time stamps + * + */ +void skb_complete_tx_timestamp(struct sk_buff *skb, + struct skb_shared_hwtstamps *hwtstamps); + /** * skb_tstamp_tx - queue clone of skb with send time stamps * @orig_skb: the original outgoing packet @@ -1965,6 +1995,7 @@ static inline void sw_tx_timestamp(struct sk_buff *skb) */ static inline void skb_tx_timestamp(struct sk_buff *skb) { + skb_clone_tx_timestamp(skb); sw_tx_timestamp(skb); } diff --git a/net/Kconfig b/net/Kconfig index 0d68b40fc0e..b3250944cde 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -86,6 +86,16 @@ config NETWORK_SECMARK to nfmark, but designated for security purposes. If you are unsure how to answer this question, answer N. +config NETWORK_PHY_TIMESTAMPING + bool "Timestamping in PHY devices" + depends on EXPERIMENTAL + help + This allows timestamping of network packets by PHYs with + hardware timestamping capabilities. This option adds some + overhead in the transmit and receive paths. + + If you are unsure how to answer this question, answer N. + menuconfig NETFILTER bool "Network packet filtering framework (Netfilter)" ---help--- diff --git a/net/core/Makefile b/net/core/Makefile index 51c3eec850e..8a04dd22cf7 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -18,4 +18,4 @@ obj-$(CONFIG_NET_DMA) += user_dma.o obj-$(CONFIG_FIB_RULES) += fib_rules.o obj-$(CONFIG_TRACEPOINTS) += net-traces.o obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o - +obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o diff --git a/net/core/dev.c b/net/core/dev.c index e2b9fa2c917..1c002c7ef5d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2957,6 +2957,9 @@ int netif_receive_skb(struct sk_buff *skb) if (netdev_tstamp_prequeue) net_timestamp_check(skb); + if (skb_defer_rx_timestamp(skb)) + return NET_RX_SUCCESS; + #ifdef CONFIG_RPS { struct rps_dev_flow voidflow, *rflow = &voidflow; diff --git a/net/core/timestamping.c b/net/core/timestamping.c new file mode 100644 index 00000000000..0ae6c22da85 --- /dev/null +++ b/net/core/timestamping.c @@ -0,0 +1,126 @@ +/* + * PTP 1588 clock support - support for timestamping in PHY devices + * + * Copyright (C) 2010 OMICRON electronics GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include +#include +#include +#include + +static struct sock_filter ptp_filter[] = { + PTP_FILTER +}; + +static unsigned int classify(struct sk_buff *skb) +{ + if (likely(skb->dev && + skb->dev->phydev && + skb->dev->phydev->drv)) + return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter)); + else + return PTP_CLASS_NONE; +} + +void skb_clone_tx_timestamp(struct sk_buff *skb) +{ + struct phy_device *phydev; + struct sk_buff *clone; + struct sock *sk = skb->sk; + unsigned int type; + + if (!sk) + return; + + type = classify(skb); + + switch (type) { + case PTP_CLASS_V1_IPV4: + case PTP_CLASS_V1_IPV6: + case PTP_CLASS_V2_IPV4: + case PTP_CLASS_V2_IPV6: + case PTP_CLASS_V2_L2: + case PTP_CLASS_V2_VLAN: + phydev = skb->dev->phydev; + if (likely(phydev->drv->txtstamp)) { + clone = skb_clone(skb, GFP_ATOMIC); + if (!clone) + return; + clone->sk = sk; + phydev->drv->txtstamp(phydev, clone, type); + } + break; + default: + break; + } +} + +void skb_complete_tx_timestamp(struct sk_buff *skb, + struct skb_shared_hwtstamps *hwtstamps) +{ + struct sock *sk = skb->sk; + struct sock_exterr_skb *serr; + int err; + + if (!hwtstamps) + return; + + *skb_hwtstamps(skb) = *hwtstamps; + serr = SKB_EXT_ERR(skb); + memset(serr, 0, sizeof(*serr)); + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + skb->sk = NULL; + err = sock_queue_err_skb(sk, skb); + if (err) + kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); + +bool skb_defer_rx_timestamp(struct sk_buff *skb) +{ + struct phy_device *phydev; + unsigned int type; + + skb_push(skb, ETH_HLEN); + + type = classify(skb); + + skb_pull(skb, ETH_HLEN); + + switch (type) { + case PTP_CLASS_V1_IPV4: + case PTP_CLASS_V1_IPV6: + case PTP_CLASS_V2_IPV4: + case PTP_CLASS_V2_IPV6: + case PTP_CLASS_V2_L2: + case PTP_CLASS_V2_VLAN: + phydev = skb->dev->phydev; + if (likely(phydev->drv->rxtstamp)) + return phydev->drv->rxtstamp(phydev, skb, type); + break; + default: + break; + } + + return false; +} + +void __init skb_timestamping_init(void) +{ + BUG_ON(sk_chk_filter(ptp_filter, ARRAY_SIZE(ptp_filter))); +} diff --git a/net/socket.c b/net/socket.c index 6fe484122a4..2270b941bcc 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2394,6 +2394,10 @@ static int __init sock_init(void) netfilter_init(); #endif +#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING + skb_timestamping_init(); +#endif + return 0; } -- cgit v1.2.3-70-g09d2 From fed66381d65a35198639f564365e61a7f256bf79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Jul 2010 19:09:08 +0000 Subject: net: pskb_expand_head() optimization Move frags[] at the end of struct skb_shared_info, and make pskb_expand_head() copy only the used part of it instead of whole array. This should avoid kmemcheck warnings and speedup pskb_expand_head() as well, avoiding a lot of cache misses. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 ++- net/core/skbuff.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f5aa87e1e0c..d89876b806a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -202,10 +202,11 @@ struct skb_shared_info { */ atomic_t dataref; - skb_frag_t frags[MAX_SKB_FRAGS]; /* Intermediate layers must ensure that destructor_arg * remains valid until skb destructor */ void * destructor_arg; + /* must be last field, see pskb_expand_head() */ + skb_frag_t frags[MAX_SKB_FRAGS]; }; /* We divide dataref into two halves. The higher 16 bits hold references diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 76d33ca5f03..7da58a25ad9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -817,7 +817,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, memcpy(data + nhead, skb->head, skb->tail - skb->head); #endif memcpy(data + size, skb_end_pointer(skb), - sizeof(struct skb_shared_info)); + offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) get_page(skb_shinfo(skb)->frags[i].page); -- cgit v1.2.3-70-g09d2 From cff0d6e6edac7672b3f915bb4fb59f279243b7f9 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 3 Aug 2010 00:31:48 -0700 Subject: can-raw: Fix skb_orphan_try handling Commit fc6055a5ba31e2c14e36e8939f9bf2b6d586a7f5 (net: Introduce skb_orphan_try()) allows an early orphan of the skb and takes care on tx timestamping, which needs the sk-reference in the skb on driver level. So does the can-raw socket, which has not been taken into account here. The patch below adds a 'prevent_sk_orphan' bit in the skb tx shared info, which fixes the problem discovered by Matthias Fuchs here: http://marc.info/?t=128030411900003&r=1&w=2 Even if it's not a primary tx timestamp topic it fits well into some skb shared tx context. Or should be find a different place for the information to protect the sk reference until it reaches the driver level? Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 +++- net/can/raw.c | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d89876b806a..d20d9e7a9bb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -169,6 +169,7 @@ struct skb_shared_hwtstamps { * @software: generate software time stamp * @in_progress: device driver is going to provide * hardware time stamp + * @prevent_sk_orphan: make sk reference available on driver level * @flags: all shared_tx flags * * These flags are attached to packets as part of the @@ -178,7 +179,8 @@ union skb_shared_tx { struct { __u8 hardware:1, software:1, - in_progress:1; + in_progress:1, + prevent_sk_orphan:1; }; __u8 flags; }; diff --git a/net/can/raw.c b/net/can/raw.c index ccfe633eec8..a10e3338f08 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -650,6 +650,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, err = sock_tx_timestamp(msg, sk, skb_tx(skb)); if (err < 0) goto free_skb; + + /* to be able to check the received tx sock reference in raw_rcv() */ + skb_tx(skb)->prevent_sk_orphan = 1; + skb->dev = dev; skb->sk = sk; -- cgit v1.2.3-70-g09d2