From 56079431b6ba163df8ba26b3eccc82379f0c0ce4 Mon Sep 17 00:00:00 2001 From: Denis Vlasenko Date: Wed, 29 Mar 2006 15:57:29 -0800 Subject: [NET]: Deinline some larger functions from netdevice.h On a allyesconfig'ured kernel: Size Uses Wasted Name and definition ===== ==== ====== ================================================ 95 162 12075 netif_wake_queue include/linux/netdevice.h 129 86 9265 dev_kfree_skb_any include/linux/netdevice.h 127 56 5885 netif_device_attach include/linux/netdevice.h 73 86 4505 dev_kfree_skb_irq include/linux/netdevice.h 46 60 1534 netif_device_detach include/linux/netdevice.h 119 16 1485 __netif_rx_schedule include/linux/netdevice.h 143 5 492 netif_rx_schedule include/linux/netdevice.h 81 7 366 netif_schedule include/linux/netdevice.h netif_wake_queue is big because __netif_schedule is a big inline: static inline void __netif_schedule(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { unsigned long flags; struct softnet_data *sd; local_irq_save(flags); sd = &__get_cpu_var(softnet_data); dev->next_sched = sd->output_queue; sd->output_queue = dev; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } } static inline void netif_wake_queue(struct net_device *dev) { #ifdef CONFIG_NETPOLL_TRAP if (netpoll_trap()) return; #endif if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) __netif_schedule(dev); } By de-inlining __netif_schedule we are saving a lot of text at each callsite of netif_wake_queue and netif_schedule. __netif_rx_schedule is also big, and it makes more sense to keep both of them out of line. Patch also deinlines dev_kfree_skb_any. We can deinline dev_kfree_skb_irq instead... oh well. netif_device_attach/detach are not hot paths, we can deinline them too. Signed-off-by: Denis Vlasenko Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/netdevice.h | 55 +++++------------------------------------------ 1 file changed, 5 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 950dc55e519..40ccf8cc423 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -598,20 +598,7 @@ DECLARE_PER_CPU(struct softnet_data,softnet_data); #define HAVE_NETIF_QUEUE -static inline void __netif_schedule(struct net_device *dev) -{ - if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { - unsigned long flags; - struct softnet_data *sd; - - local_irq_save(flags); - sd = &__get_cpu_var(softnet_data); - dev->next_sched = sd->output_queue; - sd->output_queue = dev; - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_restore(flags); - } -} +extern void __netif_schedule(struct net_device *dev); static inline void netif_schedule(struct net_device *dev) { @@ -675,13 +662,7 @@ static inline void dev_kfree_skb_irq(struct sk_buff *skb) /* Use this variant in places where it could be invoked * either from interrupt or non-interrupt context. */ -static inline void dev_kfree_skb_any(struct sk_buff *skb) -{ - if (in_irq() || irqs_disabled()) - dev_kfree_skb_irq(skb); - else - dev_kfree_skb(skb); -} +extern void dev_kfree_skb_any(struct sk_buff *skb); #define HAVE_NETIF_RX 1 extern int netif_rx(struct sk_buff *skb); @@ -768,22 +749,9 @@ static inline int netif_device_present(struct net_device *dev) return test_bit(__LINK_STATE_PRESENT, &dev->state); } -static inline void netif_device_detach(struct net_device *dev) -{ - if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && - netif_running(dev)) { - netif_stop_queue(dev); - } -} +extern void netif_device_detach(struct net_device *dev); -static inline void netif_device_attach(struct net_device *dev) -{ - if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && - netif_running(dev)) { - netif_wake_queue(dev); - __netdev_watchdog_up(dev); - } -} +extern void netif_device_attach(struct net_device *dev); /* * Network interface message level settings @@ -851,20 +819,7 @@ static inline int netif_rx_schedule_prep(struct net_device *dev) * already been called and returned 1. */ -static inline void __netif_rx_schedule(struct net_device *dev) -{ - unsigned long flags; - - local_irq_save(flags); - dev_hold(dev); - list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); - if (dev->quota < 0) - dev->quota += dev->weight; - else - dev->quota = dev->weight; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); - local_irq_restore(flags); -} +extern void __netif_rx_schedule(struct net_device *dev); /* Try to reschedule poll. Called by irq handler. */ -- cgit v1.2.3-70-g09d2 From 025be81e83043f20538dcced1e12c5f8d152fbdb Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 31 Mar 2006 02:27:06 -0800 Subject: [NET]: Allow skb headroom to be overridden Previously we added NET_IP_ALIGN so an architecture can override the padding done to align headers. The next step is to allow the skb headroom to be overridden. We currently always reserve 16 bytes to grow into, meaning all DMAs start 16 bytes into a cacheline. On ppc64 we really want DMA writes to start on a cacheline boundary, so we increase that headroom to one cacheline. Signed-off-by: Anton Blanchard Signed-off-by: David S. Miller --- include/asm-powerpc/system.h | 5 ++++- include/linux/skbuff.h | 29 +++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h index 65f5a7b2646..d075725bf44 100644 --- a/include/asm-powerpc/system.h +++ b/include/asm-powerpc/system.h @@ -365,8 +365,11 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, * powers of 2 writes until it reaches sufficient alignment). * * Based on this we disable the IP header alignment in network drivers. + * We also modify NET_SKB_PAD to be a cacheline in size, thus maintaining + * cacheline alignment of buffers. */ -#define NET_IP_ALIGN 0 +#define NET_IP_ALIGN 0 +#define NET_SKB_PAD L1_CACHE_BYTES #endif #define arch_align_stack(x) (x) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 613b9513f8b..c4619a428d9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -941,6 +941,25 @@ static inline void skb_reserve(struct sk_buff *skb, int len) #define NET_IP_ALIGN 2 #endif +/* + * The networking layer reserves some headroom in skb data (via + * dev_alloc_skb). This is used to avoid having to reallocate skb data when + * the header has to grow. In the default case, if the header has to grow + * 16 bytes or less we avoid the reallocation. + * + * Unfortunately this headroom changes the DMA alignment of the resulting + * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive + * on some architectures. An architecture can override this value, + * perhaps setting it to a cacheline in size (since that will maintain + * cacheline alignment of the DMA). It must be a power of 2. + * + * Various parts of the networking layer expect at least 16 bytes of + * headroom, you should not reduce this. + */ +#ifndef NET_SKB_PAD +#define NET_SKB_PAD 16 +#endif + extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); static inline void __skb_trim(struct sk_buff *skb, unsigned int len) @@ -1030,9 +1049,9 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) static inline struct sk_buff *__dev_alloc_skb(unsigned int length, gfp_t gfp_mask) { - struct sk_buff *skb = alloc_skb(length + 16, gfp_mask); + struct sk_buff *skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); if (likely(skb)) - skb_reserve(skb, 16); + skb_reserve(skb, NET_SKB_PAD); return skb; } #else @@ -1070,13 +1089,15 @@ static inline struct sk_buff *dev_alloc_skb(unsigned int length) */ static inline int skb_cow(struct sk_buff *skb, unsigned int headroom) { - int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); + int delta = (headroom > NET_SKB_PAD ? headroom : NET_SKB_PAD) - + skb_headroom(skb); if (delta < 0) delta = 0; if (delta || skb_cloned(skb)) - return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC); + return pskb_expand_head(skb, (delta + (NET_SKB_PAD-1)) & + ~(NET_SKB_PAD-1), 0, GFP_ATOMIC); return 0; } -- cgit v1.2.3-70-g09d2