From 076bb0c82a44fbe46fe2c8527a5b5b64b69f679d Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Wed, 10 Jul 2013 17:13:17 +0300 Subject: net: rename include/net/ll_poll.h to include/net/busy_poll.h Rename the file and correct all the places where it is included. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/core/datagram.c | 2 +- net/core/sock.c | 2 +- net/core/sysctl_net_core.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 2 +- net/socket.c | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index 6e9ab31e457..8ab48cd8955 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -56,7 +56,7 @@ #include #include #include -#include +#include /* * Is a socket 'connection oriented' ? diff --git a/net/core/sock.c b/net/core/sock.c index ab06b719f5b..9bfe83f4d67 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -139,7 +139,7 @@ #include #endif -#include +#include static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index afc677eadd9..1a298cb3dae 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include static int one = 1; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 15cbfa94bd8..5423223e93c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -279,7 +279,7 @@ #include #include -#include +#include int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 35675e46aff..3a261b41a00 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -75,7 +75,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6b270e53c20..bcc0ff2c16d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -109,7 +109,7 @@ #include #include #include -#include +#include #include "udp_impl.h" struct udp_table udp_table __read_mostly; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5cffa5c3e6b..345bd92d4dd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -63,7 +63,7 @@ #include #include #include -#include +#include #include diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b6f31437a1f..40e72034da0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/socket.c b/net/socket.c index 45afa648364..6a3e9a3f50a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -104,7 +104,7 @@ #include #include #include -#include +#include #ifdef CONFIG_NET_LL_RX_POLL unsigned int sysctl_net_ll_read __read_mostly; -- cgit v1.2.3-70-g09d2 From 8b80cda536ea9bceec0364e897868a30ee13b992 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Wed, 10 Jul 2013 17:13:26 +0300 Subject: net: rename ll methods to busy-poll Rename ndo_ll_poll to ndo_busy_poll. Rename sk_mark_ll to sk_mark_napi_id. Rename skb_mark_ll to skb_mark_napi_id. Correct all useres of these functions. Update comments and defines in include/net/busy_poll.h Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +- include/linux/netdevice.h | 2 +- include/net/busy_poll.h | 22 ++++++++++++---------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 2 +- 11 files changed, 23 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 05b6b4e8b07..3353efe7919 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -990,7 +990,7 @@ reuse_rx: __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), le16_to_cpu(cqe_fp->vlan_tag)); - skb_mark_ll(skb, &fp->napi); + skb_mark_napi_id(skb, &fp->napi); if (bnx2x_fp_ll_polling(fp)) netif_receive_skb(skb); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 15a528bda87..e5da07858a2 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12027,7 +12027,7 @@ static const struct net_device_ops bnx2x_netdev_ops = { #endif #ifdef CONFIG_NET_LL_RX_POLL - .ndo_ll_poll = bnx2x_low_latency_recv, + .ndo_busy_poll = bnx2x_low_latency_recv, #endif }; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 047ebaaf014..bad8f14b194 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1978,7 +1978,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, } #endif /* IXGBE_FCOE */ - skb_mark_ll(skb, &q_vector->napi); + skb_mark_napi_id(skb, &q_vector->napi); ixgbe_rx_skb(q_vector, skb); /* update budget accounting */ @@ -7228,7 +7228,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_poll_controller = ixgbe_netpoll, #endif #ifdef CONFIG_NET_LL_RX_POLL - .ndo_ll_poll = ixgbe_low_latency_recv, + .ndo_busy_poll = ixgbe_low_latency_recv, #endif #ifdef IXGBE_FCOE .ndo_fcoe_ddp_setup = ixgbe_fcoe_ddp_get, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 0fb2438dc2c..5eac871399d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2141,7 +2141,7 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif #ifdef CONFIG_NET_LL_RX_POLL - .ndo_ll_poll = mlx4_en_low_latency_recv, + .ndo_busy_poll = mlx4_en_low_latency_recv, #endif }; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 90746d37ac9..dec455c8f62 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -767,7 +767,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud timestamp); } - skb_mark_ll(skb, &cq->napi); + skb_mark_napi_id(skb, &cq->napi); /* Push it up the stack */ netif_receive_skb(skb); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bb82871b849..0741a1e919a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -974,7 +974,7 @@ struct net_device_ops { void (*ndo_netpoll_cleanup)(struct net_device *dev); #endif #ifdef CONFIG_NET_LL_RX_POLL - int (*ndo_ll_poll)(struct napi_struct *dev); + int (*ndo_busy_poll)(struct napi_struct *dev); #endif int (*ndo_set_vf_mac)(struct net_device *dev, int queue, u8 *mac); diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 76f03408774..4ff71908fd4 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -1,5 +1,5 @@ /* - * Low Latency Sockets + * net busy poll support * Copyright(c) 2013 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it @@ -21,8 +21,8 @@ * e1000-devel Mailing List */ -#ifndef _LINUX_NET_LL_POLL_H -#define _LINUX_NET_LL_POLL_H +#ifndef _LINUX_NET_BUSY_POLL_H +#define _LINUX_NET_BUSY_POLL_H #include #include @@ -110,11 +110,11 @@ static inline bool sk_busy_loop(struct sock *sk, int nonblock) goto out; ops = napi->dev->netdev_ops; - if (!ops->ndo_ll_poll) + if (!ops->ndo_busy_poll) goto out; do { - rc = ops->ndo_ll_poll(napi); + rc = ops->ndo_busy_poll(napi); if (rc == LL_FLUSH_FAILED) break; /* permanent failure */ @@ -134,13 +134,14 @@ out: } /* used in the NIC receive handler to mark the skb */ -static inline void skb_mark_ll(struct sk_buff *skb, struct napi_struct *napi) +static inline void skb_mark_napi_id(struct sk_buff *skb, + struct napi_struct *napi) { skb->napi_id = napi->napi_id; } /* used in the protocol hanlder to propagate the napi_id to the socket */ -static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb) +static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb) { sk->sk_napi_id = skb->napi_id; } @@ -166,11 +167,12 @@ static inline bool sk_busy_poll(struct sock *sk, int nonblock) return false; } -static inline void skb_mark_ll(struct sk_buff *skb, struct napi_struct *napi) +static inline void skb_mark_napi_id(struct sk_buff *skb, + struct napi_struct *napi) { } -static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb) +static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb) { } @@ -180,4 +182,4 @@ static inline bool busy_loop_timeout(unsigned long end_time) } #endif /* CONFIG_NET_LL_RX_POLL */ -#endif /* _LINUX_NET_LL_POLL_H */ +#endif /* _LINUX_NET_BUSY_POLL_H */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3a261b41a00..b299da5ff49 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1994,7 +1994,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; - sk_mark_ll(sk, skb); + sk_mark_napi_id(sk, skb); skb->dev = NULL; bh_lock_sock_nested(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bcc0ff2c16d..a0d7151ffbd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1713,7 +1713,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; - sk_mark_ll(sk, skb); + sk_mark_napi_id(sk, skb); ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 345bd92d4dd..6e1649d5853 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1499,7 +1499,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; - sk_mark_ll(sk, skb); + sk_mark_napi_id(sk, skb); skb->dev = NULL; bh_lock_sock_nested(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 40e72034da0..f4058150262 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -844,7 +844,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; - sk_mark_ll(sk, skb); + sk_mark_napi_id(sk, skb); ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); -- cgit v1.2.3-70-g09d2 From 64b0dc517ea1b35d02565a779e6cb77ae9045685 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Wed, 10 Jul 2013 17:13:36 +0300 Subject: net: rename busy poll socket op and globals Rename LL_SO to BUSY_POLL_SO Rename sysctl_net_ll_{read,poll} to sysctl_busy_{read,poll} Fix up users of these variables. Fix documentation for sysctl. a patch for the socket.7 man page will follow separately, because of limitations of my mail setup. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- Documentation/sysctl/net.txt | 17 +++++++++-------- arch/alpha/include/uapi/asm/socket.h | 2 +- arch/avr32/include/uapi/asm/socket.h | 2 +- arch/cris/include/uapi/asm/socket.h | 2 +- arch/frv/include/uapi/asm/socket.h | 2 +- arch/h8300/include/uapi/asm/socket.h | 2 +- arch/ia64/include/uapi/asm/socket.h | 2 +- arch/m32r/include/uapi/asm/socket.h | 2 +- arch/mips/include/uapi/asm/socket.h | 2 +- arch/mn10300/include/uapi/asm/socket.h | 2 +- arch/parisc/include/uapi/asm/socket.h | 2 +- arch/powerpc/include/uapi/asm/socket.h | 2 +- arch/s390/include/uapi/asm/socket.h | 2 +- arch/sparc/include/uapi/asm/socket.h | 2 +- arch/xtensa/include/uapi/asm/socket.h | 2 +- include/net/busy_poll.h | 8 ++++---- include/uapi/asm-generic/socket.h | 2 +- net/core/sock.c | 6 +++--- net/core/sysctl_net_core.c | 8 ++++---- net/socket.c | 4 ++-- 20 files changed, 37 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index d69e14c9002..1c15043aaee 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -50,26 +50,27 @@ The maximum number of packets that kernel can handle on a NAPI interrupt, it's a Per-CPU variable. Default: 64 -low_latency_read +busy_read ---------------- Low latency busy poll timeout for socket reads. (needs CONFIG_NET_LL_RX_POLL) Approximate time in us to busy loop waiting for packets on the device queue. -This sets the default value of the SO_LL socket option. -Can be set or overridden per socket by setting socket option SO_LL, which is -the preferred method of enabling. -If you need to enable the feature globally via sysctl, a value of 50 is recommended. +This sets the default value of the SO_BUSY_POLL socket option. +Can be set or overridden per socket by setting socket option SO_BUSY_POLL, +which is the preferred method of enabling. If you need to enable the feature +globally via sysctl, a value of 50 is recommended. Will increase power usage. Default: 0 (off) -low_latency_poll +busy_poll ---------------- Low latency busy poll timeout for poll and select. (needs CONFIG_NET_LL_RX_POLL) Approximate time in us to busy loop waiting for events. Recommended value depends on the number of sockets you poll on. For several sockets 50, for several hundreds 100. For more than that you probably want to use epoll. -Note that only sockets with SO_LL set will be busy polled, so you want to either -selectively set SO_LL on those sockets or set sysctl.net.low_latency_read globally. +Note that only sockets with SO_BUSY_POLL set will be busy polled, +so you want to either selectively set SO_BUSY_POLL on those sockets or set +sysctl.net.busy_read globally. Will increase power usage. Default: 0 (off) diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 4885825e498..467de010ea7 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -81,6 +81,6 @@ #define SO_SELECT_ERR_QUEUE 45 -#define SO_LL 46 +#define SO_BUSY_POLL 46 #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 79b61798ebf..11c4259c62f 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -74,6 +74,6 @@ #define SO_SELECT_ERR_QUEUE 45 -#define SO_LL 46 +#define SO_BUSY_POLL 46 #endif /* __ASM_AVR32_SOCKET_H */ diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h index 47b1ec55092..eb723e51554 100644 --- a/arch/cris/include/uapi/asm/socket.h +++ b/arch/cris/include/uapi/asm/socket.h @@ -76,7 +76,7 @@ #define SO_SELECT_ERR_QUEUE 45 -#define SO_LL 46 +#define SO_BUSY_POLL 46 #endif /* _ASM_SOCKET_H */ diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index dbc08520f22..f0cb1c34116 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -74,7 +74,7 @@ #define SO_SELECT_ERR_QUEUE 45 -#define SO_LL 46 +#define SO_BUSY_POLL 46 #endif /* _ASM_SOCKET_H */ diff --git a/arch/h8300/include/uapi/asm/socket.h b/arch/h8300/include/uapi/asm/socket.h index a38d38a6520..9490758c5e2 100644 --- a/arch/h8300/include/uapi/asm/socket.h +++ b/arch/h8300/include/uapi/asm/socket.h @@ -74,6 +74,6 @@ #define SO_SELECT_ERR_QUEUE 45 -#define SO_LL 46 +#define SO_BUSY_POLL 46 #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index d3358b76068..556d0701a15 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -83,6 +83,6 @@ #define SO_SELECT_ERR_QUEUE 45 -#define SO_LL 46 +#define SO_BUSY_POLL 46 #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 44aaf4639a4..24be7c8da86 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -74,6 +74,6 @@ #define SO_SELECT_ERR_QUEUE 45 -#define SO_LL 46 +#define SO_BUSY_POLL 46 #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 6a07992ba6c..61c01f054d1 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -92,6 +92,6 @@ #define SO_SELECT_ERR_QUEUE 45 -#define SO_LL 46 +#define SO_BUSY_POLL 46 #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index db80fd3e398..e2a2b203eb0 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -74,6 +74,6 @@ #define SO_SELECT_ERR_QUEUE 45 -#define SO_LL 46 +#define SO_BUSY_POLL 46 #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index f866fff9a00..71700e636a8 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -73,7 +73,7 @@ #define SO_SELECT_ERR_QUEUE 0x4026 -#define SO_LL 0x4027 +#define SO_BUSY_POLL 0x4027 /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index 405fb09bda9..a6d74467c9e 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -81,6 +81,6 @@ #define SO_SELECT_ERR_QUEUE 45 -#define SO_LL 46 +#define SO_BUSY_POLL 46 #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 0c5105fbaaf..92494494692 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -80,6 +80,6 @@ #define SO_SELECT_ERR_QUEUE 45 -#define SO_LL 46 +#define SO_BUSY_POLL 46 #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index b46c3fa0b26..4e1d66c3ce7 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -70,7 +70,7 @@ #define SO_SELECT_ERR_QUEUE 0x0029 -#define SO_LL 0x0030 +#define SO_BUSY_POLL 0x0030 /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index b21ace4fc9b..c114483010c 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -85,6 +85,6 @@ #define SO_SELECT_ERR_QUEUE 45 -#define SO_LL 46 +#define SO_BUSY_POLL 46 #endif /* _XTENSA_SOCKET_H */ diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 4ff71908fd4..a14339c2985 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -30,8 +30,8 @@ #ifdef CONFIG_NET_LL_RX_POLL struct napi_struct; -extern unsigned int sysctl_net_ll_read __read_mostly; -extern unsigned int sysctl_net_ll_poll __read_mostly; +extern unsigned int sysctl_net_busy_read __read_mostly; +extern unsigned int sysctl_net_busy_poll __read_mostly; /* return values from ndo_ll_poll */ #define LL_FLUSH_FAILED -1 @@ -39,7 +39,7 @@ extern unsigned int sysctl_net_ll_poll __read_mostly; static inline bool net_busy_loop_on(void) { - return sysctl_net_ll_poll; + return sysctl_net_busy_poll; } /* a wrapper to make debug_smp_processor_id() happy @@ -72,7 +72,7 @@ static inline unsigned long sk_busy_loop_end_time(struct sock *sk) /* in poll/select we use the global sysctl_net_ll_poll value */ static inline unsigned long busy_loop_end_time(void) { - return busy_loop_us_clock() + ACCESS_ONCE(sysctl_net_ll_poll); + return busy_loop_us_clock() + ACCESS_ONCE(sysctl_net_busy_poll); } static inline bool sk_can_busy_loop(struct sock *sk) diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index ca3a20d772a..f04b69b6abf 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -76,6 +76,6 @@ #define SO_SELECT_ERR_QUEUE 45 -#define SO_LL 46 +#define SO_BUSY_POLL 46 #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/net/core/sock.c b/net/core/sock.c index 9bfe83f4d67..548d716c5f6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -901,7 +901,7 @@ set_rcvbuf: break; #ifdef CONFIG_NET_LL_RX_POLL - case SO_LL: + case SO_BUSY_POLL: /* allow unprivileged users to decrease the value */ if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN)) ret = -EPERM; @@ -1171,7 +1171,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; #ifdef CONFIG_NET_LL_RX_POLL - case SO_LL: + case SO_BUSY_POLL: v.val = sk->sk_ll_usec; break; #endif @@ -2294,7 +2294,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) #ifdef CONFIG_NET_LL_RX_POLL sk->sk_napi_id = 0; - sk->sk_ll_usec = sysctl_net_ll_read; + sk->sk_ll_usec = sysctl_net_busy_read; #endif /* diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 1a298cb3dae..66096861663 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -300,15 +300,15 @@ static struct ctl_table net_core_table[] = { #endif /* CONFIG_NET_FLOW_LIMIT */ #ifdef CONFIG_NET_LL_RX_POLL { - .procname = "low_latency_poll", - .data = &sysctl_net_ll_poll, + .procname = "busy_poll", + .data = &sysctl_net_busy_poll, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec }, { - .procname = "low_latency_read", - .data = &sysctl_net_ll_read, + .procname = "busy_read", + .data = &sysctl_net_busy_read, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec diff --git a/net/socket.c b/net/socket.c index 6a3e9a3f50a..829b460acb8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -107,8 +107,8 @@ #include #ifdef CONFIG_NET_LL_RX_POLL -unsigned int sysctl_net_ll_read __read_mostly; -unsigned int sysctl_net_ll_poll __read_mostly; +unsigned int sysctl_net_busy_read __read_mostly; +unsigned int sysctl_net_busy_poll __read_mostly; #endif static int sock_no_open(struct inode *irrelevant, struct file *dontcare); -- cgit v1.2.3-70-g09d2 From 1eb4f758286884e7566627164bca4c4a16952a83 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 10 Jul 2013 23:00:57 +0200 Subject: ipv6: in case of link failure remove route directly instead of letting it expire We could end up expiring a route which is part of an ecmp route set. Doing so would invalidate the rt->rt6i_nsiblings calculations and could provoke the following panic: [ 80.144667] ------------[ cut here ]------------ [ 80.145172] kernel BUG at net/ipv6/ip6_fib.c:733! [ 80.145172] invalid opcode: 0000 [#1] SMP [ 80.145172] Modules linked in: 8021q nf_conntrack_netbios_ns nf_conntrack_broadcast ipt_MASQUERADE ip6table_mangle ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 iptable_nat nf_nat_ipv4 nf_nat iptable_mangle nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ebtable_filter ebtables ip6table_filter ip6_tables +snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm snd_page_alloc snd_timer virtio_balloon snd soundcore i2c_piix4 i2c_core virtio_net virtio_blk [ 80.145172] CPU: 1 PID: 786 Comm: ping6 Not tainted 3.10.0+ #118 [ 80.145172] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 80.145172] task: ffff880117fa0000 ti: ffff880118770000 task.ti: ffff880118770000 [ 80.145172] RIP: 0010:[] [] fib6_add+0x75d/0x830 [ 80.145172] RSP: 0018:ffff880118771798 EFLAGS: 00010202 [ 80.145172] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88011350e480 [ 80.145172] RDX: ffff88011350e238 RSI: 0000000000000004 RDI: ffff88011350f738 [ 80.145172] RBP: ffff880118771848 R08: ffff880117903280 R09: 0000000000000001 [ 80.145172] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88011350f680 [ 80.145172] R13: ffff880117903280 R14: ffff880118771890 R15: ffff88011350ef90 [ 80.145172] FS: 00007f02b5127740(0000) GS:ffff88011fd00000(0000) knlGS:0000000000000000 [ 80.145172] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 80.145172] CR2: 00007f981322a000 CR3: 00000001181b1000 CR4: 00000000000006e0 [ 80.145172] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 80.145172] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 80.145172] Stack: [ 80.145172] 0000000000000001 ffff880100000000 ffff880100000000 ffff880117903280 [ 80.145172] 0000000000000000 ffff880119a4cf00 0000000000000400 00000000000007fa [ 80.145172] 0000000000000000 0000000000000000 0000000000000000 ffff88011350f680 [ 80.145172] Call Trace: [ 80.145172] [] ? rt6_bind_peer+0x4b/0x90 [ 80.145172] [] __ip6_ins_rt+0x45/0x70 [ 80.145172] [] ip6_ins_rt+0x35/0x40 [ 80.145172] [] ip6_pol_route.isra.44+0x3a4/0x4b0 [ 80.145172] [] ip6_pol_route_output+0x2a/0x30 [ 80.145172] [] fib6_rule_action+0xd7/0x210 [ 80.145172] [] ? ip6_pol_route_input+0x30/0x30 [ 80.145172] [] fib_rules_lookup+0xc6/0x140 [ 80.145172] [] fib6_rule_lookup+0x44/0x80 [ 80.145172] [] ? ip6_pol_route_input+0x30/0x30 [ 80.145172] [] ip6_route_output+0x73/0xb0 [ 80.145172] [] ip6_dst_lookup_tail+0x2c3/0x2e0 [ 80.145172] [] ? list_del+0x11/0x40 [ 80.145172] [] ? remove_wait_queue+0x3c/0x50 [ 80.145172] [] ip6_dst_lookup_flow+0x3d/0xa0 [ 80.145172] [] rawv6_sendmsg+0x267/0xc20 [ 80.145172] [] inet_sendmsg+0x63/0xb0 [ 80.145172] [] ? selinux_socket_sendmsg+0x23/0x30 [ 80.145172] [] sock_sendmsg+0xa6/0xd0 [ 80.145172] [] SYSC_sendto+0x128/0x180 [ 80.145172] [] ? update_curr+0xec/0x170 [ 80.145172] [] ? kvm_clock_get_cycles+0x9/0x10 [ 80.145172] [] ? __getnstimeofday+0x3e/0xd0 [ 80.145172] [] SyS_sendto+0xe/0x10 [ 80.145172] [] system_call_fastpath+0x16/0x1b [ 80.145172] Code: fe ff ff 41 f6 45 2a 06 0f 85 ca fe ff ff 49 8b 7e 08 4c 89 ee e8 94 ef ff ff e9 b9 fe ff ff 48 8b 82 28 05 00 00 e9 01 ff ff ff <0f> 0b 49 8b 54 24 30 0d 00 00 40 00 89 83 14 01 00 00 48 89 53 [ 80.145172] RIP [] fib6_add+0x75d/0x830 [ 80.145172] RSP [ 80.387413] ---[ end trace 02f20b7a8b81ed95 ]--- [ 80.390154] Kernel panic - not syncing: Fatal exception in interrupt Cc: Nicolas Dichtel Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index bd5fd705403..5b127e09c22 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1080,10 +1080,13 @@ static void ip6_link_failure(struct sk_buff *skb) rt = (struct rt6_info *) skb_dst(skb); if (rt) { - if (rt->rt6i_flags & RTF_CACHE) - rt6_update_expires(rt, 0); - else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) + if (rt->rt6i_flags & RTF_CACHE) { + dst_hold(&rt->dst); + if (ip6_del_rt(rt)) + dst_free(&rt->dst); + } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { rt->rt6i_node->fn_sernum = -1; + } } } -- cgit v1.2.3-70-g09d2 From 110ecd69a9feea82a152bbf9b12aba57e6396883 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 11 Jul 2013 13:16:54 -0400 Subject: 9p: fix off by one causing access violations and memory corruption p9_release_pages() would attempt to dereference one value past the end of pages[]. This would cause the following crashes: [ 6293.171817] BUG: unable to handle kernel paging request at ffff8807c96f3000 [ 6293.174146] IP: [] p9_release_pages+0x3b/0x60 [ 6293.176447] PGD 79c5067 PUD 82c1e3067 PMD 82c197067 PTE 80000007c96f3060 [ 6293.180060] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [ 6293.180060] Modules linked in: [ 6293.180060] CPU: 62 PID: 174043 Comm: modprobe Tainted: G W 3.10.0-next-20130710-sasha #3954 [ 6293.180060] task: ffff8807b803b000 ti: ffff880787dde000 task.ti: ffff880787dde000 [ 6293.180060] RIP: 0010:[] [] p9_release_pages+0x3b/0x60 [ 6293.214316] RSP: 0000:ffff880787ddfc28 EFLAGS: 00010202 [ 6293.214316] RAX: 0000000000000001 RBX: ffff8807c96f2ff8 RCX: 0000000000000000 [ 6293.222017] RDX: ffff8807b803b000 RSI: 0000000000000001 RDI: ffffea001c7e3d40 [ 6293.222017] RBP: ffff880787ddfc48 R08: 0000000000000000 R09: 0000000000000000 [ 6293.222017] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000001 [ 6293.222017] R13: 0000000000000001 R14: ffff8807cc50c070 R15: ffff8807cc50c070 [ 6293.222017] FS: 00007f572641d700(0000) GS:ffff8807f3600000(0000) knlGS:0000000000000000 [ 6293.256784] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 6293.256784] CR2: ffff8807c96f3000 CR3: 00000007c8e81000 CR4: 00000000000006e0 [ 6293.256784] Stack: [ 6293.256784] ffff880787ddfcc8 ffff880787ddfcc8 0000000000000000 ffff880787ddfcc8 [ 6293.256784] ffff880787ddfd48 ffffffff84128be8 ffff880700000002 0000000000000001 [ 6293.256784] ffff8807b803b000 ffff880787ddfce0 0000100000000000 0000000000000000 [ 6293.256784] Call Trace: [ 6293.256784] [] p9_virtio_zc_request+0x598/0x630 [ 6293.256784] [] ? wake_up_bit+0x40/0x40 [ 6293.256784] [] p9_client_zc_rpc+0x111/0x3a0 [ 6293.256784] [] ? sched_clock_cpu+0x108/0x120 [ 6293.256784] [] p9_client_read+0xe1/0x2c0 [ 6293.256784] [] v9fs_file_read+0x90/0xc0 [ 6293.256784] [] vfs_read+0xc3/0x130 [ 6293.256784] [] ? trace_hardirqs_on+0xd/0x10 [ 6293.256784] [] SyS_read+0x62/0xa0 [ 6293.256784] [] tracesys+0xdd/0xe2 [ 6293.256784] Code: 66 90 48 89 fb 41 89 f5 48 8b 3f 48 85 ff 74 29 85 f6 74 25 45 31 e4 66 0f 1f 84 00 00 00 00 00 e8 eb 14 12 fd 41 ff c4 49 63 c4 <48> 8b 3c c3 48 85 ff 74 05 45 39 e5 75 e7 48 83 c4 08 5b 41 5c [ 6293.256784] RIP [] p9_release_pages+0x3b/0x60 [ 6293.256784] RSP [ 6293.256784] CR2: ffff8807c96f3000 [ 6293.256784] ---[ end trace 50822ee72cd360fc ]--- Signed-off-by: Sasha Levin Signed-off-by: David S. Miller --- net/9p/trans_common.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index de8df957867..2ee3879161b 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -24,11 +24,11 @@ */ void p9_release_pages(struct page **pages, int nr_pages) { - int i = 0; - while (pages[i] && nr_pages--) { - put_page(pages[i]); - i++; - } + int i; + + for (i = 0; i < nr_pages; i++) + if (pages[i]) + put_page(pages[i]); } EXPORT_SYMBOL(p9_release_pages); -- cgit v1.2.3-70-g09d2 From afc154e978de1eb11c555bc8bcec1552f75ebc43 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 11 Jul 2013 12:43:42 +0200 Subject: ipv6: fix route selection if kernel is not compiled with CONFIG_IPV6_ROUTER_PREF This is a follow-up patch to 3630d40067a21d4dfbadc6002bb469ce26ac5d52 ("ipv6: rt6_check_neigh should successfully verify neigh if no NUD information are available"). Since the removal of rt->n in rt6_info we can end up with a dst == NULL in rt6_check_neigh. In case the kernel is not compiled with CONFIG_IPV6_ROUTER_PREF we should also select a route with unkown NUD state but we must not avoid doing round robin selection on routes with the same target. So introduce and pass down a boolean ``do_rr'' to indicate when we should update rt->rr_ptr. As soon as no route is valid we do backtracking and do a lookup on a higher level in the fib trie. v2: a) Improved rt6_check_neigh logic (no need to create neighbour there) and documented return values. v3: a) Introduce enum rt6_nud_state to get rid of the magic numbers (thanks to David Miller). b) Update and shorten commit message a bit to actualy reflect the source. Reported-by: Pierre Emeriaud Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 63 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5b127e09c22..a8c891aa246 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -65,6 +65,12 @@ #include #endif +enum rt6_nud_state { + RT6_NUD_FAIL_HARD = -2, + RT6_NUD_FAIL_SOFT = -1, + RT6_NUD_SUCCEED = 1 +}; + static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, const struct in6_addr *dest); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); @@ -531,28 +537,29 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) return 0; } -static inline bool rt6_check_neigh(struct rt6_info *rt) +static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) { struct neighbour *neigh; - bool ret = false; + enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) - return true; + return RT6_NUD_SUCCEED; rcu_read_lock_bh(); neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); if (neigh) { read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) - ret = true; + ret = RT6_NUD_SUCCEED; #ifdef CONFIG_IPV6_ROUTER_PREF else if (!(neigh->nud_state & NUD_FAILED)) - ret = true; + ret = RT6_NUD_SUCCEED; #endif read_unlock(&neigh->lock); - } else if (IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { - ret = true; + } else { + ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? + RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT; } rcu_read_unlock_bh(); @@ -566,43 +573,52 @@ static int rt6_score_route(struct rt6_info *rt, int oif, m = rt6_check_dev(rt, oif); if (!m && (strict & RT6_LOOKUP_F_IFACE)) - return -1; + return RT6_NUD_FAIL_HARD; #ifdef CONFIG_IPV6_ROUTER_PREF m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; #endif - if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE)) - return -1; + if (strict & RT6_LOOKUP_F_REACHABLE) { + int n = rt6_check_neigh(rt); + if (n < 0) + return n; + } return m; } static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, - int *mpri, struct rt6_info *match) + int *mpri, struct rt6_info *match, + bool *do_rr) { int m; + bool match_do_rr = false; if (rt6_check_expired(rt)) goto out; m = rt6_score_route(rt, oif, strict); - if (m < 0) + if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { + match_do_rr = true; + m = 0; /* lowest valid score */ + } else if (m < 0) { goto out; + } + + if (strict & RT6_LOOKUP_F_REACHABLE) + rt6_probe(rt); if (m > *mpri) { - if (strict & RT6_LOOKUP_F_REACHABLE) - rt6_probe(match); + *do_rr = match_do_rr; *mpri = m; match = rt; - } else if (strict & RT6_LOOKUP_F_REACHABLE) { - rt6_probe(rt); } - out: return match; } static struct rt6_info *find_rr_leaf(struct fib6_node *fn, struct rt6_info *rr_head, - u32 metric, int oif, int strict) + u32 metric, int oif, int strict, + bool *do_rr) { struct rt6_info *rt, *match; int mpri = -1; @@ -610,10 +626,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, match = NULL; for (rt = rr_head; rt && rt->rt6i_metric == metric; rt = rt->dst.rt6_next) - match = find_match(rt, oif, strict, &mpri, match); + match = find_match(rt, oif, strict, &mpri, match, do_rr); for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; rt = rt->dst.rt6_next) - match = find_match(rt, oif, strict, &mpri, match); + match = find_match(rt, oif, strict, &mpri, match, do_rr); return match; } @@ -622,15 +638,16 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) { struct rt6_info *match, *rt0; struct net *net; + bool do_rr = false; rt0 = fn->rr_ptr; if (!rt0) fn->rr_ptr = rt0 = fn->leaf; - match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); + match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict, + &do_rr); - if (!match && - (strict & RT6_LOOKUP_F_REACHABLE)) { + if (do_rr) { struct rt6_info *next = rt0->dst.rt6_next; /* no entries matched; do round-robin */ -- cgit v1.2.3-70-g09d2 From 3b8ccd447375acebed9af0a3798e1ab4e58bedf4 Mon Sep 17 00:00:00 2001 From: Camelia Groza Date: Thu, 11 Jul 2013 09:55:51 +0300 Subject: inet: fix spacing in assignment Found using checkpatch.pl Signed-off-by: Camelia Groza Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 6af375afeee..7bd8983dbfc 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -467,7 +467,7 @@ void inet_unhash(struct sock *sk) lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spin_lock_bh(lock); - done =__sk_nulls_del_node_init_rcu(sk); + done = __sk_nulls_del_node_init_rcu(sk); if (done) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock_bh(lock); -- cgit v1.2.3-70-g09d2 From cdbaa0bb26d8116d00be24e6b49043777b382f3a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 10 Jul 2013 17:05:06 -0700 Subject: gso: Update tunnel segmentation to support Tx checksum offload This change makes it so that the GRE and VXLAN tunnels can make use of Tx checksum offload support provided by some drivers via the hw_enc_features. Without this fix enabling GSO means sacrificing Tx checksum offload and this actually leads to a performance regression as shown below: Utilization Send Throughput local GSO 10^6bits/s % S state 6276.51 8.39 enabled 7123.52 8.42 disabled To resolve this it was necessary to address two items. First netif_skb_features needed to be updated so that it would correctly handle the Trans Ether Bridging protocol without impacting the need to check for Q-in-Q tagging. To do this it was necessary to update harmonize_features so that it used skb_network_protocol instead of just using the outer protocol. Second it was necessary to update the GRE and UDP tunnel segmentation offloads so that they would reset the encapsulation bit and inner header offsets after the offload was complete. As a result of this change I have seen the following results on a interface with Tx checksum enabled for encapsulated frames: Utilization Send Throughput local GSO 10^6bits/s % S state 7123.52 8.42 disabled 8321.75 5.43 enabled v2: Instead of replacing refrence to skb->protocol with skb_network_protocol just replace the protocol reference in harmonize_features to allow for double VLAN tag checks. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/dev.c | 14 ++++++-------- net/ipv4/gre_offload.c | 3 +++ net/ipv4/udp.c | 4 +++- 3 files changed, 12 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 560dafd83ad..a3d8d44cb7f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2481,10 +2481,10 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) } static netdev_features_t harmonize_features(struct sk_buff *skb, - __be16 protocol, netdev_features_t features) + netdev_features_t features) { if (skb->ip_summed != CHECKSUM_NONE && - !can_checksum_protocol(features, protocol)) { + !can_checksum_protocol(features, skb_network_protocol(skb))) { features &= ~NETIF_F_ALL_CSUM; } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; @@ -2505,20 +2505,18 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, protocol, features); + return harmonize_features(skb, features); } features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); - if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) { - return harmonize_features(skb, protocol, features); - } else { + if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - return harmonize_features(skb, protocol, features); - } + + return harmonize_features(skb, features); } EXPORT_SYMBOL(netif_skb_features); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 775d5b532ec..55e6bfb3a28 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -100,6 +100,9 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, } __skb_push(skb, tnl_hlen - ghl); + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb->mac_len = mac_len; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a0d7151ffbd..766e6bab911 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2323,6 +2323,9 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, struct udphdr *uh; int udp_offset = outer_hlen - tnl_hlen; + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + skb->mac_len = mac_len; skb_push(skb, outer_hlen); @@ -2345,7 +2348,6 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, uh->check = CSUM_MANGLED_0; } - skb->ip_summed = CHECKSUM_NONE; skb->protocol = protocol; } while ((skb = skb->next)); out: -- cgit v1.2.3-70-g09d2 From 87f1369d6e2e820c77cf9eac542eed4dcf036f64 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Wed, 10 Jul 2013 15:46:08 +0200 Subject: pkt_sched: sch_qfq: improve efficiency of make_eligible In make_eligible, a mask is used to decide which groups must become eligible: the i-th group becomes eligible only if the i-th bit of the mask (from the right) is set. The mask is computed by left-shifting a 1 by a given number of places, and decrementing the result. The shift is performed on a ULL to avoid problems in case the number of places to shift is higher than 31. On a 32-bit machine, this is more costly than working on an UL. This patch replaces such a costly operation with two cheaper branches. The trick is based on the following fact: in case of a shift of at least 32 places, the resulting mask has at least the 32 less significant bits set, whereas the total number of groups is lower than 32. As a consequence, in this case it is enough to just set the 32 less significant bits of the mask with a cheaper ~0UL. In the other case, the shift can be safely performed on a UL. Reported-by: David S. Miller Reported-by: David Laight Signed-off-by: Paolo Valente Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 7c195d972bf..8d86a8b5522 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -821,7 +821,14 @@ static void qfq_make_eligible(struct qfq_sched *q) unsigned long old_vslot = q->oldV >> q->min_slot_shift; if (vslot != old_vslot) { - unsigned long mask = (1ULL << fls(vslot ^ old_vslot)) - 1; + unsigned long mask; + int last_flip_pos = fls(vslot ^ old_vslot); + + if (last_flip_pos > 31) /* higher than the number of groups */ + mask = ~0UL; /* make all groups eligible */ + else + mask = (1UL << last_flip_pos) - 1; + qfq_move_groups(q, mask, IR, ER); qfq_move_groups(q, mask, IB, EB); } -- cgit v1.2.3-70-g09d2 From 88d4f419a43b474a4524f41f55c36bee13416bdd Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Wed, 10 Jul 2013 15:46:09 +0200 Subject: pkt_sched: sch_qfq: remove forward declaration of qfq_update_agg_ts This patch removes the forward declaration of qfq_update_agg_ts, by moving the definition of the function above its first call. This patch also removes a useless forward declaration of qfq_schedule_agg. Reported-by: David S. Miller Signed-off-by: Paolo Valente Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 118 ++++++++++++++++++++++++---------------------------- 1 file changed, 55 insertions(+), 63 deletions(-) (limited to 'net') diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 8d86a8b5522..a7ab323849b 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1010,9 +1010,61 @@ static inline void charge_actual_service(struct qfq_aggregate *agg) agg->F = agg->S + (u64)service_received * agg->inv_w; } -static inline void qfq_update_agg_ts(struct qfq_sched *q, - struct qfq_aggregate *agg, - enum update_reason reason); +/* Assign a reasonable start time for a new aggregate in group i. + * Admissible values for \hat(F) are multiples of \sigma_i + * no greater than V+\sigma_i . Larger values mean that + * we had a wraparound so we consider the timestamp to be stale. + * + * If F is not stale and F >= V then we set S = F. + * Otherwise we should assign S = V, but this may violate + * the ordering in EB (see [2]). So, if we have groups in ER, + * set S to the F_j of the first group j which would be blocking us. + * We are guaranteed not to move S backward because + * otherwise our group i would still be blocked. + */ +static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg) +{ + unsigned long mask; + u64 limit, roundedF; + int slot_shift = agg->grp->slot_shift; + + roundedF = qfq_round_down(agg->F, slot_shift); + limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift); + + if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) { + /* timestamp was stale */ + mask = mask_from(q->bitmaps[ER], agg->grp->index); + if (mask) { + struct qfq_group *next = qfq_ffs(q, mask); + if (qfq_gt(roundedF, next->F)) { + if (qfq_gt(limit, next->F)) + agg->S = next->F; + else /* preserve timestamp correctness */ + agg->S = limit; + return; + } + } + agg->S = q->V; + } else /* timestamp is not stale */ + agg->S = agg->F; +} + +/* Update the timestamps of agg before scheduling/rescheduling it for + * service. In particular, assign to agg->F its maximum possible + * value, i.e., the virtual finish time with which the aggregate + * should be labeled if it used all its budget once in service. + */ +static inline void +qfq_update_agg_ts(struct qfq_sched *q, + struct qfq_aggregate *agg, enum update_reason reason) +{ + if (reason != requeue) + qfq_update_start(q, agg); + else /* just charge agg for the service received */ + agg->S = agg->F; + + agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w; +} static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg); @@ -1135,66 +1187,6 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q) return agg; } -/* - * Assign a reasonable start time for a new aggregate in group i. - * Admissible values for \hat(F) are multiples of \sigma_i - * no greater than V+\sigma_i . Larger values mean that - * we had a wraparound so we consider the timestamp to be stale. - * - * If F is not stale and F >= V then we set S = F. - * Otherwise we should assign S = V, but this may violate - * the ordering in EB (see [2]). So, if we have groups in ER, - * set S to the F_j of the first group j which would be blocking us. - * We are guaranteed not to move S backward because - * otherwise our group i would still be blocked. - */ -static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg) -{ - unsigned long mask; - u64 limit, roundedF; - int slot_shift = agg->grp->slot_shift; - - roundedF = qfq_round_down(agg->F, slot_shift); - limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift); - - if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) { - /* timestamp was stale */ - mask = mask_from(q->bitmaps[ER], agg->grp->index); - if (mask) { - struct qfq_group *next = qfq_ffs(q, mask); - if (qfq_gt(roundedF, next->F)) { - if (qfq_gt(limit, next->F)) - agg->S = next->F; - else /* preserve timestamp correctness */ - agg->S = limit; - return; - } - } - agg->S = q->V; - } else /* timestamp is not stale */ - agg->S = agg->F; -} - -/* - * Update the timestamps of agg before scheduling/rescheduling it for - * service. In particular, assign to agg->F its maximum possible - * value, i.e., the virtual finish time with which the aggregate - * should be labeled if it used all its budget once in service. - */ -static inline void -qfq_update_agg_ts(struct qfq_sched *q, - struct qfq_aggregate *agg, enum update_reason reason) -{ - if (reason != requeue) - qfq_update_start(q, agg); - else /* just charge agg for the service received */ - agg->S = agg->F; - - agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w; -} - -static void qfq_schedule_agg(struct qfq_sched *, struct qfq_aggregate *); - static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); -- cgit v1.2.3-70-g09d2 From 8c91e162e058bb91b7766f26f4d5823a21941026 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 11 Jul 2013 13:12:22 -0700 Subject: gre: Fix MTU sizing check for gretap tunnels This change fixes an MTU sizing issue seen with gretap tunnels when non-gso packets are sent from the interface. In my case I was able to reproduce the issue by simply sending a ping of 1421 bytes with the gretap interface created on a device with a standard 1500 mtu. This fix is based on the fact that the tunnel mtu is already adjusted by dev->hard_header_len so it would make sense that any packets being compared against that mtu should also be adjusted by hard_header_len and the tunnel header instead of just the tunnel header. Signed-off-by: Alexander Duyck Reported-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 945734b2f20..ca1cb2d5f6e 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -476,7 +476,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, struct rtable *rt, __be16 df) { struct ip_tunnel *tunnel = netdev_priv(dev); - int pkt_size = skb->len - tunnel->hlen; + int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; int mtu; if (df) -- cgit v1.2.3-70-g09d2 From d77e41e12744e53ca7f98f920350998b5f00c93a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 10 Jul 2013 17:30:34 +0300 Subject: net/tipc: use %*phC to dump small buffers in hex form Instead of passing each byte by stack let's use nice specifier for that. Signed-off-by: Andy Shevchenko Signed-off-by: David S. Miller --- net/tipc/ib_media.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'net') diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index ad2e1ec4117..9934a32bfa8 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -292,13 +292,7 @@ static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) if (str_size < 60) /* 60 = 19 * strlen("xx:") + strlen("xx\0") */ return 1; - sprintf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:" - "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", - a->value[0], a->value[1], a->value[2], a->value[3], - a->value[4], a->value[5], a->value[6], a->value[7], - a->value[8], a->value[9], a->value[10], a->value[11], - a->value[12], a->value[13], a->value[14], a->value[15], - a->value[16], a->value[17], a->value[18], a->value[19]); + sprintf(str_buf, "%20phC", a->value); return 0; } -- cgit v1.2.3-70-g09d2 From 92338dc2fb33c8526256a458a520af73d9ab2d14 Mon Sep 17 00:00:00 2001 From: “Cosmin Date: Fri, 12 Jul 2013 09:33:33 +0300 Subject: net: strict_strtoul is obsolete, use kstrtoul instead patch found using checkpatch.pl Signed-off-by: Cosmin Stanescu Signed-off-by: David S. Miller --- net/dns_resolver/dns_key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 0a69d075779..f347a2ca7d7 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -118,7 +118,7 @@ dns_resolver_instantiate(struct key *key, struct key_preparsed_payload *prep) if (opt_vlen <= 0) goto bad_option_value; - ret = strict_strtoul(eq, 10, &derrno); + ret = kstrtoul(eq, 10, &derrno); if (ret < 0) goto bad_option_value; -- cgit v1.2.3-70-g09d2 From 40dadff26539d1695d2a37b44f66c53158439ae9 Mon Sep 17 00:00:00 2001 From: Sunghan Suh Date: Fri, 12 Jul 2013 16:17:23 +0900 Subject: net: access page->private by using page_private Signed-off-by: Sunghan Suh Signed-off-by: David S. Miller --- net/core/skbuff.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 724bb7cb173..20e02d2605e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -824,7 +824,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) page = alloc_page(gfp_mask); if (!page) { while (head) { - struct page *next = (struct page *)head->private; + struct page *next = (struct page *)page_private(head); put_page(head); head = next; } @@ -834,7 +834,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) memcpy(page_address(page), vaddr + f->page_offset, skb_frag_size(f)); kunmap_atomic(vaddr); - page->private = (unsigned long)head; + set_page_private(page, (unsigned long)head); head = page; } @@ -848,7 +848,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) for (i = num_frags - 1; i >= 0; i--) { __skb_fill_page_desc(skb, i, head, 0, skb_shinfo(skb)->frags[i].size); - head = (struct page *)head->private; + head = (struct page *)page_private(head); } skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; -- cgit v1.2.3-70-g09d2 From 24ab6bec80861d0c55263047e8bf97e460a32e7b Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 12 Jul 2013 11:33:04 -0700 Subject: tcp: account all retransmit failures Change snmp RETRANSFAILS stat to include timeout retransmit failures in addition to other loss recoveries. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3d609490f11..92fde8d1aa8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2407,6 +2407,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) * see tcp_input.c tcp_sacktag_write_queue(). */ TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt; + } else { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); } return err; } @@ -2528,10 +2530,9 @@ begin_fwd: if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) continue; - if (tcp_retransmit_skb(sk, skb)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); + if (tcp_retransmit_skb(sk, skb)) return; - } + NET_INC_STATS_BH(sock_net(sk), mib_idx); if (tcp_in_cwnd_reduction(sk)) -- cgit v1.2.3-70-g09d2 From 307f2fb95e9b96b3577916e73d92e104f8f26494 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 12 Jul 2013 23:46:33 +0200 Subject: ipv6: only static routes qualify for equal cost multipathing Static routes in this case are non-expiring routes which did not get configured by autoconf or by icmpv6 redirects. To make sure we actually get an ecmp route while searching for the first one in this fib6_node's leafs, also make sure it matches the ecmp route assumptions. v2: a) Removed RTF_EXPIRE check in dst.from chain. The check of RTF_ADDRCONF already ensures that this route, even if added again without RTF_EXPIRES (in case of a RA announcement with infinite timeout), does not cause the rt6i_nsiblings logic to go wrong if a later RA updates the expiration time later. v3: a) Allow RTF_EXPIRES routes to enter the ecmp route set. We have to do so, because an pmtu event could update the RTF_EXPIRES flag and we would not count this route, if another route joins this set. We now filter only for RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC, which are flags that don't get changed after rt6_info construction. Cc: Nicolas Dichtel Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 192dd1a0e18..5fc9c7a68d8 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -632,6 +632,12 @@ insert_above: return ln; } +static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt) +{ + return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == + RTF_GATEWAY; +} + /* * Insert routing information in a node. */ @@ -646,6 +652,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, int add = (!info->nlh || (info->nlh->nlmsg_flags & NLM_F_CREATE)); int found = 0; + bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); ins = &fn->leaf; @@ -691,9 +698,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, * To avoid long list, we only had siblings if the * route have a gateway. */ - if (rt->rt6i_flags & RTF_GATEWAY && - !(rt->rt6i_flags & RTF_EXPIRES) && - !(iter->rt6i_flags & RTF_EXPIRES)) + if (rt_can_ecmp && + rt6_qualify_for_ecmp(iter)) rt->rt6i_nsiblings++; } @@ -715,7 +721,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, /* Find the first route that have the same metric */ sibling = fn->leaf; while (sibling) { - if (sibling->rt6i_metric == rt->rt6i_metric) { + if (sibling->rt6i_metric == rt->rt6i_metric && + rt6_qualify_for_ecmp(sibling)) { list_add_tail(&rt->rt6i_siblings, &sibling->rt6i_siblings); break; -- cgit v1.2.3-70-g09d2