From 932ff279a43ab7257942cddff2595acd541cc49b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 9 Jun 2006 12:20:56 -0700 Subject: [NET]: Add netif_tx_lock Various drivers use xmit_lock internally to synchronise with their transmission routines. They do so without setting xmit_lock_owner. This is fine as long as netpoll is not in use. With netpoll it is possible for deadlocks to occur if xmit_lock_owner isn't set. This is because if a printk occurs while xmit_lock is held and xmit_lock_owner is not set can cause netpoll to attempt to take xmit_lock recursively. While it is possible to resolve this by getting netpoll to use trylock, it is suboptimal because netpoll's sole objective is to maximise the chance of getting the printk out on the wire. So delaying or dropping the message is to be avoided as much as possible. So the only alternative is to always set xmit_lock_owner. The following patch does this by introducing the netif_tx_lock family of functions that take care of setting/unsetting xmit_lock_owner. I renamed xmit_lock to _xmit_lock to indicate that it should not be used directly. I didn't provide irq versions of the netif_tx_lock functions since xmit_lock is meant to be a BH-disabling lock. This is pretty much a straight text substitution except for a small bug fix in winbond. It currently uses netif_stop_queue/spin_unlock_wait to stop transmission. This is unsafe as an IRQ can potentially wake up the queue. So it is safer to use netif_tx_disable. The hamradio bits used spin_lock_irq but it is unnecessary as xmit_lock must never be taken in an IRQ handler. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- Documentation/networking/netdevices.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'Documentation/networking') diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt index 3c0a5ba614d..847cedb238f 100644 --- a/Documentation/networking/netdevices.txt +++ b/Documentation/networking/netdevices.txt @@ -42,9 +42,9 @@ dev->get_stats: Context: nominally process, but don't sleep inside an rwlock dev->hard_start_xmit: - Synchronization: dev->xmit_lock spinlock. + Synchronization: netif_tx_lock spinlock. When the driver sets NETIF_F_LLTX in dev->features this will be - called without holding xmit_lock. In this case the driver + called without holding netif_tx_lock. In this case the driver has to lock by itself when needed. It is recommended to use a try lock for this and return -1 when the spin lock fails. The locking there should also properly protect against @@ -62,12 +62,12 @@ dev->hard_start_xmit: Only valid when NETIF_F_LLTX is set. dev->tx_timeout: - Synchronization: dev->xmit_lock spinlock. + Synchronization: netif_tx_lock spinlock. Context: BHs disabled Notes: netif_queue_stopped() is guaranteed true dev->set_multicast_list: - Synchronization: dev->xmit_lock spinlock. + Synchronization: netif_tx_lock spinlock. Context: BHs disabled dev->poll: -- cgit v1.2.3-70-g09d2 From 35089bb203f44e33b6bbb6c4de0b0708f9a48921 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 13 Jun 2006 22:33:04 -0700 Subject: [TCP]: Add tcp_slow_start_after_idle sysctl. A lot of people have asked for a way to disable tcp_cwnd_restart(), and it seems reasonable to add a sysctl to do that. Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 7 +++++++ include/linux/sysctl.h | 1 + include/net/tcp.h | 1 + net/ipv4/sysctl_net_ipv4.c | 8 ++++++++ net/ipv4/tcp_output.c | 6 +++++- 5 files changed, 22 insertions(+), 1 deletion(-) (limited to 'Documentation/networking') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index f12007b80a4..d46338af600 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -362,6 +362,13 @@ tcp_workaround_signed_windows - BOOLEAN not receive a window scaling option from them. Default: 0 +tcp_slow_start_after_idle - BOOLEAN + If set, provide RFC2861 behavior and time out the congestion + window after an idle period. An idle period is defined at + the current RTO. If unset, the congestion window will not + be timed out after an idle period. + Default: 1 + IP Variables: ip_local_port_range - 2 INTEGERS diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 98338ed2c0b..cee944dbdcd 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -405,6 +405,7 @@ enum NET_TCP_BASE_MSS=114, NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115, NET_TCP_DMA_COPYBREAK=116, + NET_TCP_SLOW_START_AFTER_IDLE=117, }; enum { diff --git a/include/net/tcp.h b/include/net/tcp.h index de88c5472bf..bfc71f954bb 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -227,6 +227,7 @@ extern int sysctl_tcp_abc; extern int sysctl_tcp_mtu_probing; extern int sysctl_tcp_base_mss; extern int sysctl_tcp_workaround_signed_windows; +extern int sysctl_tcp_slow_start_after_idle; extern atomic_t tcp_memory_allocated; extern atomic_t tcp_sockets_allocated; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e7fb665873c..ce4cd5f3551 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -690,6 +690,14 @@ ctl_table ipv4_table[] = { .proc_handler = &proc_dointvec }, #endif + { + .ctl_name = NET_TCP_SLOW_START_AFTER_IDLE, + .procname = "tcp_slow_start_after_idle", + .data = &sysctl_tcp_slow_start_after_idle, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, { .ctl_name = 0 } }; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f33c9dddaa1..07bb5a2b375 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -59,6 +59,9 @@ int sysctl_tcp_tso_win_divisor = 3; int sysctl_tcp_mtu_probing = 0; int sysctl_tcp_base_mss = 512; +/* By default, RFC2861 behavior. */ +int sysctl_tcp_slow_start_after_idle = 1; + static void update_send_head(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) { @@ -138,7 +141,8 @@ static void tcp_event_data_sent(struct tcp_sock *tp, struct inet_connection_sock *icsk = inet_csk(sk); const u32 now = tcp_time_stamp; - if (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto) + if (sysctl_tcp_slow_start_after_idle && + (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto)) tcp_cwnd_restart(sk, __sk_dst_get(sk)); tp->lsndtime = now; -- cgit v1.2.3-70-g09d2