From 09cb105ea78d5644570d52085e2149f784575872 Mon Sep 17 00:00:00 2001 From: Jianjun Kong Date: Mon, 3 Nov 2008 00:27:11 -0800 Subject: net: clean up net/ipv4/ip_sockglue.c tcp_output.c Signed-off-by: Jianjun Kong Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ba85d883189..a524627923a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -42,7 +42,7 @@ /* People can turn this off for buggy TCP's found in printers etc. */ int sysctl_tcp_retrans_collapse __read_mostly = 1; -/* People can turn this on to work with those rare, broken TCPs that +/* People can turn this on to work with those rare, broken TCPs that * interpret the window field as a signed quantity. */ int sysctl_tcp_workaround_signed_windows __read_mostly = 0; @@ -484,7 +484,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, } if (likely(sysctl_tcp_window_scaling)) { opts->ws = tp->rx_opt.rcv_wscale; - if(likely(opts->ws)) + if (likely(opts->ws)) size += TCPOLEN_WSCALE_ALIGNED; } if (likely(sysctl_tcp_sack)) { @@ -526,7 +526,7 @@ static unsigned tcp_synack_options(struct sock *sk, if (likely(ireq->wscale_ok)) { opts->ws = ireq->rcv_wscale; - if(likely(opts->ws)) + if (likely(opts->ws)) size += TCPOLEN_WSCALE_ALIGNED; } if (likely(doing_ts)) { @@ -1172,7 +1172,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, static inline int tcp_minshall_check(const struct tcp_sock *tp) { - return after(tp->snd_sml,tp->snd_una) && + return after(tp->snd_sml, tp->snd_una) && !after(tp->snd_sml, tp->snd_nxt); } -- cgit v1.2.3-70-g09d2 From 4a17fc3add594fcc1c778e93a95b6ecf47f630e5 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 24 Nov 2008 21:03:43 -0800 Subject: tcp: collapse more than two on retransmission MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I always had thought that collapsing up to two at a time was intentional decision to avoid excessive processing if 1 byte sized skbs are to be combined for a full mtu, and consecutive retransmissions would make the size of the retransmittee double each round anyway, but some recent discussion made me to understand that was not the case. Thus make collapse work more and wait less. It would be possible to take advantage of the shifting machinery (added in the later patch) in the case of paged data but that can be implemented on top of this change. tcp_skb_is_last check is now provided by the loop. I tested a bit (ss-after-idle-off, fill 4096x4096B xfer, 10s sleep + 4096 x 1byte writes while dropping them for some a while with netem): . 16774097:16775545(1448) ack 1 win 46 . 16775545:16776993(1448) ack 1 win 46 . ack 16759617 win 2399 P 16776993:16777217(224) ack 1 win 46 . ack 16762513 win 2399 . ack 16765409 win 2399 . ack 16768305 win 2399 . ack 16771201 win 2399 . ack 16774097 win 2399 . ack 16776993 win 2399 . ack 16777217 win 2399 P 16777217:16777257(40) ack 1 win 46 . ack 16777257 win 2399 P 16777257:16778705(1448) ack 1 win 46 P 16778705:16780153(1448) ack 1 win 46 FP 16780153:16781313(1160) ack 1 win 46 . ack 16778705 win 2399 . ack 16780153 win 2399 F 1:1(0) ack 16781314 win 2399 While without drop-all period I get this: . 16773585:16775033(1448) ack 1 win 46 . ack 16764897 win 9367 . ack 16767793 win 9367 . ack 16770689 win 9367 . ack 16773585 win 9367 . 16775033:16776481(1448) ack 1 win 46 P 16776481:16777217(736) ack 1 win 46 . ack 16776481 win 9367 . ack 16777217 win 9367 P 16777217:16777218(1) ack 1 win 46 P 16777218:16777219(1) ack 1 win 46 P 16777219:16777220(1) ack 1 win 46 ... P 16777247:16777248(1) ack 1 win 46 . ack 16777218 win 9367 . ack 16777219 win 9367 ... . ack 16777233 win 9367 . ack 16777248 win 9367 P 16777248:16778696(1448) ack 1 win 46 P 16778696:16780144(1448) ack 1 win 46 FP 16780144:16781313(1169) ack 1 win 46 . ack 16780144 win 9367 F 1:1(0) ack 16781314 win 9367 The window seems to be 30-40 segments, which were successfully combined into: P 16777217:16777257(40) ack 1 win 46 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 96 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 37 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a524627923a..86ef98975e9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1766,46 +1766,22 @@ u32 __tcp_select_window(struct sock *sk) return window; } -/* Attempt to collapse two adjacent SKB's during retransmission. */ -static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, - int mss_now) +/* Collapses two adjacent SKB's during retransmission. */ +static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); int skb_size, next_skb_size; u16 flags; - /* The first test we must make is that neither of these two - * SKB's are still referenced by someone else. - */ - if (skb_cloned(skb) || skb_cloned(next_skb)) - return; - skb_size = skb->len; next_skb_size = next_skb->len; flags = TCP_SKB_CB(skb)->flags; - /* Also punt if next skb has been SACK'd. */ - if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) - return; - - /* Next skb is out of window. */ - if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp))) - return; - - /* Punt if not enough space exists in the first SKB for - * the data in the second, or the total combined payload - * would exceed the MSS. - */ - if ((next_skb_size > skb_tailroom(skb)) || - ((skb_size + next_skb_size) > mss_now)) - return; - BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); tcp_highest_sack_combine(sk, next_skb, skb); - /* Ok. We will be able to collapse the packet. */ tcp_unlink_write_queue(next_skb, sk); skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), @@ -1847,6 +1823,62 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, sk_wmem_free_skb(sk, next_skb); } +static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) +{ + if (tcp_skb_pcount(skb) > 1) + return 0; + /* TODO: SACK collapsing could be used to remove this condition */ + if (skb_shinfo(skb)->nr_frags != 0) + return 0; + if (skb_cloned(skb)) + return 0; + if (skb == tcp_send_head(sk)) + return 0; + /* Some heurestics for collapsing over SACK'd could be invented */ + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) + return 0; + + return 1; +} + +static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, + int space) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb = to, *tmp; + int first = 1; + + if (!sysctl_tcp_retrans_collapse) + return; + if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) + return; + + tcp_for_write_queue_from_safe(skb, tmp, sk) { + if (!tcp_can_collapse(sk, skb)) + break; + + space -= skb->len; + + if (first) { + first = 0; + continue; + } + + if (space < 0) + break; + /* Punt if not enough space exists in the first SKB for + * the data in the second + */ + if (skb->len > skb_tailroom(to)) + break; + + if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) + break; + + tcp_collapse_retrans(sk, to); + } +} + /* Do a simple retransmit without using the backoff mechanisms in * tcp_timer. This is used for path mtu discovery. * The socket is already locked here. @@ -1946,17 +1978,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) return -ENOMEM; /* We'll try again later. */ } - /* Collapse two adjacent packets if worthwhile and we can. */ - if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && - (skb->len < (cur_mss >> 1)) && - (!tcp_skb_is_last(sk, skb)) && - (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && - (skb_shinfo(skb)->nr_frags == 0 && - skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && - (tcp_skb_pcount(skb) == 1 && - tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) && - (sysctl_tcp_retrans_collapse != 0)) - tcp_retrans_try_collapse(sk, skb, cur_mss); + tcp_retrans_try_collapse(sk, skb, cur_mss); /* Some Solaris stacks overoptimize and ignore the FIN on a * retransmit when old data is attached. So strip it off -- cgit v1.2.3-70-g09d2 From e1aa680fa40e7492260a09cb57d94002245cc8fe Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 24 Nov 2008 21:11:55 -0800 Subject: tcp: move tcp_simple_retransmit to tcp_input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 2 -- net/ipv4/tcp_input.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++- net/ipv4/tcp_output.c | 50 ------------------------------------------------ 3 files changed, 52 insertions(+), 53 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 8f26b28fb40..90b4c3b4c33 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -472,8 +472,6 @@ extern void tcp_send_delayed_ack(struct sock *sk); /* tcp_input.c */ extern void tcp_cwnd_application_limited(struct sock *sk); -extern void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, - struct sk_buff *skb); /* tcp_timer.c */ extern void tcp_init_xmit_timers(struct sock *); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 097294b7da3..8085704863f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1002,7 +1002,8 @@ static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) } } -void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) +static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, + struct sk_buff *skb) { tcp_verify_retransmit_hint(tp, skb); @@ -2559,6 +2560,56 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } +/* Do a simple retransmit without using the backoff mechanisms in + * tcp_timer. This is used for path mtu discovery. + * The socket is already locked here. + */ +void tcp_simple_retransmit(struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + unsigned int mss = tcp_current_mss(sk, 0); + u32 prior_lost = tp->lost_out; + + tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; + if (skb->len > mss && + !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { + TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; + tp->retrans_out -= tcp_skb_pcount(skb); + } + tcp_skb_mark_lost_uncond_verify(tp, skb); + } + } + + tcp_clear_retrans_hints_partial(tp); + + if (prior_lost == tp->lost_out) + return; + + if (tcp_is_reno(tp)) + tcp_limit_reno_sacked(tp); + + tcp_verify_left_out(tp); + + /* Don't muck with the congestion window here. + * Reason is that we do not increase amount of _data_ + * in network, but units changed and effective + * cwnd/ssthresh really reduced now. + */ + if (icsk->icsk_ca_state != TCP_CA_Loss) { + tp->high_seq = tp->snd_nxt; + tp->snd_ssthresh = tcp_current_ssthresh(sk); + tp->prior_ssthresh = 0; + tp->undo_marker = 0; + tcp_set_ca_state(sk, TCP_CA_Loss); + } + tcp_xmit_retransmit_queue(sk); +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 86ef98975e9..c069ecb81ea 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1879,56 +1879,6 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, } } -/* Do a simple retransmit without using the backoff mechanisms in - * tcp_timer. This is used for path mtu discovery. - * The socket is already locked here. - */ -void tcp_simple_retransmit(struct sock *sk) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - unsigned int mss = tcp_current_mss(sk, 0); - u32 prior_lost = tp->lost_out; - - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - if (skb->len > mss && - !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); - } - tcp_skb_mark_lost_uncond_verify(tp, skb); - } - } - - tcp_clear_retrans_hints_partial(tp); - - if (prior_lost == tp->lost_out) - return; - - if (tcp_is_reno(tp)) - tcp_limit_reno_sacked(tp); - - tcp_verify_left_out(tp); - - /* Don't muck with the congestion window here. - * Reason is that we do not increase amount of _data_ - * in network, but units changed and effective - * cwnd/ssthresh really reduced now. - */ - if (icsk->icsk_ca_state != TCP_CA_Loss) { - tp->high_seq = tp->snd_nxt; - tp->snd_ssthresh = tcp_current_ssthresh(sk); - tp->prior_ssthresh = 0; - tp->undo_marker = 0; - tcp_set_ca_state(sk, TCP_CA_Loss); - } - tcp_xmit_retransmit_queue(sk); -} - /* This retransmits one SKB. Policy decisions and retransmit queue * state updates are done by the caller. Returns non-zero if an * error occurred which prevented the send. -- cgit v1.2.3-70-g09d2 From 726e07a8a38168266ac95d87736f9501a2d9e7b2 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 5 Dec 2008 22:43:56 -0800 Subject: tcp: move some parts from tcp_write_xmit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 76f840917bc..80147ba4414 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1530,13 +1530,6 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) int cwnd_quota; int result; - /* If we are closed, the bytes will have to remain here. - * In time closedown will finish, we empty the write queue and all - * will be happy. - */ - if (unlikely(sk->sk_state == TCP_CLOSE)) - return 0; - sent_pkts = 0; /* Do MTU probing. */ @@ -1608,10 +1601,18 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, { struct sk_buff *skb = tcp_send_head(sk); - if (skb) { - if (tcp_write_xmit(sk, cur_mss, nonagle)) - tcp_check_probe_timer(sk); - } + if (!skb) + return; + + /* If we are closed, the bytes will have to remain here. + * In time closedown will finish, we empty the write queue and + * all will be happy. + */ + if (unlikely(sk->sk_state == TCP_CLOSE)) + return; + + if (tcp_write_xmit(sk, cur_mss, nonagle)) + tcp_check_probe_timer(sk); } /* Send _single_ skb sitting at the send head. This function requires -- cgit v1.2.3-70-g09d2 From d5dd9175bc12015ea4d2c1a9b6b15dfa645a3db9 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 5 Dec 2008 22:48:55 -0800 Subject: tcp: use tcp_write_xmit also in tcp_push_one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tcp_minshall_update is not significant difference since it only checks for not full-sized skb which is BUG'ed on the push_one path anyway. tcp_snd_test is tcp_nagle_test+tcp_cwnd_test+tcp_snd_wnd_test, just the order changed slightly. net/ipv4/tcp_output.c: tcp_snd_test | -89 tcp_mss_split_point | -91 tcp_may_send_now | +53 tcp_cwnd_validate | -98 tso_fragment | -239 __tcp_push_pending_frames | -1340 tcp_push_one | -146 7 functions changed, 53 bytes added, 2003 bytes removed, diff: -1950 net/ipv4/tcp_output.c: tcp_write_xmit | +1772 1 function changed, 1772 bytes added, diff: +1772 tcp_output.o.new: 8 functions changed, 1825 bytes added, 2003 bytes removed, diff: -178 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 54 ++++++++++++++++----------------------------------- 1 file changed, 17 insertions(+), 37 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 59505ce0c8f..e65c114feea 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1519,7 +1519,8 @@ static int tcp_mtu_probe(struct sock *sk) * Returns 1, if no segments are in flight and we have queued segments, but * cannot send anything now because of SWS or another problem. */ -static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) +static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, + int push_one, gfp_t gfp) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -1529,11 +1530,14 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) sent_pkts = 0; - /* Do MTU probing. */ - if ((result = tcp_mtu_probe(sk)) == 0) { - return 0; - } else if (result > 0) { - sent_pkts = 1; + if (!push_one) { + /* Do MTU probing. */ + result = tcp_mtu_probe(sk); + if (!result) { + return 0; + } else if (result > 0) { + sent_pkts = 1; + } } while ((skb = tcp_send_head(sk))) { @@ -1555,7 +1559,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) nonagle : TCP_NAGLE_PUSH)))) break; } else { - if (tcp_tso_should_defer(sk, skb)) + if (!push_one && tcp_tso_should_defer(sk, skb)) break; } @@ -1570,7 +1574,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) TCP_SKB_CB(skb)->when = tcp_time_stamp; - if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC))) + if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) break; /* Advance the send_head. This one is sent out. @@ -1580,6 +1584,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) tcp_minshall_update(tp, mss_now, skb); sent_pkts++; + + if (push_one) + break; } if (likely(sent_pkts)) { @@ -1608,7 +1615,7 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, if (unlikely(sk->sk_state == TCP_CLOSE)) return; - if (tcp_write_xmit(sk, cur_mss, nonagle)) + if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC)) tcp_check_probe_timer(sk); } @@ -1617,38 +1624,11 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, */ void tcp_push_one(struct sock *sk, unsigned int mss_now) { - struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = tcp_send_head(sk); - unsigned int tso_segs, cwnd_quota; BUG_ON(!skb || skb->len < mss_now); - tso_segs = tcp_init_tso_segs(sk, skb, mss_now); - cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); - - if (likely(cwnd_quota)) { - unsigned int limit; - - BUG_ON(!tso_segs); - - limit = mss_now; - if (tso_segs > 1 && !tcp_urg_mode(tp)) - limit = tcp_mss_split_point(sk, skb, mss_now, - cwnd_quota); - - if (skb->len > limit && - unlikely(tso_fragment(sk, skb, limit, mss_now))) - return; - - /* Send it out now. */ - TCP_SKB_CB(skb)->when = tcp_time_stamp; - - if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) { - tcp_event_new_data_sent(sk, skb); - tcp_cwnd_validate(sk); - return; - } - } + tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation); } /* This function returns the amount that we can raise the -- cgit v1.2.3-70-g09d2 From a2acde07711f7d8b19928245c555bce60f91482a Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 5 Dec 2008 22:49:37 -0800 Subject: tcp: fix tso_should_defer in 64bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since jiffies is unsigned long, the types get expanded into that and after long enough time the difference will therefore always be > 1 (and that probably happens near boot as well as iirc the first jiffies wrap is scheduler close after boot to find out problems related to that early). This was originally noted by Bill Fink in Dec'07 but nobody never ended fixing it. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e65c114feea..dda42f0bd7a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1334,7 +1334,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) /* Defer for less than two clock ticks. */ if (tp->tso_deferred && - ((jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1) + (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1) goto send_now; in_flight = tcp_packets_in_flight(tp); -- cgit v1.2.3-70-g09d2 From 64ff3b938ec6782e6585a83d5459b98b0c3f6eb8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 25 Dec 2008 17:12:58 -0800 Subject: tcp: Always set urgent pointer if it's beyond snd_nxt Our TCP stack does not set the urgent flag if the urgent pointer does not fit in 16 bits, i.e., if it is more than 64K from the sequence number of a packet. This behaviour is different from the BSDs, and clearly contradicts the purpose of urgent mode, which is to send the notification (though not necessarily the associated data) as soon as possible. Our current behaviour may in fact delay the urgent notification indefinitely if the receiver window does not open up. Simply matching BSD however may break legacy applications which incorrectly rely on the out-of-band delivery of urgent data, and conversely the in-band delivery of non-urgent data. Alexey Kuznetsov suggested a safe solution of following BSD only if the urgent pointer itself has not yet been transmitted. This way we guarantee that when the remote end sees the packet with non-urgent data marked as urgent due to wrap-around we would have advanced the urgent pointer beyond, either to the actual urgent data or to an as-yet untransmitted packet. The only potential downside is that applications on the remote end may see multiple SIGURG notifications. However, this would occur anyway with other TCP stacks. More importantly, the outcome of such a duplicate notification is likely to be harmless since the signal itself does not carry any information other than the fact that we're in urgent mode. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index dda42f0bd7a..557fe16cbfb 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -663,10 +663,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->urg_ptr = 0; /* The urg_mode check is necessary during a below snd_una win probe */ - if (unlikely(tcp_urg_mode(tp) && - between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { - th->urg_ptr = htons(tp->snd_up - tcb->seq); - th->urg = 1; + if (unlikely(tcp_urg_mode(tp))) { + if (between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF)) { + th->urg_ptr = htons(tp->snd_up - tcb->seq); + th->urg = 1; + } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { + th->urg_ptr = 0xFFFF; + th->urg = 1; + } } tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); -- cgit v1.2.3-70-g09d2