From 90f2f5318b3a5b0898fef0fec9b91376c7de7a2c Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:57 -0500 Subject: sctp: Update SWS avaoidance receiver side algorithm We currently send window update SACKs every time we free up 1 PMTU worth of data. That a lot more SACKs then necessary. Instead, we'll now send back the actuall window every time we send a sack, and do window-update SACKs when a fraction of the receive buffer has been opened. The fraction is controlled with a sysctl. Signed-off-by: Vlad Yasevich --- include/net/sctp/structs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/net/sctp/structs.h') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index cd2e18778f8..90876b65777 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -231,6 +231,11 @@ extern struct sctp_globals { /* Flag to indicate whether computing and verifying checksum * is disabled. */ int checksum_disable; + + /* Threshold for rwnd update SACKS. Receive buffer shifted this many + * bits is an indicator of when to send and window update SACK. + */ + int rwnd_update_shift; } sctp_globals; #define sctp_rto_initial (sctp_globals.rto_initial) @@ -267,6 +272,7 @@ extern struct sctp_globals { #define sctp_prsctp_enable (sctp_globals.prsctp_enable) #define sctp_auth_enable (sctp_globals.auth_enable) #define sctp_checksum_disable (sctp_globals.checksum_disable) +#define sctp_rwnd_upd_shift (sctp_globals.rwnd_update_shift) /* SCTP Socket type: UDP or TCP style. */ typedef enum { -- cgit v1.2.3-70-g09d2 From 245cba7e55929dc2b10b7d915bfba0168eeeed17 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:58 -0500 Subject: sctp: Remove useless last_time_used variable The transport last_time_used variable is rather useless. It was only used when determining if CWND needs to be updated due to idle transport. However, idle transport detection was based on a Heartbeat timer and last_time_used was not incremented when sending Heartbeats. As a result the check for cwnd reduction was always true. We can get rid of the variable and just base our cwnd manipulation on the HB timer (like the code comment sais). We also have to call into the cwnd manipulation function regardless of whether HBs are enabled or not. That way we will detect idle transports if the user has disabled Heartbeats. Signed-off-by: Vlad Yasevich --- include/net/sctp/structs.h | 6 ------ net/sctp/output.c | 2 -- net/sctp/sm_statefuns.c | 5 +++-- net/sctp/transport.c | 7 ++----- 4 files changed, 5 insertions(+), 15 deletions(-) (limited to 'include/net/sctp/structs.h') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 90876b65777..ac794a6823e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -950,12 +950,6 @@ struct sctp_transport { /* Source address. */ union sctp_addr saddr; - /* When was the last time(in jiffies) that a data packet was sent on - * this transport? This is used to adjust the cwnd when the transport - * becomes inactive. - */ - unsigned long last_time_used; - /* Heartbeat interval: The endpoint sends out a Heartbeat chunk to * the destination address every heartbeat interval. */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 5cbda8f1ddf..9e8e0ea844b 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -557,8 +557,6 @@ int sctp_packet_transmit(struct sctp_packet *packet) struct timer_list *timer; unsigned long timeout; - tp->last_time_used = jiffies; - /* Restart the AUTOCLOSE timer when sending data. */ if (sctp_state(asoc, ESTABLISHED) && asoc->autoclose) { timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 16a603527df..1ef9de9bbae 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -996,14 +996,15 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, sctp_sf_heartbeat(ep, asoc, type, arg, commands)) return SCTP_DISPOSITION_NOMEM; + /* Set transport error counter and association error counter * when sending heartbeat. */ - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_IDLE, - SCTP_TRANSPORT(transport)); sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_HB_SENT, SCTP_TRANSPORT(transport)); } + sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_IDLE, + SCTP_TRANSPORT(transport)); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMER_UPDATE, SCTP_TRANSPORT(transport)); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 3b141bb32fa..2df29cbdaf5 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -83,7 +83,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->fast_recovery = 0; peer->last_time_heard = jiffies; - peer->last_time_used = jiffies; peer->last_time_ecne_reduced = jiffies; peer->init_sent_count = 0; @@ -565,10 +564,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * to be done every RTO interval, we do it every hearbeat * interval. */ - if (time_after(jiffies, transport->last_time_used + - transport->rto)) - transport->cwnd = max(transport->cwnd/2, - 4*transport->asoc->pathmtu); + transport->cwnd = max(transport->cwnd/2, + 4*transport->asoc->pathmtu); break; } -- cgit v1.2.3-70-g09d2 From 46d5a808558181e03a4760d2188cc9879445738a Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:54:00 -0500 Subject: sctp: Update max.burst implementation Current implementation of max.burst ends up limiting new data during cwnd decay period. The decay is happening becuase the connection is idle and we are allowed to fill the congestion window. The point of max.burst is to limit micro-bursts in response to large acks. This still happens, as max.burst is still applied to each transmit opportunity. It will also apply if a very large send is made (greater then allowed by burst). Tested-by: Florian Niederbacher Signed-off-by: Vlad Yasevich --- include/net/sctp/structs.h | 4 ++++ net/sctp/associola.c | 1 + net/sctp/output.c | 23 ----------------------- net/sctp/outqueue.c | 15 +++++++++++++++ net/sctp/transport.c | 38 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 23 deletions(-) (limited to 'include/net/sctp/structs.h') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ac794a6823e..bc3f8d879c5 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -942,6 +942,8 @@ struct sctp_transport { /* Data that has been sent, but not acknowledged. */ __u32 flight_size; + __u32 burst_limited; /* Holds old cwnd when max.burst is applied */ + /* TSN marking the fast recovery exit point */ __u32 fast_recovery_exit; @@ -1070,6 +1072,8 @@ void sctp_transport_put(struct sctp_transport *); void sctp_transport_update_rto(struct sctp_transport *, __u32); void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32); void sctp_transport_lower_cwnd(struct sctp_transport *, sctp_lower_cwnd_t); +void sctp_transport_burst_limited(struct sctp_transport *); +void sctp_transport_burst_reset(struct sctp_transport *); unsigned long sctp_transport_timeout(struct sctp_transport *); void sctp_transport_reset(struct sctp_transport *); void sctp_transport_update_pmtu(struct sctp_transport *, u32); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 37e982510be..880dae2ca87 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -738,6 +738,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, peer->partial_bytes_acked = 0; peer->flight_size = 0; + peer->burst_limited = 0; /* Set the transport's RTO.initial value */ peer->rto = asoc->rto_initial; diff --git a/net/sctp/output.c b/net/sctp/output.c index 9e8e0ea844b..b210d2077e2 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -615,7 +615,6 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, sctp_xmit_t retval = SCTP_XMIT_OK; size_t datasize, rwnd, inflight, flight_size; struct sctp_transport *transport = packet->transport; - __u32 max_burst_bytes; struct sctp_association *asoc = transport->asoc; struct sctp_outq *q = &asoc->outqueue; @@ -648,28 +647,6 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, } } - /* sctpimpguide-05 2.14.2 - * D) When the time comes for the sender to - * transmit new DATA chunks, the protocol parameter Max.Burst MUST - * first be applied to limit how many new DATA chunks may be sent. - * The limit is applied by adjusting cwnd as follows: - * if ((flightsize + Max.Burst * MTU) < cwnd) - * cwnd = flightsize + Max.Burst * MTU - */ - max_burst_bytes = asoc->max_burst * asoc->pathmtu; - if ((flight_size + max_burst_bytes) < transport->cwnd) { - transport->cwnd = flight_size + max_burst_bytes; - SCTP_DEBUG_PRINTK("%s: cwnd limited by max_burst: " - "transport: %p, cwnd: %d, " - "ssthresh: %d, flight_size: %d, " - "pba: %d\n", - __func__, transport, - transport->cwnd, - transport->ssthresh, - transport->flight_size, - transport->partial_bytes_acked); - } - /* RFC 2960 6.1 Transmission of DATA Chunks * * B) At any given time, the sender MUST NOT transmit new data diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 5732661c87d..2f2377369e2 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -931,6 +931,14 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) goto sctp_flush_out; } + /* Apply Max.Burst limitation to the current transport in + * case it will be used for new data. We are going to + * rest it before we return, but we want to apply the limit + * to the currently queued data. + */ + if (transport) + sctp_transport_burst_limited(transport); + /* Finally, transmit new packets. */ while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { /* RFC 2960 6.5 Every DATA chunk MUST carry a valid @@ -976,6 +984,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) packet = &transport->packet; sctp_packet_config(packet, vtag, asoc->peer.ecn_capable); + /* We've switched transports, so apply the + * Burst limit to the new transport. + */ + sctp_transport_burst_limited(transport); } SCTP_DEBUG_PRINTK("sctp_outq_flush(%p, %p[%s]), ", @@ -1070,6 +1082,9 @@ sctp_flush_out: packet = &t->packet; if (!sctp_packet_empty(packet)) error = sctp_packet_transmit(packet); + + /* Clear the burst limited state, if any */ + sctp_transport_burst_reset(t); } return error; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 2df29cbdaf5..9a6cb22d129 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -576,6 +576,43 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, transport->cwnd, transport->ssthresh); } +/* Apply Max.Burst limit to the congestion window: + * sctpimpguide-05 2.14.2 + * D) When the time comes for the sender to + * transmit new DATA chunks, the protocol parameter Max.Burst MUST + * first be applied to limit how many new DATA chunks may be sent. + * The limit is applied by adjusting cwnd as follows: + * if ((flightsize+ Max.Burst * MTU) < cwnd) + * cwnd = flightsize + Max.Burst * MTU + */ + +void sctp_transport_burst_limited(struct sctp_transport *t) +{ + struct sctp_association *asoc = t->asoc; + u32 old_cwnd = t->cwnd; + u32 max_burst_bytes; + + if (t->burst_limited) + return; + + max_burst_bytes = t->flight_size + (asoc->max_burst * asoc->pathmtu); + if (max_burst_bytes < old_cwnd) { + t->cwnd = max_burst_bytes; + t->burst_limited = old_cwnd; + } +} + +/* Restore the old cwnd congestion window, after the burst had it's + * desired effect. + */ +void sctp_transport_burst_reset(struct sctp_transport *t) +{ + if (t->burst_limited) { + t->cwnd = t->burst_limited; + t->burst_limited = 0; + } +} + /* What is the next timeout value for this transport? */ unsigned long sctp_transport_timeout(struct sctp_transport *t) { @@ -598,6 +635,7 @@ void sctp_transport_reset(struct sctp_transport *t) * (see Section 6.2.1) */ t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); + t->burst_limited = 0; t->ssthresh = asoc->peer.i.a_rwnd; t->last_rto = t->rto = asoc->rto_initial; t->rtt = 0; -- cgit v1.2.3-70-g09d2 From 5fdd4baef6195a1f2960e901c8877e2105f832ca Mon Sep 17 00:00:00 2001 From: Andrei Pelinescu-Onciul Date: Sun, 29 Nov 2009 00:14:02 -0800 Subject: sctp: on T3_RTX retransmit all the in-flight chunks When retransmitting due to T3 timeout, retransmit all the in-flight chunks for the corresponding transport/path, including chunks sent less then 1 rto ago. This is the correct behaviour according to rfc4960 section 6.3.3 E3 and "Note: Any DATA chunks that were sent to the address for which the T3-rtx timer expired but did not fit in one MTU (rule E3 above) should be marked for retransmission and sent as soon as cwnd allows (normally, when a SACK arrives). ". This fixes problems when more then one path is present and the T3 retransmission of the first chunk that timeouts stops the T3 timer for the initial active path, leaving all the other in-flight chunks waiting forever or until a new chunk is transmitted on the same path and timeouts (and this will happen only if the cwnd allows sending new chunks, but since cwnd was dropped to MTU by the timeout => it will wait until the first heartbeat). Example: 10 packets in flight, sent at 0.1 s intervals on the primary path. The primary path is down and the first packet timeouts. The first packet is retransmitted on another path, the T3 timer for the primary path is stopped and cwnd is set to MTU. All the other 9 in-flight packets will not be retransmitted (unless more new packets are sent on the primary path which depend on cwnd allowing it, and even in this case the 9 packets will be retransmitted only after a new packet timeouts which even in the best case would be more then RTO). This commit reverts d0ce92910bc04e107b2f3f2048f07e94f570035d and also removes the now unused transport->last_rto, introduced in b6157d8e03e1e780660a328f7183bcbfa4a93a19. p.s The problem is not only when multiple paths are there. It can happen in a single homed environment. If the application stops sending data, it possible to have a hung association. Signed-off-by: Andrei Pelinescu-Onciul Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 - net/sctp/outqueue.c | 10 ---------- net/sctp/sm_sideeffect.c | 1 - net/sctp/transport.c | 5 ++--- 4 files changed, 2 insertions(+), 15 deletions(-) (limited to 'include/net/sctp/structs.h') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index cd2e18778f8..0a474568b00 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -893,7 +893,6 @@ struct sctp_transport { */ /* RTO : The current retransmission timeout value. */ unsigned long rto; - unsigned long last_rto; __u32 rtt; /* This is the most recent RTT. */ diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index c9f20e28521..23e5e97aa61 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -423,16 +423,6 @@ void sctp_retransmit_mark(struct sctp_outq *q, if ((reason == SCTP_RTXR_FAST_RTX && (chunk->fast_retransmit == SCTP_NEED_FRTX)) || (reason != SCTP_RTXR_FAST_RTX && !chunk->tsn_gap_acked)) { - /* If this chunk was sent less then 1 rto ago, do not - * retransmit this chunk, but give the peer time - * to acknowlege it. Do this only when - * retransmitting due to T3 timeout. - */ - if (reason == SCTP_RTXR_T3_RTX && - time_before(jiffies, chunk->sent_at + - transport->last_rto)) - continue; - /* RFC 2960 6.2.1 Processing a Received SACK * * C) Any time a DATA chunk is marked for diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8674d491955..efa516b47e8 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -480,7 +480,6 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, * that indicates that we have an outstanding HB. */ if (!is_hb || transport->hb_sent) { - transport->last_rto = transport->rto; transport->rto = min((transport->rto * 2), transport->asoc->rto_max); } } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 3b141bb32fa..37a1184d789 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -74,7 +74,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, * given destination transport address, set RTO to the protocol * parameter 'RTO.Initial'. */ - peer->last_rto = peer->rto = msecs_to_jiffies(sctp_rto_initial); + peer->rto = msecs_to_jiffies(sctp_rto_initial); peer->rtt = 0; peer->rttvar = 0; peer->srtt = 0; @@ -386,7 +386,6 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) tp->rto = tp->asoc->rto_max; tp->rtt = rtt; - tp->last_rto = tp->rto; /* Reset rto_pending so that a new RTT measurement is started when a * new data chunk is sent. @@ -602,7 +601,7 @@ void sctp_transport_reset(struct sctp_transport *t) */ t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); t->ssthresh = asoc->peer.i.a_rwnd; - t->last_rto = t->rto = asoc->rto_initial; + t->rto = asoc->rto_initial; t->rtt = 0; t->srtt = 0; t->rttvar = 0; -- cgit v1.2.3-70-g09d2