From e0e9db178a5ba4dbb5f16f958f1affbdc63d2cc4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:57 -0500 Subject: sctp: Select a working primary during sctp_connectx() When sctp_connectx() is used, we pick the first address as primary, even though it may not have worked. This results in excessive retransmits and poor performance. We should select the address that the association was established with. Reported-by: Thomas Dreibholz Signed-off-by: Vlad Yasevich --- net/sctp/sm_sideeffect.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/sctp/sm_sideeffect.c') diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8674d491955..eda4fe783be 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1418,6 +1418,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, asoc->init_last_sent_to = t; chunk->transport = t; t->init_sent_count++; + /* Set the new transport as primary */ + sctp_assoc_set_primary(asoc, t); break; case SCTP_CMD_INIT_RESTART: -- cgit v1.2.3-70-g09d2 From 90f2f5318b3a5b0898fef0fec9b91376c7de7a2c Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:57 -0500 Subject: sctp: Update SWS avaoidance receiver side algorithm We currently send window update SACKs every time we free up 1 PMTU worth of data. That a lot more SACKs then necessary. Instead, we'll now send back the actuall window every time we send a sack, and do window-update SACKs when a fraction of the receive buffer has been opened. The fraction is controlled with a sysctl. Signed-off-by: Vlad Yasevich --- include/net/sctp/constants.h | 4 ++++ include/net/sctp/structs.h | 6 ++++++ net/sctp/associola.c | 5 +++-- net/sctp/protocol.c | 3 +++ net/sctp/sm_sideeffect.c | 3 +-- net/sctp/sysctl.c | 13 +++++++++++++ 6 files changed, 30 insertions(+), 4 deletions(-) (limited to 'net/sctp/sm_sideeffect.c') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 58f714a3b67..63908840eef 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -308,6 +308,10 @@ enum { SCTP_MAX_GABS = 16 }; #define SCTP_DEFAULT_MINWINDOW 1500 /* default minimum rwnd size */ #define SCTP_DEFAULT_MAXWINDOW 65535 /* default rwnd size */ +#define SCTP_DEFAULT_RWND_SHIFT 4 /* by default, update on 1/16 of + * rcvbuf, which is 1/8 of initial + * window + */ #define SCTP_DEFAULT_MAXSEGMENT 1500 /* MTU size, this is the limit * to which we will raise the P-MTU. */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index cd2e18778f8..90876b65777 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -231,6 +231,11 @@ extern struct sctp_globals { /* Flag to indicate whether computing and verifying checksum * is disabled. */ int checksum_disable; + + /* Threshold for rwnd update SACKS. Receive buffer shifted this many + * bits is an indicator of when to send and window update SACK. + */ + int rwnd_update_shift; } sctp_globals; #define sctp_rto_initial (sctp_globals.rto_initial) @@ -267,6 +272,7 @@ extern struct sctp_globals { #define sctp_prsctp_enable (sctp_globals.prsctp_enable) #define sctp_auth_enable (sctp_globals.auth_enable) #define sctp_checksum_disable (sctp_globals.checksum_disable) +#define sctp_rwnd_upd_shift (sctp_globals.rwnd_update_shift) /* SCTP Socket type: UDP or TCP style. */ typedef enum { diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 8e755ebff3b..37e982510be 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1383,8 +1383,9 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc) case SCTP_STATE_SHUTDOWN_RECEIVED: case SCTP_STATE_SHUTDOWN_SENT: if ((asoc->rwnd > asoc->a_rwnd) && - ((asoc->rwnd - asoc->a_rwnd) >= - min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pathmtu))) + ((asoc->rwnd - asoc->a_rwnd) >= max_t(__u32, + (asoc->base.sk->sk_rcvbuf >> sctp_rwnd_upd_shift), + asoc->pathmtu))) return 1; break; default: diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 08ef203d36a..a3c8988758b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1258,6 +1258,9 @@ SCTP_STATIC __init int sctp_init(void) /* Set SCOPE policy to enabled */ sctp_scope_policy = SCTP_SCOPE_POLICY_ENABLE; + /* Set the default rwnd update threshold */ + sctp_rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT; + sctp_sysctl_register(); INIT_LIST_HEAD(&sctp_address_families); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index eda4fe783be..8ae67098e09 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -217,8 +217,7 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); } else { - if (asoc->a_rwnd > asoc->rwnd) - asoc->a_rwnd = asoc->rwnd; + asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); if (!sack) goto nomem; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index ab7151da120..ae03ded2bf1 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -52,6 +52,7 @@ static int int_max = INT_MAX; static int sack_timer_min = 1; static int sack_timer_max = 500; static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ +static int rwnd_scale_max = 16; extern int sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; @@ -284,6 +285,18 @@ static ctl_table sctp_table[] = { .extra1 = &zero, .extra2 = &addr_scope_max, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "rwnd_update_shift", + .data = &sctp_rwnd_upd_shift, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &rwnd_scale_max, + }, + { .ctl_name = 0 } }; -- cgit v1.2.3-70-g09d2 From 5fdd4baef6195a1f2960e901c8877e2105f832ca Mon Sep 17 00:00:00 2001 From: Andrei Pelinescu-Onciul Date: Sun, 29 Nov 2009 00:14:02 -0800 Subject: sctp: on T3_RTX retransmit all the in-flight chunks When retransmitting due to T3 timeout, retransmit all the in-flight chunks for the corresponding transport/path, including chunks sent less then 1 rto ago. This is the correct behaviour according to rfc4960 section 6.3.3 E3 and "Note: Any DATA chunks that were sent to the address for which the T3-rtx timer expired but did not fit in one MTU (rule E3 above) should be marked for retransmission and sent as soon as cwnd allows (normally, when a SACK arrives). ". This fixes problems when more then one path is present and the T3 retransmission of the first chunk that timeouts stops the T3 timer for the initial active path, leaving all the other in-flight chunks waiting forever or until a new chunk is transmitted on the same path and timeouts (and this will happen only if the cwnd allows sending new chunks, but since cwnd was dropped to MTU by the timeout => it will wait until the first heartbeat). Example: 10 packets in flight, sent at 0.1 s intervals on the primary path. The primary path is down and the first packet timeouts. The first packet is retransmitted on another path, the T3 timer for the primary path is stopped and cwnd is set to MTU. All the other 9 in-flight packets will not be retransmitted (unless more new packets are sent on the primary path which depend on cwnd allowing it, and even in this case the 9 packets will be retransmitted only after a new packet timeouts which even in the best case would be more then RTO). This commit reverts d0ce92910bc04e107b2f3f2048f07e94f570035d and also removes the now unused transport->last_rto, introduced in b6157d8e03e1e780660a328f7183bcbfa4a93a19. p.s The problem is not only when multiple paths are there. It can happen in a single homed environment. If the application stops sending data, it possible to have a hung association. Signed-off-by: Andrei Pelinescu-Onciul Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 - net/sctp/outqueue.c | 10 ---------- net/sctp/sm_sideeffect.c | 1 - net/sctp/transport.c | 5 ++--- 4 files changed, 2 insertions(+), 15 deletions(-) (limited to 'net/sctp/sm_sideeffect.c') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index cd2e18778f8..0a474568b00 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -893,7 +893,6 @@ struct sctp_transport { */ /* RTO : The current retransmission timeout value. */ unsigned long rto; - unsigned long last_rto; __u32 rtt; /* This is the most recent RTT. */ diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index c9f20e28521..23e5e97aa61 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -423,16 +423,6 @@ void sctp_retransmit_mark(struct sctp_outq *q, if ((reason == SCTP_RTXR_FAST_RTX && (chunk->fast_retransmit == SCTP_NEED_FRTX)) || (reason != SCTP_RTXR_FAST_RTX && !chunk->tsn_gap_acked)) { - /* If this chunk was sent less then 1 rto ago, do not - * retransmit this chunk, but give the peer time - * to acknowlege it. Do this only when - * retransmitting due to T3 timeout. - */ - if (reason == SCTP_RTXR_T3_RTX && - time_before(jiffies, chunk->sent_at + - transport->last_rto)) - continue; - /* RFC 2960 6.2.1 Processing a Received SACK * * C) Any time a DATA chunk is marked for diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8674d491955..efa516b47e8 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -480,7 +480,6 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, * that indicates that we have an outstanding HB. */ if (!is_hb || transport->hb_sent) { - transport->last_rto = transport->rto; transport->rto = min((transport->rto * 2), transport->asoc->rto_max); } } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 3b141bb32fa..37a1184d789 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -74,7 +74,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, * given destination transport address, set RTO to the protocol * parameter 'RTO.Initial'. */ - peer->last_rto = peer->rto = msecs_to_jiffies(sctp_rto_initial); + peer->rto = msecs_to_jiffies(sctp_rto_initial); peer->rtt = 0; peer->rttvar = 0; peer->srtt = 0; @@ -386,7 +386,6 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) tp->rto = tp->asoc->rto_max; tp->rtt = rtt; - tp->last_rto = tp->rto; /* Reset rto_pending so that a new RTT measurement is started when a * new data chunk is sent. @@ -602,7 +601,7 @@ void sctp_transport_reset(struct sctp_transport *t) */ t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); t->ssthresh = asoc->peer.i.a_rwnd; - t->last_rto = t->rto = asoc->rto_initial; + t->rto = asoc->rto_initial; t->rtt = 0; t->srtt = 0; t->rttvar = 0; -- cgit v1.2.3-70-g09d2