diff options
author | James Bottomley <jejb@mulgrave.il.steeleye.com> | 2006-03-21 13:05:45 -0600 |
---|---|---|
committer | James Bottomley <jejb@mulgrave.il.steeleye.com> | 2006-03-21 13:05:45 -0600 |
commit | d04cdb64212eb5ae6a98026a97dda626e40e8e9a (patch) | |
tree | b6a7dbb21ccfceb915844e9a330b3d3dfcaf3c5b /net/dccp/ccids | |
parent | 2f8600dff2b140096a7df781884e918a16aa90e0 (diff) | |
parent | ec1248e70edc5cf7b485efcc7b41e44e10f422e5 (diff) |
Merge ../linux-2.6
Diffstat (limited to 'net/dccp/ccids')
-rw-r--r-- | net/dccp/ccids/Kconfig | 43 | ||||
-rw-r--r-- | net/dccp/ccids/Makefile | 4 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.c | 779 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.h | 85 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.c | 112 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.h | 5 |
6 files changed, 948 insertions, 80 deletions
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index 7684d83946a..ca00191628f 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig @@ -1,9 +1,39 @@ menu "DCCP CCIDs Configuration (EXPERIMENTAL)" depends on IP_DCCP && EXPERIMENTAL +config IP_DCCP_CCID2 + tristate "CCID2 (TCP-Like) (EXPERIMENTAL)" + depends on IP_DCCP + def_tristate IP_DCCP + select IP_DCCP_ACKVEC + ---help--- + CCID 2, TCP-like Congestion Control, denotes Additive Increase, + Multiplicative Decrease (AIMD) congestion control with behavior + modelled directly on TCP, including congestion window, slow start, + timeouts, and so forth [RFC 2581]. CCID 2 achieves maximum + bandwidth over the long term, consistent with the use of end-to-end + congestion control, but halves its congestion window in response to + each congestion event. This leads to the abrupt rate changes + typical of TCP. Applications should use CCID 2 if they prefer + maximum bandwidth utilization to steadiness of rate. This is often + the case for applications that are not playing their data directly + to the user. For example, a hypothetical application that + transferred files over DCCP, using application-level retransmissions + for lost packets, would prefer CCID 2 to CCID 3. On-line games may + also prefer CCID 2. + + CCID 2 is further described in: + http://www.icir.org/kohler/dccp/draft-ietf-dccp-ccid2-10.txt + + This text was extracted from: + http://www.icir.org/kohler/dccp/draft-ietf-dccp-spec-13.txt + + If in doubt, say M. + config IP_DCCP_CCID3 - tristate "CCID3 (TFRC) (EXPERIMENTAL)" + tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)" depends on IP_DCCP + def_tristate IP_DCCP ---help--- CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based rate-controlled congestion control mechanism. TFRC is designed to @@ -15,10 +45,15 @@ config IP_DCCP_CCID3 suitable than CCID 2 for applications such streaming media where a relatively smooth sending rate is of importance. - CCID 3 is further described in [CCID 3 PROFILE]. The TFRC - congestion control algorithms were initially described in RFC 3448. + CCID 3 is further described in: + + http://www.icir.org/kohler/dccp/draft-ietf-dccp-ccid3-11.txt. + + The TFRC congestion control algorithms were initially described in + RFC 3448. - This text was extracted from draft-ietf-dccp-spec-11.txt. + This text was extracted from: + http://www.icir.org/kohler/dccp/draft-ietf-dccp-spec-13.txt If in doubt, say M. diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile index 956f79f5074..438f20bccff 100644 --- a/net/dccp/ccids/Makefile +++ b/net/dccp/ccids/Makefile @@ -2,4 +2,8 @@ obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o dccp_ccid3-y := ccid3.o +obj-$(CONFIG_IP_DCCP_CCID2) += dccp_ccid2.o + +dccp_ccid2-y := ccid2.o + obj-y += lib/ diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c new file mode 100644 index 00000000000..d4f9e2d3345 --- /dev/null +++ b/net/dccp/ccids/ccid2.c @@ -0,0 +1,779 @@ +/* + * net/dccp/ccids/ccid2.c + * + * Copyright (c) 2005, 2006 Andrea Bittau <a.bittau@cs.ucl.ac.uk> + * + * Changes to meet Linux coding standards, and DCCP infrastructure fixes. + * + * Copyright (c) 2006 Arnaldo Carvalho de Melo <acme@conectiva.com.br> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * This implementation should follow: draft-ietf-dccp-ccid2-10.txt + * + * BUGS: + * - sequence number wrapping + * - jiffies wrapping + */ + +#include <linux/config.h> +#include "../ccid.h" +#include "../dccp.h" +#include "ccid2.h" + +static int ccid2_debug; + +#undef CCID2_DEBUG +#ifdef CCID2_DEBUG +#define ccid2_pr_debug(format, a...) \ + do { if (ccid2_debug) \ + printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ + } while (0) +#else +#define ccid2_pr_debug(format, a...) +#endif + +static const int ccid2_seq_len = 128; + +#ifdef CCID2_DEBUG +static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) +{ + int len = 0; + int pipe = 0; + struct ccid2_seq *seqp = hctx->ccid2hctx_seqh; + + /* there is data in the chain */ + if (seqp != hctx->ccid2hctx_seqt) { + seqp = seqp->ccid2s_prev; + len++; + if (!seqp->ccid2s_acked) + pipe++; + + while (seqp != hctx->ccid2hctx_seqt) { + struct ccid2_seq *prev = seqp->ccid2s_prev; + + len++; + if (!prev->ccid2s_acked) + pipe++; + + /* packets are sent sequentially */ + BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq); + BUG_ON(seqp->ccid2s_sent < prev->ccid2s_sent); + BUG_ON(len > ccid2_seq_len); + + seqp = prev; + } + } + + BUG_ON(pipe != hctx->ccid2hctx_pipe); + ccid2_pr_debug("len of chain=%d\n", len); + + do { + seqp = seqp->ccid2s_prev; + len++; + BUG_ON(len > ccid2_seq_len); + } while (seqp != hctx->ccid2hctx_seqh); + + BUG_ON(len != ccid2_seq_len); + ccid2_pr_debug("total len=%d\n", len); +} +#else +#define ccid2_hc_tx_check_sanity(hctx) do {} while (0) +#endif + +static int ccid2_hc_tx_send_packet(struct sock *sk, + struct sk_buff *skb, int len) +{ + struct ccid2_hc_tx_sock *hctx; + + switch (DCCP_SKB_CB(skb)->dccpd_type) { + case 0: /* XXX data packets from userland come through like this */ + case DCCP_PKT_DATA: + case DCCP_PKT_DATAACK: + break; + /* No congestion control on other packets */ + default: + return 0; + } + + hctx = ccid2_hc_tx_sk(sk); + + ccid2_pr_debug("pipe=%d cwnd=%d\n", hctx->ccid2hctx_pipe, + hctx->ccid2hctx_cwnd); + + if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd) { + /* OK we can send... make sure previous packet was sent off */ + if (!hctx->ccid2hctx_sendwait) { + hctx->ccid2hctx_sendwait = 1; + return 0; + } + } + + return 100; /* XXX */ +} + +static void ccid2_change_l_ack_ratio(struct sock *sk, int val) +{ + struct dccp_sock *dp = dccp_sk(sk); + /* + * XXX I don't really agree with val != 2. If cwnd is 1, ack ratio + * should be 1... it shouldn't be allowed to become 2. + * -sorbo. + */ + if (val != 2) { + const struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + int max = hctx->ccid2hctx_cwnd / 2; + + /* round up */ + if (hctx->ccid2hctx_cwnd & 1) + max++; + + if (val > max) + val = max; + } + + ccid2_pr_debug("changing local ack ratio to %d\n", val); + WARN_ON(val <= 0); + dp->dccps_l_ack_ratio = val; +} + +static void ccid2_change_cwnd(struct sock *sk, int val) +{ + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + + if (val == 0) + val = 1; + + /* XXX do we need to change ack ratio? */ + ccid2_pr_debug("change cwnd to %d\n", val); + + BUG_ON(val < 1); + hctx->ccid2hctx_cwnd = val; +} + +static void ccid2_start_rto_timer(struct sock *sk); + +static void ccid2_hc_tx_rto_expire(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + long s; + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer, + jiffies + HZ / 5); + goto out; + } + + ccid2_pr_debug("RTO_EXPIRE\n"); + + ccid2_hc_tx_check_sanity(hctx); + + /* back-off timer */ + hctx->ccid2hctx_rto <<= 1; + + s = hctx->ccid2hctx_rto / HZ; + if (s > 60) + hctx->ccid2hctx_rto = 60 * HZ; + + ccid2_start_rto_timer(sk); + + /* adjust pipe, cwnd etc */ + hctx->ccid2hctx_pipe = 0; + hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1; + if (hctx->ccid2hctx_ssthresh < 2) + hctx->ccid2hctx_ssthresh = 2; + ccid2_change_cwnd(sk, 1); + + /* clear state about stuff we sent */ + hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; + hctx->ccid2hctx_ssacks = 0; + hctx->ccid2hctx_acks = 0; + hctx->ccid2hctx_sent = 0; + + /* clear ack ratio state. */ + hctx->ccid2hctx_arsent = 0; + hctx->ccid2hctx_ackloss = 0; + hctx->ccid2hctx_rpseq = 0; + hctx->ccid2hctx_rpdupack = -1; + ccid2_change_l_ack_ratio(sk, 1); + ccid2_hc_tx_check_sanity(hctx); +out: + bh_unlock_sock(sk); + sock_put(sk); +} + +static void ccid2_start_rto_timer(struct sock *sk) +{ + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + + ccid2_pr_debug("setting RTO timeout=%ld\n", hctx->ccid2hctx_rto); + + BUG_ON(timer_pending(&hctx->ccid2hctx_rtotimer)); + sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer, + jiffies + hctx->ccid2hctx_rto); +} + +static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + u64 seq; + + ccid2_hc_tx_check_sanity(hctx); + + BUG_ON(!hctx->ccid2hctx_sendwait); + hctx->ccid2hctx_sendwait = 0; + hctx->ccid2hctx_pipe++; + BUG_ON(hctx->ccid2hctx_pipe < 0); + + /* There is an issue. What if another packet is sent between + * packet_send() and packet_sent(). Then the sequence number would be + * wrong. + * -sorbo. + */ + seq = dp->dccps_gss; + + hctx->ccid2hctx_seqh->ccid2s_seq = seq; + hctx->ccid2hctx_seqh->ccid2s_acked = 0; + hctx->ccid2hctx_seqh->ccid2s_sent = jiffies; + hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqh->ccid2s_next; + + ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd, + hctx->ccid2hctx_pipe); + + if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt) { + /* XXX allocate more space */ + WARN_ON(1); + } + + hctx->ccid2hctx_sent++; + + /* Ack Ratio. Need to maintain a concept of how many windows we sent */ + hctx->ccid2hctx_arsent++; + /* We had an ack loss in this window... */ + if (hctx->ccid2hctx_ackloss) { + if (hctx->ccid2hctx_arsent >= hctx->ccid2hctx_cwnd) { + hctx->ccid2hctx_arsent = 0; + hctx->ccid2hctx_ackloss = 0; + } + } else { + /* No acks lost up to now... */ + /* decrease ack ratio if enough packets were sent */ + if (dp->dccps_l_ack_ratio > 1) { + /* XXX don't calculate denominator each time */ + int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio - + dp->dccps_l_ack_ratio; + + denom = hctx->ccid2hctx_cwnd * hctx->ccid2hctx_cwnd / denom; + + if (hctx->ccid2hctx_arsent >= denom) { + ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1); + hctx->ccid2hctx_arsent = 0; + } + } else { + /* we can't increase ack ratio further [1] */ + hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/ + } + } + + /* setup RTO timer */ + if (!timer_pending(&hctx->ccid2hctx_rtotimer)) + ccid2_start_rto_timer(sk); + +#ifdef CCID2_DEBUG + ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe); + ccid2_pr_debug("Sent: seq=%llu\n", seq); + do { + struct ccid2_seq *seqp = hctx->ccid2hctx_seqt; + + while (seqp != hctx->ccid2hctx_seqh) { + ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", + seqp->ccid2s_seq, seqp->ccid2s_acked, + seqp->ccid2s_sent); + seqp = seqp->ccid2s_next; + } + } while (0); + ccid2_pr_debug("=========\n"); + ccid2_hc_tx_check_sanity(hctx); +#endif +} + +/* XXX Lame code duplication! + * returns -1 if none was found. + * else returns the next offset to use in the function call. + */ +static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset, + unsigned char **vec, unsigned char *veclen) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); + unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); + unsigned char *opt_ptr; + const unsigned char *opt_end = (unsigned char *)dh + + (dh->dccph_doff * 4); + unsigned char opt, len; + unsigned char *value; + + BUG_ON(offset < 0); + options += offset; + opt_ptr = options; + if (opt_ptr >= opt_end) + return -1; + + while (opt_ptr != opt_end) { + opt = *opt_ptr++; + len = 0; + value = NULL; + + /* Check if this isn't a single byte option */ + if (opt > DCCPO_MAX_RESERVED) { + if (opt_ptr == opt_end) + goto out_invalid_option; + + len = *opt_ptr++; + if (len < 3) + goto out_invalid_option; + /* + * Remove the type and len fields, leaving + * just the value size + */ + len -= 2; + value = opt_ptr; + opt_ptr += len; + + if (opt_ptr > opt_end) + goto out_invalid_option; + } + + switch (opt) { + case DCCPO_ACK_VECTOR_0: + case DCCPO_ACK_VECTOR_1: + *vec = value; + *veclen = len; + return offset + (opt_ptr - options); + } + } + + return -1; + +out_invalid_option: + BUG_ON(1); /* should never happen... options were previously parsed ! */ + return -1; +} + +static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) +{ + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + + sk_stop_timer(sk, &hctx->ccid2hctx_rtotimer); + ccid2_pr_debug("deleted RTO timer\n"); +} + +static inline void ccid2_new_ack(struct sock *sk, + struct ccid2_seq *seqp, + unsigned int *maxincr) +{ + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + + /* slow start */ + if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) { + hctx->ccid2hctx_acks = 0; + + /* We can increase cwnd at most maxincr [ack_ratio/2] */ + if (*maxincr) { + /* increase every 2 acks */ + hctx->ccid2hctx_ssacks++; + if (hctx->ccid2hctx_ssacks == 2) { + ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1); + hctx->ccid2hctx_ssacks = 0; + *maxincr = *maxincr - 1; + } + } else { + /* increased cwnd enough for this single ack */ + hctx->ccid2hctx_ssacks = 0; + } + } else { + hctx->ccid2hctx_ssacks = 0; + hctx->ccid2hctx_acks++; + + if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) { + ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1); + hctx->ccid2hctx_acks = 0; + } + } + + /* update RTO */ + if (hctx->ccid2hctx_srtt == -1 || + (jiffies - hctx->ccid2hctx_lastrtt) >= hctx->ccid2hctx_srtt) { + unsigned long r = jiffies - seqp->ccid2s_sent; + int s; + + /* first measurement */ + if (hctx->ccid2hctx_srtt == -1) { + ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", + r, jiffies, seqp->ccid2s_seq); + hctx->ccid2hctx_srtt = r; + hctx->ccid2hctx_rttvar = r >> 1; + } else { + /* RTTVAR */ + long tmp = hctx->ccid2hctx_srtt - r; + if (tmp < 0) + tmp *= -1; + + tmp >>= 2; + hctx->ccid2hctx_rttvar *= 3; + hctx->ccid2hctx_rttvar >>= 2; + hctx->ccid2hctx_rttvar += tmp; + + /* SRTT */ + hctx->ccid2hctx_srtt *= 7; + hctx->ccid2hctx_srtt >>= 3; + tmp = r >> 3; + hctx->ccid2hctx_srtt += tmp; + } + s = hctx->ccid2hctx_rttvar << 2; + /* clock granularity is 1 when based on jiffies */ + if (!s) + s = 1; + hctx->ccid2hctx_rto = hctx->ccid2hctx_srtt + s; + + /* must be at least a second */ + s = hctx->ccid2hctx_rto / HZ; + /* DCCP doesn't require this [but I like it cuz my code sux] */ +#if 1 + if (s < 1) + hctx->ccid2hctx_rto = HZ; +#endif + /* max 60 seconds */ + if (s > 60) + hctx->ccid2hctx_rto = HZ * 60; + + hctx->ccid2hctx_lastrtt = jiffies; + + ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", + hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar, + hctx->ccid2hctx_rto, HZ, r); + hctx->ccid2hctx_sent = 0; + } + + /* we got a new ack, so re-start RTO timer */ + ccid2_hc_tx_kill_rto_timer(sk); + ccid2_start_rto_timer(sk); +} + +static void ccid2_hc_tx_dec_pipe(struct sock *sk) +{ + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + + hctx->ccid2hctx_pipe--; + BUG_ON(hctx->ccid2hctx_pipe < 0); + + if (hctx->ccid2hctx_pipe == 0) + ccid2_hc_tx_kill_rto_timer(sk); +} + +static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + u64 ackno, seqno; + struct ccid2_seq *seqp; + unsigned char *vector; + unsigned char veclen; + int offset = 0; + int done = 0; + int loss = 0; + unsigned int maxincr = 0; + + ccid2_hc_tx_check_sanity(hctx); + /* check reverse path congestion */ + seqno = DCCP_SKB_CB(skb)->dccpd_seq; + + /* XXX this whole "algorithm" is broken. Need to fix it to keep track + * of the seqnos of the dupacks so that rpseq and rpdupack are correct + * -sorbo. + */ + /* need to bootstrap */ + if (hctx->ccid2hctx_rpdupack == -1) { + hctx->ccid2hctx_rpdupack = 0; + hctx->ccid2hctx_rpseq = seqno; + } else { + /* check if packet is consecutive */ + if ((hctx->ccid2hctx_rpseq + 1) == seqno) + hctx->ccid2hctx_rpseq++; + /* it's a later packet */ + else if (after48(seqno, hctx->ccid2hctx_rpseq)) { + hctx->ccid2hctx_rpdupack++; + + /* check if we got enough dupacks */ + if (hctx->ccid2hctx_rpdupack >= + hctx->ccid2hctx_numdupack) { + hctx->ccid2hctx_rpdupack = -1; /* XXX lame */ + hctx->ccid2hctx_rpseq = 0; + + ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio << 1); + } + } + } + + /* check forward path congestion */ + /* still didn't send out new data packets */ + if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt) + return; + + switch (DCCP_SKB_CB(skb)->dccpd_type) { + case DCCP_PKT_ACK: + case DCCP_PKT_DATAACK: + break; + default: + return; + } + + ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; + seqp = hctx->ccid2hctx_seqh->ccid2s_prev; + + /* If in slow-start, cwnd can increase at most Ack Ratio / 2 packets for + * this single ack. I round up. + * -sorbo. + */ + maxincr = dp->dccps_l_ack_ratio >> 1; + maxincr++; + + /* go through all ack vectors */ + while ((offset = ccid2_ackvector(sk, skb, offset, + &vector, &veclen)) != -1) { + /* go through this ack vector */ + while (veclen--) { + const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; + u64 ackno_end_rl; + + dccp_set_seqno(&ackno_end_rl, ackno - rl); + ccid2_pr_debug("ackvec start:%llu end:%llu\n", ackno, + ackno_end_rl); + /* if the seqno we are analyzing is larger than the + * current ackno, then move towards the tail of our + * seqnos. + */ + while (after48(seqp->ccid2s_seq, ackno)) { + if (seqp == hctx->ccid2hctx_seqt) { + done = 1; + break; + } + seqp = seqp->ccid2s_prev; + } + if (done) + break; + + /* check all seqnos in the range of the vector + * run length + */ + while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { + const u8 state = (*vector & + DCCP_ACKVEC_STATE_MASK) >> 6; + + /* new packet received or marked */ + if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && + !seqp->ccid2s_acked) { + if (state == + DCCP_ACKVEC_STATE_ECN_MARKED) { + loss = 1; + } else + ccid2_new_ack(sk, seqp, + &maxincr); + + seqp->ccid2s_acked = 1; + ccid2_pr_debug("Got ack for %llu\n", + seqp->ccid2s_seq); + ccid2_hc_tx_dec_pipe(sk); + } + if (seqp == hctx->ccid2hctx_seqt) { + done = 1; + break; + } + seqp = seqp->ccid2s_next; + } + if (done) + break; + + + dccp_set_seqno(&ackno, ackno_end_rl - 1); + vector++; + } + if (done) + break; + } + + /* The state about what is acked should be correct now + * Check for NUMDUPACK + */ + seqp = hctx->ccid2hctx_seqh->ccid2s_prev; + done = 0; + while (1) { + if (seqp->ccid2s_acked) { + done++; + if (done == hctx->ccid2hctx_numdupack) + break; + } + if (seqp == hctx->ccid2hctx_seqt) + break; + seqp = seqp->ccid2s_prev; + } + + /* If there are at least 3 acknowledgements, anything unacknowledged + * below the last sequence number is considered lost + */ + if (done == hctx->ccid2hctx_numdupack) { + struct ccid2_seq *last_acked = seqp; + + /* check for lost packets */ + while (1) { + if (!seqp->ccid2s_acked) { + loss = 1; + ccid2_hc_tx_dec_pipe(sk); + } + if (seqp == hctx->ccid2hctx_seqt) + break; + seqp = seqp->ccid2s_prev; + } + + hctx->ccid2hctx_seqt = last_acked; + } + + /* trim acked packets in tail */ + while (hctx->ccid2hctx_seqt != hctx->ccid2hctx_seqh) { + if (!hctx->ccid2hctx_seqt->ccid2s_acked) + break; + + hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next; + } + + if (loss) { + /* XXX do bit shifts guarantee a 0 as the new bit? */ + ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd >> 1); + hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd; + if (hctx->ccid2hctx_ssthresh < 2) + hctx->ccid2hctx_ssthresh = 2; + } + + ccid2_hc_tx_check_sanity(hctx); +} + +static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) +{ + struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid); + int seqcount = ccid2_seq_len; + int i; + + /* XXX init variables with proper values */ + hctx->ccid2hctx_cwnd = 1; + hctx->ccid2hctx_ssthresh = 10; + hctx->ccid2hctx_numdupack = 3; + + /* XXX init ~ to window size... */ + hctx->ccid2hctx_seqbuf = kmalloc(sizeof(*hctx->ccid2hctx_seqbuf) * + seqcount, gfp_any()); + if (hctx->ccid2hctx_seqbuf == NULL) + return -ENOMEM; + + for (i = 0; i < (seqcount - 1); i++) { + hctx->ccid2hctx_seqbuf[i].ccid2s_next = + &hctx->ccid2hctx_seqbuf[i + 1]; + hctx->ccid2hctx_seqbuf[i + 1].ccid2s_prev = + &hctx->ccid2hctx_seqbuf[i]; + } + hctx->ccid2hctx_seqbuf[seqcount - 1].ccid2s_next = + hctx->ccid2hctx_seqbuf; + hctx->ccid2hctx_seqbuf->ccid2s_prev = + &hctx->ccid2hctx_seqbuf[seqcount - 1]; + + hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqbuf; + hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; + hctx->ccid2hctx_sent = 0; + hctx->ccid2hctx_rto = 3 * HZ; + hctx->ccid2hctx_srtt = -1; + hctx->ccid2hctx_rttvar = -1; + hctx->ccid2hctx_lastrtt = 0; + hctx->ccid2hctx_rpdupack = -1; + + hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire; + hctx->ccid2hctx_rtotimer.data = (unsigned long)sk; + init_timer(&hctx->ccid2hctx_rtotimer); + + ccid2_hc_tx_check_sanity(hctx); + return 0; +} + +static void ccid2_hc_tx_exit(struct sock *sk) +{ + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + + ccid2_hc_tx_kill_rto_timer(sk); + kfree(hctx->ccid2hctx_seqbuf); + hctx->ccid2hctx_seqbuf = NULL; +} + +static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) +{ + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid2_hc_rx_sock *hcrx = ccid2_hc_rx_sk(sk); + + switch (DCCP_SKB_CB(skb)->dccpd_type) { + case DCCP_PKT_DATA: + case DCCP_PKT_DATAACK: + hcrx->ccid2hcrx_data++; + if (hcrx->ccid2hcrx_data >= dp->dccps_r_ack_ratio) { + dccp_send_ack(sk); + hcrx->ccid2hcrx_data = 0; + } + break; + } +} + +static struct ccid_operations ccid2 = { + .ccid_id = 2, + .ccid_name = "ccid2", + .ccid_owner = THIS_MODULE, + .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), + .ccid_hc_tx_init = ccid2_hc_tx_init, + .ccid_hc_tx_exit = ccid2_hc_tx_exit, + .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, + .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, + .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, + .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), + .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, +}; + +module_param(ccid2_debug, int, 0444); +MODULE_PARM_DESC(ccid2_debug, "Enable debug messages"); + +static __init int ccid2_module_init(void) +{ + return ccid_register(&ccid2); +} +module_init(ccid2_module_init); + +static __exit void ccid2_module_exit(void) +{ + ccid_unregister(&ccid2); +} +module_exit(ccid2_module_exit); + +MODULE_AUTHOR("Andrea Bittau <a.bittau@cs.ucl.ac.uk>"); +MODULE_DESCRIPTION("DCCP TCP-Like (CCID2) CCID"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("net-dccp-ccid-2"); diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h new file mode 100644 index 00000000000..451a87464fa --- /dev/null +++ b/net/dccp/ccids/ccid2.h @@ -0,0 +1,85 @@ +/* + * net/dccp/ccids/ccid2.h + * + * Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef _DCCP_CCID2_H_ +#define _DCCP_CCID2_H_ + +#include <linux/dccp.h> +#include <linux/timer.h> +#include <linux/types.h> +#include "../ccid.h" + +struct sock; + +struct ccid2_seq { + u64 ccid2s_seq; + unsigned long ccid2s_sent; + int ccid2s_acked; + struct ccid2_seq *ccid2s_prev; + struct ccid2_seq *ccid2s_next; +}; + +/** struct ccid2_hc_tx_sock - CCID2 TX half connection + * + * @ccid2hctx_ssacks - ACKs recv in slow start + * @ccid2hctx_acks - ACKS recv in AI phase + * @ccid2hctx_sent - packets sent in this window + * @ccid2hctx_lastrtt -time RTT was last measured + * @ccid2hctx_arsent - packets sent [ack ratio] + * @ccid2hctx_ackloss - ack was lost in this win + * @ccid2hctx_rpseq - last consecutive seqno + * @ccid2hctx_rpdupack - dupacks since rpseq +*/ +struct ccid2_hc_tx_sock { + int ccid2hctx_cwnd; + int ccid2hctx_ssacks; + int ccid2hctx_acks; + int ccid2hctx_ssthresh; + int ccid2hctx_pipe; + int ccid2hctx_numdupack; + struct ccid2_seq *ccid2hctx_seqbuf; + struct ccid2_seq *ccid2hctx_seqh; + struct ccid2_seq *ccid2hctx_seqt; + long ccid2hctx_rto; + long ccid2hctx_srtt; + long ccid2hctx_rttvar; + int ccid2hctx_sent; + unsigned long ccid2hctx_lastrtt; + struct timer_list ccid2hctx_rtotimer; + unsigned long ccid2hctx_arsent; + int ccid2hctx_ackloss; + u64 ccid2hctx_rpseq; + int ccid2hctx_rpdupack; + int ccid2hctx_sendwait; +}; + +struct ccid2_hc_rx_sock { + int ccid2hcrx_data; +}; + +static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk) +{ + return ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid); +} + +static inline struct ccid2_hc_rx_sock *ccid2_hc_rx_sk(const struct sock *sk) +{ + return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid); +} +#endif /* _DCCP_CCID2_H_ */ diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 35d1d347541..b4a51d0355a 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -46,7 +46,7 @@ * Reason for maths here is to avoid 32 bit overflow when a is big. * With this we get close to the limit. */ -static inline u32 usecs_div(const u32 a, const u32 b) +static u32 usecs_div(const u32 a, const u32 b) { const u32 div = a < (UINT_MAX / (USEC_PER_SEC / 10)) ? 10 : a < (UINT_MAX / (USEC_PER_SEC / 50)) ? 50 : @@ -76,15 +76,6 @@ static struct dccp_tx_hist *ccid3_tx_hist; static struct dccp_rx_hist *ccid3_rx_hist; static struct dccp_li_hist *ccid3_li_hist; -static int ccid3_init(struct sock *sk) -{ - return 0; -} - -static void ccid3_exit(struct sock *sk) -{ -} - /* TFRC sender states */ enum ccid3_hc_tx_states { TFRC_SSTATE_NO_SENT = 1, @@ -107,8 +98,8 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) } #endif -static inline void ccid3_hc_tx_set_state(struct sock *sk, - enum ccid3_hc_tx_states state) +static void ccid3_hc_tx_set_state(struct sock *sk, + enum ccid3_hc_tx_states state) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; @@ -316,8 +307,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_SENT: - hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; - hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)); hctx->ccid3hctx_last_win_count = 0; @@ -585,16 +574,15 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } } -static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) +static int ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) { const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); BUG_ON(hctx == NULL); - if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) - return; - - DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + if (sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN) + DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + return 0; } static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, @@ -626,7 +614,7 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, __FUNCTION__, dccp_role(sk), sk); rc = -EINVAL; } else { - opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value); + opt_recv->ccid3or_loss_event_rate = ntohl(*(__be32 *)value); ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n", dccp_role(sk), sk, opt_recv->ccid3or_loss_event_rate); @@ -647,7 +635,7 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, __FUNCTION__, dccp_role(sk), sk); rc = -EINVAL; } else { - opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value); + opt_recv->ccid3or_receive_rate = ntohl(*(__be32 *)value); ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n", dccp_role(sk), sk, opt_recv->ccid3or_receive_rate); @@ -658,17 +646,10 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, return rc; } -static int ccid3_hc_tx_init(struct sock *sk) +static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx; - - dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any()); - if (dp->dccps_hc_tx_ccid_private == NULL) - return -ENOMEM; - - hctx = ccid3_hc_tx_sk(sk); - memset(hctx, 0, sizeof(*hctx)); + struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid); if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE) @@ -681,6 +662,9 @@ static int ccid3_hc_tx_init(struct sock *sk) hctx->ccid3hctx_t_rto = USEC_PER_SEC; hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; INIT_LIST_HEAD(&hctx->ccid3hctx_hist); + + hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; + hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; init_timer(&hctx->ccid3hctx_no_feedback_timer); return 0; @@ -688,7 +672,6 @@ static int ccid3_hc_tx_init(struct sock *sk) static void ccid3_hc_tx_exit(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); BUG_ON(hctx == NULL); @@ -698,9 +681,6 @@ static void ccid3_hc_tx_exit(struct sock *sk) /* Empty packet history */ dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist); - - kfree(dp->dccps_hc_tx_ccid_private); - dp->dccps_hc_tx_ccid_private = NULL; } /* @@ -727,8 +707,8 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) } #endif -static inline void ccid3_hc_rx_set_state(struct sock *sk, - enum ccid3_hc_rx_states state) +static void ccid3_hc_rx_set_state(struct sock *sk, + enum ccid3_hc_rx_states state) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; @@ -793,31 +773,35 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) dccp_send_ack(sk); } -static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) +static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) { const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - u32 x_recv, pinv; + __be32 x_recv, pinv; BUG_ON(hcrx == NULL); if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) - return; + return 0; DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; if (dccp_packet_without_ack(skb)) - return; - - if (hcrx->ccid3hcrx_elapsed_time != 0) - dccp_insert_option_elapsed_time(sk, skb, - hcrx->ccid3hcrx_elapsed_time); - dccp_insert_option_timestamp(sk, skb); + return 0; + x_recv = htonl(hcrx->ccid3hcrx_x_recv); pinv = htonl(hcrx->ccid3hcrx_pinv); - dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, - &pinv, sizeof(pinv)); - dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, - &x_recv, sizeof(x_recv)); + + if ((hcrx->ccid3hcrx_elapsed_time != 0 && + dccp_insert_option_elapsed_time(sk, skb, + hcrx->ccid3hcrx_elapsed_time)) || + dccp_insert_option_timestamp(sk, skb) || + dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, + &pinv, sizeof(pinv)) || + dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, + &x_recv, sizeof(x_recv))) + return -1; + + return 0; } /* calculate first loss interval @@ -1047,20 +1031,13 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) } } -static int ccid3_hc_rx_init(struct sock *sk) +static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx; + struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid); ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any()); - if (dp->dccps_hc_rx_ccid_private == NULL) - return -ENOMEM; - - hcrx = ccid3_hc_rx_sk(sk); - memset(hcrx, 0, sizeof(*hcrx)); - if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE) hcrx->ccid3hcrx_s = dp->dccps_packet_size; @@ -1079,7 +1056,6 @@ static int ccid3_hc_rx_init(struct sock *sk) static void ccid3_hc_rx_exit(struct sock *sk) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - struct dccp_sock *dp = dccp_sk(sk); BUG_ON(hcrx == NULL); @@ -1090,9 +1066,6 @@ static void ccid3_hc_rx_exit(struct sock *sk) /* Empty loss interval history */ dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist); - - kfree(dp->dccps_hc_rx_ccid_private); - dp->dccps_hc_rx_ccid_private = NULL; } static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) @@ -1178,12 +1151,11 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, return 0; } -static struct ccid ccid3 = { +static struct ccid_operations ccid3 = { .ccid_id = 3, .ccid_name = "ccid3", .ccid_owner = THIS_MODULE, - .ccid_init = ccid3_init, - .ccid_exit = ccid3_exit, + .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock), .ccid_hc_tx_init = ccid3_hc_tx_init, .ccid_hc_tx_exit = ccid3_hc_tx_exit, .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet, @@ -1191,6 +1163,7 @@ static struct ccid ccid3 = { .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv, .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options, .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options, + .ccid_hc_rx_obj_size = sizeof(struct ccid3_hc_rx_sock), .ccid_hc_rx_init = ccid3_hc_rx_init, .ccid_hc_rx_exit = ccid3_hc_rx_exit, .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options, @@ -1241,15 +1214,6 @@ module_init(ccid3_module_init); static __exit void ccid3_module_exit(void) { -#ifdef CONFIG_IP_DCCP_UNLOAD_HACK - /* - * Hack to use while developing, so that we get rid of the control - * sock, that is what keeps a refcount on dccp.ko -acme - */ - extern void dccp_ctl_sock_exit(void); - - dccp_ctl_sock_exit(); -#endif ccid_unregister(&ccid3); if (ccid3_tx_hist != NULL) { diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 0bde4583d09..f18b96d4e5a 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -41,6 +41,7 @@ #include <linux/time.h> #include <linux/types.h> #include <linux/tfrc.h> +#include "../ccid.h" #define TFRC_MIN_PACKET_SIZE 16 #define TFRC_STD_PACKET_SIZE 256 @@ -135,12 +136,12 @@ struct ccid3_hc_rx_sock { static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) { - return dccp_sk(sk)->dccps_hc_tx_ccid_private; + return ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid); } static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) { - return dccp_sk(sk)->dccps_hc_rx_ccid_private; + return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid); } #endif /* _DCCP_CCID3_H_ */ |