summaryrefslogtreecommitdiffstats
path: root/net/dccp
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-11 19:40:14 -0700
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-11 19:40:14 -0700
commit038a5008b2f395c85e6e71d6ddf3c684e7c405b0 (patch)
tree4735eab577e97e5a22c3141e3f60071c8065585e /net/dccp
parentdd6d1844af33acb4edd0a40b1770d091a22c94be (diff)
parent266918303226cceac7eca38ced30f15f277bd89c (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
* 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6: (867 commits) [SKY2]: status polling loop (post merge) [NET]: Fix NAPI completion handling in some drivers. [TCP]: Limit processing lost_retrans loop to work-to-do cases [TCP]: Fix lost_retrans loop vs fastpath problems [TCP]: No need to re-count fackets_out/sacked_out at RTO [TCP]: Extract tcp_match_queue_to_sack from sacktag code [TCP]: Kill almost unused variable pcount from sacktag [TCP]: Fix mark_head_lost to ignore R-bit when trying to mark L [TCP]: Add bytes_acked (ABC) clearing to FRTO too [IPv6]: Update setsockopt(IPV6_MULTICAST_IF) to support RFC 3493, try2 [NETFILTER]: x_tables: add missing ip6t_modulename aliases [NETFILTER]: nf_conntrack_tcp: fix connection reopening [QETH]: fix qeth_main.c [NETLINK]: fib_frontend build fixes [IPv6]: Export userland ND options through netlink (RDNSS support) [9P]: build fix with !CONFIG_SYSCTL [NET]: Fix dev_put() and dev_hold() comments [NET]: make netlink user -> kernel interface synchronious [NET]: unify netlink kernel socket recognition [NET]: cleanup 3rd argument in netlink_sendskb ... Fix up conflicts manually in Documentation/feature-removal-schedule.txt and my new least favourite crap, the "mod_devicetable" support in the files include/linux/mod_devicetable.h and scripts/mod/file2alias.c. (The latter files seem to be explicitly _designed_ to get conflicts when different subsystems work with them - that have an absolutely horrid lack of subsystem separation!) Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'net/dccp')
-rw-r--r--net/dccp/ackvec.c16
-rw-r--r--net/dccp/ackvec.h4
-rw-r--r--net/dccp/ccids/ccid2.c62
-rw-r--r--net/dccp/ccids/ccid2.h2
-rw-r--r--net/dccp/ccids/ccid3.c109
-rw-r--r--net/dccp/ccids/ccid3.h23
-rw-r--r--net/dccp/ccids/lib/loss_interval.c11
-rw-r--r--net/dccp/ccids/lib/loss_interval.h4
-rw-r--r--net/dccp/ccids/lib/packet_history.h9
-rw-r--r--net/dccp/dccp.h65
-rw-r--r--net/dccp/input.c120
-rw-r--r--net/dccp/ipv4.c51
-rw-r--r--net/dccp/ipv6.c46
-rw-r--r--net/dccp/minisocks.c11
-rw-r--r--net/dccp/options.c47
-rw-r--r--net/dccp/output.c151
-rw-r--r--net/dccp/probe.c7
-rw-r--r--net/dccp/proto.c12
-rw-r--r--net/dccp/sysctl.c10
-rw-r--r--net/dccp/timer.c21
20 files changed, 339 insertions, 442 deletions
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 7ac775f9a64..83378f379f7 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -69,21 +69,20 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
/* Figure out how many options do we need to represent the ackvec */
- const u16 nr_opts = (av->dccpav_vec_len +
- DCCP_MAX_ACKVEC_OPT_LEN - 1) /
- DCCP_MAX_ACKVEC_OPT_LEN;
+ const u16 nr_opts = DIV_ROUND_UP(av->dccpav_vec_len,
+ DCCP_MAX_ACKVEC_OPT_LEN);
u16 len = av->dccpav_vec_len + 2 * nr_opts, i;
- struct timeval now;
u32 elapsed_time;
const unsigned char *tail, *from;
unsigned char *to;
struct dccp_ackvec_record *avr;
+ suseconds_t delta;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
return -1;
- dccp_timestamp(sk, &now);
- elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10;
+ delta = ktime_us_delta(ktime_get_real(), av->dccpav_time);
+ elapsed_time = delta / 10;
if (elapsed_time != 0 &&
dccp_insert_option_elapsed_time(sk, skb, elapsed_time))
@@ -159,8 +158,7 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1;
av->dccpav_buf_ackno = UINT48_MAX + 1;
av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
- av->dccpav_time.tv_sec = 0;
- av->dccpav_time.tv_usec = 0;
+ av->dccpav_time = ktime_set(0, 0);
av->dccpav_vec_len = 0;
INIT_LIST_HEAD(&av->dccpav_records);
}
@@ -321,7 +319,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
}
av->dccpav_buf_ackno = ackno;
- dccp_timestamp(sk, &av->dccpav_time);
+ av->dccpav_time = ktime_get_real();
out:
return 0;
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 96504a3b16e..9ef0737043e 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -12,8 +12,8 @@
*/
#include <linux/compiler.h>
+#include <linux/ktime.h>
#include <linux/list.h>
-#include <linux/time.h>
#include <linux/types.h>
/* Read about the ECN nonce to see why it is 253 */
@@ -52,7 +52,7 @@
struct dccp_ackvec {
u64 dccpav_buf_ackno;
struct list_head dccpav_records;
- struct timeval dccpav_time;
+ ktime_t dccpav_time;
u16 dccpav_buf_head;
u16 dccpav_vec_len;
u8 dccpav_buf_nonce;
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index d29b88fe723..426008e3b7e 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -59,7 +59,8 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
pipe++;
/* packets are sent sequentially */
- BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq);
+ BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq,
+ prev->ccid2s_seq ) >= 0);
BUG_ON(time_before(seqp->ccid2s_sent,
prev->ccid2s_sent));
@@ -83,8 +84,7 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
#define ccid2_hc_tx_check_sanity(hctx)
#endif
-static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num,
- gfp_t gfp)
+static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx)
{
struct ccid2_seq *seqp;
int i;
@@ -95,16 +95,16 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num,
return -ENOMEM;
/* allocate buffer and initialize linked list */
- seqp = kmalloc(sizeof(*seqp) * num, gfp);
+ seqp = kmalloc(CCID2_SEQBUF_LEN * sizeof(struct ccid2_seq), gfp_any());
if (seqp == NULL)
return -ENOMEM;
- for (i = 0; i < (num - 1); i++) {
+ for (i = 0; i < (CCID2_SEQBUF_LEN - 1); i++) {
seqp[i].ccid2s_next = &seqp[i + 1];
seqp[i + 1].ccid2s_prev = &seqp[i];
}
- seqp[num - 1].ccid2s_next = seqp;
- seqp->ccid2s_prev = &seqp[num - 1];
+ seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = seqp;
+ seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
/* This is the first allocation. Initiate the head and tail. */
if (hctx->ccid2hctx_seqbufc == 0)
@@ -114,8 +114,8 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num,
hctx->ccid2hctx_seqh->ccid2s_next = seqp;
seqp->ccid2s_prev = hctx->ccid2hctx_seqh;
- hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[num - 1];
- seqp[num - 1].ccid2s_next = hctx->ccid2hctx_seqt;
+ hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
+ seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->ccid2hctx_seqt;
}
/* store the original pointer to the buffer so we can free it */
@@ -127,19 +127,7 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num,
static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
{
- struct ccid2_hc_tx_sock *hctx;
-
- switch (DCCP_SKB_CB(skb)->dccpd_type) {
- case 0: /* XXX data packets from userland come through like this */
- case DCCP_PKT_DATA:
- case DCCP_PKT_DATAACK:
- break;
- /* No congestion control on other packets */
- default:
- return 0;
- }
-
- hctx = ccid2_hc_tx_sk(sk);
+ struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
ccid2_pr_debug("pipe=%d cwnd=%d\n", hctx->ccid2hctx_pipe,
hctx->ccid2hctx_cwnd);
@@ -180,16 +168,11 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, int val)
dp->dccps_l_ack_ratio = val;
}
-static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, int val)
+static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, u32 val)
{
- if (val == 0)
- val = 1;
-
/* XXX do we need to change ack ratio? */
- ccid2_pr_debug("change cwnd to %d\n", val);
-
- BUG_ON(val < 1);
- hctx->ccid2hctx_cwnd = val;
+ hctx->ccid2hctx_cwnd = val? : 1;
+ ccid2_pr_debug("changed cwnd to %u\n", hctx->ccid2hctx_cwnd);
}
static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
@@ -295,12 +278,11 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
next = hctx->ccid2hctx_seqh->ccid2s_next;
/* check if we need to alloc more space */
if (next == hctx->ccid2hctx_seqt) {
- int rc;
-
- ccid2_pr_debug("allocating more space in history\n");
- rc = ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, gfp_any());
- BUG_ON(rc); /* XXX what do we do? */
-
+ if (ccid2_hc_tx_alloc_seq(hctx)) {
+ DCCP_CRIT("packet history - out of memory!");
+ /* FIXME: find a more graceful way to bail out */
+ return;
+ }
next = hctx->ccid2hctx_seqh->ccid2s_next;
BUG_ON(next == hctx->ccid2hctx_seqt);
}
@@ -581,8 +563,8 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
hctx->ccid2hctx_rpseq = seqno;
} else {
/* check if packet is consecutive */
- if ((hctx->ccid2hctx_rpseq + 1) == seqno)
- hctx->ccid2hctx_rpseq++;
+ if (dccp_delta_seqno(hctx->ccid2hctx_rpseq, seqno) == 1)
+ hctx->ccid2hctx_rpseq = seqno;
/* it's a later packet */
else if (after48(seqno, hctx->ccid2hctx_rpseq)) {
hctx->ccid2hctx_rpdupack++;
@@ -771,7 +753,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
hctx->ccid2hctx_seqbufc = 0;
/* XXX init ~ to window size... */
- if (ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_ATOMIC) != 0)
+ if (ccid2_hc_tx_alloc_seq(hctx))
return -ENOMEM;
hctx->ccid2hctx_sent = 0;
@@ -835,7 +817,7 @@ static struct ccid_operations ccid2 = {
};
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
-module_param(ccid2_debug, int, 0444);
+module_param(ccid2_debug, bool, 0444);
MODULE_PARM_DESC(ccid2_debug, "Enable debug messages");
#endif
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index ebd79499c85..d9daa534c9b 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -50,7 +50,7 @@ struct ccid2_seq {
* @ccid2hctx_rpdupack - dupacks since rpseq
*/
struct ccid2_hc_tx_sock {
- int ccid2hctx_cwnd;
+ u32 ccid2hctx_cwnd;
int ccid2hctx_ssacks;
int ccid2hctx_acks;
unsigned int ccid2hctx_ssthresh;
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index e91c2b9dc27..25772c32617 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -113,27 +113,24 @@ static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
hctx->ccid3hctx_s, (unsigned)(hctx->ccid3hctx_x >> 6));
}
-/*
- * Update X by
- * If (p > 0)
- * X_calc = calcX(s, R, p);
- * X = max(min(X_calc, 2 * X_recv), s / t_mbi);
- * Else
- * If (now - tld >= R)
- * X = max(min(2 * X, 2 * X_recv), s / R);
- * tld = now;
+
+/**
+ * ccid3_hc_tx_update_x - Update allowed sending rate X
+ * @stamp: most recent time if available - can be left NULL.
+ * This function tracks draft rfc3448bis, check there for latest details.
*
* Note: X and X_recv are both stored in units of 64 * bytes/second, to support
* fine-grained resolution of sending rates. This requires scaling by 2^6
* throughout the code. Only X_calc is unscaled (in bytes/second).
*
*/
-static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
+static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
{
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
__u64 min_rate = 2 * hctx->ccid3hctx_x_recv;
const __u64 old_x = hctx->ccid3hctx_x;
+ ktime_t now = stamp? *stamp : ktime_get_real();
/*
* Handle IDLE periods: do not reduce below RFC3390 initial sending rate
@@ -153,14 +150,14 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
(((__u64)hctx->ccid3hctx_s) << 6) /
TFRC_T_MBI);
- } else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) -
- (suseconds_t)hctx->ccid3hctx_rtt >= 0) {
+ } else if (ktime_us_delta(now, hctx->ccid3hctx_t_ld)
+ - (s64)hctx->ccid3hctx_rtt >= 0) {
hctx->ccid3hctx_x =
max(min(2 * hctx->ccid3hctx_x, min_rate),
scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
hctx->ccid3hctx_rtt));
- hctx->ccid3hctx_t_ld = *now;
+ hctx->ccid3hctx_t_ld = now;
}
if (hctx->ccid3hctx_x != old_x) {
@@ -214,7 +211,6 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
- struct timeval now;
unsigned long t_nfb = USEC_PER_SEC / 5;
bh_lock_sock(sk);
@@ -265,15 +261,12 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
max(hctx->ccid3hctx_x_recv / 2,
(((__u64)hctx->ccid3hctx_s) << 6) /
(2 * TFRC_T_MBI));
-
- if (hctx->ccid3hctx_p == 0)
- dccp_timestamp(sk, &now);
} else {
hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
hctx->ccid3hctx_x_recv <<= 4;
}
/* Now recalculate X [RFC 3448, 4.3, step (4)] */
- ccid3_hc_tx_update_x(sk, &now);
+ ccid3_hc_tx_update_x(sk, NULL);
/*
* Schedule no feedback timer to expire in
* max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
@@ -309,8 +302,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
ktime_t now = ktime_get_real();
s64 delay;
- BUG_ON(hctx == NULL);
-
/*
* This function is called only for Data and DataAck packets. Sending
* zero-sized Data(Ack)s is theoretically possible, but for congestion
@@ -341,7 +332,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt);
hctx->ccid3hctx_rtt = dp->dccps_syn_rtt;
hctx->ccid3hctx_x = rfc3390_initial_rate(sk);
- hctx->ccid3hctx_t_ld = ktime_to_timeval(now);
+ hctx->ccid3hctx_t_ld = now;
} else {
/* Sender does not have RTT sample: X = MSS/second */
hctx->ccid3hctx_x = dp->dccps_mss_cache;
@@ -388,11 +379,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
unsigned int len)
{
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
- struct timeval now;
struct dccp_tx_hist_entry *packet;
- BUG_ON(hctx == NULL);
-
ccid3_hc_tx_update_s(hctx, len);
packet = dccp_tx_hist_entry_new(ccid3_tx_hist, GFP_ATOMIC);
@@ -402,8 +390,7 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
}
dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, packet);
- dccp_timestamp(sk, &now);
- packet->dccphtx_tstamp = now;
+ packet->dccphtx_tstamp = ktime_get_real();
packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss;
packet->dccphtx_rtt = hctx->ccid3hctx_rtt;
packet->dccphtx_sent = 1;
@@ -414,12 +401,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
struct ccid3_options_received *opt_recv;
struct dccp_tx_hist_entry *packet;
- struct timeval now;
+ ktime_t now;
unsigned long t_nfb;
u32 pinv, r_sample;
- BUG_ON(hctx == NULL);
-
/* we are only interested in ACKs */
if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
@@ -452,13 +437,12 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
else /* can not exceed 100% */
hctx->ccid3hctx_p = 1000000 / pinv;
- dccp_timestamp(sk, &now);
-
+ now = ktime_get_real();
/*
* Calculate new round trip sample as per [RFC 3448, 4.3] by
* R_sample = (now - t_recvdata) - t_elapsed
*/
- r_sample = dccp_sample_rtt(sk, &now, &packet->dccphtx_tstamp);
+ r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, packet->dccphtx_tstamp));
/*
* Update RTT estimate by
@@ -561,8 +545,6 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
struct ccid3_options_received *opt_recv;
- BUG_ON(hctx == NULL);
-
opt_recv = &hctx->ccid3hctx_options_received;
if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
@@ -636,8 +618,6 @@ static void ccid3_hc_tx_exit(struct sock *sk)
{
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
- BUG_ON(hctx == NULL);
-
ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
@@ -647,14 +627,13 @@ static void ccid3_hc_tx_exit(struct sock *sk)
static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
{
- const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+ struct ccid3_hc_tx_sock *hctx;
/* Listen socks doesn't have a private CCID block */
if (sk->sk_state == DCCP_LISTEN)
return;
- BUG_ON(hctx == NULL);
-
+ hctx = ccid3_hc_tx_sk(sk);
info->tcpi_rto = hctx->ccid3hctx_t_rto;
info->tcpi_rtt = hctx->ccid3hctx_rtt;
}
@@ -662,13 +641,14 @@ static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
u32 __user *optval, int __user *optlen)
{
- const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+ const struct ccid3_hc_tx_sock *hctx;
const void *val;
/* Listen socks doesn't have a private CCID block */
if (sk->sk_state == DCCP_LISTEN)
return -EINVAL;
+ hctx = ccid3_hc_tx_sk(sk);
switch (optname) {
case DCCP_SOCKOPT_CCID_TX_INFO:
if (len < sizeof(hctx->ccid3hctx_tfrc))
@@ -729,20 +709,20 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_rx_hist_entry *packet;
- struct timeval now;
+ ktime_t now;
suseconds_t delta;
ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk);
- dccp_timestamp(sk, &now);
+ now = ktime_get_real();
switch (hcrx->ccid3hcrx_state) {
case TFRC_RSTATE_NO_DATA:
hcrx->ccid3hcrx_x_recv = 0;
break;
case TFRC_RSTATE_DATA:
- delta = timeval_delta(&now,
- &hcrx->ccid3hcrx_tstamp_last_feedback);
+ delta = ktime_us_delta(now,
+ hcrx->ccid3hcrx_tstamp_last_feedback);
DCCP_BUG_ON(delta < 0);
hcrx->ccid3hcrx_x_recv =
scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
@@ -764,7 +744,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
hcrx->ccid3hcrx_bytes_recv = 0;
/* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */
- delta = timeval_delta(&now, &packet->dccphrx_tstamp);
+ delta = ktime_us_delta(now, packet->dccphrx_tstamp);
DCCP_BUG_ON(delta < 0);
hcrx->ccid3hcrx_elapsed_time = delta / 10;
@@ -782,14 +762,13 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
{
- const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+ const struct ccid3_hc_rx_sock *hcrx;
__be32 x_recv, pinv;
- BUG_ON(hcrx == NULL);
-
if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
return 0;
+ hcrx = ccid3_hc_rx_sk(sk);
DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter;
if (dccp_packet_without_ack(skb))
@@ -839,7 +818,7 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
dccp_li_update_li(sk,
&hcrx->ccid3hcrx_li_hist,
&hcrx->ccid3hcrx_hist,
- &hcrx->ccid3hcrx_tstamp_last_feedback,
+ hcrx->ccid3hcrx_tstamp_last_feedback,
hcrx->ccid3hcrx_s,
hcrx->ccid3hcrx_bytes_recv,
hcrx->ccid3hcrx_x_recv,
@@ -876,11 +855,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
const struct dccp_options_received *opt_recv;
struct dccp_rx_hist_entry *packet;
- struct timeval now;
u32 p_prev, r_sample, rtt_prev;
int loss, payload_size;
-
- BUG_ON(hcrx == NULL);
+ ktime_t now;
opt_recv = &dccp_sk(sk)->dccps_options_received;
@@ -891,9 +868,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
case DCCP_PKT_DATAACK:
if (opt_recv->dccpor_timestamp_echo == 0)
break;
+ r_sample = dccp_timestamp() - opt_recv->dccpor_timestamp_echo;
rtt_prev = hcrx->ccid3hcrx_rtt;
- dccp_timestamp(sk, &now);
- r_sample = dccp_sample_rtt(sk, &now, NULL);
+ r_sample = dccp_sample_rtt(sk, 10 * r_sample);
if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
hcrx->ccid3hcrx_rtt = r_sample;
@@ -912,7 +889,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
return;
}
- packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp,
+ packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp,
skb, GFP_ATOMIC);
if (unlikely(packet == NULL)) {
DCCP_WARN("%s(%p), Not enough mem to add rx packet "
@@ -941,9 +918,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
if (loss)
break;
- dccp_timestamp(sk, &now);
- if ((timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) -
- (suseconds_t)hcrx->ccid3hcrx_rtt) >= 0) {
+ now = ktime_get_real();
+ if ((ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_ack) -
+ (s64)hcrx->ccid3hcrx_rtt) >= 0) {
hcrx->ccid3hcrx_tstamp_last_ack = now;
ccid3_hc_rx_send_feedback(sk);
}
@@ -984,8 +961,8 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
- dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack);
- hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack;
+ hcrx->ccid3hcrx_tstamp_last_feedback =
+ hcrx->ccid3hcrx_tstamp_last_ack = ktime_get_real();
hcrx->ccid3hcrx_s = 0;
hcrx->ccid3hcrx_rtt = 0;
return 0;
@@ -995,8 +972,6 @@ static void ccid3_hc_rx_exit(struct sock *sk)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
- BUG_ON(hcrx == NULL);
-
ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
/* Empty packet history */
@@ -1008,14 +983,13 @@ static void ccid3_hc_rx_exit(struct sock *sk)
static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
{
- const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+ const struct ccid3_hc_rx_sock *hcrx;
/* Listen socks doesn't have a private CCID block */
if (sk->sk_state == DCCP_LISTEN)
return;
- BUG_ON(hcrx == NULL);
-
+ hcrx = ccid3_hc_rx_sk(sk);
info->tcpi_ca_state = hcrx->ccid3hcrx_state;
info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt;
@@ -1024,13 +998,14 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
u32 __user *optval, int __user *optlen)
{
- const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+ const struct ccid3_hc_rx_sock *hcrx;
const void *val;
/* Listen socks doesn't have a private CCID block */
if (sk->sk_state == DCCP_LISTEN)
return -EINVAL;
+ hcrx = ccid3_hc_rx_sk(sk);
switch (optname) {
case DCCP_SOCKOPT_CCID_RX_INFO:
if (len < sizeof(hcrx->ccid3hcrx_tfrc))
@@ -1071,7 +1046,7 @@ static struct ccid_operations ccid3 = {
};
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
-module_param(ccid3_debug, int, 0444);
+module_param(ccid3_debug, bool, 0444);
MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
#endif
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 51d4b804e33..0cdc982cfe4 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -38,7 +38,6 @@
#include <linux/ktime.h>
#include <linux/list.h>
-#include <linux/time.h>
#include <linux/types.h>
#include <linux/tfrc.h>
#include "../ccid.h"
@@ -111,13 +110,20 @@ struct ccid3_hc_tx_sock {
u8 ccid3hctx_idle;
ktime_t ccid3hctx_t_last_win_count;
struct timer_list ccid3hctx_no_feedback_timer;
- struct timeval ccid3hctx_t_ld;
+ ktime_t ccid3hctx_t_ld;
ktime_t ccid3hctx_t_nom;
u32 ccid3hctx_delta;
struct list_head ccid3hctx_hist;
struct ccid3_options_received ccid3hctx_options_received;
};
+static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
+{
+ struct ccid3_hc_tx_sock *hctx = ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid);
+ BUG_ON(hctx == NULL);
+ return hctx;
+}
+
/* TFRC receiver states */
enum ccid3_hc_rx_states {
TFRC_RSTATE_NO_DATA = 1,
@@ -153,8 +159,8 @@ struct ccid3_hc_rx_sock {
ccid3hcrx_ccval_last_counter:4;
enum ccid3_hc_rx_states ccid3hcrx_state:8;
u32 ccid3hcrx_bytes_recv;
- struct timeval ccid3hcrx_tstamp_last_feedback;
- struct timeval ccid3hcrx_tstamp_last_ack;
+ ktime_t ccid3hcrx_tstamp_last_feedback;
+ ktime_t ccid3hcrx_tstamp_last_ack;
struct list_head ccid3hcrx_hist;
struct list_head ccid3hcrx_li_hist;
u16 ccid3hcrx_s;
@@ -162,14 +168,11 @@ struct ccid3_hc_rx_sock {
u32 ccid3hcrx_elapsed_time;
};
-static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
-{
- return ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid);
-}
-
static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)
{
- return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid);
+ struct ccid3_hc_rx_sock *hcrx = ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid);
+ BUG_ON(hcrx == NULL);
+ return hcrx;
}
#endif /* _DCCP_CCID3_H_ */
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 174d3f13d93..40ad428a27f 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -125,14 +125,14 @@ static int dccp_li_hist_interval_new(struct list_head *list,
* returns estimated loss interval in usecs */
static u32 dccp_li_calc_first_li(struct sock *sk,
struct list_head *hist_list,
- struct timeval *last_feedback,
+ ktime_t last_feedback,
u16 s, u32 bytes_recv,
u32 previous_x_recv)
{
struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
u32 x_recv, p;
suseconds_t rtt, delta;
- struct timeval tstamp = { 0, 0 };
+ ktime_t tstamp = ktime_set(0, 0);
int interval = 0;
int win_count = 0;
int step = 0;
@@ -176,7 +176,7 @@ found:
return ~0;
}
- delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp);
+ delta = ktime_us_delta(tstamp, tail->dccphrx_tstamp);
DCCP_BUG_ON(delta < 0);
rtt = delta * 4 / interval;
@@ -196,8 +196,7 @@ found:
return ~0;
}
- dccp_timestamp(sk, &tstamp);
- delta = timeval_delta(&tstamp, last_feedback);
+ delta = ktime_us_delta(ktime_get_real(), last_feedback);
DCCP_BUG_ON(delta <= 0);
x_recv = scaled_div32(bytes_recv, delta);
@@ -226,7 +225,7 @@ found:
void dccp_li_update_li(struct sock *sk,
struct list_head *li_hist_list,
struct list_head *hist_list,
- struct timeval *last_feedback, u16 s, u32 bytes_recv,
+ ktime_t last_feedback, u16 s, u32 bytes_recv,
u32 previous_x_recv, u64 seq_loss, u8 win_loss)
{
struct dccp_li_hist_entry *head;
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 906c806d6d9..27bee92dae1 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -13,8 +13,8 @@
* any later version.
*/
+#include <linux/ktime.h>
#include <linux/list.h>
-#include <linux/time.h>
extern void dccp_li_hist_purge(struct list_head *list);
@@ -23,7 +23,7 @@ extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
extern void dccp_li_update_li(struct sock *sk,
struct list_head *li_hist_list,
struct list_head *hist_list,
- struct timeval *last_feedback, u16 s,
+ ktime_t last_feedback, u16 s,
u32 bytes_recv, u32 previous_x_recv,
u64 seq_loss, u8 win_loss);
#endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 60d00f01539..032bb61c6e3 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -37,9 +37,9 @@
#ifndef _DCCP_PKT_HIST_
#define _DCCP_PKT_HIST_
+#include <linux/ktime.h>
#include <linux/list.h>
#include <linux/slab.h>
-#include <linux/time.h>
#include "../../dccp.h"
@@ -57,7 +57,7 @@ struct dccp_tx_hist_entry {
u64 dccphtx_seqno:48,
dccphtx_sent:1;
u32 dccphtx_rtt;
- struct timeval dccphtx_tstamp;
+ ktime_t dccphtx_tstamp;
};
struct dccp_tx_hist {
@@ -124,7 +124,7 @@ struct dccp_rx_hist_entry {
dccphrx_ccval:4,
dccphrx_type:4;
u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */
- struct timeval dccphrx_tstamp;
+ ktime_t dccphrx_tstamp;
};
struct dccp_rx_hist {
@@ -136,7 +136,6 @@ extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist);
static inline struct dccp_rx_hist_entry *
dccp_rx_hist_entry_new(struct dccp_rx_hist *hist,
- const struct sock *sk,
const u32 ndp,
const struct sk_buff *skb,
const gfp_t prio)
@@ -151,7 +150,7 @@ static inline struct dccp_rx_hist_entry *
entry->dccphrx_ccval = dh->dccph_ccval;
entry->dccphrx_type = dh->dccph_type;
entry->dccphrx_ndp = ndp;
- dccp_timestamp(sk, &entry->dccphrx_tstamp);
+ entry->dccphrx_tstamp = ktime_get_real();
}
return entry;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index e2d74cd7eee..ee97950d77d 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -13,6 +13,7 @@
*/
#include <linux/dccp.h>
+#include <linux/ktime.h>
#include <net/snmp.h>
#include <net/sock.h>
#include <net/tcp.h>
@@ -91,6 +92,7 @@ extern int sysctl_dccp_feat_ack_ratio;
extern int sysctl_dccp_feat_send_ack_vector;
extern int sysctl_dccp_feat_send_ndp_count;
extern int sysctl_dccp_tx_qlen;
+extern int sysctl_dccp_sync_ratelimit;
/*
* 48-bit sequence number arithmetic (signed and unsigned)
@@ -208,7 +210,6 @@ extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb);
extern void dccp_send_ack(struct sock *sk);
-extern void dccp_send_delayed_ack(struct sock *sk);
extern void dccp_reqsk_send_ack(struct sk_buff *sk, struct request_sock *rsk);
extern void dccp_send_sync(struct sock *sk, const u64 seq,
@@ -293,11 +294,12 @@ extern unsigned int dccp_poll(struct file *file, struct socket *sock,
extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len);
+extern struct sk_buff *dccp_ctl_make_reset(struct socket *ctl,
+ struct sk_buff *skb);
extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
extern void dccp_send_close(struct sock *sk, const int active);
extern int dccp_invalid_packet(struct sk_buff *skb);
-extern u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv,
- struct timeval *t_history);
+extern u32 dccp_sample_rtt(struct sock *sk, long delta);
static inline int dccp_bad_service_code(const struct sock *sk,
const __be32 service)
@@ -309,10 +311,22 @@ static inline int dccp_bad_service_code(const struct sock *sk,
return !dccp_list_has_service(dp->dccps_service_list, service);
}
+/**
+ * dccp_skb_cb - DCCP per-packet control information
+ * @dccpd_type: one of %dccp_pkt_type (or unknown)
+ * @dccpd_ccval: CCVal field (5.1), see e.g. RFC 4342, 8.1
+ * @dccpd_reset_code: one of %dccp_reset_codes
+ * @dccpd_reset_data: Data1..3 fields (depend on @dccpd_reset_code)
+ * @dccpd_opt_len: total length of all options (5.8) in the packet
+ * @dccpd_seq: sequence number
+ * @dccpd_ack_seq: acknowledgment number subheader field value
+ * This is used for transmission as well as for reception.
+ */
struct dccp_skb_cb {
__u8 dccpd_type:4;
__u8 dccpd_ccval:4;
- __u8 dccpd_reset_code;
+ __u8 dccpd_reset_code,
+ dccpd_reset_data[3];
__u16 dccpd_opt_len;
__u64 dccpd_seq;
__u64 dccpd_ack_seq;
@@ -395,53 +409,14 @@ extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
extern int dccp_insert_option_elapsed_time(struct sock *sk,
struct sk_buff *skb,
u32 elapsed_time);
+extern u32 dccp_timestamp(void);
+extern void dccp_timestamping_init(void);
extern int dccp_insert_option_timestamp(struct sock *sk,
struct sk_buff *skb);
extern int dccp_insert_option(struct sock *sk, struct sk_buff *skb,
unsigned char option,
const void *value, unsigned char len);
-extern void dccp_timestamp(const struct sock *sk, struct timeval *tv);
-
-static inline suseconds_t timeval_usecs(const struct timeval *tv)
-{
- return tv->tv_sec * USEC_PER_SEC + tv->tv_usec;
-}
-
-static inline suseconds_t timeval_delta(const struct timeval *large,
- const struct timeval *small)
-{
- time_t secs = large->tv_sec - small->tv_sec;
- suseconds_t usecs = large->tv_usec - small->tv_usec;
-
- if (usecs < 0) {
- secs--;
- usecs += USEC_PER_SEC;
- }
- return secs * USEC_PER_SEC + usecs;
-}
-
-static inline void timeval_add_usecs(struct timeval *tv,
- const suseconds_t usecs)
-{
- tv->tv_usec += usecs;
- while (tv->tv_usec >= USEC_PER_SEC) {
- tv->tv_sec++;
- tv->tv_usec -= USEC_PER_SEC;
- }
-}
-
-static inline void timeval_sub_usecs(struct timeval *tv,
- const suseconds_t usecs)
-{
- tv->tv_usec -= usecs;
- while (tv->tv_usec < 0) {
- tv->tv_sec--;
- tv->tv_usec += USEC_PER_SEC;
- }
- DCCP_BUG_ON(tv->tv_sec < 0);
-}
-
#ifdef CONFIG_SYSCTL
extern int dccp_sysctl_init(void);
extern void dccp_sysctl_exit(void);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index da6ec185ed5..19d7e1dbd87 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -68,7 +68,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
struct dccp_sock *dp = dccp_sk(sk);
- u64 lswl, lawl;
+ u64 lswl, lawl, seqno = DCCP_SKB_CB(skb)->dccpd_seq,
+ ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
/*
* Step 5: Prepare sequence numbers for Sync
@@ -84,11 +85,9 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
*/
if (dh->dccph_type == DCCP_PKT_SYNC ||
dh->dccph_type == DCCP_PKT_SYNCACK) {
- if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
- dp->dccps_awl, dp->dccps_awh) &&
- dccp_delta_seqno(dp->dccps_swl,
- DCCP_SKB_CB(skb)->dccpd_seq) >= 0)
- dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
+ if (between48(ackno, dp->dccps_awl, dp->dccps_awh) &&
+ dccp_delta_seqno(dp->dccps_swl, seqno) >= 0)
+ dccp_update_gsr(sk, seqno);
else
return -1;
}
@@ -103,9 +102,6 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
* Update S.GSR, S.SWL, S.SWH
* If P.type != Sync,
* Update S.GAR
- * Otherwise,
- * Send Sync packet acknowledging P.seqno
- * Drop packet and return
*/
lswl = dp->dccps_swl;
lawl = dp->dccps_awl;
@@ -113,35 +109,52 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
if (dh->dccph_type == DCCP_PKT_CLOSEREQ ||
dh->dccph_type == DCCP_PKT_CLOSE ||
dh->dccph_type == DCCP_PKT_RESET) {
- lswl = dp->dccps_gsr;
- dccp_inc_seqno(&lswl);
+ lswl = ADD48(dp->dccps_gsr, 1);
lawl = dp->dccps_gar;
}
- if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) &&
- (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ ||
- between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
- lawl, dp->dccps_awh))) {
- dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
+ if (between48(seqno, lswl, dp->dccps_swh) &&
+ (ackno == DCCP_PKT_WITHOUT_ACK_SEQ ||
+ between48(ackno, lawl, dp->dccps_awh))) {
+ dccp_update_gsr(sk, seqno);
if (dh->dccph_type != DCCP_PKT_SYNC &&
- (DCCP_SKB_CB(skb)->dccpd_ack_seq !=
- DCCP_PKT_WITHOUT_ACK_SEQ))
- dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq;
+ (ackno != DCCP_PKT_WITHOUT_ACK_SEQ))
+ dp->dccps_gar = ackno;
} else {
+ unsigned long now = jiffies;
+ /*
+ * Step 6: Check sequence numbers
+ * Otherwise,
+ * If P.type == Reset,
+ * Send Sync packet acknowledging S.GSR
+ * Otherwise,
+ * Send Sync packet acknowledging P.seqno
+ * Drop packet and return
+ *
+ * These Syncs are rate-limited as per RFC 4340, 7.5.4:
+ * at most 1 / (dccp_sync_rate_limit * HZ) Syncs per second.
+ */
+ if (time_before(now, (dp->dccps_rate_last +
+ sysctl_dccp_sync_ratelimit)))
+ return 0;
+
DCCP_WARN("DCCP: Step 6 failed for %s packet, "
"(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and "
"(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), "
"sending SYNC...\n", dccp_packet_name(dh->dccph_type),
- (unsigned long long) lswl,
- (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq,
+ (unsigned long long) lswl, (unsigned long long) seqno,
(unsigned long long) dp->dccps_swh,
- (DCCP_SKB_CB(skb)->dccpd_ack_seq ==
- DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists",
- (unsigned long long) lawl,
- (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq,
+ (ackno == DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist"
+ : "exists",
+ (unsigned long long) lawl, (unsigned long long) ackno,
(unsigned long long) dp->dccps_awh);
- dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
+
+ dp->dccps_rate_last = now;
+
+ if (dh->dccph_type == DCCP_PKT_RESET)
+ seqno = dp->dccps_gsr;
+ dccp_send_sync(sk, seqno, DCCP_PKT_SYNC);
return -1;
}
@@ -280,6 +293,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
if (dh->dccph_type == DCCP_PKT_RESPONSE) {
const struct inet_connection_sock *icsk = inet_csk(sk);
struct dccp_sock *dp = dccp_sk(sk);
+ long tstamp = dccp_timestamp();
/* Stop the REQUEST timer */
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
@@ -300,13 +314,10 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
if (dccp_parse_options(sk, skb))
goto out_invalid_packet;
- /* Obtain RTT sample from SYN exchange (used by CCID 3) */
- if (dp->dccps_options_received.dccpor_timestamp_echo) {
- struct timeval now;
-
- dccp_timestamp(sk, &now);
- dp->dccps_syn_rtt = dccp_sample_rtt(sk, &now, NULL);
- }
+ /* Obtain usec RTT sample from SYN exchange (used by CCID 3) */
+ if (likely(dp->dccps_options_received.dccpor_timestamp_echo))
+ dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp -
+ dp->dccps_options_received.dccpor_timestamp_echo));
if (dccp_msk(sk)->dccpms_send_ack_vector &&
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
@@ -540,11 +551,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
return 0;
}
- if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) {
- dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK);
- goto discard;
- }
-
switch (sk->sk_state) {
case DCCP_CLOSED:
dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
@@ -575,6 +581,9 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
sk_wake_async(sk, 0, POLL_OUT);
break;
}
+ } else if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) {
+ dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK);
+ goto discard;
}
if (!queued) {
@@ -587,37 +596,22 @@ discard:
EXPORT_SYMBOL_GPL(dccp_rcv_state_process);
/**
- * dccp_sample_rtt - Sample RTT from packet exchange
- *
- * @sk: connected dccp_sock
- * @t_recv: receive timestamp of packet with timestamp echo
- * @t_hist: packet history timestamp or NULL
+ * dccp_sample_rtt - Validate and finalise computation of RTT sample
+ * @delta: number of microseconds between packet and acknowledgment
+ * The routine is kept generic to work in different contexts. It should be
+ * called immediately when the ACK used for the RTT sample arrives.
*/
-u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv,
- struct timeval *t_hist)
+u32 dccp_sample_rtt(struct sock *sk, long delta)
{
- struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_options_received *or = &dp->dccps_options_received;
- suseconds_t delta;
-
- if (t_hist == NULL) {
- if (!or->dccpor_timestamp_echo) {
- DCCP_WARN("packet without timestamp echo\n");
- return DCCP_SANE_RTT_MAX;
- }
- timeval_sub_usecs(t_recv, or->dccpor_timestamp_echo * 10);
- delta = timeval_usecs(t_recv);
- } else
- delta = timeval_delta(t_recv, t_hist);
-
- delta -= or->dccpor_elapsed_time * 10; /* either set or 0 */
+ /* dccpor_elapsed_time is either zeroed out or set and > 0 */
+ delta -= dccp_sk(sk)->dccps_options_received.dccpor_elapsed_time * 10;
if (unlikely(delta <= 0)) {
- DCCP_WARN("unusable RTT sample %ld, using min\n", (long)delta);
+ DCCP_WARN("unusable RTT sample %ld, using min\n", delta);
return DCCP_SANE_RTT_MIN;
}
- if (unlikely(delta - (suseconds_t)DCCP_SANE_RTT_MAX > 0)) {
- DCCP_WARN("RTT sample %ld too large, using max\n", (long)delta);
+ if (unlikely(delta > DCCP_SANE_RTT_MAX)) {
+ DCCP_WARN("RTT sample %ld too large, using max\n", delta);
return DCCP_SANE_RTT_MAX;
}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 718f2fa923a..44f6e17e105 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -381,7 +381,6 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
{
struct inet_request_sock *ireq;
struct inet_sock *newinet;
- struct dccp_sock *newdp;
struct sock *newsk;
if (sk_acceptq_is_full(sk))
@@ -396,7 +395,6 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
sk_setup_caps(newsk, dst);
- newdp = dccp_sk(newsk);
newinet = inet_sk(newsk);
ireq = inet_rsk(req);
newinet->daddr = ireq->rmt_addr;
@@ -512,17 +510,12 @@ out:
static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
{
int err;
- struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
const struct iphdr *rxiph;
- const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
- sizeof(struct dccp_hdr_ext) +
- sizeof(struct dccp_hdr_reset);
struct sk_buff *skb;
struct dst_entry *dst;
- u64 seqno = 0;
/* Never send a reset in response to a reset. */
- if (rxdh->dccph_type == DCCP_PKT_RESET)
+ if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET)
return;
if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
@@ -532,37 +525,14 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
if (dst == NULL)
return;
- skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header,
- GFP_ATOMIC);
+ skb = dccp_ctl_make_reset(dccp_v4_ctl_socket, rxskb);
if (skb == NULL)
goto out;
- /* Reserve space for headers. */
- skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header);
- skb->dst = dst_clone(dst);
-
- dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len);
-
- /* Build DCCP header and checksum it. */
- dh->dccph_type = DCCP_PKT_RESET;
- dh->dccph_sport = rxdh->dccph_dport;
- dh->dccph_dport = rxdh->dccph_sport;
- dh->dccph_doff = dccp_hdr_reset_len / 4;
- dh->dccph_x = 1;
- dccp_hdr_reset(skb)->dccph_reset_code =
- DCCP_SKB_CB(rxskb)->dccpd_reset_code;
-
- /* See "8.3.1. Abnormal Termination" in RFC 4340 */
- if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
- dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1);
-
- dccp_hdr_set_seq(dh, seqno);
- dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
-
- dccp_csum_outgoing(skb);
rxiph = ip_hdr(rxskb);
- dh->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr,
- rxiph->daddr);
+ dccp_hdr(skb)->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr,
+ rxiph->daddr);
+ skb->dst = dst_clone(dst);
bh_lock_sock(dccp_v4_ctl_socket->sk);
err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk,
@@ -598,17 +568,14 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
struct dccp_request_sock *dreq;
const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
- __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
/* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
if (((struct rtable *)skb->dst)->rt_flags &
- (RTCF_BROADCAST | RTCF_MULTICAST)) {
- reset_code = DCCP_RESET_CODE_NO_CONNECTION;
- goto drop;
- }
+ (RTCF_BROADCAST | RTCF_MULTICAST))
+ return 0; /* discard, don't send a reset here */
if (dccp_bad_service_code(sk, service)) {
- reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
+ dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
goto drop;
}
/*
@@ -616,6 +583,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
* limitations, they conserve resources and peer is
* evidently real one.
*/
+ dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
if (inet_csk_reqsk_queue_is_full(sk))
goto drop;
@@ -668,7 +636,6 @@ drop_and_free:
reqsk_free(req);
drop:
DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
- dcb->dccpd_reset_code = reset_code;
return -1;
}
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index b158c661867..006a3834fbc 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -301,50 +301,23 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req)
static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
{
- struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
struct ipv6hdr *rxip6h;
- const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
- sizeof(struct dccp_hdr_ext) +
- sizeof(struct dccp_hdr_reset);
struct sk_buff *skb;
struct flowi fl;
- u64 seqno = 0;
- if (rxdh->dccph_type == DCCP_PKT_RESET)
+ if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET)
return;
if (!ipv6_unicast_destination(rxskb))
return;
- skb = alloc_skb(dccp_v6_ctl_socket->sk->sk_prot->max_header,
- GFP_ATOMIC);
+ skb = dccp_ctl_make_reset(dccp_v6_ctl_socket, rxskb);
if (skb == NULL)
return;
- skb_reserve(skb, dccp_v6_ctl_socket->sk->sk_prot->max_header);
-
- dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len);
-
- /* Swap the send and the receive. */
- dh->dccph_type = DCCP_PKT_RESET;
- dh->dccph_sport = rxdh->dccph_dport;
- dh->dccph_dport = rxdh->dccph_sport;
- dh->dccph_doff = dccp_hdr_reset_len / 4;
- dh->dccph_x = 1;
- dccp_hdr_reset(skb)->dccph_reset_code =
- DCCP_SKB_CB(rxskb)->dccpd_reset_code;
-
- /* See "8.3.1. Abnormal Termination" in RFC 4340 */
- if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
- dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1);
-
- dccp_hdr_set_seq(dh, seqno);
- dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
-
- dccp_csum_outgoing(skb);
rxip6h = ipv6_hdr(rxskb);
- dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr,
- &rxip6h->daddr);
+ dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr,
+ &rxip6h->daddr);
memset(&fl, 0, sizeof(fl));
ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr);
@@ -352,8 +325,8 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
fl.proto = IPPROTO_DCCP;
fl.oif = inet6_iif(rxskb);
- fl.fl_ip_dport = dh->dccph_dport;
- fl.fl_ip_sport = dh->dccph_sport;
+ fl.fl_ip_dport = dccp_hdr(skb)->dccph_dport;
+ fl.fl_ip_sport = dccp_hdr(skb)->dccph_sport;
security_skb_classify_flow(rxskb, &fl);
/* sk = NULL, but it is safe for now. RST socket required. */
@@ -417,21 +390,21 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
struct ipv6_pinfo *np = inet6_sk(sk);
const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
- __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
if (skb->protocol == htons(ETH_P_IP))
return dccp_v4_conn_request(sk, skb);
if (!ipv6_unicast_destination(skb))
- goto drop;
+ return 0; /* discard, don't send a reset here */
if (dccp_bad_service_code(sk, service)) {
- reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
+ dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
goto drop;
}
/*
* There are no SYN attacks on IPv6, yet...
*/
+ dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
if (inet_csk_reqsk_queue_is_full(sk))
goto drop;
@@ -491,7 +464,6 @@ drop_and_free:
reqsk_free(req);
drop:
DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
- dcb->dccpd_reset_code = reset_code;
return -1;
}
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index e18e249ac49..831b76e08d0 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -42,6 +42,16 @@ struct inet_timewait_death_row dccp_death_row = {
EXPORT_SYMBOL_GPL(dccp_death_row);
+void dccp_minisock_init(struct dccp_minisock *dmsk)
+{
+ dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
+ dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid;
+ dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid;
+ dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio;
+ dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
+ dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count;
+}
+
void dccp_time_wait(struct sock *sk, int state, int timeo)
{
struct inet_timewait_sock *tw = NULL;
@@ -112,7 +122,6 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
newdp->dccps_service_list = NULL;
newdp->dccps_service = dreq->dreq_service;
newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
- do_gettimeofday(&newdp->dccps_epoch);
if (dccp_feat_clone(sk, newsk))
goto out_free;
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 34d536d5f1a..d361b553330 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -29,16 +29,6 @@ int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO;
int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT;
-void dccp_minisock_init(struct dccp_minisock *dmsk)
-{
- dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
- dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid;
- dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid;
- dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio;
- dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
- dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count;
-}
-
static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
{
u32 value = 0;
@@ -158,7 +148,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
opt_recv->dccpor_timestamp = ntohl(*(__be32 *)value);
dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
- dccp_timestamp(sk, &dp->dccps_timestamp_time);
+ dp->dccps_timestamp_time = ktime_get_real();
dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n",
dccp_role(sk), opt_recv->dccpor_timestamp,
@@ -189,7 +179,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
else
elapsed_time = ntohl(*(__be32 *)(value + 4));
- dccp_pr_debug_cat(", ELAPSED_TIME=%d\n", elapsed_time);
+ dccp_pr_debug_cat(", ELAPSED_TIME=%u\n", elapsed_time);
/* Give precedence to the biggest ELAPSED_TIME */
if (elapsed_time > opt_recv->dccpor_elapsed_time)
@@ -370,29 +360,9 @@ int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb,
EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
-void dccp_timestamp(const struct sock *sk, struct timeval *tv)
-{
- const struct dccp_sock *dp = dccp_sk(sk);
-
- do_gettimeofday(tv);
- tv->tv_sec -= dp->dccps_epoch.tv_sec;
- tv->tv_usec -= dp->dccps_epoch.tv_usec;
-
- while (tv->tv_usec < 0) {
- tv->tv_sec--;
- tv->tv_usec += USEC_PER_SEC;
- }
-}
-
-EXPORT_SYMBOL_GPL(dccp_timestamp);
-
int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
{
- struct timeval tv;
- __be32 now;
-
- dccp_timestamp(sk, &tv);
- now = htonl(timeval_usecs(&tv) / 10);
+ __be32 now = htonl(dccp_timestamp());
/* yes this will overflow but that is the point as we want a
* 10 usec 32 bit timer which mean it wraps every 11.9 hours */
@@ -405,14 +375,12 @@ static int dccp_insert_option_timestamp_echo(struct sock *sk,
struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
- struct timeval now;
__be32 tstamp_echo;
- u32 elapsed_time;
int len, elapsed_time_len;
unsigned char *to;
-
- dccp_timestamp(sk, &now);
- elapsed_time = timeval_delta(&now, &dp->dccps_timestamp_time) / 10;
+ const suseconds_t delta = ktime_us_delta(ktime_get_real(),
+ dp->dccps_timestamp_time);
+ u32 elapsed_time = delta / 10;
elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
len = 6 + elapsed_time_len;
@@ -438,8 +406,7 @@ static int dccp_insert_option_timestamp_echo(struct sock *sk,
}
dp->dccps_timestamp_echo = 0;
- dp->dccps_timestamp_time.tv_sec = 0;
- dp->dccps_timestamp_time.tv_usec = 0;
+ dp->dccps_timestamp_time = ktime_set(0, 0);
return 0;
}
diff --git a/net/dccp/output.c b/net/dccp/output.c
index c8d843e983f..f49544618f2 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -61,6 +61,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
set_ack = 0;
/* fall through */
case DCCP_PKT_DATAACK:
+ case DCCP_PKT_RESET:
break;
case DCCP_PKT_REQUEST:
@@ -69,12 +70,14 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
case DCCP_PKT_SYNC:
case DCCP_PKT_SYNCACK:
- ackno = dcb->dccpd_seq;
+ ackno = dcb->dccpd_ack_seq;
/* fall through */
default:
/*
- * Only data packets should come through with skb->sk
- * set.
+ * Set owner/destructor: some skbs are allocated via
+ * alloc_skb (e.g. when retransmission may happen).
+ * Only Data, DataAck, and Reset packets should come
+ * through here with skb->sk set.
*/
WARN_ON(skb->sk);
skb_set_owner_w(skb, sk);
@@ -174,34 +177,38 @@ void dccp_write_space(struct sock *sk)
/**
* dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet
- * @sk: socket to wait for
+ * @sk: socket to wait for
+ * @skb: current skb to pass on for waiting
+ * @delay: sleep timeout in milliseconds (> 0)
+ * This function is called by default when the socket is closed, and
+ * when a non-zero linger time is set on the socket. For consistency
*/
-static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb)
+static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay)
{
struct dccp_sock *dp = dccp_sk(sk);
DEFINE_WAIT(wait);
- unsigned long delay;
+ unsigned long jiffdelay;
int rc;
- while (1) {
+ do {
+ dccp_pr_debug("delayed send by %d msec\n", delay);
+ jiffdelay = msecs_to_jiffies(delay);
+
prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+ sk->sk_write_pending++;
+ release_sock(sk);
+ schedule_timeout(jiffdelay);
+ lock_sock(sk);
+ sk->sk_write_pending--;
+
if (sk->sk_err)
goto do_error;
if (signal_pending(current))
goto do_interrupted;
rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
- if (rc <= 0)
- break;
- dccp_pr_debug("delayed send by %d msec\n", rc);
- delay = msecs_to_jiffies(rc);
- sk->sk_write_pending++;
- release_sock(sk);
- schedule_timeout(delay);
- lock_sock(sk);
- sk->sk_write_pending--;
- }
+ } while ((delay = rc) > 0);
out:
finish_wait(sk->sk_sleep, &wait);
return rc;
@@ -228,7 +235,7 @@ void dccp_write_xmit(struct sock *sk, int block)
msecs_to_jiffies(err)+jiffies);
break;
} else
- err = dccp_wait_for_ccid(sk, skb);
+ err = dccp_wait_for_ccid(sk, skb, err);
if (err && err != -EINTR)
DCCP_BUG("err=%d after dccp_wait_for_ccid", err);
}
@@ -324,72 +331,81 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
EXPORT_SYMBOL_GPL(dccp_make_response);
-static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
- const enum dccp_reset_codes code)
+/* answer offending packet in @rcv_skb with Reset from control socket @ctl */
+struct sk_buff *dccp_ctl_make_reset(struct socket *ctl, struct sk_buff *rcv_skb)
{
- struct dccp_hdr *dh;
- struct dccp_sock *dp = dccp_sk(sk);
- const u32 dccp_header_size = sizeof(struct dccp_hdr) +
- sizeof(struct dccp_hdr_ext) +
- sizeof(struct dccp_hdr_reset);
- struct sk_buff *skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1,
- GFP_ATOMIC);
- if (skb == NULL)
- return NULL;
-
- /* Reserve space for headers. */
- skb_reserve(skb, sk->sk_prot->max_header);
-
- skb->dst = dst_clone(dst);
-
- dccp_inc_seqno(&dp->dccps_gss);
-
- DCCP_SKB_CB(skb)->dccpd_reset_code = code;
- DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET;
- DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss;
+ struct dccp_hdr *rxdh = dccp_hdr(rcv_skb), *dh;
+ struct dccp_skb_cb *dcb = DCCP_SKB_CB(rcv_skb);
+ const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
+ sizeof(struct dccp_hdr_ext) +
+ sizeof(struct dccp_hdr_reset);
+ struct dccp_hdr_reset *dhr;
+ struct sk_buff *skb;
- if (dccp_insert_options(sk, skb)) {
- kfree_skb(skb);
+ skb = alloc_skb(ctl->sk->sk_prot->max_header, GFP_ATOMIC);
+ if (skb == NULL)
return NULL;
- }
- dh = dccp_zeroed_hdr(skb, dccp_header_size);
+ skb_reserve(skb, ctl->sk->sk_prot->max_header);
- dh->dccph_sport = inet_sk(sk)->sport;
- dh->dccph_dport = inet_sk(sk)->dport;
- dh->dccph_doff = (dccp_header_size +
- DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
+ /* Swap the send and the receive. */
+ dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len);
dh->dccph_type = DCCP_PKT_RESET;
+ dh->dccph_sport = rxdh->dccph_dport;
+ dh->dccph_dport = rxdh->dccph_sport;
+ dh->dccph_doff = dccp_hdr_reset_len / 4;
dh->dccph_x = 1;
- dccp_hdr_set_seq(dh, dp->dccps_gss);
- dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr);
- dccp_hdr_reset(skb)->dccph_reset_code = code;
- inet_csk(sk)->icsk_af_ops->send_check(sk, 0, skb);
+ dhr = dccp_hdr_reset(skb);
+ dhr->dccph_reset_code = dcb->dccpd_reset_code;
+
+ switch (dcb->dccpd_reset_code) {
+ case DCCP_RESET_CODE_PACKET_ERROR:
+ dhr->dccph_reset_data[0] = rxdh->dccph_type;
+ break;
+ case DCCP_RESET_CODE_OPTION_ERROR: /* fall through */
+ case DCCP_RESET_CODE_MANDATORY_ERROR:
+ memcpy(dhr->dccph_reset_data, dcb->dccpd_reset_data, 3);
+ break;
+ }
+ /*
+ * From RFC 4340, 8.3.1:
+ * If P.ackno exists, set R.seqno := P.ackno + 1.
+ * Else set R.seqno := 0.
+ */
+ if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+ dccp_hdr_set_seq(dh, ADD48(dcb->dccpd_ack_seq, 1));
+ dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dcb->dccpd_seq);
- DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+ dccp_csum_outgoing(skb);
return skb;
}
+EXPORT_SYMBOL_GPL(dccp_ctl_make_reset);
+
+/* send Reset on established socket, to close or abort the connection */
int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
{
+ struct sk_buff *skb;
/*
* FIXME: what if rebuild_header fails?
* Should we be doing a rebuild_header here?
*/
int err = inet_sk_rebuild_header(sk);
- if (err == 0) {
- struct sk_buff *skb = dccp_make_reset(sk, sk->sk_dst_cache,
- code);
- if (skb != NULL) {
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, 0);
- return net_xmit_eval(err);
- }
- }
+ if (err != 0)
+ return err;
+
+ skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1, GFP_ATOMIC);
+ if (skb == NULL)
+ return -ENOBUFS;
- return err;
+ /* Reserve space for headers and prepare control bits. */
+ skb_reserve(skb, sk->sk_prot->max_header);
+ DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET;
+ DCCP_SKB_CB(skb)->dccpd_reset_code = code;
+
+ return dccp_transmit_skb(sk, skb);
}
/*
@@ -477,6 +493,7 @@ void dccp_send_ack(struct sock *sk)
EXPORT_SYMBOL_GPL(dccp_send_ack);
+/* FIXME: Is this still necessary (11.3) - currently nowhere used by DCCP. */
void dccp_send_delayed_ack(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -507,7 +524,7 @@ void dccp_send_delayed_ack(struct sock *sk)
sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
}
-void dccp_send_sync(struct sock *sk, const u64 seq,
+void dccp_send_sync(struct sock *sk, const u64 ackno,
const enum dccp_pkt_type pkt_type)
{
/*
@@ -517,14 +534,16 @@ void dccp_send_sync(struct sock *sk, const u64 seq,
*/
struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header, GFP_ATOMIC);
- if (skb == NULL)
+ if (skb == NULL) {
/* FIXME: how to make sure the sync is sent? */
+ DCCP_CRIT("could not send %s", dccp_packet_name(pkt_type));
return;
+ }
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, sk->sk_prot->max_header);
DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
- DCCP_SKB_CB(skb)->dccpd_seq = seq;
+ DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno;
dccp_transmit_skb(sk, skb);
}
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index bae10b0f2fc..7053bb827bc 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -30,6 +30,7 @@
#include <linux/module.h>
#include <linux/kfifo.h>
#include <linux/vmalloc.h>
+#include <net/net_namespace.h>
#include "dccp.h"
#include "ccid.h"
@@ -168,7 +169,7 @@ static __init int dccpprobe_init(void)
if (IS_ERR(dccpw.fifo))
return PTR_ERR(dccpw.fifo);
- if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops))
+ if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
goto err0;
ret = register_jprobe(&dccp_send_probe);
@@ -178,7 +179,7 @@ static __init int dccpprobe_init(void)
pr_info("DCCP watch registered (port=%d)\n", port);
return 0;
err1:
- proc_net_remove(procname);
+ proc_net_remove(&init_net, procname);
err0:
kfifo_free(dccpw.fifo);
return ret;
@@ -188,7 +189,7 @@ module_init(dccpprobe_init);
static __exit void dccpprobe_exit(void)
{
kfifo_free(dccpw.fifo);
- proc_net_remove(procname);
+ proc_net_remove(&init_net, procname);
unregister_jprobe(&dccp_send_probe);
}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 04b59ec4f51..cc9bf1cb264 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -172,7 +172,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
struct inet_connection_sock *icsk = inet_csk(sk);
dccp_minisock_init(&dp->dccps_minisock);
- do_gettimeofday(&dp->dccps_epoch);
/*
* FIXME: We're hardcoding the CCID, and doing this at this point makes
@@ -220,6 +219,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
sk->sk_write_space = dccp_write_space;
icsk->icsk_sync_mss = dccp_sync_mss;
dp->dccps_mss_cache = 536;
+ dp->dccps_rate_last = jiffies;
dp->dccps_role = DCCP_ROLE_UNDEFINED;
dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
@@ -587,6 +587,10 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
case DCCP_SOCKOPT_SERVICE:
return dccp_getsockopt_service(sk, len,
(__be32 __user *)optval, optlen);
+ case DCCP_SOCKOPT_GET_CUR_MPS:
+ val = dp->dccps_mss_cache;
+ len = sizeof(val);
+ break;
case DCCP_SOCKOPT_SEND_CSCOV:
val = dp->dccps_pcslen;
len = sizeof(val);
@@ -664,7 +668,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
* so that the trick in dccp_rcv_request_sent_state_process.
*/
/* Wait for a connection to finish. */
- if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
+ if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
goto out_release;
@@ -988,7 +992,7 @@ MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
#ifdef CONFIG_IP_DCCP_DEBUG
int dccp_debug;
-module_param(dccp_debug, int, 0444);
+module_param(dccp_debug, bool, 0444);
MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
EXPORT_SYMBOL_GPL(dccp_debug);
@@ -1077,6 +1081,8 @@ static int __init dccp_init(void)
rc = dccp_sysctl_init();
if (rc)
goto out_ackvec_exit;
+
+ dccp_timestamping_init();
out:
return rc;
out_ackvec_exit:
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 1260aabac5e..9364b2fb4db 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -18,6 +18,9 @@
#error This file should not be compiled without CONFIG_SYSCTL defined
#endif
+/* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */
+int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8;
+
static struct ctl_table dccp_default_table[] = {
{
.procname = "seq_window",
@@ -89,6 +92,13 @@ static struct ctl_table dccp_default_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "sync_ratelimit",
+ .data = &sysctl_dccp_sync_ratelimit,
+ .maxlen = sizeof(sysctl_dccp_sync_ratelimit),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ },
{ .ctl_name = 0, }
};
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 0197a41c256..3af067354bd 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -291,3 +291,24 @@ void dccp_init_xmit_timers(struct sock *sk)
inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
&dccp_keepalive_timer);
}
+
+static ktime_t dccp_timestamp_seed;
+/**
+ * dccp_timestamp - 10s of microseconds time source
+ * Returns the number of 10s of microseconds since loading DCCP. This is native
+ * DCCP time difference format (RFC 4340, sec. 13).
+ * Please note: This will wrap around about circa every 11.9 hours.
+ */
+u32 dccp_timestamp(void)
+{
+ s64 delta = ktime_us_delta(ktime_get_real(), dccp_timestamp_seed);
+
+ do_div(delta, 10);
+ return delta;
+}
+EXPORT_SYMBOL_GPL(dccp_timestamp);
+
+void __init dccp_timestamping_init(void)
+{
+ dccp_timestamp_seed = ktime_get_real();
+}