summaryrefslogtreecommitdiffstats
path: root/net/dccp
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2005-08-29 17:36:46 -0700
committerLinus Torvalds <torvalds@g5.osdl.org>2005-08-29 17:36:46 -0700
commit826509f8110049663799bc20f2b5b6170e2f78ca (patch)
tree9d8823cf283592625b882125b4b13988f2934cc6 /net/dccp
parentd992895ba2b27cf5adf1ba0ad6d27662adc54c5e (diff)
parentc530cfb1ce1e8f230744c3f3bd86771f50725053 (diff)
Merge HEAD from master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6.git
Diffstat (limited to 'net/dccp')
-rw-r--r--net/dccp/Kconfig50
-rw-r--r--net/dccp/Makefile10
-rw-r--r--net/dccp/ccid.c139
-rw-r--r--net/dccp/ccid.h180
-rw-r--r--net/dccp/ccids/Kconfig29
-rw-r--r--net/dccp/ccids/Makefile5
-rw-r--r--net/dccp/ccids/ccid3.c1221
-rw-r--r--net/dccp/ccids/ccid3.h137
-rw-r--r--net/dccp/ccids/lib/Makefile3
-rw-r--r--net/dccp/ccids/lib/loss_interval.c144
-rw-r--r--net/dccp/ccids/lib/loss_interval.h61
-rw-r--r--net/dccp/ccids/lib/packet_history.c398
-rw-r--r--net/dccp/ccids/lib/packet_history.h199
-rw-r--r--net/dccp/ccids/lib/tfrc.h22
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c644
-rw-r--r--net/dccp/dccp.h493
-rw-r--r--net/dccp/diag.c71
-rw-r--r--net/dccp/input.c600
-rw-r--r--net/dccp/ipv4.c1356
-rw-r--r--net/dccp/minisocks.c264
-rw-r--r--net/dccp/options.c855
-rw-r--r--net/dccp/output.c528
-rw-r--r--net/dccp/proto.c826
-rw-r--r--net/dccp/timer.c255
24 files changed, 8490 insertions, 0 deletions
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
new file mode 100644
index 00000000000..187ac182e24
--- /dev/null
+++ b/net/dccp/Kconfig
@@ -0,0 +1,50 @@
+menu "DCCP Configuration (EXPERIMENTAL)"
+ depends on INET && EXPERIMENTAL
+
+config IP_DCCP
+ tristate "The DCCP Protocol (EXPERIMENTAL)"
+ ---help---
+ Datagram Congestion Control Protocol
+
+ From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>.
+
+ The Datagram Congestion Control Protocol (DCCP) is a transport
+ protocol that implements bidirectional, unicast connections of
+ congestion-controlled, unreliable datagrams. It should be suitable
+ for use by applications such as streaming media, Internet telephony,
+ and on-line games
+
+ To compile this protocol support as a module, choose M here: the
+ module will be called dccp.
+
+ If in doubt, say N.
+
+config INET_DCCP_DIAG
+ depends on IP_DCCP && INET_DIAG
+ def_tristate y if (IP_DCCP = y && INET_DIAG = y)
+ def_tristate m
+
+source "net/dccp/ccids/Kconfig"
+
+menu "DCCP Kernel Hacking"
+ depends on IP_DCCP && DEBUG_KERNEL=y
+
+config IP_DCCP_DEBUG
+ bool "DCCP debug messages"
+ ---help---
+ Only use this if you're hacking DCCP.
+
+ Just say N.
+
+config IP_DCCP_UNLOAD_HACK
+ depends on IP_DCCP=m && IP_DCCP_CCID3=m
+ bool "DCCP control sock unload hack"
+ ---help---
+ Enable this to be able to unload the dccp module when the it
+ has only one refcount held, the control sock one. Just execute
+ "rmmod dccp_ccid3 dccp"
+
+ Just say N.
+endmenu
+
+endmenu
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
new file mode 100644
index 00000000000..fb97bb04245
--- /dev/null
+++ b/net/dccp/Makefile
@@ -0,0 +1,10 @@
+obj-$(CONFIG_IP_DCCP) += dccp.o
+
+dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \
+ timer.o
+
+obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
+
+dccp_diag-y := diag.o
+
+obj-y += ccids/
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
new file mode 100644
index 00000000000..9d8fc0e289e
--- /dev/null
+++ b/net/dccp/ccid.c
@@ -0,0 +1,139 @@
+/*
+ * net/dccp/ccid.c
+ *
+ * An implementation of the DCCP protocol
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * CCID infrastructure
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "ccid.h"
+
+static struct ccid *ccids[CCID_MAX];
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
+static atomic_t ccids_lockct = ATOMIC_INIT(0);
+static DEFINE_SPINLOCK(ccids_lock);
+
+/*
+ * The strategy is: modifications ccids vector are short, do not sleep and
+ * veeery rare, but read access should be free of any exclusive locks.
+ */
+static void ccids_write_lock(void)
+{
+ spin_lock(&ccids_lock);
+ while (atomic_read(&ccids_lockct) != 0) {
+ spin_unlock(&ccids_lock);
+ yield();
+ spin_lock(&ccids_lock);
+ }
+}
+
+static inline void ccids_write_unlock(void)
+{
+ spin_unlock(&ccids_lock);
+}
+
+static inline void ccids_read_lock(void)
+{
+ atomic_inc(&ccids_lockct);
+ spin_unlock_wait(&ccids_lock);
+}
+
+static inline void ccids_read_unlock(void)
+{
+ atomic_dec(&ccids_lockct);
+}
+
+#else
+#define ccids_write_lock() do { } while(0)
+#define ccids_write_unlock() do { } while(0)
+#define ccids_read_lock() do { } while(0)
+#define ccids_read_unlock() do { } while(0)
+#endif
+
+int ccid_register(struct ccid *ccid)
+{
+ int err;
+
+ if (ccid->ccid_init == NULL)
+ return -1;
+
+ ccids_write_lock();
+ err = -EEXIST;
+ if (ccids[ccid->ccid_id] == NULL) {
+ ccids[ccid->ccid_id] = ccid;
+ err = 0;
+ }
+ ccids_write_unlock();
+ if (err == 0)
+ pr_info("CCID: Registered CCID %d (%s)\n",
+ ccid->ccid_id, ccid->ccid_name);
+ return err;
+}
+
+EXPORT_SYMBOL_GPL(ccid_register);
+
+int ccid_unregister(struct ccid *ccid)
+{
+ ccids_write_lock();
+ ccids[ccid->ccid_id] = NULL;
+ ccids_write_unlock();
+ pr_info("CCID: Unregistered CCID %d (%s)\n",
+ ccid->ccid_id, ccid->ccid_name);
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(ccid_unregister);
+
+struct ccid *ccid_init(unsigned char id, struct sock *sk)
+{
+ struct ccid *ccid;
+
+#ifdef CONFIG_KMOD
+ if (ccids[id] == NULL)
+ request_module("net-dccp-ccid-%d", id);
+#endif
+ ccids_read_lock();
+
+ ccid = ccids[id];
+ if (ccid == NULL)
+ goto out;
+
+ if (!try_module_get(ccid->ccid_owner))
+ goto out_err;
+
+ if (ccid->ccid_init(sk) != 0)
+ goto out_module_put;
+out:
+ ccids_read_unlock();
+ return ccid;
+out_module_put:
+ module_put(ccid->ccid_owner);
+out_err:
+ ccid = NULL;
+ goto out;
+}
+
+EXPORT_SYMBOL_GPL(ccid_init);
+
+void ccid_exit(struct ccid *ccid, struct sock *sk)
+{
+ if (ccid == NULL)
+ return;
+
+ ccids_read_lock();
+
+ if (ccids[ccid->ccid_id] != NULL) {
+ if (ccid->ccid_exit != NULL)
+ ccid->ccid_exit(sk);
+ module_put(ccid->ccid_owner);
+ }
+
+ ccids_read_unlock();
+}
+
+EXPORT_SYMBOL_GPL(ccid_exit);
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
new file mode 100644
index 00000000000..962f1e9e2f7
--- /dev/null
+++ b/net/dccp/ccid.h
@@ -0,0 +1,180 @@
+#ifndef _CCID_H
+#define _CCID_H
+/*
+ * net/dccp/ccid.h
+ *
+ * An implementation of the DCCP protocol
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * CCID infrastructure
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <net/sock.h>
+#include <linux/dccp.h>
+#include <linux/list.h>
+#include <linux/module.h>
+
+#define CCID_MAX 255
+
+struct ccid {
+ unsigned char ccid_id;
+ const char *ccid_name;
+ struct module *ccid_owner;
+ int (*ccid_init)(struct sock *sk);
+ void (*ccid_exit)(struct sock *sk);
+ int (*ccid_hc_rx_init)(struct sock *sk);
+ int (*ccid_hc_tx_init)(struct sock *sk);
+ void (*ccid_hc_rx_exit)(struct sock *sk);
+ void (*ccid_hc_tx_exit)(struct sock *sk);
+ void (*ccid_hc_rx_packet_recv)(struct sock *sk,
+ struct sk_buff *skb);
+ int (*ccid_hc_rx_parse_options)(struct sock *sk,
+ unsigned char option,
+ unsigned char len, u16 idx,
+ unsigned char* value);
+ void (*ccid_hc_rx_insert_options)(struct sock *sk,
+ struct sk_buff *skb);
+ void (*ccid_hc_tx_insert_options)(struct sock *sk,
+ struct sk_buff *skb);
+ void (*ccid_hc_tx_packet_recv)(struct sock *sk,
+ struct sk_buff *skb);
+ int (*ccid_hc_tx_parse_options)(struct sock *sk,
+ unsigned char option,
+ unsigned char len, u16 idx,
+ unsigned char* value);
+ int (*ccid_hc_tx_send_packet)(struct sock *sk,
+ struct sk_buff *skb, int len);
+ void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more,
+ int len);
+ void (*ccid_hc_rx_get_info)(struct sock *sk,
+ struct tcp_info *info);
+ void (*ccid_hc_tx_get_info)(struct sock *sk,
+ struct tcp_info *info);
+};
+
+extern int ccid_register(struct ccid *ccid);
+extern int ccid_unregister(struct ccid *ccid);
+
+extern struct ccid *ccid_init(unsigned char id, struct sock *sk);
+extern void ccid_exit(struct ccid *ccid, struct sock *sk);
+
+static inline void __ccid_get(struct ccid *ccid)
+{
+ __module_get(ccid->ccid_owner);
+}
+
+static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
+ struct sk_buff *skb, int len)
+{
+ int rc = 0;
+ if (ccid->ccid_hc_tx_send_packet != NULL)
+ rc = ccid->ccid_hc_tx_send_packet(sk, skb, len);
+ return rc;
+}
+
+static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
+ int more, int len)
+{
+ if (ccid->ccid_hc_tx_packet_sent != NULL)
+ ccid->ccid_hc_tx_packet_sent(sk, more, len);
+}
+
+static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk)
+{
+ int rc = 0;
+ if (ccid->ccid_hc_rx_init != NULL)
+ rc = ccid->ccid_hc_rx_init(sk);
+ return rc;
+}
+
+static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk)
+{
+ int rc = 0;
+ if (ccid->ccid_hc_tx_init != NULL)
+ rc = ccid->ccid_hc_tx_init(sk);
+ return rc;
+}
+
+static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk)
+{
+ if (ccid->ccid_hc_rx_exit != NULL &&
+ dccp_sk(sk)->dccps_hc_rx_ccid_private != NULL)
+ ccid->ccid_hc_rx_exit(sk);
+}
+
+static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk)
+{
+ if (ccid->ccid_hc_tx_exit != NULL &&
+ dccp_sk(sk)->dccps_hc_tx_ccid_private != NULL)
+ ccid->ccid_hc_tx_exit(sk);
+}
+
+static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (ccid->ccid_hc_rx_packet_recv != NULL)
+ ccid->ccid_hc_rx_packet_recv(sk, skb);
+}
+
+static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (ccid->ccid_hc_tx_packet_recv != NULL)
+ ccid->ccid_hc_tx_packet_recv(sk, skb);
+}
+
+static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
+ unsigned char option,
+ unsigned char len, u16 idx,
+ unsigned char* value)
+{
+ int rc = 0;
+ if (ccid->ccid_hc_tx_parse_options != NULL)
+ rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx,
+ value);
+ return rc;
+}
+
+static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
+ unsigned char option,
+ unsigned char len, u16 idx,
+ unsigned char* value)
+{
+ int rc = 0;
+ if (ccid->ccid_hc_rx_parse_options != NULL)
+ rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value);
+ return rc;
+}
+
+static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (ccid->ccid_hc_tx_insert_options != NULL)
+ ccid->ccid_hc_tx_insert_options(sk, skb);
+}
+
+static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (ccid->ccid_hc_rx_insert_options != NULL)
+ ccid->ccid_hc_rx_insert_options(sk, skb);
+}
+
+static inline void ccid_hc_rx_get_info(struct ccid *ccid, struct sock *sk,
+ struct tcp_info *info)
+{
+ if (ccid->ccid_hc_rx_get_info != NULL)
+ ccid->ccid_hc_rx_get_info(sk, info);
+}
+
+static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk,
+ struct tcp_info *info)
+{
+ if (ccid->ccid_hc_tx_get_info != NULL)
+ ccid->ccid_hc_tx_get_info(sk, info);
+}
+#endif /* _CCID_H */
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
new file mode 100644
index 00000000000..7684d83946a
--- /dev/null
+++ b/net/dccp/ccids/Kconfig
@@ -0,0 +1,29 @@
+menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
+ depends on IP_DCCP && EXPERIMENTAL
+
+config IP_DCCP_CCID3
+ tristate "CCID3 (TFRC) (EXPERIMENTAL)"
+ depends on IP_DCCP
+ ---help---
+ CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
+ rate-controlled congestion control mechanism. TFRC is designed to
+ be reasonably fair when competing for bandwidth with TCP-like flows,
+ where a flow is "reasonably fair" if its sending rate is generally
+ within a factor of two of the sending rate of a TCP flow under the
+ same conditions. However, TFRC has a much lower variation of
+ throughput over time compared with TCP, which makes CCID 3 more
+ suitable than CCID 2 for applications such streaming media where a
+ relatively smooth sending rate is of importance.
+
+ CCID 3 is further described in [CCID 3 PROFILE]. The TFRC
+ congestion control algorithms were initially described in RFC 3448.
+
+ This text was extracted from draft-ietf-dccp-spec-11.txt.
+
+ If in doubt, say M.
+
+config IP_DCCP_TFRC_LIB
+ depends on IP_DCCP_CCID3
+ def_tristate IP_DCCP_CCID3
+
+endmenu
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile
new file mode 100644
index 00000000000..956f79f5074
--- /dev/null
+++ b/net/dccp/ccids/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o
+
+dccp_ccid3-y := ccid3.o
+
+obj-y += lib/
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
new file mode 100644
index 00000000000..7bf3b3a91e9
--- /dev/null
+++ b/net/dccp/ccids/ccid3.c
@@ -0,0 +1,1221 @@
+/*
+ * net/dccp/ccids/ccid3.c
+ *
+ * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *
+ * An implementation of the DCCP protocol
+ *
+ * This code has been developed by the University of Waikato WAND
+ * research group. For further information please see http://www.wand.net.nz/
+ *
+ * This code also uses code from Lulea University, rereleased as GPL by its
+ * authors:
+ * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
+ * and to make it work as a loadable module in the DCCP stack written by
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
+ *
+ * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/config.h>
+#include "../ccid.h"
+#include "../dccp.h"
+#include "lib/packet_history.h"
+#include "lib/loss_interval.h"
+#include "lib/tfrc.h"
+#include "ccid3.h"
+
+/*
+ * Reason for maths with 10 here is to avoid 32 bit overflow when a is big.
+ */
+static inline u32 usecs_div(const u32 a, const u32 b)
+{
+ const u32 tmp = a * (USEC_PER_SEC / 10);
+ return b > 20 ? tmp / (b / 10) : tmp;
+}
+
+static int ccid3_debug;
+
+#ifdef CCID3_DEBUG
+#define ccid3_pr_debug(format, a...) \
+ do { if (ccid3_debug) \
+ printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
+ } while (0)
+#else
+#define ccid3_pr_debug(format, a...)
+#endif
+
+static struct dccp_tx_hist *ccid3_tx_hist;
+static struct dccp_rx_hist *ccid3_rx_hist;
+static struct dccp_li_hist *ccid3_li_hist;
+
+static int ccid3_init(struct sock *sk)
+{
+ ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+ return 0;
+}
+
+static void ccid3_exit(struct sock *sk)
+{
+ ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+}
+
+/* TFRC sender states */
+enum ccid3_hc_tx_states {
+ TFRC_SSTATE_NO_SENT = 1,
+ TFRC_SSTATE_NO_FBACK,
+ TFRC_SSTATE_FBACK,
+ TFRC_SSTATE_TERM,
+};
+
+#ifdef CCID3_DEBUG
+static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
+{
+ static char *ccid3_state_names[] = {
+ [TFRC_SSTATE_NO_SENT] = "NO_SENT",
+ [TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
+ [TFRC_SSTATE_FBACK] = "FBACK",
+ [TFRC_SSTATE_TERM] = "TERM",
+ };
+
+ return ccid3_state_names[state];
+}
+#endif
+
+static inline void ccid3_hc_tx_set_state(struct sock *sk,
+ enum ccid3_hc_tx_states state)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+ enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state;
+
+ ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
+ dccp_role(sk), sk, ccid3_tx_state_name(oldstate),
+ ccid3_tx_state_name(state));
+ WARN_ON(state == oldstate);
+ hctx->ccid3hctx_state = state;
+}
+
+/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */
+static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx)
+{
+ /*
+ * If no feedback spec says t_ipi is 1 second (set elsewhere and then
+ * doubles after every no feedback timer (separate function)
+ */
+ if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
+ hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s,
+ hctx->ccid3hctx_x);
+}
+
+/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
+static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx)
+{
+ hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
+ TFRC_OPSYS_HALF_TIME_GRAN);
+}
+
+/*
+ * Update X by
+ * If (p > 0)
+ * x_calc = calcX(s, R, p);
+ * X = max(min(X_calc, 2 * X_recv), s / t_mbi);
+ * Else
+ * If (now - tld >= R)
+ * X = max(min(2 * X, 2 * X_recv), s / R);
+ * tld = now;
+ */
+static void ccid3_hc_tx_update_x(struct sock *sk)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+
+ /* To avoid large error in calcX */
+ if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) {
+ hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s,
+ hctx->ccid3hctx_rtt,
+ hctx->ccid3hctx_p);
+ hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc,
+ 2 * hctx->ccid3hctx_x_recv),
+ (hctx->ccid3hctx_s /
+ TFRC_MAX_BACK_OFF_TIME));
+ } else {
+ struct timeval now;
+
+ do_gettimeofday(&now);
+ if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >=
+ hctx->ccid3hctx_rtt) {
+ hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv,
+ hctx->ccid3hctx_x) * 2,
+ usecs_div(hctx->ccid3hctx_s,
+ hctx->ccid3hctx_rtt));
+ hctx->ccid3hctx_t_ld = now;
+ }
+ }
+}
+
+static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
+{
+ struct sock *sk = (struct sock *)data;
+ struct dccp_sock *dp = dccp_sk(sk);
+ unsigned long next_tmout = 0;
+ struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ /* Try again later. */
+ /* XXX: set some sensible MIB */
+ sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+ jiffies + HZ / 5);
+ goto out;
+ }
+
+ ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk,
+ ccid3_tx_state_name(hctx->ccid3hctx_state));
+
+ switch (hctx->ccid3hctx_state) {
+ case TFRC_SSTATE_TERM:
+ goto out;
+ case TFRC_SSTATE_NO_FBACK:
+ /* Halve send rate */
+ hctx->ccid3hctx_x /= 2;
+ if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s /
+ TFRC_MAX_BACK_OFF_TIME))
+ hctx->ccid3hctx_x = (hctx->ccid3hctx_s /
+ TFRC_MAX_BACK_OFF_TIME);
+
+ ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d "
+ "bytes/s\n",
+ dccp_role(sk), sk,
+ ccid3_tx_state_name(hctx->ccid3hctx_state),
+ hctx->ccid3hctx_x);
+ next_tmout = max_t(u32, 2 * usecs_div(hctx->ccid3hctx_s,
+ hctx->ccid3hctx_x),
+ TFRC_INITIAL_TIMEOUT);
+ /*
+ * FIXME - not sure above calculation is correct. See section
+ * 5 of CCID3 11 should adjust tx_t_ipi and double that to
+ * achieve it really
+ */
+ break;
+ case TFRC_SSTATE_FBACK:
+ /*
+ * Check if IDLE since last timeout and recv rate is less than
+ * 4 packets per RTT
+ */
+ if (!hctx->ccid3hctx_idle ||
+ (hctx->ccid3hctx_x_recv >=
+ 4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) {
+ ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n",
+ dccp_role(sk), sk,
+ ccid3_tx_state_name(hctx->ccid3hctx_state));
+ /* Halve sending rate */
+
+ /* If (X_calc > 2 * X_recv)
+ * X_recv = max(X_recv / 2, s / (2 * t_mbi));
+ * Else
+ * X_recv = X_calc / 4;
+ */
+ BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P &&
+ hctx->ccid3hctx_x_calc == 0);
+
+ /* check also if p is zero -> x_calc is infinity? */
+ if (hctx->ccid3hctx_p < TFRC_SMALLEST_P ||
+ hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv)
+ hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2,
+ hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME));
+ else
+ hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4;
+
+ /* Update sending rate */
+ ccid3_hc_tx_update_x(sk);
+ }
+ /*
+ * Schedule no feedback timer to expire in
+ * max(4 * R, 2 * s / X)
+ */
+ next_tmout = max_t(u32, hctx->ccid3hctx_t_rto,
+ 2 * usecs_div(hctx->ccid3hctx_s,
+ hctx->ccid3hctx_x));
+ break;
+ default:
+ printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+ __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
+ dump_stack();
+ goto out;
+ }
+
+ sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+ jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout)));
+ hctx->ccid3hctx_idle = 1;
+out:
+ bh_unlock_sock(sk);
+ sock_put(sk);
+}
+
+static int ccid3_hc_tx_send_packet(struct sock *sk,
+ struct sk_buff *skb, int len)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+ struct dccp_tx_hist_entry *new_packet;
+ struct timeval now;
+ long delay;
+ int rc = -ENOTCONN;
+
+ /* Check if pure ACK or Terminating*/
+
+ /*
+ * XXX: We only call this function for DATA and DATAACK, on, these
+ * packets can have zero length, but why the comment about "pure ACK"?
+ */
+ if (hctx == NULL || len == 0 ||
+ hctx->ccid3hctx_state == TFRC_SSTATE_TERM)
+ goto out;
+
+ /* See if last packet allocated was not sent */
+ new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist);
+ if (new_packet == NULL || new_packet->dccphtx_sent) {
+ new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist,
+ SLAB_ATOMIC);
+
+ rc = -ENOBUFS;
+ if (new_packet == NULL) {
+ ccid3_pr_debug("%s, sk=%p, not enough mem to add "
+ "to history, send refused\n",
+ dccp_role(sk), sk);
+ goto out;
+ }
+
+ dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet);
+ }
+
+ do_gettimeofday(&now);
+
+ switch (hctx->ccid3hctx_state) {
+ case TFRC_SSTATE_NO_SENT:
+ ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n",
+ dccp_role(sk), sk, dp->dccps_gss);
+
+ hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer;
+ hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk;
+ sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+ jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT));
+ hctx->ccid3hctx_last_win_count = 0;
+ hctx->ccid3hctx_t_last_win_count = now;
+ ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
+ hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT;
+
+ /* Set nominal send time for initial packet */
+ hctx->ccid3hctx_t_nom = now;
+ timeval_add_usecs(&hctx->ccid3hctx_t_nom,
+ hctx->ccid3hctx_t_ipi);
+ ccid3_calc_new_delta(hctx);
+ rc = 0;
+ break;
+ case TFRC_SSTATE_NO_FBACK:
+ case TFRC_SSTATE_FBACK:
+ delay = (timeval_delta(&now, &hctx->ccid3hctx_t_nom) -
+ hctx->ccid3hctx_delta);
+ ccid3_pr_debug("send_packet delay=%ld\n", delay);
+ delay /= -1000;
+ /* divide by -1000 is to convert to ms and get sign right */
+ rc = delay > 0 ? delay : 0;
+ break;
+ default:
+ printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+ __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
+ dump_stack();
+ rc = -EINVAL;
+ break;
+ }
+
+ /* Can we send? if so add options and add to packet history */
+ if (rc == 0)
+ new_packet->dccphtx_ccval =
+ DCCP_SKB_CB(skb)->dccpd_ccval =
+ hctx->ccid3hctx_last_win_count;
+out:
+ return rc;
+}
+
+static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+ struct timeval now;
+
+ BUG_ON(hctx == NULL);
+
+ if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) {
+ ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n",
+ dccp_role(sk), sk);
+ return;
+ }
+
+ do_gettimeofday(&now);
+
+ /* check if we have sent a data packet */
+ if (len > 0) {
+ unsigned long quarter_rtt;
+ struct dccp_tx_hist_entry *packet;
+
+ packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist);
+ if (packet == NULL) {
+ printk(KERN_CRIT "%s: packet doesn't exists in "
+ "history!\n", __FUNCTION__);
+ return;
+ }
+ if (packet->dccphtx_sent) {
+ printk(KERN_CRIT "%s: no unsent packet in history!\n",
+ __FUNCTION__);
+ return;
+ }
+ packet->dccphtx_tstamp = now;
+ packet->dccphtx_seqno = dp->dccps_gss;
+ /*
+ * Check if win_count have changed
+ * Algorithm in "8.1. Window Counter Valuer" in
+ * draft-ietf-dccp-ccid3-11.txt
+ */
+ quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count);
+ if (likely(hctx->ccid3hctx_rtt > 8))
+ quarter_rtt /= hctx->ccid3hctx_rtt / 4;
+
+ if (quarter_rtt > 0) {
+ hctx->ccid3hctx_t_last_win_count = now;
+ hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count +
+ min_t(unsigned long, quarter_rtt, 5)) % 16;
+ ccid3_pr_debug("%s, sk=%p, window changed from "
+ "%u to %u!\n",
+ dccp_role(sk), sk,
+ packet->dccphtx_ccval,
+ hctx->ccid3hctx_last_win_count);
+ }
+
+ hctx->ccid3hctx_idle = 0;
+ packet->dccphtx_rtt = hctx->ccid3hctx_rtt;
+ packet->dccphtx_sent = 1;
+ } else
+ ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n",
+ dccp_role(sk), sk, dp->dccps_gss);
+
+ switch (hctx->ccid3hctx_state) {
+ case TFRC_SSTATE_NO_SENT:
+ /* if first wasn't pure ack */
+ if (len != 0)
+ printk(KERN_CRIT "%s: %s, First packet sent is noted "
+ "as a data packet\n",
+ __FUNCTION__, dccp_role(sk));
+ return;
+ case TFRC_SSTATE_NO_FBACK:
+ case TFRC_SSTATE_FBACK:
+ if (len > 0) {
+ hctx->ccid3hctx_t_nom = now;
+ ccid3_calc_new_t_ipi(hctx);
+ ccid3_calc_new_delta(hctx);
+ timeval_add_usecs(&hctx->ccid3hctx_t_nom,
+ hctx->ccid3hctx_t_ipi);
+ }
+ break;
+ default:
+ printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+ __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
+ dump_stack();
+ break;
+ }
+}
+
+static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+ struct ccid3_options_received *opt_recv;
+ struct dccp_tx_hist_entry *packet;
+ unsigned long next_tmout;
+ u32 t_elapsed;
+ u32 pinv;
+ u32 x_recv;
+ u32 r_sample;
+
+ if (hctx == NULL)
+ return;
+
+ if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) {
+ ccid3_pr_debug("%s, sk=%p, received a packet when "
+ "terminating!\n", dccp_role(sk), sk);
+ return;
+ }
+
+ /* we are only interested in ACKs */
+ if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
+ DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
+ return;
+
+ opt_recv = &hctx->ccid3hctx_options_received;
+
+ t_elapsed = dp->dccps_options_received.dccpor_elapsed_time;
+ x_recv = opt_recv->ccid3or_receive_rate;
+ pinv = opt_recv->ccid3or_loss_event_rate;
+
+ switch (hctx->ccid3hctx_state) {
+ case TFRC_SSTATE_NO_SENT:
+ /* FIXME: what to do here? */
+ return;
+ case TFRC_SSTATE_NO_FBACK:
+ case TFRC_SSTATE_FBACK:
+ /* Calculate new round trip sample by
+ * R_sample = (now - t_recvdata) - t_delay */
+ /* get t_recvdata from history */
+ packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist,
+ DCCP_SKB_CB(skb)->dccpd_ack_seq);
+ if (packet == NULL) {
+ ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't "
+ "exist in history!\n",
+ dccp_role(sk), sk,
+ DCCP_SKB_CB(skb)->dccpd_ack_seq,
+ dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
+ return;
+ }
+
+ /* Update RTT */
+ r_sample = timeval_now_delta(&packet->dccphtx_tstamp);
+ /* FIXME: */
+ // r_sample -= usecs_to_jiffies(t_elapsed * 10);
+
+ /* Update RTT estimate by
+ * If (No feedback recv)
+ * R = R_sample;
+ * Else
+ * R = q * R + (1 - q) * R_sample;
+ *
+ * q is a constant, RFC 3448 recomments 0.9
+ */
+ if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
+ ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
+ hctx->ccid3hctx_rtt = r_sample;
+ } else
+ hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 +
+ r_sample / 10;
+
+ ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, "
+ "r_sample=%us\n", dccp_role(sk), sk,
+ hctx->ccid3hctx_rtt, r_sample);
+
+ /* Update timeout interval */
+ hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
+ USEC_PER_SEC);
+
+ /* Update receive rate */
+ hctx->ccid3hctx_x_recv = x_recv;/* X_recv in bytes per sec */
+
+ /* Update loss event rate */
+ if (pinv == ~0 || pinv == 0)
+ hctx->ccid3hctx_p = 0;
+ else {
+ hctx->ccid3hctx_p = 1000000 / pinv;
+
+ if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) {
+ hctx->ccid3hctx_p = TFRC_SMALLEST_P;
+ ccid3_pr_debug("%s, sk=%p, Smallest p used!\n",
+ dccp_role(sk), sk);
+ }
+ }
+
+ /* unschedule no feedback timer */
+ sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
+
+ /* Update sending rate */
+ ccid3_hc_tx_update_x(sk);
+
+ /* Update next send time */
+ timeval_sub_usecs(&hctx->ccid3hctx_t_nom,
+ hctx->ccid3hctx_t_ipi);
+ ccid3_calc_new_t_ipi(hctx);
+ timeval_add_usecs(&hctx->ccid3hctx_t_nom,
+ hctx->ccid3hctx_t_ipi);
+ ccid3_calc_new_delta(hctx);
+
+ /* remove all packets older than the one acked from history */
+ dccp_tx_hist_purge_older(ccid3_tx_hist,
+ &hctx->ccid3hctx_hist, packet);
+ /*
+ * As we have calculated new ipi, delta, t_nom it is possible that
+ * we now can send a packet, so wake up dccp_wait_for_ccids.
+ */
+ sk->sk_write_space(sk);
+
+ /*
+ * Schedule no feedback timer to expire in
+ * max(4 * R, 2 * s / X)
+ */
+ next_tmout = max(hctx->ccid3hctx_t_rto,
+ 2 * usecs_div(hctx->ccid3hctx_s,
+ hctx->ccid3hctx_x));
+
+ ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to "
+ "expire in %lu jiffies (%luus)\n",
+ dccp_role(sk), sk,
+ usecs_to_jiffies(next_tmout), next_tmout);
+
+ sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+ jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout)));
+
+ /* set idle flag */
+ hctx->ccid3hctx_idle = 1;
+ break;
+ default:
+ printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+ __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
+ dump_stack();
+ break;
+ }
+}
+
+static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb)
+{
+ const struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+
+ if (hctx == NULL || !(sk->sk_state == DCCP_OPEN ||
+ sk->sk_state == DCCP_PARTOPEN))
+ return;
+
+ DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
+}
+
+static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
+ unsigned char len, u16 idx,
+ unsigned char *value)
+{
+ int rc = 0;
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+ struct ccid3_options_received *opt_recv;
+
+ if (hctx == NULL)
+ return 0;
+
+ opt_recv = &hctx->ccid3hctx_options_received;
+
+ if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
+ opt_recv->ccid3or_seqno = dp->dccps_gsr;
+ opt_recv->ccid3or_loss_event_rate = ~0;
+ opt_recv->ccid3or_loss_intervals_idx = 0;
+ opt_recv->ccid3or_loss_intervals_len = 0;
+ opt_recv->ccid3or_receive_rate = 0;
+ }
+
+ switch (option) {
+ case TFRC_OPT_LOSS_EVENT_RATE:
+ if (len != 4) {
+ ccid3_pr_debug("%s, sk=%p, invalid len for "
+ "TFRC_OPT_LOSS_EVENT_RATE\n",
+ dccp_role(sk), sk);
+ rc = -EINVAL;
+ } else {
+ opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value);
+ ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n",
+ dccp_role(sk), sk,
+ opt_recv->ccid3or_loss_event_rate);
+ }
+ break;
+ case TFRC_OPT_LOSS_INTERVALS:
+ opt_recv->ccid3or_loss_intervals_idx = idx;
+ opt_recv->ccid3or_loss_intervals_len = len;
+ ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n",
+ dccp_role(sk), sk,
+ opt_recv->ccid3or_loss_intervals_idx,
+ opt_recv->ccid3or_loss_intervals_len);
+ break;
+ case TFRC_OPT_RECEIVE_RATE:
+ if (len != 4) {
+ ccid3_pr_debug("%s, sk=%p, invalid len for "
+ "TFRC_OPT_RECEIVE_RATE\n",
+ dccp_role(sk), sk);
+ rc = -EINVAL;
+ } else {
+ opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value);
+ ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n",
+ dccp_role(sk), sk,
+ opt_recv->ccid3or_receive_rate);
+ }
+ break;
+ }
+
+ return rc;
+}
+
+static int ccid3_hc_tx_init(struct sock *sk)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_tx_sock *hctx;
+
+ ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+
+ hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx),
+ gfp_any());
+ if (hctx == NULL)
+ return -ENOMEM;
+
+ memset(hctx, 0, sizeof(*hctx));
+
+ if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
+ dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE)
+ hctx->ccid3hctx_s = dp->dccps_packet_size;
+ else
+ hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE;
+
+ /* Set transmission rate to 1 packet per second */
+ hctx->ccid3hctx_x = hctx->ccid3hctx_s;
+ hctx->ccid3hctx_t_rto = USEC_PER_SEC;
+ hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
+ INIT_LIST_HEAD(&hctx->ccid3hctx_hist);
+ init_timer(&hctx->ccid3hctx_no_feedback_timer);
+
+ return 0;
+}
+
+static void ccid3_hc_tx_exit(struct sock *sk)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+
+ ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+ BUG_ON(hctx == NULL);
+
+ ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
+ sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
+
+ /* Empty packet history */
+ dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
+
+ kfree(dp->dccps_hc_tx_ccid_private);
+ dp->dccps_hc_tx_ccid_private = NULL;
+}
+
+/*
+ * RX Half Connection methods
+ */
+
+/* TFRC receiver states */
+enum ccid3_hc_rx_states {
+ TFRC_RSTATE_NO_DATA = 1,
+ TFRC_RSTATE_DATA,
+ TFRC_RSTATE_TERM = 127,
+};
+
+#ifdef CCID3_DEBUG
+static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
+{
+ static char *ccid3_rx_state_names[] = {
+ [TFRC_RSTATE_NO_DATA] = "NO_DATA",
+ [TFRC_RSTATE_DATA] = "DATA",
+ [TFRC_RSTATE_TERM] = "TERM",
+ };
+
+ return ccid3_rx_state_names[state];
+}
+#endif
+
+static inline void ccid3_hc_rx_set_state(struct sock *sk,
+ enum ccid3_hc_rx_states state)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+ enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state;
+
+ ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
+ dccp_role(sk), sk, ccid3_rx_state_name(oldstate),
+ ccid3_rx_state_name(state));
+ WARN_ON(state == oldstate);
+ hcrx->ccid3hcrx_state = state;
+}
+
+static void ccid3_hc_rx_send_feedback(struct sock *sk)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+ struct dccp_rx_hist_entry *packet;
+ struct timeval now;
+
+ ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+
+ do_gettimeofday(&now);
+
+ switch (hcrx->ccid3hcrx_state) {
+ case TFRC_RSTATE_NO_DATA:
+ hcrx->ccid3hcrx_x_recv = 0;
+ break;
+ case TFRC_RSTATE_DATA: {
+ const u32 delta = timeval_delta(&now,
+ &hcrx->ccid3hcrx_tstamp_last_feedback);
+
+ hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv *
+ USEC_PER_SEC);
+ if (likely(delta > 1))
+ hcrx->ccid3hcrx_x_recv /= delta;
+ }
+ break;
+ default:
+ printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+ __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
+ dump_stack();
+ return;
+ }
+
+ packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist);
+ if (packet == NULL) {
+ printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n",
+ __FUNCTION__, dccp_role(sk), sk);
+ dump_stack();
+ return;
+ }
+
+ hcrx->ccid3hcrx_tstamp_last_feedback = now;
+ hcrx->ccid3hcrx_last_counter = packet->dccphrx_ccval;
+ hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno;
+ hcrx->ccid3hcrx_bytes_recv = 0;
+
+ /* Convert to multiples of 10us */
+ hcrx->ccid3hcrx_elapsed_time =
+ timeval_delta(&now, &packet->dccphrx_tstamp) / 10;
+ if (hcrx->ccid3hcrx_p == 0)
+ hcrx->ccid3hcrx_pinv = ~0;
+ else
+ hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
+ dccp_send_ack(sk);
+}
+
+static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
+{
+ const struct dccp_sock *dp = dccp_sk(sk);
+ u32 x_recv, pinv;
+ struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+
+ if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN ||
+ sk->sk_state == DCCP_PARTOPEN))
+ return;
+
+ DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter;
+
+ if (dccp_packet_without_ack(skb))
+ return;
+
+ if (hcrx->ccid3hcrx_elapsed_time != 0)
+ dccp_insert_option_elapsed_time(sk, skb,
+ hcrx->ccid3hcrx_elapsed_time);
+ dccp_insert_option_timestamp(sk, skb);
+ x_recv = htonl(hcrx->ccid3hcrx_x_recv);
+ pinv = htonl(hcrx->ccid3hcrx_pinv);
+ dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
+ &pinv, sizeof(pinv));
+ dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
+ &x_recv, sizeof(x_recv));
+}
+
+/* calculate first loss interval
+ *
+ * returns estimated loss interval in usecs */
+
+static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+ struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
+ u32 rtt, delta, x_recv, fval, p, tmp2;
+ struct timeval tstamp = { 0, };
+ int interval = 0;
+ int win_count = 0;
+ int step = 0;
+ u64 tmp1;
+
+ list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist,
+ dccphrx_node) {
+ if (dccp_rx_hist_entry_data_packet(entry)) {
+ tail = entry;
+
+ switch (step) {
+ case 0:
+ tstamp = entry->dccphrx_tstamp;
+ win_count = entry->dccphrx_ccval;
+ step = 1;
+ break;
+ case 1:
+ interval = win_count - entry->dccphrx_ccval;
+ if (interval < 0)
+ interval += TFRC_WIN_COUNT_LIMIT;
+ if (interval > 4)
+ goto found;
+ break;
+ }
+ }
+ }
+
+ if (step == 0) {
+ printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no "
+ "data packets!\n",
+ __FUNCTION__, dccp_role(sk), sk);
+ return ~0;
+ }
+
+ if (interval == 0) {
+ ccid3_pr_debug("%s, sk=%p, Could not find a win_count "
+ "interval > 0. Defaulting to 1\n",
+ dccp_role(sk), sk);
+ interval = 1;
+ }
+found:
+ rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval;
+ ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n",
+ dccp_role(sk), sk, rtt);
+ if (rtt == 0)
+ rtt = 1;
+
+ delta = timeval_now_delta(&hcrx->ccid3hcrx_tstamp_last_feedback);
+ x_recv = hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC;
+ if (likely(delta > 1))
+ x_recv /= delta;
+
+ tmp1 = (u64)x_recv * (u64)rtt;
+ do_div(tmp1,10000000);
+ tmp2 = (u32)tmp1;
+ fval = (hcrx->ccid3hcrx_s * 100000) / tmp2;
+ /* do not alter order above or you will get overflow on 32 bit */
+ p = tfrc_calc_x_reverse_lookup(fval);
+ ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied "
+ "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
+
+ if (p == 0)
+ return ~0;
+ else
+ return 1000000 / p;
+}
+
+static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+
+ if (seq_loss != DCCP_MAX_SEQNO + 1 &&
+ list_empty(&hcrx->ccid3hcrx_li_hist)) {
+ struct dccp_li_hist_entry *li_tail;
+
+ li_tail = dccp_li_hist_interval_new(ccid3_li_hist,
+ &hcrx->ccid3hcrx_li_hist,
+ seq_loss, win_loss);
+ if (li_tail == NULL)
+ return;
+ li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
+ }
+ /* FIXME: find end of interval */
+}
+
+static void ccid3_hc_rx_detect_loss(struct sock *sk)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+ u8 win_loss;
+ const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist,
+ &hcrx->ccid3hcrx_li_hist,
+ &win_loss);
+
+ ccid3_hc_rx_update_li(sk, seq_loss, win_loss);
+}
+
+static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+ const struct dccp_options_received *opt_recv;
+ struct dccp_rx_hist_entry *packet;
+ struct timeval now;
+ u8 win_count;
+ u32 p_prev;
+ int ins;
+
+ if (hcrx == NULL)
+ return;
+
+ BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA ||
+ hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA));
+
+ opt_recv = &dp->dccps_options_received;
+
+ switch (DCCP_SKB_CB(skb)->dccpd_type) {
+ case DCCP_PKT_ACK:
+ if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
+ return;
+ case DCCP_PKT_DATAACK:
+ if (opt_recv->dccpor_timestamp_echo == 0)
+ break;
+ p_prev = hcrx->ccid3hcrx_rtt;
+ do_gettimeofday(&now);
+ hcrx->ccid3hcrx_rtt = timeval_usecs(&now) -
+ (opt_recv->dccpor_timestamp_echo -
+ opt_recv->dccpor_elapsed_time) * 10;
+ if (p_prev != hcrx->ccid3hcrx_rtt)
+ ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n",
+ dccp_role(sk), hcrx->ccid3hcrx_rtt,
+ opt_recv->dccpor_elapsed_time);
+ break;
+ case DCCP_PKT_DATA:
+ break;
+ default:
+ ccid3_pr_debug("%s, sk=%p, not DATA/DATAACK/ACK packet(%s)\n",
+ dccp_role(sk), sk,
+ dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
+ return;
+ }
+
+ packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp,
+ skb, SLAB_ATOMIC);
+ if (packet == NULL) {
+ ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet "
+ "to history (consider it lost)!",
+ dccp_role(sk), sk);
+ return;
+ }
+
+ win_count = packet->dccphrx_ccval;
+
+ ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
+ &hcrx->ccid3hcrx_li_hist, packet);
+
+ if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
+ return;
+
+ switch (hcrx->ccid3hcrx_state) {
+ case TFRC_RSTATE_NO_DATA:
+ ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial "
+ "feedback\n",
+ dccp_role(sk), sk,
+ dccp_state_name(sk->sk_state), skb);
+ ccid3_hc_rx_send_feedback(sk);
+ ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
+ return;
+ case TFRC_RSTATE_DATA:
+ hcrx->ccid3hcrx_bytes_recv += skb->len -
+ dccp_hdr(skb)->dccph_doff * 4;
+ if (ins != 0)
+ break;
+
+ do_gettimeofday(&now);
+ if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >=
+ hcrx->ccid3hcrx_rtt) {
+ hcrx->ccid3hcrx_tstamp_last_ack = now;
+ ccid3_hc_rx_send_feedback(sk);
+ }
+ return;
+ default:
+ printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+ __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
+ dump_stack();
+ return;
+ }
+
+ /* Dealing with packet loss */
+ ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n",
+ dccp_role(sk), sk, dccp_state_name(sk->sk_state));
+
+ ccid3_hc_rx_detect_loss(sk);
+ p_prev = hcrx->ccid3hcrx_p;
+
+ /* Calculate loss event rate */
+ if (!list_empty(&hcrx->ccid3hcrx_li_hist))
+ /* Scaling up by 1000000 as fixed decimal */
+ hcrx->ccid3hcrx_p = 1000000 / dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);
+
+ if (hcrx->ccid3hcrx_p > p_prev) {
+ ccid3_hc_rx_send_feedback(sk);
+ return;
+ }
+}
+
+static int ccid3_hc_rx_init(struct sock *sk)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_rx_sock *hcrx;
+
+ ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+
+ hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx),
+ gfp_any());
+ if (hcrx == NULL)
+ return -ENOMEM;
+
+ memset(hcrx, 0, sizeof(*hcrx));
+
+ if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
+ dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE)
+ hcrx->ccid3hcrx_s = dp->dccps_packet_size;
+ else
+ hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE;
+
+ hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
+ INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
+ INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
+ /*
+ * XXX this seems to be paranoid, need to think more about this, for
+ * now start with something different than zero. -acme
+ */
+ hcrx->ccid3hcrx_rtt = USEC_PER_SEC / 5;
+ return 0;
+}
+
+static void ccid3_hc_rx_exit(struct sock *sk)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+
+ ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+
+ if (hcrx == NULL)
+ return;
+
+ ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
+
+ /* Empty packet history */
+ dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist);
+
+ /* Empty loss interval history */
+ dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist);
+
+ kfree(dp->dccps_hc_rx_ccid_private);
+ dp->dccps_hc_rx_ccid_private = NULL;
+}
+
+static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
+{
+ const struct dccp_sock *dp = dccp_sk(sk);
+ const struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+
+ if (hcrx == NULL)
+ return;
+
+ info->tcpi_ca_state = hcrx->ccid3hcrx_state;
+ info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
+ info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt;
+}
+
+static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
+{
+ const struct dccp_sock *dp = dccp_sk(sk);
+ const struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+
+ if (hctx == NULL)
+ return;
+
+ info->tcpi_rto = hctx->ccid3hctx_t_rto;
+ info->tcpi_rtt = hctx->ccid3hctx_rtt;
+}
+
+static struct ccid ccid3 = {
+ .ccid_id = 3,
+ .ccid_name = "ccid3",
+ .ccid_owner = THIS_MODULE,
+ .ccid_init = ccid3_init,
+ .ccid_exit = ccid3_exit,
+ .ccid_hc_tx_init = ccid3_hc_tx_init,
+ .ccid_hc_tx_exit = ccid3_hc_tx_exit,
+ .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet,
+ .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent,
+ .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv,
+ .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options,
+ .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options,
+ .ccid_hc_rx_init = ccid3_hc_rx_init,
+ .ccid_hc_rx_exit = ccid3_hc_rx_exit,
+ .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options,
+ .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv,
+ .ccid_hc_rx_get_info = ccid3_hc_rx_get_info,
+ .ccid_hc_tx_get_info = ccid3_hc_tx_get_info,
+};
+
+module_param(ccid3_debug, int, 0444);
+MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
+
+static __init int ccid3_module_init(void)
+{
+ int rc = -ENOBUFS;
+
+ ccid3_rx_hist = dccp_rx_hist_new("ccid3");
+ if (ccid3_rx_hist == NULL)
+ goto out;
+
+ ccid3_tx_hist = dccp_tx_hist_new("ccid3");
+ if (ccid3_tx_hist == NULL)
+ goto out_free_rx;
+
+ ccid3_li_hist = dccp_li_hist_new("ccid3");
+ if (ccid3_li_hist == NULL)
+ goto out_free_tx;
+
+ rc = ccid_register(&ccid3);
+ if (rc != 0)
+ goto out_free_loss_interval_history;
+out:
+ return rc;
+
+out_free_loss_interval_history:
+ dccp_li_hist_delete(ccid3_li_hist);
+ ccid3_li_hist = NULL;
+out_free_tx:
+ dccp_tx_hist_delete(ccid3_tx_hist);
+ ccid3_tx_hist = NULL;
+out_free_rx:
+ dccp_rx_hist_delete(ccid3_rx_hist);
+ ccid3_rx_hist = NULL;
+ goto out;
+}
+module_init(ccid3_module_init);
+
+static __exit void ccid3_module_exit(void)
+{
+#ifdef CONFIG_IP_DCCP_UNLOAD_HACK
+ /*
+ * Hack to use while developing, so that we get rid of the control
+ * sock, that is what keeps a refcount on dccp.ko -acme
+ */
+ extern void dccp_ctl_sock_exit(void);
+
+ dccp_ctl_sock_exit();
+#endif
+ ccid_unregister(&ccid3);
+
+ if (ccid3_tx_hist != NULL) {
+ dccp_tx_hist_delete(ccid3_tx_hist);
+ ccid3_tx_hist = NULL;
+ }
+ if (ccid3_rx_hist != NULL) {
+ dccp_rx_hist_delete(ccid3_rx_hist);
+ ccid3_rx_hist = NULL;
+ }
+ if (ccid3_li_hist != NULL) {
+ dccp_li_hist_delete(ccid3_li_hist);
+ ccid3_li_hist = NULL;
+ }
+}
+module_exit(ccid3_module_exit);
+
+MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, "
+ "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
+MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("net-dccp-ccid-3");
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
new file mode 100644
index 00000000000..ee8cbace663
--- /dev/null
+++ b/net/dccp/ccids/ccid3.h
@@ -0,0 +1,137 @@
+/*
+ * net/dccp/ccids/ccid3.h
+ *
+ * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *
+ * An implementation of the DCCP protocol
+ *
+ * This code has been developed by the University of Waikato WAND
+ * research group. For further information please see http://www.wand.net.nz/
+ * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ *
+ * This code also uses code from Lulea University, rereleased as GPL by its
+ * authors:
+ * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
+ * and to make it work as a loadable module in the DCCP stack written by
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
+ *
+ * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _DCCP_CCID3_H_
+#define _DCCP_CCID3_H_
+
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/time.h>
+#include <linux/types.h>
+
+#define TFRC_MIN_PACKET_SIZE 16
+#define TFRC_STD_PACKET_SIZE 256
+#define TFRC_MAX_PACKET_SIZE 65535
+
+/* Two seconds as per CCID3 spec */
+#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
+
+/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
+#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ))
+
+/* In seconds */
+#define TFRC_MAX_BACK_OFF_TIME 64
+
+#define TFRC_SMALLEST_P 40
+
+enum ccid3_options {
+ TFRC_OPT_LOSS_EVENT_RATE = 192,
+ TFRC_OPT_LOSS_INTERVALS = 193,
+ TFRC_OPT_RECEIVE_RATE = 194,
+};
+
+struct ccid3_options_received {
+ u64 ccid3or_seqno:48,
+ ccid3or_loss_intervals_idx:16;
+ u16 ccid3or_loss_intervals_len;
+ u32 ccid3or_loss_event_rate;
+ u32 ccid3or_receive_rate;
+};
+
+/** struct ccid3_hc_tx_sock - CCID3 sender half connection sock
+ *
+ * @ccid3hctx_state - Sender state
+ * @ccid3hctx_x - Current sending rate
+ * @ccid3hctx_x_recv - Receive rate
+ * @ccid3hctx_x_calc - Calculated send (?) rate
+ * @ccid3hctx_s - Packet size
+ * @ccid3hctx_rtt - Estimate of current round trip time in usecs
+ * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
+ * @ccid3hctx_last_win_count - Last window counter sent
+ * @ccid3hctx_t_last_win_count - Timestamp of earliest packet
+ * with last_win_count value sent
+ * @ccid3hctx_no_feedback_timer - Handle to no feedback timer
+ * @ccid3hctx_idle - FIXME
+ * @ccid3hctx_t_ld - Time last doubled during slow start
+ * @ccid3hctx_t_nom - Nominal send time of next packet
+ * @ccid3hctx_t_ipi - Interpacket (send) interval
+ * @ccid3hctx_delta - Send timer delta
+ * @ccid3hctx_hist - Packet history
+ */
+struct ccid3_hc_tx_sock {
+ u32 ccid3hctx_x;
+ u32 ccid3hctx_x_recv;
+ u32 ccid3hctx_x_calc;
+ u16 ccid3hctx_s;
+ u32 ccid3hctx_rtt;
+ u32 ccid3hctx_p;
+ u8 ccid3hctx_state;
+ u8 ccid3hctx_last_win_count;
+ u8 ccid3hctx_idle;
+ struct timeval ccid3hctx_t_last_win_count;
+ struct timer_list ccid3hctx_no_feedback_timer;
+ struct timeval ccid3hctx_t_ld;
+ struct timeval ccid3hctx_t_nom;
+ u32 ccid3hctx_t_rto;
+ u32 ccid3hctx_t_ipi;
+ u32 ccid3hctx_delta;
+ struct list_head ccid3hctx_hist;
+ struct ccid3_options_received ccid3hctx_options_received;
+};
+
+struct ccid3_hc_rx_sock {
+ u64 ccid3hcrx_seqno_last_counter:48,
+ ccid3hcrx_state:8,
+ ccid3hcrx_last_counter:4;
+ unsigned long ccid3hcrx_rtt;
+ u32 ccid3hcrx_p;
+ u32 ccid3hcrx_bytes_recv;
+ struct timeval ccid3hcrx_tstamp_last_feedback;
+ struct timeval ccid3hcrx_tstamp_last_ack;
+ struct list_head ccid3hcrx_hist;
+ struct list_head ccid3hcrx_li_hist;
+ u16 ccid3hcrx_s;
+ u32 ccid3hcrx_pinv;
+ u32 ccid3hcrx_elapsed_time;
+ u32 ccid3hcrx_x_recv;
+};
+
+#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \
+ ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field)
+
+#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \
+ ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field)
+
+#endif /* _DCCP_CCID3_H_ */
diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile
new file mode 100644
index 00000000000..5f940a6cbac
--- /dev/null
+++ b/net/dccp/ccids/lib/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o
+
+dccp_tfrc_lib-y := loss_interval.o packet_history.o tfrc_equation.o
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
new file mode 100644
index 00000000000..4c01a54143a
--- /dev/null
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -0,0 +1,144 @@
+/*
+ * net/dccp/ccids/lib/loss_interval.c
+ *
+ * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include "loss_interval.h"
+
+struct dccp_li_hist *dccp_li_hist_new(const char *name)
+{
+ struct dccp_li_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
+ static const char dccp_li_hist_mask[] = "li_hist_%s";
+ char *slab_name;
+
+ if (hist == NULL)
+ goto out;
+
+ slab_name = kmalloc(strlen(name) + sizeof(dccp_li_hist_mask) - 1,
+ GFP_ATOMIC);
+ if (slab_name == NULL)
+ goto out_free_hist;
+
+ sprintf(slab_name, dccp_li_hist_mask, name);
+ hist->dccplih_slab = kmem_cache_create(slab_name,
+ sizeof(struct dccp_li_hist_entry),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (hist->dccplih_slab == NULL)
+ goto out_free_slab_name;
+out:
+ return hist;
+out_free_slab_name:
+ kfree(slab_name);
+out_free_hist:
+ kfree(hist);
+ hist = NULL;
+ goto out;
+}
+
+EXPORT_SYMBOL_GPL(dccp_li_hist_new);
+
+void dccp_li_hist_delete(struct dccp_li_hist *hist)
+{
+ const char* name = kmem_cache_name(hist->dccplih_slab);
+
+ kmem_cache_destroy(hist->dccplih_slab);
+ kfree(name);
+ kfree(hist);
+}
+
+EXPORT_SYMBOL_GPL(dccp_li_hist_delete);
+
+void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list)
+{
+ struct dccp_li_hist_entry *entry, *next;
+
+ list_for_each_entry_safe(entry, next, list, dccplih_node) {
+ list_del_init(&entry->dccplih_node);
+ kmem_cache_free(hist->dccplih_slab, entry);
+ }
+}
+
+EXPORT_SYMBOL_GPL(dccp_li_hist_purge);
+
+/* Weights used to calculate loss event rate */
+/*
+ * These are integers as per section 8 of RFC3448. We can then divide by 4 *
+ * when we use it.
+ */
+static const int dccp_li_hist_w[DCCP_LI_HIST_IVAL_F_LENGTH] = {
+ 4, 4, 4, 4, 3, 2, 1, 1,
+};
+
+u32 dccp_li_hist_calc_i_mean(struct list_head *list)
+{
+ struct dccp_li_hist_entry *li_entry, *li_next;
+ int i = 0;
+ u32 i_tot;
+ u32 i_tot0 = 0;
+ u32 i_tot1 = 0;
+ u32 w_tot = 0;
+
+ list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
+ if (i < DCCP_LI_HIST_IVAL_F_LENGTH) {
+ i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
+ w_tot += dccp_li_hist_w[i];
+ }
+
+ if (i != 0)
+ i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
+
+ if (++i > DCCP_LI_HIST_IVAL_F_LENGTH)
+ break;
+ }
+
+ if (i != DCCP_LI_HIST_IVAL_F_LENGTH)
+ return 0;
+
+ i_tot = max(i_tot0, i_tot1);
+
+ /* FIXME: Why do we do this? -Ian McDonald */
+ if (i_tot * 4 < w_tot)
+ i_tot = w_tot * 4;
+
+ return i_tot * 4 / w_tot;
+}
+
+EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
+
+struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+ struct list_head *list,
+ const u64 seq_loss,
+ const u8 win_loss)
+{
+ struct dccp_li_hist_entry *tail = NULL, *entry;
+ int i;
+
+ for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) {
+ entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC);
+ if (entry == NULL) {
+ dccp_li_hist_purge(hist, list);
+ return NULL;
+ }
+ if (tail == NULL)
+ tail = entry;
+ list_add(&entry->dccplih_node, list);
+ }
+
+ entry->dccplih_seqno = seq_loss;
+ entry->dccplih_win_count = win_loss;
+ return tail;
+}
+
+EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new);
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
new file mode 100644
index 00000000000..13ad47ba142
--- /dev/null
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -0,0 +1,61 @@
+#ifndef _DCCP_LI_HIST_
+#define _DCCP_LI_HIST_
+/*
+ * net/dccp/ccids/lib/loss_interval.h
+ *
+ * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+
+#define DCCP_LI_HIST_IVAL_F_LENGTH 8
+
+struct dccp_li_hist {
+ kmem_cache_t *dccplih_slab;
+};
+
+extern struct dccp_li_hist *dccp_li_hist_new(const char *name);
+extern void dccp_li_hist_delete(struct dccp_li_hist *hist);
+
+struct dccp_li_hist_entry {
+ struct list_head dccplih_node;
+ u64 dccplih_seqno:48,
+ dccplih_win_count:4;
+ u32 dccplih_interval;
+};
+
+static inline struct dccp_li_hist_entry *
+ dccp_li_hist_entry_new(struct dccp_li_hist *hist,
+ const unsigned int __nocast prio)
+{
+ return kmem_cache_alloc(hist->dccplih_slab, prio);
+}
+
+static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist,
+ struct dccp_li_hist_entry *entry)
+{
+ if (entry != NULL)
+ kmem_cache_free(hist->dccplih_slab, entry);
+}
+
+extern void dccp_li_hist_purge(struct dccp_li_hist *hist,
+ struct list_head *list);
+
+extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
+
+extern struct dccp_li_hist_entry *
+ dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+ struct list_head *list,
+ const u64 seq_loss,
+ const u8 win_loss);
+#endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
new file mode 100644
index 00000000000..d3f9d205383
--- /dev/null
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -0,0 +1,398 @@
+/*
+ * net/dccp/packet_history.h
+ *
+ * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *
+ * An implementation of the DCCP protocol
+ *
+ * This code has been developed by the University of Waikato WAND
+ * research group. For further information please see http://www.wand.net.nz/
+ * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ *
+ * This code also uses code from Lulea University, rereleased as GPL by its
+ * authors:
+ * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
+ * and to make it work as a loadable module in the DCCP stack written by
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
+ *
+ * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include "packet_history.h"
+
+struct dccp_rx_hist *dccp_rx_hist_new(const char *name)
+{
+ struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
+ static const char dccp_rx_hist_mask[] = "rx_hist_%s";
+ char *slab_name;
+
+ if (hist == NULL)
+ goto out;
+
+ slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1,
+ GFP_ATOMIC);
+ if (slab_name == NULL)
+ goto out_free_hist;
+
+ sprintf(slab_name, dccp_rx_hist_mask, name);
+ hist->dccprxh_slab = kmem_cache_create(slab_name,
+ sizeof(struct dccp_rx_hist_entry),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (hist->dccprxh_slab == NULL)
+ goto out_free_slab_name;
+out:
+ return hist;
+out_free_slab_name:
+ kfree(slab_name);
+out_free_hist:
+ kfree(hist);
+ hist = NULL;
+ goto out;
+}
+
+EXPORT_SYMBOL_GPL(dccp_rx_hist_new);
+
+void dccp_rx_hist_delete(struct dccp_rx_hist *hist)
+{
+ const char* name = kmem_cache_name(hist->dccprxh_slab);
+
+ kmem_cache_destroy(hist->dccprxh_slab);
+ kfree(name);
+ kfree(hist);
+}
+
+EXPORT_SYMBOL_GPL(dccp_rx_hist_delete);
+
+void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list)
+{
+ struct dccp_rx_hist_entry *entry, *next;
+
+ list_for_each_entry_safe(entry, next, list, dccphrx_node) {
+ list_del_init(&entry->dccphrx_node);
+ kmem_cache_free(hist->dccprxh_slab, entry);
+ }
+}
+
+EXPORT_SYMBOL_GPL(dccp_rx_hist_purge);
+
+struct dccp_rx_hist_entry *
+ dccp_rx_hist_find_data_packet(const struct list_head *list)
+{
+ struct dccp_rx_hist_entry *entry, *packet = NULL;
+
+ list_for_each_entry(entry, list, dccphrx_node)
+ if (entry->dccphrx_type == DCCP_PKT_DATA ||
+ entry->dccphrx_type == DCCP_PKT_DATAACK) {
+ packet = entry;
+ break;
+ }
+
+ return packet;
+}
+
+EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet);
+
+int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
+ struct list_head *rx_list,
+ struct list_head *li_list,
+ struct dccp_rx_hist_entry *packet)
+{
+ struct dccp_rx_hist_entry *entry, *next, *iter;
+ u8 num_later = 0;
+
+ iter = dccp_rx_hist_head(rx_list);
+ if (iter == NULL)
+ dccp_rx_hist_add_entry(rx_list, packet);
+ else {
+ const u64 seqno = packet->dccphrx_seqno;
+
+ if (after48(seqno, iter->dccphrx_seqno))
+ dccp_rx_hist_add_entry(rx_list, packet);
+ else {
+ if (dccp_rx_hist_entry_data_packet(iter))
+ num_later = 1;
+
+ list_for_each_entry_continue(iter, rx_list,
+ dccphrx_node) {
+ if (after48(seqno, iter->dccphrx_seqno)) {
+ dccp_rx_hist_add_entry(&iter->dccphrx_node,
+ packet);
+ goto trim_history;
+ }
+
+ if (dccp_rx_hist_entry_data_packet(iter))
+ num_later++;
+
+ if (num_later == TFRC_RECV_NUM_LATE_LOSS) {
+ dccp_rx_hist_entry_delete(hist, packet);
+ return 1;
+ }
+ }
+
+ if (num_later < TFRC_RECV_NUM_LATE_LOSS)
+ dccp_rx_hist_add_entry(rx_list, packet);
+ /*
+ * FIXME: else what? should we destroy the packet
+ * like above?
+ */
+ }
+ }
+
+trim_history:
+ /*
+ * Trim history (remove all packets after the NUM_LATE_LOSS + 1
+ * data packets)
+ */
+ num_later = TFRC_RECV_NUM_LATE_LOSS + 1;
+
+ if (!list_empty(li_list)) {
+ list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
+ if (num_later == 0) {
+ list_del_init(&entry->dccphrx_node);
+ dccp_rx_hist_entry_delete(hist, entry);
+ } else if (dccp_rx_hist_entry_data_packet(entry))
+ --num_later;
+ }
+ } else {
+ int step = 0;
+ u8 win_count = 0; /* Not needed, but lets shut up gcc */
+ int tmp;
+ /*
+ * We have no loss interval history so we need at least one
+ * rtt:s of data packets to approximate rtt.
+ */
+ list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
+ if (num_later == 0) {
+ switch (step) {
+ case 0:
+ step = 1;
+ /* OK, find next data packet */
+ num_later = 1;
+ break;
+ case 1:
+ step = 2;
+ /* OK, find next data packet */
+ num_later = 1;
+ win_count = entry->dccphrx_ccval;
+ break;
+ case 2:
+ tmp = win_count - entry->dccphrx_ccval;
+ if (tmp < 0)
+ tmp += TFRC_WIN_COUNT_LIMIT;
+ if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) {
+ /*
+ * We have found a packet older
+ * than one rtt remove the rest
+ */
+ step = 3;
+ } else /* OK, find next data packet */
+ num_later = 1;
+ break;
+ case 3:
+ list_del_init(&entry->dccphrx_node);
+ dccp_rx_hist_entry_delete(hist, entry);
+ break;
+ }
+ } else if (dccp_rx_hist_entry_data_packet(entry))
+ --num_later;
+ }
+ }
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet);
+
+u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
+ struct list_head *li_list, u8 *win_loss)
+{
+ struct dccp_rx_hist_entry *entry, *next, *packet;
+ struct dccp_rx_hist_entry *a_loss = NULL;
+ struct dccp_rx_hist_entry *b_loss = NULL;
+ u64 seq_loss = DCCP_MAX_SEQNO + 1;
+ u8 num_later = TFRC_RECV_NUM_LATE_LOSS;
+
+ list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
+ if (num_later == 0) {
+ b_loss = entry;
+ break;
+ } else if (dccp_rx_hist_entry_data_packet(entry))
+ --num_later;
+ }
+
+ if (b_loss == NULL)
+ goto out;
+
+ num_later = 1;
+ list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
+ if (num_later == 0) {
+ a_loss = entry;
+ break;
+ } else if (dccp_rx_hist_entry_data_packet(entry))
+ --num_later;
+ }
+
+ if (a_loss == NULL) {
+ if (list_empty(li_list)) {
+ /* no loss event have occured yet */
+ LIMIT_NETDEBUG("%s: TODO: find a lost data packet by "
+ "comparing to initial seqno\n",
+ __FUNCTION__);
+ goto out;
+ } else {
+ LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!",
+ __FUNCTION__);
+ goto out;
+ }
+ }
+
+ /* Locate a lost data packet */
+ entry = packet = b_loss;
+ list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
+ u64 delta = dccp_delta_seqno(entry->dccphrx_seqno,
+ packet->dccphrx_seqno);
+
+ if (delta != 0) {
+ if (dccp_rx_hist_entry_data_packet(packet))
+ --delta;
+ /*
+ * FIXME: check this, probably this % usage is because
+ * in earlier drafts the ndp count was just 8 bits
+ * long, but now it cam be up to 24 bits long.
+ */
+#if 0
+ if (delta % DCCP_NDP_LIMIT !=
+ (packet->dccphrx_ndp -
+ entry->dccphrx_ndp) % DCCP_NDP_LIMIT)
+#endif
+ if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) {
+ seq_loss = entry->dccphrx_seqno;
+ dccp_inc_seqno(&seq_loss);
+ }
+ }
+ packet = entry;
+ if (packet == a_loss)
+ break;
+ }
+out:
+ if (seq_loss != DCCP_MAX_SEQNO + 1)
+ *win_loss = a_loss->dccphrx_ccval;
+ else
+ *win_loss = 0; /* Paranoia */
+
+ return seq_loss;
+}
+
+EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss);
+
+struct dccp_tx_hist *dccp_tx_hist_new(const char *name)
+{
+ struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
+ static const char dccp_tx_hist_mask[] = "tx_hist_%s";
+ char *slab_name;
+
+ if (hist == NULL)
+ goto out;
+
+ slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1,
+ GFP_ATOMIC);
+ if (slab_name == NULL)
+ goto out_free_hist;
+
+ sprintf(slab_name, dccp_tx_hist_mask, name);
+ hist->dccptxh_slab = kmem_cache_create(slab_name,
+ sizeof(struct dccp_tx_hist_entry),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (hist->dccptxh_slab == NULL)
+ goto out_free_slab_name;
+out:
+ return hist;
+out_free_slab_name:
+ kfree(slab_name);
+out_free_hist:
+ kfree(hist);
+ hist = NULL;
+ goto out;
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_new);
+
+void dccp_tx_hist_delete(struct dccp_tx_hist *hist)
+{
+ const char* name = kmem_cache_name(hist->dccptxh_slab);
+
+ kmem_cache_destroy(hist->dccptxh_slab);
+ kfree(name);
+ kfree(hist);
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_delete);
+
+struct dccp_tx_hist_entry *
+ dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq)
+{
+ struct dccp_tx_hist_entry *packet = NULL, *entry;
+
+ list_for_each_entry(entry, list, dccphtx_node)
+ if (entry->dccphtx_seqno == seq) {
+ packet = entry;
+ break;
+ }
+
+ return packet;
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry);
+
+void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
+ struct list_head *list,
+ struct dccp_tx_hist_entry *packet)
+{
+ struct dccp_tx_hist_entry *next;
+
+ list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) {
+ list_del_init(&packet->dccphtx_node);
+ dccp_tx_hist_entry_delete(hist, packet);
+ }
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older);
+
+void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list)
+{
+ struct dccp_tx_hist_entry *entry, *next;
+
+ list_for_each_entry_safe(entry, next, list, dccphtx_node) {
+ list_del_init(&entry->dccphtx_node);
+ dccp_tx_hist_entry_delete(hist, entry);
+ }
+}
+
+EXPORT_SYMBOL_GPL(dccp_tx_hist_purge);
+
+MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, "
+ "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
+MODULE_DESCRIPTION("DCCP TFRC library");
+MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
new file mode 100644
index 00000000000..fb90a91aa93
--- /dev/null
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -0,0 +1,199 @@
+/*
+ * net/dccp/packet_history.h
+ *
+ * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *
+ * An implementation of the DCCP protocol
+ *
+ * This code has been developed by the University of Waikato WAND
+ * research group. For further information please see http://www.wand.net.nz/
+ * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ *
+ * This code also uses code from Lulea University, rereleased as GPL by its
+ * authors:
+ * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
+ * and to make it work as a loadable module in the DCCP stack written by
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
+ *
+ * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _DCCP_PKT_HIST_
+#define _DCCP_PKT_HIST_
+
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+
+#include "../../dccp.h"
+
+/* Number of later packets received before one is considered lost */
+#define TFRC_RECV_NUM_LATE_LOSS 3
+
+#define TFRC_WIN_COUNT_PER_RTT 4
+#define TFRC_WIN_COUNT_LIMIT 16
+
+struct dccp_tx_hist_entry {
+ struct list_head dccphtx_node;
+ u64 dccphtx_seqno:48,
+ dccphtx_ccval:4,
+ dccphtx_sent:1;
+ u32 dccphtx_rtt;
+ struct timeval dccphtx_tstamp;
+};
+
+struct dccp_rx_hist_entry {
+ struct list_head dccphrx_node;
+ u64 dccphrx_seqno:48,
+ dccphrx_ccval:4,
+ dccphrx_type:4;
+ u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */
+ struct timeval dccphrx_tstamp;
+};
+
+struct dccp_tx_hist {
+ kmem_cache_t *dccptxh_slab;
+};
+
+extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name);
+extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist);
+
+struct dccp_rx_hist {
+ kmem_cache_t *dccprxh_slab;
+};
+
+extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name);
+extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist);
+extern struct dccp_rx_hist_entry *
+ dccp_rx_hist_find_data_packet(const struct list_head *list);
+
+static inline struct dccp_tx_hist_entry *
+ dccp_tx_hist_entry_new(struct dccp_tx_hist *hist,
+ const unsigned int __nocast prio)
+{
+ struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab,
+ prio);
+
+ if (entry != NULL)
+ entry->dccphtx_sent = 0;
+
+ return entry;
+}
+
+static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist,
+ struct dccp_tx_hist_entry *entry)
+{
+ if (entry != NULL)
+ kmem_cache_free(hist->dccptxh_slab, entry);
+}
+
+extern struct dccp_tx_hist_entry *
+ dccp_tx_hist_find_entry(const struct list_head *list,
+ const u64 seq);
+
+static inline void dccp_tx_hist_add_entry(struct list_head *list,
+ struct dccp_tx_hist_entry *entry)
+{
+ list_add(&entry->dccphtx_node, list);
+}
+
+extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
+ struct list_head *list,
+ struct dccp_tx_hist_entry *next);
+
+extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist,
+ struct list_head *list);
+
+static inline struct dccp_tx_hist_entry *
+ dccp_tx_hist_head(struct list_head *list)
+{
+ struct dccp_tx_hist_entry *head = NULL;
+
+ if (!list_empty(list))
+ head = list_entry(list->next, struct dccp_tx_hist_entry,
+ dccphtx_node);
+ return head;
+}
+
+static inline struct dccp_rx_hist_entry *
+ dccp_rx_hist_entry_new(struct dccp_rx_hist *hist,
+ const u32 ndp,
+ const struct sk_buff *skb,
+ const unsigned int __nocast prio)
+{
+ struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab,
+ prio);
+
+ if (entry != NULL) {
+ const struct dccp_hdr *dh = dccp_hdr(skb);
+
+ entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+ entry->dccphrx_ccval = dh->dccph_ccval;
+ entry->dccphrx_type = dh->dccph_type;
+ entry->dccphrx_ndp = ndp;
+ do_gettimeofday(&(entry->dccphrx_tstamp));
+ }
+
+ return entry;
+}
+
+static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist,
+ struct dccp_rx_hist_entry *entry)
+{
+ if (entry != NULL)
+ kmem_cache_free(hist->dccprxh_slab, entry);
+}
+
+extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist,
+ struct list_head *list);
+
+static inline void dccp_rx_hist_add_entry(struct list_head *list,
+ struct dccp_rx_hist_entry *entry)
+{
+ list_add(&entry->dccphrx_node, list);
+}
+
+static inline struct dccp_rx_hist_entry *
+ dccp_rx_hist_head(struct list_head *list)
+{
+ struct dccp_rx_hist_entry *head = NULL;
+
+ if (!list_empty(list))
+ head = list_entry(list->next, struct dccp_rx_hist_entry,
+ dccphrx_node);
+ return head;
+}
+
+static inline int
+ dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry)
+{
+ return entry->dccphrx_type == DCCP_PKT_DATA ||
+ entry->dccphrx_type == DCCP_PKT_DATAACK;
+}
+
+extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
+ struct list_head *rx_list,
+ struct list_head *li_list,
+ struct dccp_rx_hist_entry *packet);
+
+extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
+ struct list_head *li_list, u8 *win_loss);
+
+#endif /* _DCCP_PKT_HIST_ */
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
new file mode 100644
index 00000000000..130c4c40cfe
--- /dev/null
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -0,0 +1,22 @@
+#ifndef _TFRC_H_
+#define _TFRC_H_
+/*
+ * net/dccp/ccids/lib/tfrc.h
+ *
+ * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/types.h>
+
+extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
+extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
+
+#endif /* _TFRC_H_ */
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
new file mode 100644
index 00000000000..d2b5933b451
--- /dev/null
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -0,0 +1,644 @@
+/*
+ * net/dccp/ccids/lib/tfrc_equation.c
+ *
+ * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <asm/bug.h>
+#include <asm/div64.h>
+
+#include "tfrc.h"
+
+#define TFRC_CALC_X_ARRSIZE 500
+
+#define TFRC_CALC_X_SPLIT 50000
+/* equivalent to 0.05 */
+
+static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = {
+ { 37172, 8172 },
+ { 53499, 11567 },
+ { 66664, 14180 },
+ { 78298, 16388 },
+ { 89021, 18339 },
+ { 99147, 20108 },
+ { 108858, 21738 },
+ { 118273, 23260 },
+ { 127474, 24693 },
+ { 136520, 26052 },
+ { 145456, 27348 },
+ { 154316, 28589 },
+ { 163130, 29783 },
+ { 171919, 30935 },
+ { 180704, 32049 },
+ { 189502, 33130 },
+ { 198328, 34180 },
+ { 207194, 35202 },
+ { 216114, 36198 },
+ { 225097, 37172 },
+ { 234153, 38123 },
+ { 243294, 39055 },
+ { 252527, 39968 },
+ { 261861, 40864 },
+ { 271305, 41743 },
+ { 280866, 42607 },
+ { 290553, 43457 },
+ { 300372, 44293 },
+ { 310333, 45117 },
+ { 320441, 45929 },
+ { 330705, 46729 },
+ { 341131, 47518 },
+ { 351728, 48297 },
+ { 362501, 49066 },
+ { 373460, 49826 },
+ { 384609, 50577 },
+ { 395958, 51320 },
+ { 407513, 52054 },
+ { 419281, 52780 },
+ { 431270, 53499 },
+ { 443487, 54211 },
+ { 455940, 54916 },
+ { 468635, 55614 },
+ { 481581, 56306 },
+ { 494785, 56991 },
+ { 508254, 57671 },
+ { 521996, 58345 },
+ { 536019, 59014 },
+ { 550331, 59677 },
+ { 564939, 60335 },
+ { 579851, 60988 },
+ { 595075, 61636 },
+ { 610619, 62279 },
+ { 626491, 62918 },
+ { 642700, 63553 },
+ { 659253, 64183 },
+ { 676158, 64809 },
+ { 693424, 65431 },
+ { 711060, 66050 },
+ { 729073, 66664 },
+ { 747472, 67275 },
+ { 766266, 67882 },
+ { 785464, 68486 },
+ { 805073, 69087 },
+ { 825103, 69684 },
+ { 845562, 70278 },
+ { 866460, 70868 },
+ { 887805, 71456 },
+ { 909606, 72041 },
+ { 931873, 72623 },
+ { 954614, 73202 },
+ { 977839, 73778 },
+ { 1001557, 74352 },
+ { 1025777, 74923 },
+ { 1050508, 75492 },
+ { 1075761, 76058 },
+ { 1101544, 76621 },
+ { 1127867, 77183 },
+ { 1154739, 77741 },
+ { 1182172, 78298 },
+ { 1210173, 78852 },
+ { 1238753, 79405 },
+ { 1267922, 79955 },
+ { 1297689, 80503 },
+ { 1328066, 81049 },
+ { 1359060, 81593 },
+ { 1390684, 82135 },
+ { 1422947, 82675 },
+ { 1455859, 83213 },
+ { 1489430, 83750 },
+ { 1523671, 84284 },
+ { 1558593, 84817 },
+ { 1594205, 85348 },
+ { 1630518, 85878 },
+ { 1667543, 86406 },
+ { 1705290, 86932 },
+ { 1743770, 87457 },
+ { 1782994, 87980 },
+ { 1822973, 88501 },
+ { 1863717, 89021 },
+ { 1905237, 89540 },
+ { 1947545, 90057 },
+ { 1990650, 90573 },
+ { 2034566, 91087 },
+ { 2079301, 91600 },
+ { 2124869, 92111 },
+ { 2171279, 92622 },
+ { 2218543, 93131 },
+ { 2266673, 93639 },
+ { 2315680, 94145 },
+ { 2365575, 94650 },
+ { 2416371, 95154 },
+ { 2468077, 95657 },
+ { 2520707, 96159 },
+ { 2574271, 96660 },
+ { 2628782, 97159 },
+ { 2684250, 97658 },
+ { 2740689, 98155 },
+ { 2798110, 98651 },
+ { 2856524, 99147 },
+ { 2915944, 99641 },
+ { 2976382, 100134 },
+ { 3037850, 100626 },
+ { 3100360, 101117 },
+ { 3163924, 101608 },
+ { 3228554, 102097 },
+ { 3294263, 102586 },
+ { 3361063, 103073 },
+ { 3428966, 103560 },
+ { 3497984, 104045 },
+ { 3568131, 104530 },
+ { 3639419, 105014 },
+ { 3711860, 105498 },
+ { 3785467, 105980 },
+ { 3860253, 106462 },
+ { 3936229, 106942 },
+ { 4013410, 107422 },
+ { 4091808, 107902 },
+ { 4171435, 108380 },
+ { 4252306, 108858 },
+ { 4334431, 109335 },
+ { 4417825, 109811 },
+ { 4502501, 110287 },
+ { 4588472, 110762 },
+ { 4675750, 111236 },
+ { 4764349, 111709 },
+ { 4854283, 112182 },
+ { 4945564, 112654 },
+ { 5038206, 113126 },
+ { 5132223, 113597 },
+ { 5227627, 114067 },
+ { 5324432, 114537 },
+ { 5422652, 115006 },
+ { 5522299, 115474 },
+ { 5623389, 115942 },
+ { 5725934, 116409 },
+ { 5829948, 116876 },
+ { 5935446, 117342 },
+ { 6042439, 117808 },
+ { 6150943, 118273 },
+ { 6260972, 118738 },
+ { 6372538, 119202 },
+ { 6485657, 119665 },
+ { 6600342, 120128 },
+ { 6716607, 120591 },
+ { 6834467, 121053 },
+ { 6953935, 121514 },
+ { 7075025, 121976 },
+ { 7197752, 122436 },
+ { 7322131, 122896 },
+ { 7448175, 123356 },
+ { 7575898, 123815 },
+ { 7705316, 124274 },
+ { 7836442, 124733 },
+ { 7969291, 125191 },
+ { 8103877, 125648 },
+ { 8240216, 126105 },
+ { 8378321, 126562 },
+ { 8518208, 127018 },
+ { 8659890, 127474 },
+ { 8803384, 127930 },
+ { 8948702, 128385 },
+ { 9095861, 128840 },
+ { 9244875, 129294 },
+ { 9395760, 129748 },
+ { 9548529, 130202 },
+ { 9703198, 130655 },
+ { 9859782, 131108 },
+ { 10018296, 131561 },
+ { 10178755, 132014 },
+ { 10341174, 132466 },
+ { 10505569, 132917 },
+ { 10671954, 133369 },
+ { 10840345, 133820 },
+ { 11010757, 134271 },
+ { 11183206, 134721 },
+ { 11357706, 135171 },
+ { 11534274, 135621 },
+ { 11712924, 136071 },
+ { 11893673, 136520 },
+ { 12076536, 136969 },
+ { 12261527, 137418 },
+ { 12448664, 137867 },
+ { 12637961, 138315 },
+ { 12829435, 138763 },
+ { 13023101, 139211 },
+ { 13218974, 139658 },
+ { 13417071, 140106 },
+ { 13617407, 140553 },
+ { 13819999, 140999 },
+ { 14024862, 141446 },
+ { 14232012, 141892 },
+ { 14441465, 142339 },
+ { 14653238, 142785 },
+ { 14867346, 143230 },
+ { 15083805, 143676 },
+ { 15302632, 144121 },
+ { 15523842, 144566 },
+ { 15747453, 145011 },
+ { 15973479, 145456 },
+ { 16201939, 145900 },
+ { 16432847, 146345 },
+ { 16666221, 146789 },
+ { 16902076, 147233 },
+ { 17140429, 147677 },
+ { 17381297, 148121 },
+ { 17624696, 148564 },
+ { 17870643, 149007 },
+ { 18119154, 149451 },
+ { 18370247, 149894 },
+ { 18623936, 150336 },
+ { 18880241, 150779 },
+ { 19139176, 151222 },
+ { 19400759, 151664 },
+ { 19665007, 152107 },
+ { 19931936, 152549 },
+ { 20201564, 152991 },
+ { 20473907, 153433 },
+ { 20748982, 153875 },
+ { 21026807, 154316 },
+ { 21307399, 154758 },
+ { 21590773, 155199 },
+ { 21876949, 155641 },
+ { 22165941, 156082 },
+ { 22457769, 156523 },
+ { 22752449, 156964 },
+ { 23049999, 157405 },
+ { 23350435, 157846 },
+ { 23653774, 158287 },
+ { 23960036, 158727 },
+ { 24269236, 159168 },
+ { 24581392, 159608 },
+ { 24896521, 160049 },
+ { 25214642, 160489 },
+ { 25535772, 160929 },
+ { 25859927, 161370 },
+ { 26187127, 161810 },
+ { 26517388, 162250 },
+ { 26850728, 162690 },
+ { 27187165, 163130 },
+ { 27526716, 163569 },
+ { 27869400, 164009 },
+ { 28215234, 164449 },
+ { 28564236, 164889 },
+ { 28916423, 165328 },
+ { 29271815, 165768 },
+ { 29630428, 166208 },
+ { 29992281, 166647 },
+ { 30357392, 167087 },
+ { 30725779, 167526 },
+ { 31097459, 167965 },
+ { 31472452, 168405 },
+ { 31850774, 168844 },
+ { 32232445, 169283 },
+ { 32617482, 169723 },
+ { 33005904, 170162 },
+ { 33397730, 170601 },
+ { 33792976, 171041 },
+ { 34191663, 171480 },
+ { 34593807, 171919 },
+ { 34999428, 172358 },
+ { 35408544, 172797 },
+ { 35821174, 173237 },
+ { 36237335, 173676 },
+ { 36657047, 174115 },
+ { 37080329, 174554 },
+ { 37507197, 174993 },
+ { 37937673, 175433 },
+ { 38371773, 175872 },
+ { 38809517, 176311 },
+ { 39250924, 176750 },
+ { 39696012, 177190 },
+ { 40144800, 177629 },
+ { 40597308, 178068 },
+ { 41053553, 178507 },
+ { 41513554, 178947 },
+ { 41977332, 179386 },
+ { 42444904, 179825 },
+ { 42916290, 180265 },
+ { 43391509, 180704 },
+ { 43870579, 181144 },
+ { 44353520, 181583 },
+ { 44840352, 182023 },
+ { 45331092, 182462 },
+ { 45825761, 182902 },
+ { 46324378, 183342 },
+ { 46826961, 183781 },
+ { 47333531, 184221 },
+ { 47844106, 184661 },
+ { 48358706, 185101 },
+ { 48877350, 185541 },
+ { 49400058, 185981 },
+ { 49926849, 186421 },
+ { 50457743, 186861 },
+ { 50992759, 187301 },
+ { 51531916, 187741 },
+ { 52075235, 188181 },
+ { 52622735, 188622 },
+ { 53174435, 189062 },
+ { 53730355, 189502 },
+ { 54290515, 189943 },
+ { 54854935, 190383 },
+ { 55423634, 190824 },
+ { 55996633, 191265 },
+ { 56573950, 191706 },
+ { 57155606, 192146 },
+ { 57741621, 192587 },
+ { 58332014, 193028 },
+ { 58926806, 193470 },
+ { 59526017, 193911 },
+ { 60129666, 194352 },
+ { 60737774, 194793 },
+ { 61350361, 195235 },
+ { 61967446, 195677 },
+ { 62589050, 196118 },
+ { 63215194, 196560 },
+ { 63845897, 197002 },
+ { 64481179, 197444 },
+ { 65121061, 197886 },
+ { 65765563, 198328 },
+ { 66414705, 198770 },
+ { 67068508, 199213 },
+ { 67726992, 199655 },
+ { 68390177, 200098 },
+ { 69058085, 200540 },
+ { 69730735, 200983 },
+ { 70408147, 201426 },
+ { 71090343, 201869 },
+ { 71777343, 202312 },
+ { 72469168, 202755 },
+ { 73165837, 203199 },
+ { 73867373, 203642 },
+ { 74573795, 204086 },
+ { 75285124, 204529 },
+ { 76001380, 204973 },
+ { 76722586, 205417 },
+ { 77448761, 205861 },
+ { 78179926, 206306 },
+ { 78916102, 206750 },
+ { 79657310, 207194 },
+ { 80403571, 207639 },
+ { 81154906, 208084 },
+ { 81911335, 208529 },
+ { 82672880, 208974 },
+ { 83439562, 209419 },
+ { 84211402, 209864 },
+ { 84988421, 210309 },
+ { 85770640, 210755 },
+ { 86558080, 211201 },
+ { 87350762, 211647 },
+ { 88148708, 212093 },
+ { 88951938, 212539 },
+ { 89760475, 212985 },
+ { 90574339, 213432 },
+ { 91393551, 213878 },
+ { 92218133, 214325 },
+ { 93048107, 214772 },
+ { 93883493, 215219 },
+ { 94724314, 215666 },
+ { 95570590, 216114 },
+ { 96422343, 216561 },
+ { 97279594, 217009 },
+ { 98142366, 217457 },
+ { 99010679, 217905 },
+ { 99884556, 218353 },
+ { 100764018, 218801 },
+ { 101649086, 219250 },
+ { 102539782, 219698 },
+ { 103436128, 220147 },
+ { 104338146, 220596 },
+ { 105245857, 221046 },
+ { 106159284, 221495 },
+ { 107078448, 221945 },
+ { 108003370, 222394 },
+ { 108934074, 222844 },
+ { 109870580, 223294 },
+ { 110812910, 223745 },
+ { 111761087, 224195 },
+ { 112715133, 224646 },
+ { 113675069, 225097 },
+ { 114640918, 225548 },
+ { 115612702, 225999 },
+ { 116590442, 226450 },
+ { 117574162, 226902 },
+ { 118563882, 227353 },
+ { 119559626, 227805 },
+ { 120561415, 228258 },
+ { 121569272, 228710 },
+ { 122583219, 229162 },
+ { 123603278, 229615 },
+ { 124629471, 230068 },
+ { 125661822, 230521 },
+ { 126700352, 230974 },
+ { 127745083, 231428 },
+ { 128796039, 231882 },
+ { 129853241, 232336 },
+ { 130916713, 232790 },
+ { 131986475, 233244 },
+ { 133062553, 233699 },
+ { 134144966, 234153 },
+ { 135233739, 234608 },
+ { 136328894, 235064 },
+ { 137430453, 235519 },
+ { 138538440, 235975 },
+ { 139652876, 236430 },
+ { 140773786, 236886 },
+ { 141901190, 237343 },
+ { 143035113, 237799 },
+ { 144175576, 238256 },
+ { 145322604, 238713 },
+ { 146476218, 239170 },
+ { 147636442, 239627 },
+ { 148803298, 240085 },
+ { 149976809, 240542 },
+ { 151156999, 241000 },
+ { 152343890, 241459 },
+ { 153537506, 241917 },
+ { 154737869, 242376 },
+ { 155945002, 242835 },
+ { 157158929, 243294 },
+ { 158379673, 243753 },
+ { 159607257, 244213 },
+ { 160841704, 244673 },
+ { 162083037, 245133 },
+ { 163331279, 245593 },
+ { 164586455, 246054 },
+ { 165848586, 246514 },
+ { 167117696, 246975 },
+ { 168393810, 247437 },
+ { 169676949, 247898 },
+ { 170967138, 248360 },
+ { 172264399, 248822 },
+ { 173568757, 249284 },
+ { 174880235, 249747 },
+ { 176198856, 250209 },
+ { 177524643, 250672 },
+ { 178857621, 251136 },
+ { 180197813, 251599 },
+ { 181545242, 252063 },
+ { 182899933, 252527 },
+ { 184261908, 252991 },
+ { 185631191, 253456 },
+ { 187007807, 253920 },
+ { 188391778, 254385 },
+ { 189783129, 254851 },
+ { 191181884, 255316 },
+ { 192588065, 255782 },
+ { 194001698, 256248 },
+ { 195422805, 256714 },
+ { 196851411, 257181 },
+ { 198287540, 257648 },
+ { 199731215, 258115 },
+ { 201182461, 258582 },
+ { 202641302, 259050 },
+ { 204107760, 259518 },
+ { 205581862, 259986 },
+ { 207063630, 260454 },
+ { 208553088, 260923 },
+ { 210050262, 261392 },
+ { 211555174, 261861 },
+ { 213067849, 262331 },
+ { 214588312, 262800 },
+ { 216116586, 263270 },
+ { 217652696, 263741 },
+ { 219196666, 264211 },
+ { 220748520, 264682 },
+ { 222308282, 265153 },
+ { 223875978, 265625 },
+ { 225451630, 266097 },
+ { 227035265, 266569 },
+ { 228626905, 267041 },
+ { 230226576, 267514 },
+ { 231834302, 267986 },
+ { 233450107, 268460 },
+ { 235074016, 268933 },
+ { 236706054, 269407 },
+ { 238346244, 269881 },
+ { 239994613, 270355 },
+ { 241651183, 270830 },
+ { 243315981, 271305 }
+};
+
+/* Calculate the send rate as per section 3.1 of RFC3448
+
+Returns send rate in bytes per second
+
+Integer maths and lookups are used as not allowed floating point in kernel
+
+The function for Xcalc as per section 3.1 of RFC3448 is:
+
+X = s
+ -------------------------------------------------------------
+ R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2)))
+
+where
+X is the trasmit rate in bytes/second
+s is the packet size in bytes
+R is the round trip time in seconds
+p is the loss event rate, between 0 and 1.0, of the number of loss events
+ as a fraction of the number of packets transmitted
+t_RTO is the TCP retransmission timeout value in seconds
+b is the number of packets acknowledged by a single TCP acknowledgement
+
+we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes:
+
+X = s
+ -----------------------------------------------------------------------
+ R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2)))
+
+
+which we can break down into:
+
+X = s
+ --------
+ R * f(p)
+
+where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p))
+
+Function parameters:
+s - bytes
+R - RTT in usecs
+p - loss rate (decimal fraction multiplied by 1,000,000)
+
+Returns Xcalc in bytes per second
+
+DON'T alter this code unless you run test cases against it as the code
+has been manipulated to stop underflow/overlow.
+
+*/
+u32 tfrc_calc_x(u16 s, u32 R, u32 p)
+{
+ int index;
+ u32 f;
+ u64 tmp1, tmp2;
+
+ if (p < TFRC_CALC_X_SPLIT)
+ index = (p / (TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE)) - 1;
+ else
+ index = (p / (1000000 / TFRC_CALC_X_ARRSIZE)) - 1;
+
+ if (index < 0)
+ /* p should be 0 unless there is a bug in my code */
+ index = 0;
+
+ if (R == 0)
+ R = 1; /* RTT can't be zero or else divide by zero */
+
+ BUG_ON(index >= TFRC_CALC_X_ARRSIZE);
+
+ if (p >= TFRC_CALC_X_SPLIT)
+ f = tfrc_calc_x_lookup[index][0];
+ else
+ f = tfrc_calc_x_lookup[index][1];
+
+ tmp1 = ((u64)s * 100000000);
+ tmp2 = ((u64)R * (u64)f);
+ do_div(tmp2, 10000);
+ do_div(tmp1, tmp2);
+ /* Don't alter above math unless you test due to overflow on 32 bit */
+
+ return (u32)tmp1;
+}
+
+EXPORT_SYMBOL_GPL(tfrc_calc_x);
+
+/*
+ * args: fvalue - function value to match
+ * returns: p closest to that value
+ *
+ * both fvalue and p are multiplied by 1,000,000 to use ints
+ */
+u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
+{
+ int ctr = 0;
+ int small;
+
+ if (fvalue < tfrc_calc_x_lookup[0][1])
+ return 0;
+
+ if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1])
+ small = 1;
+ else if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0])
+ return 1000000;
+ else
+ small = 0;
+
+ while (fvalue > tfrc_calc_x_lookup[ctr][small])
+ ctr++;
+
+ if (small)
+ return TFRC_CALC_X_SPLIT * ctr / TFRC_CALC_X_ARRSIZE;
+ else
+ return 1000000 * ctr / TFRC_CALC_X_ARRSIZE;
+}
+
+EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
new file mode 100644
index 00000000000..33456c0d593
--- /dev/null
+++ b/net/dccp/dccp.h
@@ -0,0 +1,493 @@
+#ifndef _DCCP_H
+#define _DCCP_H
+/*
+ * net/dccp/dccp.h
+ *
+ * An implementation of the DCCP protocol
+ * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <net/snmp.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+
+#ifdef CONFIG_IP_DCCP_DEBUG
+extern int dccp_debug;
+
+#define dccp_pr_debug(format, a...) \
+ do { if (dccp_debug) \
+ printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \
+ } while (0)
+#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) \
+ printk(format, ##a); } while (0)
+#else
+#define dccp_pr_debug(format, a...)
+#define dccp_pr_debug_cat(format, a...)
+#endif
+
+extern struct inet_hashinfo dccp_hashinfo;
+
+extern atomic_t dccp_orphan_count;
+extern int dccp_tw_count;
+extern void dccp_tw_deschedule(struct inet_timewait_sock *tw);
+
+extern void dccp_time_wait(struct sock *sk, int state, int timeo);
+
+/* FIXME: Right size this */
+#define DCCP_MAX_OPT_LEN 128
+
+#define DCCP_MAX_PACKET_HDR 32
+
+#define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER)
+
+#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
+ * state, about 60 seconds */
+
+/* draft-ietf-dccp-spec-11.txt initial RTO value */
+#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
+
+/* Maximal interval between probes for local resources. */
+#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
+
+#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
+
+extern struct proto dccp_v4_prot;
+
+/* is seq1 < seq2 ? */
+static inline int before48(const u64 seq1, const u64 seq2)
+{
+ return (s64)((seq1 << 16) - (seq2 << 16)) < 0;
+}
+
+/* is seq1 > seq2 ? */
+static inline int after48(const u64 seq1, const u64 seq2)
+{
+ return (s64)((seq2 << 16) - (seq1 << 16)) < 0;
+}
+
+/* is seq2 <= seq1 <= seq3 ? */
+static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
+{
+ return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16);
+}
+
+static inline u64 max48(const u64 seq1, const u64 seq2)
+{
+ return after48(seq1, seq2) ? seq1 : seq2;
+}
+
+enum {
+ DCCP_MIB_NUM = 0,
+ DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */
+ DCCP_MIB_ESTABRESETS, /* EstabResets */
+ DCCP_MIB_CURRESTAB, /* CurrEstab */
+ DCCP_MIB_OUTSEGS, /* OutSegs */
+ DCCP_MIB_OUTRSTS,
+ DCCP_MIB_ABORTONTIMEOUT,
+ DCCP_MIB_TIMEOUTS,
+ DCCP_MIB_ABORTFAILED,
+ DCCP_MIB_PASSIVEOPENS,
+ DCCP_MIB_ATTEMPTFAILS,
+ DCCP_MIB_OUTDATAGRAMS,
+ DCCP_MIB_INERRS,
+ DCCP_MIB_OPTMANDATORYERROR,
+ DCCP_MIB_INVALIDOPT,
+ __DCCP_MIB_MAX
+};
+
+#define DCCP_MIB_MAX __DCCP_MIB_MAX
+struct dccp_mib {
+ unsigned long mibs[DCCP_MIB_MAX];
+} __SNMP_MIB_ALIGN__;
+
+DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
+#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field)
+#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field)
+#define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field)
+#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field)
+#define DCCP_ADD_STATS_BH(field, val) \
+ SNMP_ADD_STATS_BH(dccp_statistics, field, val)
+#define DCCP_ADD_STATS_USER(field, val) \
+ SNMP_ADD_STATS_USER(dccp_statistics, field, val)
+
+extern int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb);
+extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb);
+
+extern int dccp_send_response(struct sock *sk);
+extern void dccp_send_ack(struct sock *sk);
+extern void dccp_send_delayed_ack(struct sock *sk);
+extern void dccp_send_sync(struct sock *sk, const u64 seq,
+ const enum dccp_pkt_type pkt_type);
+
+extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo);
+extern void dccp_write_space(struct sock *sk);
+
+extern void dccp_init_xmit_timers(struct sock *sk);
+static inline void dccp_clear_xmit_timers(struct sock *sk)
+{
+ inet_csk_clear_xmit_timers(sk);
+}
+
+extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
+
+extern const char *dccp_packet_name(const int type);
+extern const char *dccp_state_name(const int state);
+
+static inline void dccp_set_state(struct sock *sk, const int state)
+{
+ const int oldstate = sk->sk_state;
+
+ dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
+ dccp_role(sk), sk,
+ dccp_state_name(oldstate), dccp_state_name(state));
+ WARN_ON(state == oldstate);
+
+ switch (state) {
+ case DCCP_OPEN:
+ if (oldstate != DCCP_OPEN)
+ DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
+ break;
+
+ case DCCP_CLOSED:
+ if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
+ DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
+
+ sk->sk_prot->unhash(sk);
+ if (inet_csk(sk)->icsk_bind_hash != NULL &&
+ !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
+ inet_put_port(&dccp_hashinfo, sk);
+ /* fall through */
+ default:
+ if (oldstate == DCCP_OPEN)
+ DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
+ }
+
+ /* Change state AFTER socket is unhashed to avoid closed
+ * socket sitting in hash tables.
+ */
+ sk->sk_state = state;
+}
+
+static inline void dccp_done(struct sock *sk)
+{
+ dccp_set_state(sk, DCCP_CLOSED);
+ dccp_clear_xmit_timers(sk);
+
+ sk->sk_shutdown = SHUTDOWN_MASK;
+
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_state_change(sk);
+ else
+ inet_csk_destroy_sock(sk);
+}
+
+static inline void dccp_openreq_init(struct request_sock *req,
+ struct dccp_sock *dp,
+ struct sk_buff *skb)
+{
+ /*
+ * FIXME: fill in the other req fields from the DCCP options
+ * received
+ */
+ inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
+ inet_rsk(req)->acked = 0;
+ req->rcv_wnd = 0;
+}
+
+extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
+
+extern struct sock *dccp_create_openreq_child(struct sock *sk,
+ const struct request_sock *req,
+ const struct sk_buff *skb);
+
+extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
+
+extern void dccp_v4_err(struct sk_buff *skb, u32);
+
+extern int dccp_v4_rcv(struct sk_buff *skb);
+
+extern struct sock *dccp_v4_request_recv_sock(struct sock *sk,
+ struct sk_buff *skb,
+ struct request_sock *req,
+ struct dst_entry *dst);
+extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct request_sock **prev);
+
+extern int dccp_child_process(struct sock *parent, struct sock *child,
+ struct sk_buff *skb);
+extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+ struct dccp_hdr *dh, unsigned len);
+extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
+ const struct dccp_hdr *dh, const unsigned len);
+
+extern void dccp_close(struct sock *sk, long timeout);
+extern struct sk_buff *dccp_make_response(struct sock *sk,
+ struct dst_entry *dst,
+ struct request_sock *req);
+extern struct sk_buff *dccp_make_reset(struct sock *sk,
+ struct dst_entry *dst,
+ enum dccp_reset_codes code);
+
+extern int dccp_connect(struct sock *sk);
+extern int dccp_disconnect(struct sock *sk, int flags);
+extern int dccp_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
+extern int dccp_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int optlen);
+extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk,
+ struct msghdr *msg, size_t size);
+extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
+ struct msghdr *msg, size_t len, int nonblock,
+ int flags, int *addr_len);
+extern void dccp_shutdown(struct sock *sk, int how);
+
+extern int dccp_v4_checksum(const struct sk_buff *skb,
+ const u32 saddr, const u32 daddr);
+
+extern int dccp_v4_send_reset(struct sock *sk,
+ enum dccp_reset_codes code);
+extern void dccp_send_close(struct sock *sk, const int active);
+
+struct dccp_skb_cb {
+ __u8 dccpd_type;
+ __u8 dccpd_reset_code;
+ __u8 dccpd_service;
+ __u8 dccpd_ccval;
+ __u64 dccpd_seq;
+ __u64 dccpd_ack_seq;
+ int dccpd_opt_len;
+};
+
+#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
+
+static inline int dccp_non_data_packet(const struct sk_buff *skb)
+{
+ const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
+
+ return type == DCCP_PKT_ACK ||
+ type == DCCP_PKT_CLOSE ||
+ type == DCCP_PKT_CLOSEREQ ||
+ type == DCCP_PKT_RESET ||
+ type == DCCP_PKT_SYNC ||
+ type == DCCP_PKT_SYNCACK;
+}
+
+static inline int dccp_packet_without_ack(const struct sk_buff *skb)
+{
+ const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
+
+ return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
+}
+
+#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1)
+#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2)
+
+static inline void dccp_set_seqno(u64 *seqno, u64 value)
+{
+ if (value > DCCP_MAX_SEQNO)
+ value -= DCCP_MAX_SEQNO + 1;
+ *seqno = value;
+}
+
+static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
+{
+ return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
+}
+
+static inline void dccp_inc_seqno(u64 *seqno)
+{
+ if (++*seqno > DCCP_MAX_SEQNO)
+ *seqno = 0;
+}
+
+static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
+{
+ struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh +
+ sizeof(*dh));
+
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ dh->dccph_seq = htonl((gss >> 32)) >> 8;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ dh->dccph_seq = htonl((gss >> 32));
+#else
+#error "Adjust your <asm/byteorder.h> defines"
+#endif
+ dhx->dccph_seq_low = htonl(gss & 0xffffffff);
+}
+
+static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack,
+ const u64 gsr)
+{
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ dhack->dccph_ack_nr_high = htonl((gsr >> 32));
+#else
+#error "Adjust your <asm/byteorder.h> defines"
+#endif
+ dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff);
+}
+
+static inline void dccp_update_gsr(struct sock *sk, u64 seq)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ dp->dccps_gsr = seq;
+ dccp_set_seqno(&dp->dccps_swl,
+ (dp->dccps_gsr + 1 -
+ (dp->dccps_options.dccpo_sequence_window / 4)));
+ dccp_set_seqno(&dp->dccps_swh,
+ (dp->dccps_gsr +
+ (3 * dp->dccps_options.dccpo_sequence_window) / 4));
+}
+
+static inline void dccp_update_gss(struct sock *sk, u64 seq)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ dp->dccps_awh = dp->dccps_gss = seq;
+ dccp_set_seqno(&dp->dccps_awl,
+ (dp->dccps_gss -
+ dp->dccps_options.dccpo_sequence_window + 1));
+}
+
+extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb);
+extern void dccp_insert_option_elapsed_time(struct sock *sk,
+ struct sk_buff *skb,
+ u32 elapsed_time);
+extern void dccp_insert_option_timestamp(struct sock *sk,
+ struct sk_buff *skb);
+extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
+ unsigned char option,
+ const void *value, unsigned char len);
+
+extern struct socket *dccp_ctl_socket;
+
+#define DCCP_ACKPKTS_STATE_RECEIVED 0
+#define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6)
+#define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6)
+
+#define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */
+#define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */
+
+/** struct dccp_ackpkts - acknowledgeable packets
+ *
+ * This data structure is the one defined in the DCCP draft
+ * Appendix A.
+ *
+ * @dccpap_buf_head - circular buffer head
+ * @dccpap_buf_tail - circular buffer tail
+ * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the
+ * buffer (i.e. %dccpap_buf_head)
+ * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
+ * by the buffer with State 0
+ *
+ * Additionally, the HC-Receiver must keep some information about the
+ * Ack Vectors it has recently sent. For each packet sent carrying an
+ * Ack Vector, it remembers four variables:
+ *
+ * @dccpap_ack_seqno - the Sequence Number used for the packet
+ * (HC-Receiver seqno)
+ * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement.
+ * @dccpap_ack_ackno - the Acknowledgement Number used for the packet
+ * (HC-Sender seqno)
+ * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
+ *
+ * @dccpap_buf_len - circular buffer length
+ * @dccpap_time - the time in usecs
+ * @dccpap_buf - circular buffer of acknowledgeable packets
+ */
+struct dccp_ackpkts {
+ unsigned int dccpap_buf_head;
+ unsigned int dccpap_buf_tail;
+ u64 dccpap_buf_ackno;
+ u64 dccpap_ack_seqno;
+ u64 dccpap_ack_ackno;
+ unsigned int dccpap_ack_ptr;
+ unsigned int dccpap_buf_vector_len;
+ unsigned int dccpap_ack_vector_len;
+ unsigned int dccpap_buf_len;
+ struct timeval dccpap_time;
+ u8 dccpap_buf_nonce;
+ u8 dccpap_ack_nonce;
+ u8 dccpap_buf[0];
+};
+
+extern struct dccp_ackpkts *
+ dccp_ackpkts_alloc(unsigned int len,
+ const unsigned int __nocast priority);
+extern void dccp_ackpkts_free(struct dccp_ackpkts *ap);
+extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state);
+extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap,
+ struct sock *sk, u64 ackno);
+
+static inline suseconds_t timeval_usecs(const struct timeval *tv)
+{
+ return tv->tv_sec * USEC_PER_SEC + tv->tv_usec;
+}
+
+static inline suseconds_t timeval_delta(const struct timeval *large,
+ const struct timeval *small)
+{
+ time_t secs = large->tv_sec - small->tv_sec;
+ suseconds_t usecs = large->tv_usec - small->tv_usec;
+
+ if (usecs < 0) {
+ secs--;
+ usecs += USEC_PER_SEC;
+ }
+ return secs * USEC_PER_SEC + usecs;
+}
+
+static inline void timeval_add_usecs(struct timeval *tv,
+ const suseconds_t usecs)
+{
+ tv->tv_usec += usecs;
+ while (tv->tv_usec >= USEC_PER_SEC) {
+ tv->tv_sec++;
+ tv->tv_usec -= USEC_PER_SEC;
+ }
+}
+
+static inline void timeval_sub_usecs(struct timeval *tv,
+ const suseconds_t usecs)
+{
+ tv->tv_usec -= usecs;
+ while (tv->tv_usec < 0) {
+ tv->tv_sec--;
+ tv->tv_usec += USEC_PER_SEC;
+ }
+}
+
+/*
+ * Returns the difference in usecs between timeval
+ * passed in and current time
+ */
+static inline suseconds_t timeval_now_delta(const struct timeval *tv)
+{
+ struct timeval now;
+ do_gettimeofday(&now);
+ return timeval_delta(&now, tv);
+}
+
+#ifdef CONFIG_IP_DCCP_DEBUG
+extern void dccp_ackvector_print(const u64 ackno,
+ const unsigned char *vector, int len);
+extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap);
+#else
+static inline void dccp_ackvector_print(const u64 ackno,
+ const unsigned char *vector,
+ int len) { }
+static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { }
+#endif
+
+#endif /* _DCCP_H */
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
new file mode 100644
index 00000000000..f675d8e642d
--- /dev/null
+++ b/net/dccp/diag.c
@@ -0,0 +1,71 @@
+/*
+ * net/dccp/diag.c
+ *
+ * An implementation of the DCCP protocol
+ * Arnaldo Carvalho de Melo <acme@mandriva.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+
+#include <linux/module.h>
+#include <linux/inet_diag.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+static void dccp_get_info(struct sock *sk, struct tcp_info *info)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ memset(info, 0, sizeof(*info));
+
+ info->tcpi_state = sk->sk_state;
+ info->tcpi_retransmits = icsk->icsk_retransmits;
+ info->tcpi_probes = icsk->icsk_probes_out;
+ info->tcpi_backoff = icsk->icsk_backoff;
+ info->tcpi_pmtu = dp->dccps_pmtu_cookie;
+
+ if (dp->dccps_options.dccpo_send_ack_vector)
+ info->tcpi_options |= TCPI_OPT_SACK;
+
+ ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info);
+ ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info);
+}
+
+static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+ void *_info)
+{
+ r->idiag_rqueue = r->idiag_wqueue = 0;
+
+ if (_info != NULL)
+ dccp_get_info(sk, _info);
+}
+
+static struct inet_diag_handler dccp_diag_handler = {
+ .idiag_hashinfo = &dccp_hashinfo,
+ .idiag_get_info = dccp_diag_get_info,
+ .idiag_type = DCCPDIAG_GETSOCK,
+ .idiag_info_size = sizeof(struct tcp_info),
+};
+
+static int __init dccp_diag_init(void)
+{
+ return inet_diag_register(&dccp_diag_handler);
+}
+
+static void __exit dccp_diag_fini(void)
+{
+ inet_diag_unregister(&dccp_diag_handler);
+}
+
+module_init(dccp_diag_init);
+module_exit(dccp_diag_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
+MODULE_DESCRIPTION("DCCP inet_diag handler");
diff --git a/net/dccp/input.c b/net/dccp/input.c
new file mode 100644
index 00000000000..ef29cef1daf
--- /dev/null
+++ b/net/dccp/input.c
@@ -0,0 +1,600 @@
+/*
+ * net/dccp/input.c
+ *
+ * An implementation of the DCCP protocol
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+
+#include <net/sock.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+static void dccp_fin(struct sock *sk, struct sk_buff *skb)
+{
+ sk->sk_shutdown |= RCV_SHUTDOWN;
+ sock_set_flag(sk, SOCK_DONE);
+ __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4);
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ skb_set_owner_r(skb, sk);
+ sk->sk_data_ready(sk, 0);
+}
+
+static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
+{
+ dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED);
+ dccp_fin(sk, skb);
+ dccp_set_state(sk, DCCP_CLOSED);
+ sk_wake_async(sk, 1, POLL_HUP);
+}
+
+static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
+{
+ /*
+ * Step 7: Check for unexpected packet types
+ * If (S.is_server and P.type == CloseReq)
+ * Send Sync packet acknowledging P.seqno
+ * Drop packet and return
+ */
+ if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) {
+ dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
+ return;
+ }
+
+ dccp_set_state(sk, DCCP_CLOSING);
+ dccp_send_close(sk, 0);
+}
+
+static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ if (dp->dccps_options.dccpo_send_ack_vector)
+ dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk,
+ DCCP_SKB_CB(skb)->dccpd_ack_seq);
+}
+
+static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
+{
+ const struct dccp_hdr *dh = dccp_hdr(skb);
+ struct dccp_sock *dp = dccp_sk(sk);
+ u64 lswl, lawl;
+
+ /*
+ * Step 5: Prepare sequence numbers for Sync
+ * If P.type == Sync or P.type == SyncAck,
+ * If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL,
+ * / * P is valid, so update sequence number variables
+ * accordingly. After this update, P will pass the tests
+ * in Step 6. A SyncAck is generated if necessary in
+ * Step 15 * /
+ * Update S.GSR, S.SWL, S.SWH
+ * Otherwise,
+ * Drop packet and return
+ */
+ if (dh->dccph_type == DCCP_PKT_SYNC ||
+ dh->dccph_type == DCCP_PKT_SYNCACK) {
+ if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
+ dp->dccps_awl, dp->dccps_awh) &&
+ !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl))
+ dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
+ else
+ return -1;
+ }
+
+ /*
+ * Step 6: Check sequence numbers
+ * Let LSWL = S.SWL and LAWL = S.AWL
+ * If P.type == CloseReq or P.type == Close or P.type == Reset,
+ * LSWL := S.GSR + 1, LAWL := S.GAR
+ * If LSWL <= P.seqno <= S.SWH
+ * and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH),
+ * Update S.GSR, S.SWL, S.SWH
+ * If P.type != Sync,
+ * Update S.GAR
+ * Otherwise,
+ * Send Sync packet acknowledging P.seqno
+ * Drop packet and return
+ */
+ lswl = dp->dccps_swl;
+ lawl = dp->dccps_awl;
+
+ if (dh->dccph_type == DCCP_PKT_CLOSEREQ ||
+ dh->dccph_type == DCCP_PKT_CLOSE ||
+ dh->dccph_type == DCCP_PKT_RESET) {
+ lswl = dp->dccps_gsr;
+ dccp_inc_seqno(&lswl);
+ lawl = dp->dccps_gar;
+ }
+
+ if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) &&
+ (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ ||
+ between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
+ lawl, dp->dccps_awh))) {
+ dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
+
+ if (dh->dccph_type != DCCP_PKT_SYNC &&
+ (DCCP_SKB_CB(skb)->dccpd_ack_seq !=
+ DCCP_PKT_WITHOUT_ACK_SEQ))
+ dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq;
+ } else {
+ LIMIT_NETDEBUG(KERN_WARNING "DCCP: Step 6 failed for %s packet, "
+ "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and "
+ "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), "
+ "sending SYNC...\n",
+ dccp_packet_name(dh->dccph_type),
+ (unsigned long long) lswl,
+ (unsigned long long)
+ DCCP_SKB_CB(skb)->dccpd_seq,
+ (unsigned long long) dp->dccps_swh,
+ (DCCP_SKB_CB(skb)->dccpd_ack_seq ==
+ DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists",
+ (unsigned long long) lawl,
+ (unsigned long long)
+ DCCP_SKB_CB(skb)->dccpd_ack_seq,
+ (unsigned long long) dp->dccps_awh);
+ dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
+ return -1;
+ }
+
+ return 0;
+}
+
+int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
+ const struct dccp_hdr *dh, const unsigned len)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ if (dccp_check_seqno(sk, skb))
+ goto discard;
+
+ if (dccp_parse_options(sk, skb))
+ goto discard;
+
+ if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+ dccp_event_ack_recv(sk, skb);
+
+ /*
+ * FIXME: check ECN to see if we should use
+ * DCCP_ACKPKTS_STATE_ECN_MARKED
+ */
+ if (dp->dccps_options.dccpo_send_ack_vector) {
+ struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
+
+ if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
+ DCCP_SKB_CB(skb)->dccpd_seq,
+ DCCP_ACKPKTS_STATE_RECEIVED)) {
+ LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable "
+ "packets buffer full!\n");
+ ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
+ inet_csk_schedule_ack(sk);
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+ TCP_DELACK_MIN,
+ DCCP_RTO_MAX);
+ goto discard;
+ }
+
+ /*
+ * FIXME: this activation is probably wrong, have to study more
+ * TCP delack machinery and how it fits into DCCP draft, but
+ * for now it kinda "works" 8)
+ */
+ if (!inet_csk_ack_scheduled(sk)) {
+ inet_csk_schedule_ack(sk);
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ,
+ DCCP_RTO_MAX);
+ }
+ }
+
+ ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
+ ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
+
+ switch (dccp_hdr(skb)->dccph_type) {
+ case DCCP_PKT_DATAACK:
+ case DCCP_PKT_DATA:
+ /*
+ * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED
+ * option if it is.
+ */
+ __skb_pull(skb, dh->dccph_doff * 4);
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ skb_set_owner_r(skb, sk);
+ sk->sk_data_ready(sk, 0);
+ return 0;
+ case DCCP_PKT_ACK:
+ goto discard;
+ case DCCP_PKT_RESET:
+ /*
+ * Step 9: Process Reset
+ * If P.type == Reset,
+ * Tear down connection
+ * S.state := TIMEWAIT
+ * Set TIMEWAIT timer
+ * Drop packet and return
+ */
+ dccp_fin(sk, skb);
+ dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
+ return 0;
+ case DCCP_PKT_CLOSEREQ:
+ dccp_rcv_closereq(sk, skb);
+ goto discard;
+ case DCCP_PKT_CLOSE:
+ dccp_rcv_close(sk, skb);
+ return 0;
+ case DCCP_PKT_REQUEST:
+ /* Step 7
+ * or (S.is_server and P.type == Response)
+ * or (S.is_client and P.type == Request)
+ * or (S.state >= OPEN and P.type == Request
+ * and P.seqno >= S.OSR)
+ * or (S.state >= OPEN and P.type == Response
+ * and P.seqno >= S.OSR)
+ * or (S.state == RESPOND and P.type == Data),
+ * Send Sync packet acknowledging P.seqno
+ * Drop packet and return
+ */
+ if (dp->dccps_role != DCCP_ROLE_LISTEN)
+ goto send_sync;
+ goto check_seq;
+ case DCCP_PKT_RESPONSE:
+ if (dp->dccps_role != DCCP_ROLE_CLIENT)
+ goto send_sync;
+check_seq:
+ if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) {
+send_sync:
+ dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
+ DCCP_PKT_SYNC);
+ }
+ break;
+ case DCCP_PKT_SYNC:
+ dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
+ DCCP_PKT_SYNCACK);
+ /*
+ * From the draft:
+ *
+ * As with DCCP-Ack packets, DCCP-Sync and DCCP-SyncAck packets
+ * MAY have non-zero-length application data areas, whose
+ * contents * receivers MUST ignore.
+ */
+ goto discard;
+ }
+
+ DCCP_INC_STATS_BH(DCCP_MIB_INERRS);
+discard:
+ __kfree_skb(skb);
+ return 0;
+}
+
+static int dccp_rcv_request_sent_state_process(struct sock *sk,
+ struct sk_buff *skb,
+ const struct dccp_hdr *dh,
+ const unsigned len)
+{
+ /*
+ * Step 4: Prepare sequence numbers in REQUEST
+ * If S.state == REQUEST,
+ * If (P.type == Response or P.type == Reset)
+ * and S.AWL <= P.ackno <= S.AWH,
+ * / * Set sequence number variables corresponding to the
+ * other endpoint, so P will pass the tests in Step 6 * /
+ * Set S.GSR, S.ISR, S.SWL, S.SWH
+ * / * Response processing continues in Step 10; Reset
+ * processing continues in Step 9 * /
+ */
+ if (dh->dccph_type == DCCP_PKT_RESPONSE) {
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ /* Stop the REQUEST timer */
+ inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
+ BUG_TRAP(sk->sk_send_head != NULL);
+ __kfree_skb(sk->sk_send_head);
+ sk->sk_send_head = NULL;
+
+ if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
+ dp->dccps_awl, dp->dccps_awh)) {
+ dccp_pr_debug("invalid ackno: S.AWL=%llu, "
+ "P.ackno=%llu, S.AWH=%llu \n",
+ (unsigned long long)dp->dccps_awl,
+ (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
+ (unsigned long long)dp->dccps_awh);
+ goto out_invalid_packet;
+ }
+
+ dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
+ dccp_update_gsr(sk, dp->dccps_isr);
+ /*
+ * SWL and AWL are initially adjusted so that they are not less than
+ * the initial Sequence Numbers received and sent, respectively:
+ * SWL := max(GSR + 1 - floor(W/4), ISR),
+ * AWL := max(GSS - W' + 1, ISS).
+ * These adjustments MUST be applied only at the beginning of the
+ * connection.
+ *
+ * AWL was adjusted in dccp_v4_connect -acme
+ */
+ dccp_set_seqno(&dp->dccps_swl,
+ max48(dp->dccps_swl, dp->dccps_isr));
+
+ if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 ||
+ ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) {
+ ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
+ ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
+ /* FIXME: send appropriate RESET code */
+ goto out_invalid_packet;
+ }
+
+ dccp_sync_mss(sk, dp->dccps_pmtu_cookie);
+
+ /*
+ * Step 10: Process REQUEST state (second part)
+ * If S.state == REQUEST,
+ * / * If we get here, P is a valid Response from the
+ * server (see Step 4), and we should move to
+ * PARTOPEN state. PARTOPEN means send an Ack,
+ * don't send Data packets, retransmit Acks
+ * periodically, and always include any Init Cookie
+ * from the Response * /
+ * S.state := PARTOPEN
+ * Set PARTOPEN timer
+ * Continue with S.state == PARTOPEN
+ * / * Step 12 will send the Ack completing the
+ * three-way handshake * /
+ */
+ dccp_set_state(sk, DCCP_PARTOPEN);
+
+ /* Make sure socket is routed, for correct metrics. */
+ inet_sk_rebuild_header(sk);
+
+ if (!sock_flag(sk, SOCK_DEAD)) {
+ sk->sk_state_change(sk);
+ sk_wake_async(sk, 0, POLL_OUT);
+ }
+
+ if (sk->sk_write_pending || icsk->icsk_ack.pingpong ||
+ icsk->icsk_accept_queue.rskq_defer_accept) {
+ /* Save one ACK. Data will be ready after
+ * several ticks, if write_pending is set.
+ *
+ * It may be deleted, but with this feature tcpdumps
+ * look so _wonderfully_ clever, that I was not able
+ * to stand against the temptation 8) --ANK
+ */
+ /*
+ * OK, in DCCP we can as well do a similar trick, its
+ * even in the draft, but there is no need for us to
+ * schedule an ack here, as dccp_sendmsg does this for
+ * us, also stated in the draft. -acme
+ */
+ __kfree_skb(skb);
+ return 0;
+ }
+ dccp_send_ack(sk);
+ return -1;
+ }
+
+out_invalid_packet:
+ return 1; /* dccp_v4_do_rcv will send a reset, but...
+ FIXME: the reset code should be
+ DCCP_RESET_CODE_PACKET_ERROR */
+}
+
+static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
+ struct sk_buff *skb,
+ const struct dccp_hdr *dh,
+ const unsigned len)
+{
+ int queued = 0;
+
+ switch (dh->dccph_type) {
+ case DCCP_PKT_RESET:
+ inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
+ break;
+ case DCCP_PKT_DATAACK:
+ case DCCP_PKT_ACK:
+ /*
+ * FIXME: we should be reseting the PARTOPEN (DELACK) timer
+ * here but only if we haven't used the DELACK timer for
+ * something else, like sending a delayed ack for a TIMESTAMP
+ * echo, etc, for now were not clearing it, sending an extra
+ * ACK when there is nothing else to do in DELACK is not a big
+ * deal after all.
+ */
+
+ /* Stop the PARTOPEN timer */
+ if (sk->sk_state == DCCP_PARTOPEN)
+ inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
+
+ dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq;
+ dccp_set_state(sk, DCCP_OPEN);
+
+ if (dh->dccph_type == DCCP_PKT_DATAACK) {
+ dccp_rcv_established(sk, skb, dh, len);
+ queued = 1; /* packet was queued
+ (by dccp_rcv_established) */
+ }
+ break;
+ }
+
+ return queued;
+}
+
+int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+ struct dccp_hdr *dh, unsigned len)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ const int old_state = sk->sk_state;
+ int queued = 0;
+
+ /*
+ * Step 3: Process LISTEN state
+ * (Continuing from dccp_v4_do_rcv and dccp_v6_do_rcv)
+ *
+ * If S.state == LISTEN,
+ * If P.type == Request or P contains a valid Init Cookie
+ * option,
+ * * Must scan the packet's options to check for an Init
+ * Cookie. Only the Init Cookie is processed here,
+ * however; other options are processed in Step 8. This
+ * scan need only be performed if the endpoint uses Init
+ * Cookies *
+ * * Generate a new socket and switch to that socket *
+ * Set S := new socket for this port pair
+ * S.state = RESPOND
+ * Choose S.ISS (initial seqno) or set from Init Cookie
+ * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+ * Continue with S.state == RESPOND
+ * * A Response packet will be generated in Step 11 *
+ * Otherwise,
+ * Generate Reset(No Connection) unless P.type == Reset
+ * Drop packet and return
+ *
+ * NOTE: the check for the packet types is done in
+ * dccp_rcv_state_process
+ */
+ if (sk->sk_state == DCCP_LISTEN) {
+ if (dh->dccph_type == DCCP_PKT_REQUEST) {
+ if (dccp_v4_conn_request(sk, skb) < 0)
+ return 1;
+
+ /* FIXME: do congestion control initialization */
+ goto discard;
+ }
+ if (dh->dccph_type == DCCP_PKT_RESET)
+ goto discard;
+
+ /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/
+ return 1;
+ }
+
+ if (sk->sk_state != DCCP_REQUESTING) {
+ if (dccp_check_seqno(sk, skb))
+ goto discard;
+
+ /*
+ * Step 8: Process options and mark acknowledgeable
+ */
+ if (dccp_parse_options(sk, skb))
+ goto discard;
+
+ if (DCCP_SKB_CB(skb)->dccpd_ack_seq !=
+ DCCP_PKT_WITHOUT_ACK_SEQ)
+ dccp_event_ack_recv(sk, skb);
+
+ ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
+ ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
+
+ /*
+ * FIXME: check ECN to see if we should use
+ * DCCP_ACKPKTS_STATE_ECN_MARKED
+ */
+ if (dp->dccps_options.dccpo_send_ack_vector) {
+ if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
+ DCCP_SKB_CB(skb)->dccpd_seq,
+ DCCP_ACKPKTS_STATE_RECEIVED))
+ goto discard;
+ /*
+ * FIXME: this activation is probably wrong, have to
+ * study more TCP delack machinery and how it fits into
+ * DCCP draft, but for now it kinda "works" 8)
+ */
+ if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno ==
+ DCCP_MAX_SEQNO + 1) &&
+ !inet_csk_ack_scheduled(sk)) {
+ inet_csk_schedule_ack(sk);
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+ TCP_DELACK_MIN,
+ DCCP_RTO_MAX);
+ }
+ }
+ }
+
+ /*
+ * Step 9: Process Reset
+ * If P.type == Reset,
+ * Tear down connection
+ * S.state := TIMEWAIT
+ * Set TIMEWAIT timer
+ * Drop packet and return
+ */
+ if (dh->dccph_type == DCCP_PKT_RESET) {
+ /*
+ * Queue the equivalent of TCP fin so that dccp_recvmsg
+ * exits the loop
+ */
+ dccp_fin(sk, skb);
+ dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
+ return 0;
+ /*
+ * Step 7: Check for unexpected packet types
+ * If (S.is_server and P.type == CloseReq)
+ * or (S.is_server and P.type == Response)
+ * or (S.is_client and P.type == Request)
+ * or (S.state == RESPOND and P.type == Data),
+ * Send Sync packet acknowledging P.seqno
+ * Drop packet and return
+ */
+ } else if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
+ (dh->dccph_type == DCCP_PKT_RESPONSE ||
+ dh->dccph_type == DCCP_PKT_CLOSEREQ)) ||
+ (dp->dccps_role == DCCP_ROLE_CLIENT &&
+ dh->dccph_type == DCCP_PKT_REQUEST) ||
+ (sk->sk_state == DCCP_RESPOND &&
+ dh->dccph_type == DCCP_PKT_DATA)) {
+ dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
+ DCCP_PKT_SYNC);
+ goto discard;
+ } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
+ dccp_rcv_closereq(sk, skb);
+ goto discard;
+ } else if (dh->dccph_type == DCCP_PKT_CLOSE) {
+ dccp_rcv_close(sk, skb);
+ return 0;
+ }
+
+ switch (sk->sk_state) {
+ case DCCP_CLOSED:
+ return 1;
+
+ case DCCP_REQUESTING:
+ /* FIXME: do congestion control initialization */
+
+ queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
+ if (queued >= 0)
+ return queued;
+
+ __kfree_skb(skb);
+ return 0;
+
+ case DCCP_RESPOND:
+ case DCCP_PARTOPEN:
+ queued = dccp_rcv_respond_partopen_state_process(sk, skb,
+ dh, len);
+ break;
+ }
+
+ if (dh->dccph_type == DCCP_PKT_ACK ||
+ dh->dccph_type == DCCP_PKT_DATAACK) {
+ switch (old_state) {
+ case DCCP_PARTOPEN:
+ sk->sk_state_change(sk);
+ sk_wake_async(sk, 0, POLL_OUT);
+ break;
+ }
+ }
+
+ if (!queued) {
+discard:
+ __kfree_skb(skb);
+ }
+ return 0;
+}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
new file mode 100644
index 00000000000..3fc75dbee4b
--- /dev/null
+++ b/net/dccp/ipv4.c
@@ -0,0 +1,1356 @@
+/*
+ * net/dccp/ipv4.c
+ *
+ * An implementation of the DCCP protocol
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/icmp.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/random.h>
+
+#include <net/icmp.h>
+#include <net/inet_hashtables.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+#include <net/xfrm.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
+ .lhash_lock = RW_LOCK_UNLOCKED,
+ .lhash_users = ATOMIC_INIT(0),
+ .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
+ .portalloc_lock = SPIN_LOCK_UNLOCKED,
+ .port_rover = 1024 - 1,
+};
+
+EXPORT_SYMBOL_GPL(dccp_hashinfo);
+
+static int dccp_v4_get_port(struct sock *sk, const unsigned short snum)
+{
+ return inet_csk_get_port(&dccp_hashinfo, sk, snum);
+}
+
+static void dccp_v4_hash(struct sock *sk)
+{
+ inet_hash(&dccp_hashinfo, sk);
+}
+
+static void dccp_v4_unhash(struct sock *sk)
+{
+ inet_unhash(&dccp_hashinfo, sk);
+}
+
+/* called with local bh disabled */
+static int __dccp_v4_check_established(struct sock *sk, const __u16 lport,
+ struct inet_timewait_sock **twp)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ const u32 daddr = inet->rcv_saddr;
+ const u32 saddr = inet->daddr;
+ const int dif = sk->sk_bound_dev_if;
+ INET_ADDR_COOKIE(acookie, saddr, daddr)
+ const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+ const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport,
+ dccp_hashinfo.ehash_size);
+ struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash];
+ const struct sock *sk2;
+ const struct hlist_node *node;
+ struct inet_timewait_sock *tw;
+
+ write_lock(&head->lock);
+
+ /* Check TIME-WAIT sockets first. */
+ sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) {
+ tw = inet_twsk(sk2);
+
+ if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+ goto not_unique;
+ }
+ tw = NULL;
+
+ /* And established part... */
+ sk_for_each(sk2, node, &head->chain) {
+ if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+ goto not_unique;
+ }
+
+ /* Must record num and sport now. Otherwise we will see
+ * in hash table socket with a funny identity. */
+ inet->num = lport;
+ inet->sport = htons(lport);
+ sk->sk_hashent = hash;
+ BUG_TRAP(sk_unhashed(sk));
+ __sk_add_node(sk, &head->chain);
+ sock_prot_inc_use(sk->sk_prot);
+ write_unlock(&head->lock);
+
+ if (twp != NULL) {
+ *twp = tw;
+ NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+ } else if (tw != NULL) {
+ /* Silly. Should hash-dance instead... */
+ inet_twsk_deschedule(tw, &dccp_death_row);
+ NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+
+ inet_twsk_put(tw);
+ }
+
+ return 0;
+
+not_unique:
+ write_unlock(&head->lock);
+ return -EADDRNOTAVAIL;
+}
+
+/*
+ * Bind a port for a connect operation and hash it.
+ */
+static int dccp_v4_hash_connect(struct sock *sk)
+{
+ const unsigned short snum = inet_sk(sk)->num;
+ struct inet_bind_hashbucket *head;
+ struct inet_bind_bucket *tb;
+ int ret;
+
+ if (snum == 0) {
+ int rover;
+ int low = sysctl_local_port_range[0];
+ int high = sysctl_local_port_range[1];
+ int remaining = (high - low) + 1;
+ struct hlist_node *node;
+ struct inet_timewait_sock *tw = NULL;
+
+ local_bh_disable();
+
+ /* TODO. Actually it is not so bad idea to remove
+ * dccp_hashinfo.portalloc_lock before next submission to
+ * Linus.
+ * As soon as we touch this place at all it is time to think.
+ *
+ * Now it protects single _advisory_ variable
+ * dccp_hashinfo.port_rover, hence it is mostly useless.
+ * Code will work nicely if we just delete it, but
+ * I am afraid in contented case it will work not better or
+ * even worse: another cpu just will hit the same bucket
+ * and spin there.
+ * So some cpu salt could remove both contention and
+ * memory pingpong. Any ideas how to do this in a nice way?
+ */
+ spin_lock(&dccp_hashinfo.portalloc_lock);
+ rover = dccp_hashinfo.port_rover;
+
+ do {
+ rover++;
+ if ((rover < low) || (rover > high))
+ rover = low;
+ head = &dccp_hashinfo.bhash[inet_bhashfn(rover,
+ dccp_hashinfo.bhash_size)];
+ spin_lock(&head->lock);
+
+ /* Does not bother with rcv_saddr checks,
+ * because the established check is already
+ * unique enough.
+ */
+ inet_bind_bucket_for_each(tb, node, &head->chain) {
+ if (tb->port == rover) {
+ BUG_TRAP(!hlist_empty(&tb->owners));
+ if (tb->fastreuse >= 0)
+ goto next_port;
+ if (!__dccp_v4_check_established(sk,
+ rover,
+ &tw))
+ goto ok;
+ goto next_port;
+ }
+ }
+
+ tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep,
+ head, rover);
+ if (tb == NULL) {
+ spin_unlock(&head->lock);
+ break;
+ }
+ tb->fastreuse = -1;
+ goto ok;
+
+ next_port:
+ spin_unlock(&head->lock);
+ } while (--remaining > 0);
+ dccp_hashinfo.port_rover = rover;
+ spin_unlock(&dccp_hashinfo.portalloc_lock);
+
+ local_bh_enable();
+
+ return -EADDRNOTAVAIL;
+
+ok:
+ /* All locks still held and bhs disabled */
+ dccp_hashinfo.port_rover = rover;
+ spin_unlock(&dccp_hashinfo.portalloc_lock);
+
+ inet_bind_hash(sk, tb, rover);
+ if (sk_unhashed(sk)) {
+ inet_sk(sk)->sport = htons(rover);
+ __inet_hash(&dccp_hashinfo, sk, 0);
+ }
+ spin_unlock(&head->lock);
+
+ if (tw != NULL) {
+ inet_twsk_deschedule(tw, &dccp_death_row);
+ inet_twsk_put(tw);
+ }
+
+ ret = 0;
+ goto out;
+ }
+
+ head = &dccp_hashinfo.bhash[inet_bhashfn(snum,
+ dccp_hashinfo.bhash_size)];
+ tb = inet_csk(sk)->icsk_bind_hash;
+ spin_lock_bh(&head->lock);
+ if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
+ __inet_hash(&dccp_hashinfo, sk, 0);
+ spin_unlock_bh(&head->lock);
+ return 0;
+ } else {
+ spin_unlock(&head->lock);
+ /* No definite answer... Walk to established hash table */
+ ret = __dccp_v4_check_established(sk, snum, NULL);
+out:
+ local_bh_enable();
+ return ret;
+ }
+}
+
+static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct dccp_sock *dp = dccp_sk(sk);
+ const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
+ struct rtable *rt;
+ u32 daddr, nexthop;
+ int tmp;
+ int err;
+
+ dp->dccps_role = DCCP_ROLE_CLIENT;
+
+ if (addr_len < sizeof(struct sockaddr_in))
+ return -EINVAL;
+
+ if (usin->sin_family != AF_INET)
+ return -EAFNOSUPPORT;
+
+ nexthop = daddr = usin->sin_addr.s_addr;
+ if (inet->opt != NULL && inet->opt->srr) {
+ if (daddr == 0)
+ return -EINVAL;
+ nexthop = inet->opt->faddr;
+ }
+
+ tmp = ip_route_connect(&rt, nexthop, inet->saddr,
+ RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
+ IPPROTO_DCCP,
+ inet->sport, usin->sin_port, sk);
+ if (tmp < 0)
+ return tmp;
+
+ if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
+ ip_rt_put(rt);
+ return -ENETUNREACH;
+ }
+
+ if (inet->opt == NULL || !inet->opt->srr)
+ daddr = rt->rt_dst;
+
+ if (inet->saddr == 0)
+ inet->saddr = rt->rt_src;
+ inet->rcv_saddr = inet->saddr;
+
+ inet->dport = usin->sin_port;
+ inet->daddr = daddr;
+
+ dp->dccps_ext_header_len = 0;
+ if (inet->opt != NULL)
+ dp->dccps_ext_header_len = inet->opt->optlen;
+ /*
+ * Socket identity is still unknown (sport may be zero).
+ * However we set state to DCCP_REQUESTING and not releasing socket
+ * lock select source port, enter ourselves into the hash tables and
+ * complete initialization after this.
+ */
+ dccp_set_state(sk, DCCP_REQUESTING);
+ err = dccp_v4_hash_connect(sk);
+ if (err != 0)
+ goto failure;
+
+ err = ip_route_newports(&rt, inet->sport, inet->dport, sk);
+ if (err != 0)
+ goto failure;
+
+ /* OK, now commit destination to socket. */
+ sk_setup_caps(sk, &rt->u.dst);
+
+ dp->dccps_gar =
+ dp->dccps_iss = secure_dccp_sequence_number(inet->saddr,
+ inet->daddr,
+ inet->sport,
+ usin->sin_port);
+ dccp_update_gss(sk, dp->dccps_iss);
+
+ /*
+ * SWL and AWL are initially adjusted so that they are not less than
+ * the initial Sequence Numbers received and sent, respectively:
+ * SWL := max(GSR + 1 - floor(W/4), ISR),
+ * AWL := max(GSS - W' + 1, ISS).
+ * These adjustments MUST be applied only at the beginning of the
+ * connection.
+ */
+ dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
+
+ inet->id = dp->dccps_iss ^ jiffies;
+
+ err = dccp_connect(sk);
+ rt = NULL;
+ if (err != 0)
+ goto failure;
+out:
+ return err;
+failure:
+ /*
+ * This unhashes the socket and releases the local port, if necessary.
+ */
+ dccp_set_state(sk, DCCP_CLOSED);
+ ip_rt_put(rt);
+ sk->sk_route_caps = 0;
+ inet->dport = 0;
+ goto out;
+}
+
+/*
+ * This routine does path mtu discovery as defined in RFC1191.
+ */
+static inline void dccp_do_pmtu_discovery(struct sock *sk,
+ const struct iphdr *iph,
+ u32 mtu)
+{
+ struct dst_entry *dst;
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct dccp_sock *dp = dccp_sk(sk);
+
+ /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs
+ * send out by Linux are always < 576bytes so they should go through
+ * unfragmented).
+ */
+ if (sk->sk_state == DCCP_LISTEN)
+ return;
+
+ /* We don't check in the destentry if pmtu discovery is forbidden
+ * on this route. We just assume that no packet_to_big packets
+ * are send back when pmtu discovery is not active.
+ * There is a small race when the user changes this flag in the
+ * route, but I think that's acceptable.
+ */
+ if ((dst = __sk_dst_check(sk, 0)) == NULL)
+ return;
+
+ dst->ops->update_pmtu(dst, mtu);
+
+ /* Something is about to be wrong... Remember soft error
+ * for the case, if this connection will not able to recover.
+ */
+ if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
+ sk->sk_err_soft = EMSGSIZE;
+
+ mtu = dst_mtu(dst);
+
+ if (inet->pmtudisc != IP_PMTUDISC_DONT &&
+ dp->dccps_pmtu_cookie > mtu) {
+ dccp_sync_mss(sk, mtu);
+
+ /*
+ * From: draft-ietf-dccp-spec-11.txt
+ *
+ * DCCP-Sync packets are the best choice for upward
+ * probing, since DCCP-Sync probes do not risk application
+ * data loss.
+ */
+ dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
+ } /* else let the usual retransmit timer handle it */
+}
+
+static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb)
+{
+ int err;
+ struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+ const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
+ sizeof(struct dccp_hdr_ext) +
+ sizeof(struct dccp_hdr_ack_bits);
+ struct sk_buff *skb;
+
+ if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
+ return;
+
+ skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
+ if (skb == NULL)
+ return;
+
+ /* Reserve space for headers. */
+ skb_reserve(skb, MAX_DCCP_HEADER);
+
+ skb->dst = dst_clone(rxskb->dst);
+
+ skb->h.raw = skb_push(skb, dccp_hdr_ack_len);
+ dh = dccp_hdr(skb);
+ memset(dh, 0, dccp_hdr_ack_len);
+
+ /* Build DCCP header and checksum it. */
+ dh->dccph_type = DCCP_PKT_ACK;
+ dh->dccph_sport = rxdh->dccph_dport;
+ dh->dccph_dport = rxdh->dccph_sport;
+ dh->dccph_doff = dccp_hdr_ack_len / 4;
+ dh->dccph_x = 1;
+
+ dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
+ dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
+ DCCP_SKB_CB(rxskb)->dccpd_seq);
+
+ bh_lock_sock(dccp_ctl_socket->sk);
+ err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
+ rxskb->nh.iph->daddr,
+ rxskb->nh.iph->saddr, NULL);
+ bh_unlock_sock(dccp_ctl_socket->sk);
+
+ if (err == NET_XMIT_CN || err == 0) {
+ DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
+ DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+ }
+}
+
+static void dccp_v4_reqsk_send_ack(struct sk_buff *skb,
+ struct request_sock *req)
+{
+ dccp_v4_ctl_send_ack(skb);
+}
+
+static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
+ struct dst_entry *dst)
+{
+ int err = -1;
+ struct sk_buff *skb;
+
+ /* First, grab a route. */
+
+ if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
+ goto out;
+
+ skb = dccp_make_response(sk, dst, req);
+ if (skb != NULL) {
+ const struct inet_request_sock *ireq = inet_rsk(req);
+
+ err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
+ ireq->rmt_addr,
+ ireq->opt);
+ if (err == NET_XMIT_CN)
+ err = 0;
+ }
+
+out:
+ dst_release(dst);
+ return err;
+}
+
+/*
+ * This routine is called by the ICMP module when it gets some sort of error
+ * condition. If err < 0 then the socket should be closed and the error
+ * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code.
+ * After adjustment header points to the first 8 bytes of the tcp header. We
+ * need to find the appropriate port.
+ *
+ * The locking strategy used here is very "optimistic". When someone else
+ * accesses the socket the ICMP is just dropped and for some paths there is no
+ * check at all. A more general error queue to queue errors for later handling
+ * is probably better.
+ */
+void dccp_v4_err(struct sk_buff *skb, u32 info)
+{
+ const struct iphdr *iph = (struct iphdr *)skb->data;
+ const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data +
+ (iph->ihl << 2));
+ struct dccp_sock *dp;
+ struct inet_sock *inet;
+ const int type = skb->h.icmph->type;
+ const int code = skb->h.icmph->code;
+ struct sock *sk;
+ __u64 seq;
+ int err;
+
+ if (skb->len < (iph->ihl << 2) + 8) {
+ ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ return;
+ }
+
+ sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport,
+ iph->saddr, dh->dccph_sport, inet_iif(skb));
+ if (sk == NULL) {
+ ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ return;
+ }
+
+ if (sk->sk_state == DCCP_TIME_WAIT) {
+ inet_twsk_put((struct inet_timewait_sock *)sk);
+ return;
+ }
+
+ bh_lock_sock(sk);
+ /* If too many ICMPs get dropped on busy
+ * servers this needs to be solved differently.
+ */
+ if (sock_owned_by_user(sk))
+ NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+
+ if (sk->sk_state == DCCP_CLOSED)
+ goto out;
+
+ dp = dccp_sk(sk);
+ seq = dccp_hdr_seq(skb);
+ if (sk->sk_state != DCCP_LISTEN &&
+ !between48(seq, dp->dccps_swl, dp->dccps_swh)) {
+ NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
+ goto out;
+ }
+
+ switch (type) {
+ case ICMP_SOURCE_QUENCH:
+ /* Just silently ignore these. */
+ goto out;
+ case ICMP_PARAMETERPROB:
+ err = EPROTO;
+ break;
+ case ICMP_DEST_UNREACH:
+ if (code > NR_ICMP_UNREACH)
+ goto out;
+
+ if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
+ if (!sock_owned_by_user(sk))
+ dccp_do_pmtu_discovery(sk, iph, info);
+ goto out;
+ }
+
+ err = icmp_err_convert[code].errno;
+ break;
+ case ICMP_TIME_EXCEEDED:
+ err = EHOSTUNREACH;
+ break;
+ default:
+ goto out;
+ }
+
+ switch (sk->sk_state) {
+ struct request_sock *req , **prev;
+ case DCCP_LISTEN:
+ if (sock_owned_by_user(sk))
+ goto out;
+ req = inet_csk_search_req(sk, &prev, dh->dccph_dport,
+ iph->daddr, iph->saddr);
+ if (!req)
+ goto out;
+
+ /*
+ * ICMPs are not backlogged, hence we cannot get an established
+ * socket here.
+ */
+ BUG_TRAP(!req->sk);
+
+ if (seq != dccp_rsk(req)->dreq_iss) {
+ NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+ goto out;
+ }
+ /*
+ * Still in RESPOND, just remove it silently.
+ * There is no good way to pass the error to the newly
+ * created socket, and POSIX does not want network
+ * errors returned from accept().
+ */
+ inet_csk_reqsk_queue_drop(sk, req, prev);
+ goto out;
+
+ case DCCP_REQUESTING:
+ case DCCP_RESPOND:
+ if (!sock_owned_by_user(sk)) {
+ DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+ sk->sk_err = err;
+
+ sk->sk_error_report(sk);
+
+ dccp_done(sk);
+ } else
+ sk->sk_err_soft = err;
+ goto out;
+ }
+
+ /* If we've already connected we will keep trying
+ * until we time out, or the user gives up.
+ *
+ * rfc1122 4.2.3.9 allows to consider as hard errors
+ * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
+ * but it is obsoleted by pmtu discovery).
+ *
+ * Note, that in modern internet, where routing is unreliable
+ * and in each dark corner broken firewalls sit, sending random
+ * errors ordered by their masters even this two messages finally lose
+ * their original sense (even Linux sends invalid PORT_UNREACHs)
+ *
+ * Now we are in compliance with RFCs.
+ * --ANK (980905)
+ */
+
+ inet = inet_sk(sk);
+ if (!sock_owned_by_user(sk) && inet->recverr) {
+ sk->sk_err = err;
+ sk->sk_error_report(sk);
+ } else /* Only an error on timeout */
+ sk->sk_err_soft = err;
+out:
+ bh_unlock_sock(sk);
+ sock_put(sk);
+}
+
+int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code)
+{
+ struct sk_buff *skb;
+ /*
+ * FIXME: what if rebuild_header fails?
+ * Should we be doing a rebuild_header here?
+ */
+ int err = inet_sk_rebuild_header(sk);
+
+ if (err != 0)
+ return err;
+
+ skb = dccp_make_reset(sk, sk->sk_dst_cache, code);
+ if (skb != NULL) {
+ const struct dccp_sock *dp = dccp_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
+
+ err = ip_build_and_send_pkt(skb, sk,
+ inet->saddr, inet->daddr, NULL);
+ if (err == NET_XMIT_CN)
+ err = 0;
+
+ ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
+ ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
+ }
+
+ return err;
+}
+
+static inline u64 dccp_v4_init_sequence(const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ return secure_dccp_sequence_number(skb->nh.iph->daddr,
+ skb->nh.iph->saddr,
+ dccp_hdr(skb)->dccph_dport,
+ dccp_hdr(skb)->dccph_sport);
+}
+
+int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+{
+ struct inet_request_sock *ireq;
+ struct dccp_sock dp;
+ struct request_sock *req;
+ struct dccp_request_sock *dreq;
+ const __u32 saddr = skb->nh.iph->saddr;
+ const __u32 daddr = skb->nh.iph->daddr;
+ struct dst_entry *dst = NULL;
+
+ /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
+ if (((struct rtable *)skb->dst)->rt_flags &
+ (RTCF_BROADCAST | RTCF_MULTICAST))
+ goto drop;
+
+ /*
+ * TW buckets are converted to open requests without
+ * limitations, they conserve resources and peer is
+ * evidently real one.
+ */
+ if (inet_csk_reqsk_queue_is_full(sk))
+ goto drop;
+
+ /*
+ * Accept backlog is full. If we have already queued enough
+ * of warm entries in syn queue, drop request. It is better than
+ * clogging syn queue with openreqs with exponentially increasing
+ * timeout.
+ */
+ if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+ goto drop;
+
+ req = reqsk_alloc(sk->sk_prot->rsk_prot);
+ if (req == NULL)
+ goto drop;
+
+ /* FIXME: process options */
+
+ dccp_openreq_init(req, &dp, skb);
+
+ ireq = inet_rsk(req);
+ ireq->loc_addr = daddr;
+ ireq->rmt_addr = saddr;
+ /* FIXME: Merge Aristeu's option parsing code when ready */
+ req->rcv_wnd = 100; /* Fake, option parsing will get the
+ right value */
+ ireq->opt = NULL;
+
+ /*
+ * Step 3: Process LISTEN state
+ *
+ * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+ *
+ * In fact we defer setting S.GSR, S.SWL, S.SWH to
+ * dccp_create_openreq_child.
+ */
+ dreq = dccp_rsk(req);
+ dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq;
+ dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
+ dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service;
+
+ if (dccp_v4_send_response(sk, req, dst))
+ goto drop_and_free;
+
+ inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+ return 0;
+
+drop_and_free:
+ /*
+ * FIXME: should be reqsk_free after implementing req->rsk_ops
+ */
+ __reqsk_free(req);
+drop:
+ DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+ return -1;
+}
+
+/*
+ * The three way handshake has completed - we got a valid ACK or DATAACK -
+ * now create the new socket.
+ *
+ * This is the equivalent of TCP's tcp_v4_syn_recv_sock
+ */
+struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct dst_entry *dst)
+{
+ struct inet_request_sock *ireq;
+ struct inet_sock *newinet;
+ struct dccp_sock *newdp;
+ struct sock *newsk;
+
+ if (sk_acceptq_is_full(sk))
+ goto exit_overflow;
+
+ if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
+ goto exit;
+
+ newsk = dccp_create_openreq_child(sk, req, skb);
+ if (newsk == NULL)
+ goto exit;
+
+ sk_setup_caps(newsk, dst);
+
+ newdp = dccp_sk(newsk);
+ newinet = inet_sk(newsk);
+ ireq = inet_rsk(req);
+ newinet->daddr = ireq->rmt_addr;
+ newinet->rcv_saddr = ireq->loc_addr;
+ newinet->saddr = ireq->loc_addr;
+ newinet->opt = ireq->opt;
+ ireq->opt = NULL;
+ newinet->mc_index = inet_iif(skb);
+ newinet->mc_ttl = skb->nh.iph->ttl;
+ newinet->id = jiffies;
+
+ dccp_sync_mss(newsk, dst_mtu(dst));
+
+ __inet_hash(&dccp_hashinfo, newsk, 0);
+ __inet_inherit_port(&dccp_hashinfo, sk, newsk);
+
+ return newsk;
+
+exit_overflow:
+ NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+exit:
+ NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+ dst_release(dst);
+ return NULL;
+}
+
+static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
+{
+ const struct dccp_hdr *dh = dccp_hdr(skb);
+ const struct iphdr *iph = skb->nh.iph;
+ struct sock *nsk;
+ struct request_sock **prev;
+ /* Find possible connection requests. */
+ struct request_sock *req = inet_csk_search_req(sk, &prev,
+ dh->dccph_sport,
+ iph->saddr, iph->daddr);
+ if (req != NULL)
+ return dccp_check_req(sk, skb, req, prev);
+
+ nsk = __inet_lookup_established(&dccp_hashinfo,
+ iph->saddr, dh->dccph_sport,
+ iph->daddr, ntohs(dh->dccph_dport),
+ inet_iif(skb));
+ if (nsk != NULL) {
+ if (nsk->sk_state != DCCP_TIME_WAIT) {
+ bh_lock_sock(nsk);
+ return nsk;
+ }
+ inet_twsk_put((struct inet_timewait_sock *)nsk);
+ return NULL;
+ }
+
+ return sk;
+}
+
+int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr,
+ const u32 daddr)
+{
+ const struct dccp_hdr* dh = dccp_hdr(skb);
+ int checksum_len;
+ u32 tmp;
+
+ if (dh->dccph_cscov == 0)
+ checksum_len = skb->len;
+ else {
+ checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
+ checksum_len = checksum_len < skb->len ? checksum_len :
+ skb->len;
+ }
+
+ tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
+ return csum_tcpudp_magic(saddr, daddr, checksum_len,
+ IPPROTO_DCCP, tmp);
+}
+
+static int dccp_v4_verify_checksum(struct sk_buff *skb,
+ const u32 saddr, const u32 daddr)
+{
+ struct dccp_hdr *dh = dccp_hdr(skb);
+ int checksum_len;
+ u32 tmp;
+
+ if (dh->dccph_cscov == 0)
+ checksum_len = skb->len;
+ else {
+ checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
+ checksum_len = checksum_len < skb->len ? checksum_len :
+ skb->len;
+ }
+ tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
+ return csum_tcpudp_magic(saddr, daddr, checksum_len,
+ IPPROTO_DCCP, tmp) == 0 ? 0 : -1;
+}
+
+static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct rtable *rt;
+ struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif,
+ .nl_u = { .ip4_u =
+ { .daddr = skb->nh.iph->saddr,
+ .saddr = skb->nh.iph->daddr,
+ .tos = RT_CONN_FLAGS(sk) } },
+ .proto = sk->sk_protocol,
+ .uli_u = { .ports =
+ { .sport = dccp_hdr(skb)->dccph_dport,
+ .dport = dccp_hdr(skb)->dccph_sport }
+ }
+ };
+
+ if (ip_route_output_flow(&rt, &fl, sk, 0)) {
+ IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+ return NULL;
+ }
+
+ return &rt->u.dst;
+}
+
+static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
+{
+ int err;
+ struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+ const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
+ sizeof(struct dccp_hdr_ext) +
+ sizeof(struct dccp_hdr_reset);
+ struct sk_buff *skb;
+ struct dst_entry *dst;
+ u64 seqno;
+
+ /* Never send a reset in response to a reset. */
+ if (rxdh->dccph_type == DCCP_PKT_RESET)
+ return;
+
+ if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
+ return;
+
+ dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb);
+ if (dst == NULL)
+ return;
+
+ skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
+ if (skb == NULL)
+ goto out;
+
+ /* Reserve space for headers. */
+ skb_reserve(skb, MAX_DCCP_HEADER);
+ skb->dst = dst_clone(dst);
+
+ skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
+ dh = dccp_hdr(skb);
+ memset(dh, 0, dccp_hdr_reset_len);
+
+ /* Build DCCP header and checksum it. */
+ dh->dccph_type = DCCP_PKT_RESET;
+ dh->dccph_sport = rxdh->dccph_dport;
+ dh->dccph_dport = rxdh->dccph_sport;
+ dh->dccph_doff = dccp_hdr_reset_len / 4;
+ dh->dccph_x = 1;
+ dccp_hdr_reset(skb)->dccph_reset_code =
+ DCCP_SKB_CB(rxskb)->dccpd_reset_code;
+
+ /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */
+ seqno = 0;
+ if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+ dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1);
+
+ dccp_hdr_set_seq(dh, seqno);
+ dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
+ DCCP_SKB_CB(rxskb)->dccpd_seq);
+
+ dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr,
+ rxskb->nh.iph->daddr);
+
+ bh_lock_sock(dccp_ctl_socket->sk);
+ err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
+ rxskb->nh.iph->daddr,
+ rxskb->nh.iph->saddr, NULL);
+ bh_unlock_sock(dccp_ctl_socket->sk);
+
+ if (err == NET_XMIT_CN || err == 0) {
+ DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
+ DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+ }
+out:
+ dst_release(dst);
+}
+
+int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ struct dccp_hdr *dh = dccp_hdr(skb);
+
+ if (sk->sk_state == DCCP_OPEN) { /* Fast path */
+ if (dccp_rcv_established(sk, skb, dh, skb->len))
+ goto reset;
+ return 0;
+ }
+
+ /*
+ * Step 3: Process LISTEN state
+ * If S.state == LISTEN,
+ * If P.type == Request or P contains a valid Init Cookie
+ * option,
+ * * Must scan the packet's options to check for an Init
+ * Cookie. Only the Init Cookie is processed here,
+ * however; other options are processed in Step 8. This
+ * scan need only be performed if the endpoint uses Init
+ * Cookies *
+ * * Generate a new socket and switch to that socket *
+ * Set S := new socket for this port pair
+ * S.state = RESPOND
+ * Choose S.ISS (initial seqno) or set from Init Cookie
+ * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+ * Continue with S.state == RESPOND
+ * * A Response packet will be generated in Step 11 *
+ * Otherwise,
+ * Generate Reset(No Connection) unless P.type == Reset
+ * Drop packet and return
+ *
+ * NOTE: the check for the packet types is done in
+ * dccp_rcv_state_process
+ */
+ if (sk->sk_state == DCCP_LISTEN) {
+ struct sock *nsk = dccp_v4_hnd_req(sk, skb);
+
+ if (nsk == NULL)
+ goto discard;
+
+ if (nsk != sk) {
+ if (dccp_child_process(sk, nsk, skb))
+ goto reset;
+ return 0;
+ }
+ }
+
+ if (dccp_rcv_state_process(sk, skb, dh, skb->len))
+ goto reset;
+ return 0;
+
+reset:
+ DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
+ dccp_v4_ctl_send_reset(skb);
+discard:
+ kfree_skb(skb);
+ return 0;
+}
+
+static inline int dccp_invalid_packet(struct sk_buff *skb)
+{
+ const struct dccp_hdr *dh;
+
+ if (skb->pkt_type != PACKET_HOST)
+ return 1;
+
+ if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) {
+ LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n");
+ return 1;
+ }
+
+ dh = dccp_hdr(skb);
+
+ /* If the packet type is not understood, drop packet and return */
+ if (dh->dccph_type >= DCCP_PKT_INVALID) {
+ LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n");
+ return 1;
+ }
+
+ /*
+ * If P.Data Offset is too small for packet type, or too large for
+ * packet, drop packet and return
+ */
+ if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
+ LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) "
+ "too small 1\n",
+ dh->dccph_doff);
+ return 1;
+ }
+
+ if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) {
+ LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) "
+ "too small 2\n",
+ dh->dccph_doff);
+ return 1;
+ }
+
+ dh = dccp_hdr(skb);
+
+ /*
+ * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
+ * has short sequence numbers), drop packet and return
+ */
+ if (dh->dccph_x == 0 &&
+ dh->dccph_type != DCCP_PKT_DATA &&
+ dh->dccph_type != DCCP_PKT_ACK &&
+ dh->dccph_type != DCCP_PKT_DATAACK) {
+ LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack "
+ "nor DataAck and P.X == 0\n",
+ dccp_packet_name(dh->dccph_type));
+ return 1;
+ }
+
+ /* If the header checksum is incorrect, drop packet and return */
+ if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
+ skb->nh.iph->daddr) < 0) {
+ LIMIT_NETDEBUG(KERN_WARNING "DCCP: header checksum is "
+ "incorrect\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* this is called when real data arrives */
+int dccp_v4_rcv(struct sk_buff *skb)
+{
+ const struct dccp_hdr *dh;
+ struct sock *sk;
+ int rc;
+
+ /* Step 1: Check header basics: */
+
+ if (dccp_invalid_packet(skb))
+ goto discard_it;
+
+ dh = dccp_hdr(skb);
+#if 0
+ /*
+ * Use something like this to simulate some DATA/DATAACK loss to test
+ * dccp_ackpkts_add, you'll get something like this on a session that
+ * sends 10 DATA/DATAACK packets:
+ *
+ * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1|
+ *
+ * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet
+ * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets
+ * with the same state
+ * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet
+ *
+ * So...
+ *
+ * 281473596467422 was received
+ * 281473596467421 was not received
+ * 281473596467420 was received
+ * 281473596467419 was not received
+ * 281473596467418 was received
+ * 281473596467417 was not received
+ * 281473596467416 was received
+ * 281473596467415 was not received
+ * 281473596467414 was received
+ * 281473596467413 was received (this one was the 3way handshake
+ * RESPONSE)
+ *
+ */
+ if (dh->dccph_type == DCCP_PKT_DATA ||
+ dh->dccph_type == DCCP_PKT_DATAACK) {
+ static int discard = 0;
+
+ if (discard) {
+ discard = 0;
+ goto discard_it;
+ }
+ discard = 1;
+ }
+#endif
+ DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb);
+ DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
+
+ dccp_pr_debug("%8.8s "
+ "src=%u.%u.%u.%u@%-5d "
+ "dst=%u.%u.%u.%u@%-5d seq=%llu",
+ dccp_packet_name(dh->dccph_type),
+ NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport),
+ NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport),
+ (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
+
+ if (dccp_packet_without_ack(skb)) {
+ DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
+ dccp_pr_debug_cat("\n");
+ } else {
+ DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
+ dccp_pr_debug_cat(", ack=%llu\n",
+ (unsigned long long)
+ DCCP_SKB_CB(skb)->dccpd_ack_seq);
+ }
+
+ /* Step 2:
+ * Look up flow ID in table and get corresponding socket */
+ sk = __inet_lookup(&dccp_hashinfo,
+ skb->nh.iph->saddr, dh->dccph_sport,
+ skb->nh.iph->daddr, ntohs(dh->dccph_dport),
+ inet_iif(skb));
+
+ /*
+ * Step 2:
+ * If no socket ...
+ * Generate Reset(No Connection) unless P.type == Reset
+ * Drop packet and return
+ */
+ if (sk == NULL) {
+ dccp_pr_debug("failed to look up flow ID in table and "
+ "get corresponding socket\n");
+ goto no_dccp_socket;
+ }
+
+ /*
+ * Step 2:
+ * ... or S.state == TIMEWAIT,
+ * Generate Reset(No Connection) unless P.type == Reset
+ * Drop packet and return
+ */
+
+ if (sk->sk_state == DCCP_TIME_WAIT) {
+ dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: "
+ "do_time_wait\n");
+ goto do_time_wait;
+ }
+
+ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
+ dccp_pr_debug("xfrm4_policy_check failed\n");
+ goto discard_and_relse;
+ }
+
+ if (sk_filter(sk, skb, 0)) {
+ dccp_pr_debug("sk_filter failed\n");
+ goto discard_and_relse;
+ }
+
+ skb->dev = NULL;
+
+ bh_lock_sock(sk);
+ rc = 0;
+ if (!sock_owned_by_user(sk))
+ rc = dccp_v4_do_rcv(sk, skb);
+ else
+ sk_add_backlog(sk, skb);
+ bh_unlock_sock(sk);
+
+ sock_put(sk);
+ return rc;
+
+no_dccp_socket:
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto discard_it;
+ /*
+ * Step 2:
+ * Generate Reset(No Connection) unless P.type == Reset
+ * Drop packet and return
+ */
+ if (dh->dccph_type != DCCP_PKT_RESET) {
+ DCCP_SKB_CB(skb)->dccpd_reset_code =
+ DCCP_RESET_CODE_NO_CONNECTION;
+ dccp_v4_ctl_send_reset(skb);
+ }
+
+discard_it:
+ /* Discard frame. */
+ kfree_skb(skb);
+ return 0;
+
+discard_and_relse:
+ sock_put(sk);
+ goto discard_it;
+
+do_time_wait:
+ inet_twsk_put((struct inet_timewait_sock *)sk);
+ goto no_dccp_socket;
+}
+
+static int dccp_v4_init_sock(struct sock *sk)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ static int dccp_ctl_socket_init = 1;
+
+ dccp_options_init(&dp->dccps_options);
+
+ if (dp->dccps_options.dccpo_send_ack_vector) {
+ dp->dccps_hc_rx_ackpkts =
+ dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
+ GFP_KERNEL);
+
+ if (dp->dccps_hc_rx_ackpkts == NULL)
+ return -ENOMEM;
+ }
+
+ /*
+ * FIXME: We're hardcoding the CCID, and doing this at this point makes
+ * the listening (master) sock get CCID control blocks, which is not
+ * necessary, but for now, to not mess with the test userspace apps,
+ * lets leave it here, later the real solution is to do this in a
+ * setsockopt(CCIDs-I-want/accept). -acme
+ */
+ if (likely(!dccp_ctl_socket_init)) {
+ dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid,
+ sk);
+ dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid,
+ sk);
+ if (dp->dccps_hc_rx_ccid == NULL ||
+ dp->dccps_hc_tx_ccid == NULL) {
+ ccid_exit(dp->dccps_hc_rx_ccid, sk);
+ ccid_exit(dp->dccps_hc_tx_ccid, sk);
+ dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
+ dp->dccps_hc_rx_ackpkts = NULL;
+ dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+ return -ENOMEM;
+ }
+ } else
+ dccp_ctl_socket_init = 0;
+
+ dccp_init_xmit_timers(sk);
+ inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT;
+ sk->sk_state = DCCP_CLOSED;
+ sk->sk_write_space = dccp_write_space;
+ dp->dccps_mss_cache = 536;
+ dp->dccps_role = DCCP_ROLE_UNDEFINED;
+
+ return 0;
+}
+
+static int dccp_v4_destroy_sock(struct sock *sk)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ /*
+ * DCCP doesn't use sk_qrite_queue, just sk_send_head
+ * for retransmissions
+ */
+ if (sk->sk_send_head != NULL) {
+ kfree_skb(sk->sk_send_head);
+ sk->sk_send_head = NULL;
+ }
+
+ /* Clean up a referenced DCCP bind bucket. */
+ if (inet_csk(sk)->icsk_bind_hash != NULL)
+ inet_put_port(&dccp_hashinfo, sk);
+
+ ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
+ ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
+ dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
+ dp->dccps_hc_rx_ackpkts = NULL;
+ ccid_exit(dp->dccps_hc_rx_ccid, sk);
+ ccid_exit(dp->dccps_hc_tx_ccid, sk);
+ dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+
+ return 0;
+}
+
+static void dccp_v4_reqsk_destructor(struct request_sock *req)
+{
+ kfree(inet_rsk(req)->opt);
+}
+
+static struct request_sock_ops dccp_request_sock_ops = {
+ .family = PF_INET,
+ .obj_size = sizeof(struct dccp_request_sock),
+ .rtx_syn_ack = dccp_v4_send_response,
+ .send_ack = dccp_v4_reqsk_send_ack,
+ .destructor = dccp_v4_reqsk_destructor,
+ .send_reset = dccp_v4_ctl_send_reset,
+};
+
+struct proto dccp_v4_prot = {
+ .name = "DCCP",
+ .owner = THIS_MODULE,
+ .close = dccp_close,
+ .connect = dccp_v4_connect,
+ .disconnect = dccp_disconnect,
+ .ioctl = dccp_ioctl,
+ .init = dccp_v4_init_sock,
+ .setsockopt = dccp_setsockopt,
+ .getsockopt = dccp_getsockopt,
+ .sendmsg = dccp_sendmsg,
+ .recvmsg = dccp_recvmsg,
+ .backlog_rcv = dccp_v4_do_rcv,
+ .hash = dccp_v4_hash,
+ .unhash = dccp_v4_unhash,
+ .accept = inet_csk_accept,
+ .get_port = dccp_v4_get_port,
+ .shutdown = dccp_shutdown,
+ .destroy = dccp_v4_destroy_sock,
+ .orphan_count = &dccp_orphan_count,
+ .max_header = MAX_DCCP_HEADER,
+ .obj_size = sizeof(struct dccp_sock),
+ .rsk_prot = &dccp_request_sock_ops,
+ .twsk_obj_size = sizeof(struct inet_timewait_sock),
+};
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
new file mode 100644
index 00000000000..ce5dff4ac22
--- /dev/null
+++ b/net/dccp/minisocks.c
@@ -0,0 +1,264 @@
+/*
+ * net/dccp/minisocks.c
+ *
+ * An implementation of the DCCP protocol
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+
+#include <net/sock.h>
+#include <net/xfrm.h>
+#include <net/inet_timewait_sock.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+struct inet_timewait_death_row dccp_death_row = {
+ .sysctl_max_tw_buckets = NR_FILE * 2,
+ .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
+ .death_lock = SPIN_LOCK_UNLOCKED,
+ .hashinfo = &dccp_hashinfo,
+ .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
+ (unsigned long)&dccp_death_row),
+ .twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work,
+ inet_twdr_twkill_work,
+ &dccp_death_row),
+/* Short-time timewait calendar */
+
+ .twcal_hand = -1,
+ .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
+ (unsigned long)&dccp_death_row),
+};
+
+void dccp_time_wait(struct sock *sk, int state, int timeo)
+{
+ struct inet_timewait_sock *tw = NULL;
+
+ if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets)
+ tw = inet_twsk_alloc(sk, state);
+
+ if (tw != NULL) {
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
+
+ /* Linkage updates. */
+ __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
+
+ /* Get the TIME_WAIT timeout firing. */
+ if (timeo < rto)
+ timeo = rto;
+
+ tw->tw_timeout = DCCP_TIMEWAIT_LEN;
+ if (state == DCCP_TIME_WAIT)
+ timeo = DCCP_TIMEWAIT_LEN;
+
+ inet_twsk_schedule(tw, &dccp_death_row, timeo,
+ DCCP_TIMEWAIT_LEN);
+ inet_twsk_put(tw);
+ } else {
+ /* Sorry, if we're out of memory, just CLOSE this
+ * socket up. We've got bigger problems than
+ * non-graceful socket closings.
+ */
+ LIMIT_NETDEBUG(KERN_INFO "DCCP: time wait bucket "
+ "table overflow\n");
+ }
+
+ dccp_done(sk);
+}
+
+struct sock *dccp_create_openreq_child(struct sock *sk,
+ const struct request_sock *req,
+ const struct sk_buff *skb)
+{
+ /*
+ * Step 3: Process LISTEN state
+ *
+ * // Generate a new socket and switch to that socket
+ * Set S := new socket for this port pair
+ */
+ struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
+
+ if (newsk != NULL) {
+ const struct dccp_request_sock *dreq = dccp_rsk(req);
+ struct inet_connection_sock *newicsk = inet_csk(sk);
+ struct dccp_sock *newdp = dccp_sk(newsk);
+
+ newdp->dccps_hc_rx_ackpkts = NULL;
+ newdp->dccps_role = DCCP_ROLE_SERVER;
+ newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
+
+ if (newdp->dccps_options.dccpo_send_ack_vector) {
+ newdp->dccps_hc_rx_ackpkts =
+ dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
+ GFP_ATOMIC);
+ /*
+ * XXX: We're using the same CCIDs set on the parent,
+ * i.e. sk_clone copied the master sock and left the
+ * CCID pointers for this child, that is why we do the
+ * __ccid_get calls.
+ */
+ if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL))
+ goto out_free;
+ }
+
+ if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid,
+ newsk) != 0 ||
+ ccid_hc_tx_init(newdp->dccps_hc_tx_ccid,
+ newsk) != 0)) {
+ dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts);
+ ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk);
+ ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk);
+out_free:
+ /* It is still raw copy of parent, so invalidate
+ * destructor and make plain sk_free() */
+ newsk->sk_destruct = NULL;
+ sk_free(newsk);
+ return NULL;
+ }
+
+ __ccid_get(newdp->dccps_hc_rx_ccid);
+ __ccid_get(newdp->dccps_hc_tx_ccid);
+
+ /*
+ * Step 3: Process LISTEN state
+ *
+ * Choose S.ISS (initial seqno) or set from Init Cookie
+ * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init
+ * Cookie
+ */
+
+ /* See dccp_v4_conn_request */
+ newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd;
+
+ newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr;
+ dccp_update_gsr(newsk, dreq->dreq_isr);
+
+ newdp->dccps_iss = dreq->dreq_iss;
+ dccp_update_gss(newsk, dreq->dreq_iss);
+
+ /*
+ * SWL and AWL are initially adjusted so that they are not less than
+ * the initial Sequence Numbers received and sent, respectively:
+ * SWL := max(GSR + 1 - floor(W/4), ISR),
+ * AWL := max(GSS - W' + 1, ISS).
+ * These adjustments MUST be applied only at the beginning of the
+ * connection.
+ */
+ dccp_set_seqno(&newdp->dccps_swl,
+ max48(newdp->dccps_swl, newdp->dccps_isr));
+ dccp_set_seqno(&newdp->dccps_awl,
+ max48(newdp->dccps_awl, newdp->dccps_iss));
+
+ dccp_init_xmit_timers(newsk);
+
+ DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
+ }
+ return newsk;
+}
+
+/*
+ * Process an incoming packet for RESPOND sockets represented
+ * as an request_sock.
+ */
+struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct request_sock **prev)
+{
+ struct sock *child = NULL;
+
+ /* Check for retransmitted REQUEST */
+ if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
+ if (after48(DCCP_SKB_CB(skb)->dccpd_seq,
+ dccp_rsk(req)->dreq_isr)) {
+ struct dccp_request_sock *dreq = dccp_rsk(req);
+
+ dccp_pr_debug("Retransmitted REQUEST\n");
+ /* Send another RESPONSE packet */
+ dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1);
+ dccp_set_seqno(&dreq->dreq_isr,
+ DCCP_SKB_CB(skb)->dccpd_seq);
+ req->rsk_ops->rtx_syn_ack(sk, req, NULL);
+ }
+ /* Network Duplicate, discard packet */
+ return NULL;
+ }
+
+ DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
+
+ if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK &&
+ dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK)
+ goto drop;
+
+ /* Invalid ACK */
+ if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) {
+ dccp_pr_debug("Invalid ACK number: ack_seq=%llu, "
+ "dreq_iss=%llu\n",
+ (unsigned long long)
+ DCCP_SKB_CB(skb)->dccpd_ack_seq,
+ (unsigned long long)
+ dccp_rsk(req)->dreq_iss);
+ goto drop;
+ }
+
+ child = dccp_v4_request_recv_sock(sk, skb, req, NULL);
+ if (child == NULL)
+ goto listen_overflow;
+
+ /* FIXME: deal with options */
+
+ inet_csk_reqsk_queue_unlink(sk, req, prev);
+ inet_csk_reqsk_queue_removed(sk, req);
+ inet_csk_reqsk_queue_add(sk, req, child);
+out:
+ return child;
+listen_overflow:
+ dccp_pr_debug("listen_overflow!\n");
+ DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
+drop:
+ if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
+ req->rsk_ops->send_reset(skb);
+
+ inet_csk_reqsk_queue_drop(sk, req, prev);
+ goto out;
+}
+
+/*
+ * Queue segment on the new socket if the new socket is active,
+ * otherwise we just shortcircuit this and continue with
+ * the new socket.
+ */
+int dccp_child_process(struct sock *parent, struct sock *child,
+ struct sk_buff *skb)
+{
+ int ret = 0;
+ const int state = child->sk_state;
+
+ if (!sock_owned_by_user(child)) {
+ ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb),
+ skb->len);
+
+ /* Wakeup parent, send SIGIO */
+ if (state == DCCP_RESPOND && child->sk_state != state)
+ parent->sk_data_ready(parent, 0);
+ } else {
+ /* Alas, it is possible again, because we do lookup
+ * in main socket hash table and lock on listening
+ * socket does not protect us more.
+ */
+ sk_add_backlog(child, skb);
+ }
+
+ bh_unlock_sock(child);
+ sock_put(child);
+ return ret;
+}
diff --git a/net/dccp/options.c b/net/dccp/options.c
new file mode 100644
index 00000000000..382c5894acb
--- /dev/null
+++ b/net/dccp/options.c
@@ -0,0 +1,855 @@
+/*
+ * net/dccp/options.c
+ *
+ * An implementation of the DCCP protocol
+ * Copyright (c) 2005 Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
+ * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
+ * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
+ struct sock *sk,
+ const u64 ackno,
+ const unsigned char len,
+ const unsigned char *vector);
+
+/* stores the default values for new connection. may be changed with sysctl */
+static const struct dccp_options dccpo_default_values = {
+ .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW,
+ .dccpo_ccid = DCCPF_INITIAL_CCID,
+ .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR,
+ .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT,
+};
+
+void dccp_options_init(struct dccp_options *dccpo)
+{
+ memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo));
+}
+
+static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
+{
+ u32 value = 0;
+
+ if (len > 3)
+ value += *bf++ << 24;
+ if (len > 2)
+ value += *bf++ << 16;
+ if (len > 1)
+ value += *bf++ << 8;
+ if (len > 0)
+ value += *bf;
+
+ return value;
+}
+
+int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+#ifdef CONFIG_IP_DCCP_DEBUG
+ const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+ "CLIENT rx opt: " : "server rx opt: ";
+#endif
+ const struct dccp_hdr *dh = dccp_hdr(skb);
+ const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
+ unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
+ unsigned char *opt_ptr = options;
+ const unsigned char *opt_end = (unsigned char *)dh +
+ (dh->dccph_doff * 4);
+ struct dccp_options_received *opt_recv = &dp->dccps_options_received;
+ unsigned char opt, len;
+ unsigned char *value;
+
+ memset(opt_recv, 0, sizeof(*opt_recv));
+
+ while (opt_ptr != opt_end) {
+ opt = *opt_ptr++;
+ len = 0;
+ value = NULL;
+
+ /* Check if this isn't a single byte option */
+ if (opt > DCCPO_MAX_RESERVED) {
+ if (opt_ptr == opt_end)
+ goto out_invalid_option;
+
+ len = *opt_ptr++;
+ if (len < 3)
+ goto out_invalid_option;
+ /*
+ * Remove the type and len fields, leaving
+ * just the value size
+ */
+ len -= 2;
+ value = opt_ptr;
+ opt_ptr += len;
+
+ if (opt_ptr > opt_end)
+ goto out_invalid_option;
+ }
+
+ switch (opt) {
+ case DCCPO_PADDING:
+ break;
+ case DCCPO_NDP_COUNT:
+ if (len > 3)
+ goto out_invalid_option;
+
+ opt_recv->dccpor_ndp = dccp_decode_value_var(value, len);
+ dccp_pr_debug("%sNDP count=%d\n", debug_prefix,
+ opt_recv->dccpor_ndp);
+ break;
+ case DCCPO_ACK_VECTOR_0:
+ if (len > DCCP_MAX_ACK_VECTOR_LEN)
+ goto out_invalid_option;
+
+ if (pkt_type == DCCP_PKT_DATA)
+ continue;
+
+ opt_recv->dccpor_ack_vector_len = len;
+ opt_recv->dccpor_ack_vector_idx = value - options;
+
+ dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n",
+ debug_prefix, len,
+ (unsigned long long)
+ DCCP_SKB_CB(skb)->dccpd_ack_seq);
+ dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq,
+ value, len);
+ dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts,
+ sk,
+ DCCP_SKB_CB(skb)->dccpd_ack_seq,
+ len, value);
+ break;
+ case DCCPO_TIMESTAMP:
+ if (len != 4)
+ goto out_invalid_option;
+
+ opt_recv->dccpor_timestamp = ntohl(*(u32 *)value);
+
+ dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
+ do_gettimeofday(&dp->dccps_timestamp_time);
+
+ dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n",
+ debug_prefix, opt_recv->dccpor_timestamp,
+ (unsigned long long)
+ DCCP_SKB_CB(skb)->dccpd_ack_seq);
+ break;
+ case DCCPO_TIMESTAMP_ECHO:
+ if (len != 4 && len != 6 && len != 8)
+ goto out_invalid_option;
+
+ opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value);
+
+ dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, ",
+ debug_prefix,
+ opt_recv->dccpor_timestamp_echo,
+ len + 2,
+ (unsigned long long)
+ DCCP_SKB_CB(skb)->dccpd_ack_seq);
+
+ if (len > 4) {
+ if (len == 6)
+ opt_recv->dccpor_elapsed_time =
+ ntohs(*(u16 *)(value + 4));
+ else
+ opt_recv->dccpor_elapsed_time =
+ ntohl(*(u32 *)(value + 4));
+
+ dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n",
+ debug_prefix,
+ opt_recv->dccpor_elapsed_time);
+ }
+ break;
+ case DCCPO_ELAPSED_TIME:
+ if (len != 2 && len != 4)
+ goto out_invalid_option;
+
+ if (pkt_type == DCCP_PKT_DATA)
+ continue;
+
+ if (len == 2)
+ opt_recv->dccpor_elapsed_time =
+ ntohs(*(u16 *)value);
+ else
+ opt_recv->dccpor_elapsed_time =
+ ntohl(*(u32 *)value);
+
+ dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix,
+ opt_recv->dccpor_elapsed_time);
+ break;
+ /*
+ * From draft-ietf-dccp-spec-11.txt:
+ *
+ * Option numbers 128 through 191 are for
+ * options sent from the HC-Sender to the
+ * HC-Receiver; option numbers 192 through 255
+ * are for options sent from the HC-Receiver to
+ * the HC-Sender.
+ */
+ case 128 ... 191: {
+ const u16 idx = value - options;
+
+ if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
+ opt, len, idx,
+ value) != 0)
+ goto out_invalid_option;
+ }
+ break;
+ case 192 ... 255: {
+ const u16 idx = value - options;
+
+ if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
+ opt, len, idx,
+ value) != 0)
+ goto out_invalid_option;
+ }
+ break;
+ default:
+ pr_info("DCCP(%p): option %d(len=%d) not "
+ "implemented, ignoring\n",
+ sk, opt, len);
+ break;
+ }
+ }
+
+ return 0;
+
+out_invalid_option:
+ DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
+ DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
+ pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len);
+ return -1;
+}
+
+static void dccp_encode_value_var(const u32 value, unsigned char *to,
+ const unsigned int len)
+{
+ if (len > 3)
+ *to++ = (value & 0xFF000000) >> 24;
+ if (len > 2)
+ *to++ = (value & 0xFF0000) >> 16;
+ if (len > 1)
+ *to++ = (value & 0xFF00) >> 8;
+ if (len > 0)
+ *to++ = (value & 0xFF);
+}
+
+static inline int dccp_ndp_len(const int ndp)
+{
+ return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3;
+}
+
+void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
+ const unsigned char option,
+ const void *value, const unsigned char len)
+{
+ unsigned char *to;
+
+ if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) {
+ LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert "
+ "%d option!\n", option);
+ return;
+ }
+
+ DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2;
+
+ to = skb_push(skb, len + 2);
+ *to++ = option;
+ *to++ = len + 2;
+
+ memcpy(to, value, len);
+}
+
+EXPORT_SYMBOL_GPL(dccp_insert_option);
+
+static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ int ndp = dp->dccps_ndp_count;
+
+ if (dccp_non_data_packet(skb))
+ ++dp->dccps_ndp_count;
+ else
+ dp->dccps_ndp_count = 0;
+
+ if (ndp > 0) {
+ unsigned char *ptr;
+ const int ndp_len = dccp_ndp_len(ndp);
+ const int len = ndp_len + 2;
+
+ if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
+ return;
+
+ DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+
+ ptr = skb_push(skb, len);
+ *ptr++ = DCCPO_NDP_COUNT;
+ *ptr++ = len;
+ dccp_encode_value_var(ndp, ptr, ndp_len);
+ }
+}
+
+static inline int dccp_elapsed_time_len(const u32 elapsed_time)
+{
+ return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
+}
+
+void dccp_insert_option_elapsed_time(struct sock *sk,
+ struct sk_buff *skb,
+ u32 elapsed_time)
+{
+#ifdef CONFIG_IP_DCCP_DEBUG
+ struct dccp_sock *dp = dccp_sk(sk);
+ const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+ "CLIENT TX opt: " : "server TX opt: ";
+#endif
+ const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
+ const int len = 2 + elapsed_time_len;
+ unsigned char *to;
+
+ if (elapsed_time_len == 0)
+ return;
+
+ if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
+ LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to "
+ "insert elapsed time!\n");
+ return;
+ }
+
+ DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+
+ to = skb_push(skb, len);
+ *to++ = DCCPO_ELAPSED_TIME;
+ *to++ = len;
+
+ if (elapsed_time_len == 2) {
+ const u16 var16 = htons((u16)elapsed_time);
+ memcpy(to, &var16, 2);
+ } else {
+ const u32 var32 = htonl(elapsed_time);
+ memcpy(to, &var32, 4);
+ }
+
+ dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n",
+ debug_prefix, elapsed_time,
+ len,
+ (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
+}
+
+EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
+
+static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+#ifdef CONFIG_IP_DCCP_DEBUG
+ const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+ "CLIENT TX opt: " : "server TX opt: ";
+#endif
+ struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
+ int len = ap->dccpap_buf_vector_len + 2;
+ const u32 elapsed_time = timeval_now_delta(&ap->dccpap_time) / 10;
+ unsigned char *to, *from;
+
+ if (elapsed_time != 0)
+ dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
+
+ if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
+ LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to "
+ "insert ACK Vector!\n");
+ return;
+ }
+
+ /*
+ * XXX: now we have just one ack vector sent record, so
+ * we have to wait for it to be cleared.
+ *
+ * Of course this is not acceptable, but this is just for
+ * basic testing now.
+ */
+ if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1)
+ return;
+
+ DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+
+ to = skb_push(skb, len);
+ *to++ = DCCPO_ACK_VECTOR_0;
+ *to++ = len;
+
+ len = ap->dccpap_buf_vector_len;
+ from = ap->dccpap_buf + ap->dccpap_buf_head;
+
+ /* Check if buf_head wraps */
+ if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) {
+ const unsigned int tailsize = (ap->dccpap_buf_len -
+ ap->dccpap_buf_head);
+
+ memcpy(to, from, tailsize);
+ to += tailsize;
+ len -= tailsize;
+ from = ap->dccpap_buf;
+ }
+
+ memcpy(to, from, len);
+ /*
+ * From draft-ietf-dccp-spec-11.txt:
+ *
+ * For each acknowledgement it sends, the HC-Receiver will add an
+ * acknowledgement record. ack_seqno will equal the HC-Receiver
+ * sequence number it used for the ack packet; ack_ptr will equal
+ * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
+ * equal buf_nonce.
+ *
+ * This implemention uses just one ack record for now.
+ */
+ ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+ ap->dccpap_ack_ptr = ap->dccpap_buf_head;
+ ap->dccpap_ack_ackno = ap->dccpap_buf_ackno;
+ ap->dccpap_ack_nonce = ap->dccpap_buf_nonce;
+ ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len;
+
+ dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, "
+ "ack_ackno=%llu\n",
+ debug_prefix, ap->dccpap_ack_vector_len,
+ (unsigned long long) ap->dccpap_ack_seqno,
+ (unsigned long long) ap->dccpap_ack_ackno);
+}
+
+void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
+{
+ struct timeval tv;
+ u32 now;
+
+ do_gettimeofday(&tv);
+ now = (tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10;
+ /* yes this will overflow but that is the point as we want a
+ * 10 usec 32 bit timer which mean it wraps every 11.9 hours */
+
+ now = htonl(now);
+ dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now));
+}
+
+EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);
+
+static void dccp_insert_option_timestamp_echo(struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+#ifdef CONFIG_IP_DCCP_DEBUG
+ const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+ "CLIENT TX opt: " : "server TX opt: ";
+#endif
+ u32 tstamp_echo;
+ const u32 elapsed_time =
+ timeval_now_delta(&dp->dccps_timestamp_time) / 10;
+ const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
+ const int len = 6 + elapsed_time_len;
+ unsigned char *to;
+
+ if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
+ LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert "
+ "timestamp echo!\n");
+ return;
+ }
+
+ DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+
+ to = skb_push(skb, len);
+ *to++ = DCCPO_TIMESTAMP_ECHO;
+ *to++ = len;
+
+ tstamp_echo = htonl(dp->dccps_timestamp_echo);
+ memcpy(to, &tstamp_echo, 4);
+ to += 4;
+
+ if (elapsed_time_len == 2) {
+ const u16 var16 = htons((u16)elapsed_time);
+ memcpy(to, &var16, 2);
+ } else if (elapsed_time_len == 4) {
+ const u32 var32 = htonl(elapsed_time);
+ memcpy(to, &var32, 4);
+ }
+
+ dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n",
+ debug_prefix, dp->dccps_timestamp_echo,
+ len,
+ (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
+
+ dp->dccps_timestamp_echo = 0;
+ dp->dccps_timestamp_time.tv_sec = 0;
+ dp->dccps_timestamp_time.tv_usec = 0;
+}
+
+void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
+
+ if (dp->dccps_options.dccpo_send_ndp_count)
+ dccp_insert_option_ndp(sk, skb);
+
+ if (!dccp_packet_without_ack(skb)) {
+ if (dp->dccps_options.dccpo_send_ack_vector &&
+ (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno !=
+ DCCP_MAX_SEQNO + 1))
+ dccp_insert_option_ack_vector(sk, skb);
+
+ if (dp->dccps_timestamp_echo != 0)
+ dccp_insert_option_timestamp_echo(sk, skb);
+ }
+
+ ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb);
+ ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb);
+
+ /* XXX: insert other options when appropriate */
+
+ if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
+ /* The length of all options has to be a multiple of 4 */
+ int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4;
+
+ if (padding != 0) {
+ padding = 4 - padding;
+ memset(skb_push(skb, padding), 0, padding);
+ DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
+ }
+ }
+}
+
+struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len,
+ const unsigned int __nocast priority)
+{
+ struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority);
+
+ if (ap != NULL) {
+#ifdef CONFIG_IP_DCCP_DEBUG
+ memset(ap->dccpap_buf, 0xFF, len);
+#endif
+ ap->dccpap_buf_len = len;
+ ap->dccpap_buf_head =
+ ap->dccpap_buf_tail =
+ ap->dccpap_buf_len - 1;
+ ap->dccpap_buf_ackno =
+ ap->dccpap_ack_ackno =
+ ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
+ ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0;
+ ap->dccpap_ack_ptr = 0;
+ ap->dccpap_time.tv_sec = 0;
+ ap->dccpap_time.tv_usec = 0;
+ ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0;
+ }
+
+ return ap;
+}
+
+void dccp_ackpkts_free(struct dccp_ackpkts *ap)
+{
+ if (ap != NULL) {
+#ifdef CONFIG_IP_DCCP_DEBUG
+ memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len);
+#endif
+ kfree(ap);
+ }
+}
+
+static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap,
+ const unsigned int index)
+{
+ return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK;
+}
+
+static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap,
+ const unsigned int index)
+{
+ return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK;
+}
+
+/*
+ * If several packets are missing, the HC-Receiver may prefer to enter multiple
+ * bytes with run length 0, rather than a single byte with a larger run length;
+ * this simplifies table updates if one of the missing packets arrives.
+ */
+static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap,
+ const unsigned int packets,
+ const unsigned char state)
+{
+ unsigned int gap;
+ signed long new_head;
+
+ if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len)
+ return -ENOBUFS;
+
+ gap = packets - 1;
+ new_head = ap->dccpap_buf_head - packets;
+
+ if (new_head < 0) {
+ if (gap > 0) {
+ memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED,
+ gap + new_head + 1);
+ gap = -new_head;
+ }
+ new_head += ap->dccpap_buf_len;
+ }
+
+ ap->dccpap_buf_head = new_head;
+
+ if (gap > 0)
+ memset(ap->dccpap_buf + ap->dccpap_buf_head + 1,
+ DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap);
+
+ ap->dccpap_buf[ap->dccpap_buf_head] = state;
+ ap->dccpap_buf_vector_len += packets;
+ return 0;
+}
+
+/*
+ * Implements the draft-ietf-dccp-spec-11.txt Appendix A
+ */
+int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state)
+{
+ /*
+ * Check at the right places if the buffer is full, if it is, tell the
+ * caller to start dropping packets till the HC-Sender acks our ACK
+ * vectors, when we will free up space in dccpap_buf.
+ *
+ * We may well decide to do buffer compression, etc, but for now lets
+ * just drop.
+ *
+ * From Appendix A:
+ *
+ * Of course, the circular buffer may overflow, either when the
+ * HC-Sender is sending data at a very high rate, when the
+ * HC-Receiver's acknowledgements are not reaching the HC-Sender,
+ * or when the HC-Sender is forgetting to acknowledge those acks
+ * (so the HC-Receiver is unable to clean up old state). In this
+ * case, the HC-Receiver should either compress the buffer (by
+ * increasing run lengths when possible), transfer its state to
+ * a larger buffer, or, as a last resort, drop all received
+ * packets, without processing them whatsoever, until its buffer
+ * shrinks again.
+ */
+
+ /* See if this is the first ackno being inserted */
+ if (ap->dccpap_buf_vector_len == 0) {
+ ap->dccpap_buf[ap->dccpap_buf_head] = state;
+ ap->dccpap_buf_vector_len = 1;
+ } else if (after48(ackno, ap->dccpap_buf_ackno)) {
+ const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno,
+ ackno);
+
+ /*
+ * Look if the state of this packet is the same as the
+ * previous ackno and if so if we can bump the head len.
+ */
+ if (delta == 1 &&
+ dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state &&
+ (dccp_ackpkts_len(ap, ap->dccpap_buf_head) <
+ DCCP_ACKPKTS_LEN_MASK))
+ ap->dccpap_buf[ap->dccpap_buf_head]++;
+ else if (dccp_ackpkts_set_buf_head_state(ap, delta, state))
+ return -ENOBUFS;
+ } else {
+ /*
+ * A.1.2. Old Packets
+ *
+ * When a packet with Sequence Number S arrives, and
+ * S <= buf_ackno, the HC-Receiver will scan the table
+ * for the byte corresponding to S. (Indexing structures
+ * could reduce the complexity of this scan.)
+ */
+ u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno);
+ unsigned int index = ap->dccpap_buf_head;
+
+ while (1) {
+ const u8 len = dccp_ackpkts_len(ap, index);
+ const u8 state = dccp_ackpkts_state(ap, index);
+ /*
+ * valid packets not yet in dccpap_buf have a reserved
+ * entry, with a len equal to 0.
+ */
+ if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED &&
+ len == 0 && delta == 0) { /* Found our
+ reserved seat! */
+ dccp_pr_debug("Found %llu reserved seat!\n",
+ (unsigned long long) ackno);
+ ap->dccpap_buf[index] = state;
+ goto out;
+ }
+ /* len == 0 means one packet */
+ if (delta < len + 1)
+ goto out_duplicate;
+
+ delta -= len + 1;
+ if (++index == ap->dccpap_buf_len)
+ index = 0;
+ }
+ }
+
+ ap->dccpap_buf_ackno = ackno;
+ do_gettimeofday(&ap->dccpap_time);
+out:
+ dccp_pr_debug("");
+ dccp_ackpkts_print(ap);
+ return 0;
+
+out_duplicate:
+ /* Duplicate packet */
+ dccp_pr_debug("Received a dup or already considered lost "
+ "packet: %llu\n", (unsigned long long) ackno);
+ return -EILSEQ;
+}
+
+#ifdef CONFIG_IP_DCCP_DEBUG
+void dccp_ackvector_print(const u64 ackno, const unsigned char *vector,
+ int len)
+{
+ if (!dccp_debug)
+ return;
+
+ printk("ACK vector len=%d, ackno=%llu |", len,
+ (unsigned long long) ackno);
+
+ while (len--) {
+ const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6;
+ const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
+
+ printk("%d,%d|", state, rl);
+ ++vector;
+ }
+
+ printk("\n");
+}
+
+void dccp_ackpkts_print(const struct dccp_ackpkts *ap)
+{
+ dccp_ackvector_print(ap->dccpap_buf_ackno,
+ ap->dccpap_buf + ap->dccpap_buf_head,
+ ap->dccpap_buf_vector_len);
+}
+#endif
+
+static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap)
+{
+ /*
+ * As we're keeping track of the ack vector size
+ * (dccpap_buf_vector_len) and the sent ack vector size
+ * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but
+ * keep this code here as in the future we'll implement a vector of
+ * ack records, as suggested in draft-ietf-dccp-spec-11.txt
+ * Appendix A. -acme
+ */
+#if 0
+ ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1;
+ if (ap->dccpap_buf_tail >= ap->dccpap_buf_len)
+ ap->dccpap_buf_tail -= ap->dccpap_buf_len;
+#endif
+ ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len;
+}
+
+void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk,
+ u64 ackno)
+{
+ /* Check if we actually sent an ACK vector */
+ if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
+ return;
+
+ if (ackno == ap->dccpap_ack_seqno) {
+#ifdef CONFIG_IP_DCCP_DEBUG
+ struct dccp_sock *dp = dccp_sk(sk);
+ const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+ "CLIENT rx ack: " : "server rx ack: ";
+#endif
+ dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
+ "ack_ackno=%llu, ACKED!\n",
+ debug_prefix, 1,
+ (unsigned long long) ap->dccpap_ack_seqno,
+ (unsigned long long) ap->dccpap_ack_ackno);
+ dccp_ackpkts_trow_away_ack_record(ap);
+ ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
+ }
+}
+
+static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
+ struct sock *sk, u64 ackno,
+ const unsigned char len,
+ const unsigned char *vector)
+{
+ unsigned char i;
+
+ /* Check if we actually sent an ACK vector */
+ if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
+ return;
+ /*
+ * We're in the receiver half connection, so if the received an ACK
+ * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're
+ * not interested.
+ *
+ * Extra explanation with example:
+ *
+ * if we received an ACK vector with ackno 50, it can only be acking
+ * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
+ */
+ /* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */
+ if (before48(ackno, ap->dccpap_ack_seqno)) {
+ /* dccp_pr_debug_cat("yes\n"); */
+ return;
+ }
+ /* dccp_pr_debug_cat("no\n"); */
+
+ i = len;
+ while (i--) {
+ const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
+ u64 ackno_end_rl;
+
+ dccp_set_seqno(&ackno_end_rl, ackno - rl);
+
+ /*
+ * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl,
+ * ap->dccpap_ack_seqno, ackno);
+ */
+ if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) {
+ const u8 state = (*vector &
+ DCCP_ACKPKTS_STATE_MASK) >> 6;
+ /* dccp_pr_debug_cat("yes\n"); */
+
+ if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) {
+#ifdef CONFIG_IP_DCCP_DEBUG
+ struct dccp_sock *dp = dccp_sk(sk);
+ const char *debug_prefix =
+ dp->dccps_role == DCCP_ROLE_CLIENT ?
+ "CLIENT rx ack: " : "server rx ack: ";
+#endif
+ dccp_pr_debug("%sACK vector 0, len=%d, "
+ "ack_seqno=%llu, ack_ackno=%llu, "
+ "ACKED!\n",
+ debug_prefix, len,
+ (unsigned long long)
+ ap->dccpap_ack_seqno,
+ (unsigned long long)
+ ap->dccpap_ack_ackno);
+ dccp_ackpkts_trow_away_ack_record(ap);
+ }
+ /*
+ * If dccpap_ack_seqno was not received, no problem
+ * we'll send another ACK vector.
+ */
+ ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
+ break;
+ }
+ /* dccp_pr_debug_cat("no\n"); */
+
+ dccp_set_seqno(&ackno, ackno_end_rl - 1);
+ ++vector;
+ }
+}
diff --git a/net/dccp/output.c b/net/dccp/output.c
new file mode 100644
index 00000000000..28de157a432
--- /dev/null
+++ b/net/dccp/output.c
@@ -0,0 +1,528 @@
+/*
+ * net/dccp/output.c
+ *
+ * An implementation of the DCCP protocol
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+
+#include <net/sock.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+static inline void dccp_event_ack_sent(struct sock *sk)
+{
+ inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
+}
+
+/*
+ * All SKB's seen here are completely headerless. It is our
+ * job to build the DCCP header, and pass the packet down to
+ * IP so it can do the same plus pass the packet off to the
+ * device.
+ */
+int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
+{
+ if (likely(skb != NULL)) {
+ const struct inet_sock *inet = inet_sk(sk);
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+ struct dccp_hdr *dh;
+ /* XXX For now we're using only 48 bits sequence numbers */
+ const int dccp_header_size = sizeof(*dh) +
+ sizeof(struct dccp_hdr_ext) +
+ dccp_packet_hdr_len(dcb->dccpd_type);
+ int err, set_ack = 1;
+ u64 ackno = dp->dccps_gsr;
+
+ dccp_inc_seqno(&dp->dccps_gss);
+
+ switch (dcb->dccpd_type) {
+ case DCCP_PKT_DATA:
+ set_ack = 0;
+ break;
+ case DCCP_PKT_SYNC:
+ case DCCP_PKT_SYNCACK:
+ ackno = dcb->dccpd_seq;
+ break;
+ }
+
+ dcb->dccpd_seq = dp->dccps_gss;
+ dccp_insert_options(sk, skb);
+
+ skb->h.raw = skb_push(skb, dccp_header_size);
+ dh = dccp_hdr(skb);
+ /*
+ * Data packets are not cloned as they are never retransmitted
+ */
+ if (skb_cloned(skb))
+ skb_set_owner_w(skb, sk);
+
+ /* Build DCCP header and checksum it. */
+ memset(dh, 0, dccp_header_size);
+ dh->dccph_type = dcb->dccpd_type;
+ dh->dccph_sport = inet->sport;
+ dh->dccph_dport = inet->dport;
+ dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4;
+ dh->dccph_ccval = dcb->dccpd_ccval;
+ /* XXX For now we're using only 48 bits sequence numbers */
+ dh->dccph_x = 1;
+
+ dp->dccps_awh = dp->dccps_gss;
+ dccp_hdr_set_seq(dh, dp->dccps_gss);
+ if (set_ack)
+ dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno);
+
+ switch (dcb->dccpd_type) {
+ case DCCP_PKT_REQUEST:
+ dccp_hdr_request(skb)->dccph_req_service =
+ dcb->dccpd_service;
+ break;
+ case DCCP_PKT_RESET:
+ dccp_hdr_reset(skb)->dccph_reset_code =
+ dcb->dccpd_reset_code;
+ break;
+ }
+
+ dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr,
+ inet->daddr);
+
+ if (set_ack)
+ dccp_event_ack_sent(sk);
+
+ DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+
+ err = ip_queue_xmit(skb, 0);
+ if (err <= 0)
+ return err;
+
+ /* NET_XMIT_CN is special. It does not guarantee,
+ * that this packet is lost. It tells that device
+ * is about to start to drop packets or already
+ * drops some packets of the same priority and
+ * invokes us to send less aggressively.
+ */
+ return err == NET_XMIT_CN ? 0 : err;
+ }
+ return -ENOBUFS;
+}
+
+unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ int mss_now;
+
+ /*
+ * FIXME: we really should be using the af_specific thing to support
+ * IPv6.
+ * mss_now = pmtu - tp->af_specific->net_header_len -
+ * sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext);
+ */
+ mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) -
+ sizeof(struct dccp_hdr_ext);
+
+ /* Now subtract optional transport overhead */
+ mss_now -= dp->dccps_ext_header_len;
+
+ /*
+ * FIXME: this should come from the CCID infrastructure, where, say,
+ * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets
+ * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED
+ * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to
+ * make it a multiple of 4
+ */
+
+ mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
+
+ /* And store cached results */
+ dp->dccps_pmtu_cookie = pmtu;
+ dp->dccps_mss_cache = mss_now;
+
+ return mss_now;
+}
+
+void dccp_write_space(struct sock *sk)
+{
+ read_lock(&sk->sk_callback_lock);
+
+ if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+ wake_up_interruptible(sk->sk_sleep);
+ /* Should agree with poll, otherwise some programs break */
+ if (sock_writeable(sk))
+ sk_wake_async(sk, 2, POLL_OUT);
+
+ read_unlock(&sk->sk_callback_lock);
+}
+
+/**
+ * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet
+ * @sk: socket to wait for
+ * @timeo: for how long
+ */
+static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb,
+ long *timeo)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ DEFINE_WAIT(wait);
+ long delay;
+ int rc;
+
+ while (1) {
+ prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+
+ if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+ goto do_error;
+ if (!*timeo)
+ goto do_nonblock;
+ if (signal_pending(current))
+ goto do_interrupted;
+
+ rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
+ skb->len);
+ if (rc <= 0)
+ break;
+ delay = msecs_to_jiffies(rc);
+ if (delay > *timeo || delay < 0)
+ goto do_nonblock;
+
+ sk->sk_write_pending++;
+ release_sock(sk);
+ *timeo -= schedule_timeout(delay);
+ lock_sock(sk);
+ sk->sk_write_pending--;
+ }
+out:
+ finish_wait(sk->sk_sleep, &wait);
+ return rc;
+
+do_error:
+ rc = -EPIPE;
+ goto out;
+do_nonblock:
+ rc = -EAGAIN;
+ goto out;
+do_interrupted:
+ rc = sock_intr_errno(*timeo);
+ goto out;
+}
+
+int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
+{
+ const struct dccp_sock *dp = dccp_sk(sk);
+ int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
+ skb->len);
+
+ if (err > 0)
+ err = dccp_wait_for_ccid(sk, skb, timeo);
+
+ if (err == 0) {
+ const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
+ struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+ const int len = skb->len;
+
+ if (sk->sk_state == DCCP_PARTOPEN) {
+ /* See 8.1.5. Handshake Completion */
+ inet_csk_schedule_ack(sk);
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+ inet_csk(sk)->icsk_rto,
+ DCCP_RTO_MAX);
+ dcb->dccpd_type = DCCP_PKT_DATAACK;
+ /*
+ * FIXME: we really should have a
+ * dccps_ack_pending or use icsk.
+ */
+ } else if (inet_csk_ack_scheduled(sk) ||
+ dp->dccps_timestamp_echo != 0 ||
+ (dp->dccps_options.dccpo_send_ack_vector &&
+ ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 &&
+ ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1))
+ dcb->dccpd_type = DCCP_PKT_DATAACK;
+ else
+ dcb->dccpd_type = DCCP_PKT_DATA;
+
+ err = dccp_transmit_skb(sk, skb);
+ ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
+ }
+
+ return err;
+}
+
+int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
+{
+ if (inet_sk_rebuild_header(sk) != 0)
+ return -EHOSTUNREACH; /* Routing failure or similar. */
+
+ return dccp_transmit_skb(sk, (skb_cloned(skb) ?
+ pskb_copy(skb, GFP_ATOMIC):
+ skb_clone(skb, GFP_ATOMIC)));
+}
+
+struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
+ struct request_sock *req)
+{
+ struct dccp_hdr *dh;
+ const int dccp_header_size = sizeof(struct dccp_hdr) +
+ sizeof(struct dccp_hdr_ext) +
+ sizeof(struct dccp_hdr_response);
+ struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
+ dccp_header_size, 1,
+ GFP_ATOMIC);
+ if (skb == NULL)
+ return NULL;
+
+ /* Reserve space for headers. */
+ skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
+
+ skb->dst = dst_clone(dst);
+ skb->csum = 0;
+
+ DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
+ DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss;
+ dccp_insert_options(sk, skb);
+
+ skb->h.raw = skb_push(skb, dccp_header_size);
+
+ dh = dccp_hdr(skb);
+ memset(dh, 0, dccp_header_size);
+
+ dh->dccph_sport = inet_sk(sk)->sport;
+ dh->dccph_dport = inet_rsk(req)->rmt_port;
+ dh->dccph_doff = (dccp_header_size +
+ DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
+ dh->dccph_type = DCCP_PKT_RESPONSE;
+ dh->dccph_x = 1;
+ dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss);
+ dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr);
+
+ dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr,
+ inet_rsk(req)->rmt_addr);
+
+ DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+ return skb;
+}
+
+struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
+ const enum dccp_reset_codes code)
+
+{
+ struct dccp_hdr *dh;
+ struct dccp_sock *dp = dccp_sk(sk);
+ const int dccp_header_size = sizeof(struct dccp_hdr) +
+ sizeof(struct dccp_hdr_ext) +
+ sizeof(struct dccp_hdr_reset);
+ struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
+ dccp_header_size, 1,
+ GFP_ATOMIC);
+ if (skb == NULL)
+ return NULL;
+
+ /* Reserve space for headers. */
+ skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
+
+ skb->dst = dst_clone(dst);
+ skb->csum = 0;
+
+ dccp_inc_seqno(&dp->dccps_gss);
+
+ DCCP_SKB_CB(skb)->dccpd_reset_code = code;
+ DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET;
+ DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss;
+ dccp_insert_options(sk, skb);
+
+ skb->h.raw = skb_push(skb, dccp_header_size);
+
+ dh = dccp_hdr(skb);
+ memset(dh, 0, dccp_header_size);
+
+ dh->dccph_sport = inet_sk(sk)->sport;
+ dh->dccph_dport = inet_sk(sk)->dport;
+ dh->dccph_doff = (dccp_header_size +
+ DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
+ dh->dccph_type = DCCP_PKT_RESET;
+ dh->dccph_x = 1;
+ dccp_hdr_set_seq(dh, dp->dccps_gss);
+ dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr);
+
+ dccp_hdr_reset(skb)->dccph_reset_code = code;
+
+ dh->dccph_checksum = dccp_v4_checksum(skb, inet_sk(sk)->saddr,
+ inet_sk(sk)->daddr);
+
+ DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+ return skb;
+}
+
+/*
+ * Do all connect socket setups that can be done AF independent.
+ */
+static inline void dccp_connect_init(struct sock *sk)
+{
+ struct dst_entry *dst = __sk_dst_get(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ sk->sk_err = 0;
+ sock_reset_flag(sk, SOCK_DONE);
+
+ dccp_sync_mss(sk, dst_mtu(dst));
+
+ /*
+ * FIXME: set dp->{dccps_swh,dccps_swl}, with
+ * something like dccp_inc_seq
+ */
+
+ icsk->icsk_retransmits = 0;
+}
+
+int dccp_connect(struct sock *sk)
+{
+ struct sk_buff *skb;
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ dccp_connect_init(sk);
+
+ skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation);
+ if (unlikely(skb == NULL))
+ return -ENOBUFS;
+
+ /* Reserve space for headers. */
+ skb_reserve(skb, MAX_DCCP_HEADER);
+
+ DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
+ /* FIXME: set service to something meaningful, coming
+ * from userspace*/
+ DCCP_SKB_CB(skb)->dccpd_service = 0;
+ skb->csum = 0;
+ skb_set_owner_w(skb, sk);
+
+ BUG_TRAP(sk->sk_send_head == NULL);
+ sk->sk_send_head = skb;
+ dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
+ DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
+
+ /* Timer for repeating the REQUEST until an answer. */
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ icsk->icsk_rto, DCCP_RTO_MAX);
+ return 0;
+}
+
+void dccp_send_ack(struct sock *sk)
+{
+ /* If we have been reset, we may not send again. */
+ if (sk->sk_state != DCCP_CLOSED) {
+ struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
+
+ if (skb == NULL) {
+ inet_csk_schedule_ack(sk);
+ inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+ TCP_DELACK_MAX,
+ DCCP_RTO_MAX);
+ return;
+ }
+
+ /* Reserve space for headers */
+ skb_reserve(skb, MAX_DCCP_HEADER);
+ skb->csum = 0;
+ DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK;
+ skb_set_owner_w(skb, sk);
+ dccp_transmit_skb(sk, skb);
+ }
+}
+
+EXPORT_SYMBOL_GPL(dccp_send_ack);
+
+void dccp_send_delayed_ack(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ /*
+ * FIXME: tune this timer. elapsed time fixes the skew, so no problem
+ * with using 2s, and active senders also piggyback the ACK into a
+ * DATAACK packet, so this is really for quiescent senders.
+ */
+ unsigned long timeout = jiffies + 2 * HZ;
+
+ /* Use new timeout only if there wasn't a older one earlier. */
+ if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
+ /* If delack timer was blocked or is about to expire,
+ * send ACK now.
+ *
+ * FIXME: check the "about to expire" part
+ */
+ if (icsk->icsk_ack.blocked) {
+ dccp_send_ack(sk);
+ return;
+ }
+
+ if (!time_before(timeout, icsk->icsk_ack.timeout))
+ timeout = icsk->icsk_ack.timeout;
+ }
+ icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
+ icsk->icsk_ack.timeout = timeout;
+ sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
+}
+
+void dccp_send_sync(struct sock *sk, const u64 seq,
+ const enum dccp_pkt_type pkt_type)
+{
+ /*
+ * We are not putting this on the write queue, so
+ * dccp_transmit_skb() will set the ownership to this
+ * sock.
+ */
+ struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
+
+ if (skb == NULL)
+ /* FIXME: how to make sure the sync is sent? */
+ return;
+
+ /* Reserve space for headers and prepare control bits. */
+ skb_reserve(skb, MAX_DCCP_HEADER);
+ skb->csum = 0;
+ DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
+ DCCP_SKB_CB(skb)->dccpd_seq = seq;
+
+ skb_set_owner_w(skb, sk);
+ dccp_transmit_skb(sk, skb);
+}
+
+/*
+ * Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This
+ * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under
+ * any circumstances.
+ */
+void dccp_send_close(struct sock *sk, const int active)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct sk_buff *skb;
+ const unsigned int prio = active ? GFP_KERNEL : GFP_ATOMIC;
+
+ skb = alloc_skb(sk->sk_prot->max_header, prio);
+ if (skb == NULL)
+ return;
+
+ /* Reserve space for headers and prepare control bits. */
+ skb_reserve(skb, sk->sk_prot->max_header);
+ skb->csum = 0;
+ DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ?
+ DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
+
+ skb_set_owner_w(skb, sk);
+ if (active) {
+ BUG_TRAP(sk->sk_send_head == NULL);
+ sk->sk_send_head = skb;
+ dccp_transmit_skb(sk, skb_clone(skb, prio));
+ } else
+ dccp_transmit_skb(sk, skb);
+
+ ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
+ ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
+}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
new file mode 100644
index 00000000000..18a0e69c9dc
--- /dev/null
+++ b/net/dccp/proto.c
@@ -0,0 +1,826 @@
+/*
+ * net/dccp/proto.c
+ *
+ * An implementation of the DCCP protocol
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/if_arp.h>
+#include <linux/init.h>
+#include <linux/random.h>
+#include <net/checksum.h>
+
+#include <net/inet_common.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/sock.h>
+#include <net/xfrm.h>
+
+#include <asm/semaphore.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/delay.h>
+#include <linux/poll.h>
+#include <linux/dccp.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
+
+atomic_t dccp_orphan_count = ATOMIC_INIT(0);
+
+static struct net_protocol dccp_protocol = {
+ .handler = dccp_v4_rcv,
+ .err_handler = dccp_v4_err,
+};
+
+const char *dccp_packet_name(const int type)
+{
+ static const char *dccp_packet_names[] = {
+ [DCCP_PKT_REQUEST] = "REQUEST",
+ [DCCP_PKT_RESPONSE] = "RESPONSE",
+ [DCCP_PKT_DATA] = "DATA",
+ [DCCP_PKT_ACK] = "ACK",
+ [DCCP_PKT_DATAACK] = "DATAACK",
+ [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
+ [DCCP_PKT_CLOSE] = "CLOSE",
+ [DCCP_PKT_RESET] = "RESET",
+ [DCCP_PKT_SYNC] = "SYNC",
+ [DCCP_PKT_SYNCACK] = "SYNCACK",
+ };
+
+ if (type >= DCCP_NR_PKT_TYPES)
+ return "INVALID";
+ else
+ return dccp_packet_names[type];
+}
+
+EXPORT_SYMBOL_GPL(dccp_packet_name);
+
+const char *dccp_state_name(const int state)
+{
+ static char *dccp_state_names[] = {
+ [DCCP_OPEN] = "OPEN",
+ [DCCP_REQUESTING] = "REQUESTING",
+ [DCCP_PARTOPEN] = "PARTOPEN",
+ [DCCP_LISTEN] = "LISTEN",
+ [DCCP_RESPOND] = "RESPOND",
+ [DCCP_CLOSING] = "CLOSING",
+ [DCCP_TIME_WAIT] = "TIME_WAIT",
+ [DCCP_CLOSED] = "CLOSED",
+ };
+
+ if (state >= DCCP_MAX_STATES)
+ return "INVALID STATE!";
+ else
+ return dccp_state_names[state];
+}
+
+EXPORT_SYMBOL_GPL(dccp_state_name);
+
+static inline int dccp_listen_start(struct sock *sk)
+{
+ dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN;
+ return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
+}
+
+int dccp_disconnect(struct sock *sk, int flags)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_sock *inet = inet_sk(sk);
+ int err = 0;
+ const int old_state = sk->sk_state;
+
+ if (old_state != DCCP_CLOSED)
+ dccp_set_state(sk, DCCP_CLOSED);
+
+ /* ABORT function of RFC793 */
+ if (old_state == DCCP_LISTEN) {
+ inet_csk_listen_stop(sk);
+ /* FIXME: do the active reset thing */
+ } else if (old_state == DCCP_REQUESTING)
+ sk->sk_err = ECONNRESET;
+
+ dccp_clear_xmit_timers(sk);
+ __skb_queue_purge(&sk->sk_receive_queue);
+ if (sk->sk_send_head != NULL) {
+ __kfree_skb(sk->sk_send_head);
+ sk->sk_send_head = NULL;
+ }
+
+ inet->dport = 0;
+
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+ inet_reset_saddr(sk);
+
+ sk->sk_shutdown = 0;
+ sock_reset_flag(sk, SOCK_DONE);
+
+ icsk->icsk_backoff = 0;
+ inet_csk_delack_init(sk);
+ __sk_dst_reset(sk);
+
+ BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
+
+ sk->sk_error_report(sk);
+ return err;
+}
+
+/*
+ * Wait for a DCCP event.
+ *
+ * Note that we don't need to lock the socket, as the upper poll layers
+ * take care of normal races (between the test and the event) and we don't
+ * go look at any of the socket buffers directly.
+ */
+static unsigned int dccp_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
+{
+ unsigned int mask;
+ struct sock *sk = sock->sk;
+
+ poll_wait(file, sk->sk_sleep, wait);
+ if (sk->sk_state == DCCP_LISTEN)
+ return inet_csk_listen_poll(sk);
+
+ /* Socket is not locked. We are protected from async events
+ by poll logic and correct handling of state changes
+ made by another threads is impossible in any case.
+ */
+
+ mask = 0;
+ if (sk->sk_err)
+ mask = POLLERR;
+
+ if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
+ mask |= POLLHUP;
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ mask |= POLLIN | POLLRDNORM;
+
+ /* Connected? */
+ if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
+ if (atomic_read(&sk->sk_rmem_alloc) > 0)
+ mask |= POLLIN | POLLRDNORM;
+
+ if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+ if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+ mask |= POLLOUT | POLLWRNORM;
+ } else { /* send SIGIO later */
+ set_bit(SOCK_ASYNC_NOSPACE,
+ &sk->sk_socket->flags);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+
+ /* Race breaker. If space is freed after
+ * wspace test but before the flags are set,
+ * IO signal will be lost.
+ */
+ if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
+ mask |= POLLOUT | POLLWRNORM;
+ }
+ }
+ }
+ return mask;
+}
+
+int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+ dccp_pr_debug("entry\n");
+ return -ENOIOCTLCMD;
+}
+
+int dccp_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int optlen)
+{
+ struct dccp_sock *dp;
+ int err;
+ int val;
+
+ if (level != SOL_DCCP)
+ return ip_setsockopt(sk, level, optname, optval, optlen);
+
+ if (optlen < sizeof(int))
+ return -EINVAL;
+
+ if (get_user(val, (int __user *)optval))
+ return -EFAULT;
+
+ lock_sock(sk);
+
+ dp = dccp_sk(sk);
+ err = 0;
+
+ switch (optname) {
+ case DCCP_SOCKOPT_PACKET_SIZE:
+ dp->dccps_packet_size = val;
+ break;
+ default:
+ err = -ENOPROTOOPT;
+ break;
+ }
+
+ release_sock(sk);
+ return err;
+}
+
+int dccp_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ struct dccp_sock *dp;
+ int val, len;
+
+ if (level != SOL_DCCP)
+ return ip_getsockopt(sk, level, optname, optval, optlen);
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ len = min_t(unsigned int, len, sizeof(int));
+ if (len < 0)
+ return -EINVAL;
+
+ dp = dccp_sk(sk);
+
+ switch (optname) {
+ case DCCP_SOCKOPT_PACKET_SIZE:
+ val = dp->dccps_packet_size;
+ break;
+ default:
+ return -ENOPROTOOPT;
+ }
+
+ if (put_user(len, optlen) || copy_to_user(optval, &val, len))
+ return -EFAULT;
+
+ return 0;
+}
+
+int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len)
+{
+ const struct dccp_sock *dp = dccp_sk(sk);
+ const int flags = msg->msg_flags;
+ const int noblock = flags & MSG_DONTWAIT;
+ struct sk_buff *skb;
+ int rc, size;
+ long timeo;
+
+ if (len > dp->dccps_mss_cache)
+ return -EMSGSIZE;
+
+ lock_sock(sk);
+ timeo = sock_sndtimeo(sk, noblock);
+
+ /*
+ * We have to use sk_stream_wait_connect here to set sk_write_pending,
+ * so that the trick in dccp_rcv_request_sent_state_process.
+ */
+ /* Wait for a connection to finish. */
+ if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
+ if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
+ goto out_release;
+
+ size = sk->sk_prot->max_header + len;
+ release_sock(sk);
+ skb = sock_alloc_send_skb(sk, size, noblock, &rc);
+ lock_sock(sk);
+ if (skb == NULL)
+ goto out_release;
+
+ skb_reserve(skb, sk->sk_prot->max_header);
+ rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+ if (rc != 0)
+ goto out_discard;
+
+ rc = dccp_write_xmit(sk, skb, &timeo);
+ /*
+ * XXX we don't use sk_write_queue, so just discard the packet.
+ * Current plan however is to _use_ sk_write_queue with
+ * an algorith similar to tcp_sendmsg, where the main difference
+ * is that in DCCP we have to respect packet boundaries, so
+ * no coalescing of skbs.
+ *
+ * This bug was _quickly_ found & fixed by just looking at an OSTRA
+ * generated callgraph 8) -acme
+ */
+ if (rc != 0)
+ goto out_discard;
+out_release:
+ release_sock(sk);
+ return rc ? : len;
+out_discard:
+ kfree_skb(skb);
+ goto out_release;
+}
+
+int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len, int nonblock, int flags, int *addr_len)
+{
+ const struct dccp_hdr *dh;
+ long timeo;
+
+ lock_sock(sk);
+
+ if (sk->sk_state == DCCP_LISTEN) {
+ len = -ENOTCONN;
+ goto out;
+ }
+
+ timeo = sock_rcvtimeo(sk, nonblock);
+
+ do {
+ struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+
+ if (skb == NULL)
+ goto verify_sock_status;
+
+ dh = dccp_hdr(skb);
+
+ if (dh->dccph_type == DCCP_PKT_DATA ||
+ dh->dccph_type == DCCP_PKT_DATAACK)
+ goto found_ok_skb;
+
+ if (dh->dccph_type == DCCP_PKT_RESET ||
+ dh->dccph_type == DCCP_PKT_CLOSE) {
+ dccp_pr_debug("found fin ok!\n");
+ len = 0;
+ goto found_fin_ok;
+ }
+ dccp_pr_debug("packet_type=%s\n",
+ dccp_packet_name(dh->dccph_type));
+ sk_eat_skb(sk, skb);
+verify_sock_status:
+ if (sock_flag(sk, SOCK_DONE)) {
+ len = 0;
+ break;
+ }
+
+ if (sk->sk_err) {
+ len = sock_error(sk);
+ break;
+ }
+
+ if (sk->sk_shutdown & RCV_SHUTDOWN) {
+ len = 0;
+ break;
+ }
+
+ if (sk->sk_state == DCCP_CLOSED) {
+ if (!sock_flag(sk, SOCK_DONE)) {
+ /* This occurs when user tries to read
+ * from never connected socket.
+ */
+ len = -ENOTCONN;
+ break;
+ }
+ len = 0;
+ break;
+ }
+
+ if (!timeo) {
+ len = -EAGAIN;
+ break;
+ }
+
+ if (signal_pending(current)) {
+ len = sock_intr_errno(timeo);
+ break;
+ }
+
+ sk_wait_data(sk, &timeo);
+ continue;
+ found_ok_skb:
+ if (len > skb->len)
+ len = skb->len;
+ else if (len < skb->len)
+ msg->msg_flags |= MSG_TRUNC;
+
+ if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
+ /* Exception. Bailout! */
+ len = -EFAULT;
+ break;
+ }
+ found_fin_ok:
+ if (!(flags & MSG_PEEK))
+ sk_eat_skb(sk, skb);
+ break;
+ } while (1);
+out:
+ release_sock(sk);
+ return len;
+}
+
+static int inet_dccp_listen(struct socket *sock, int backlog)
+{
+ struct sock *sk = sock->sk;
+ unsigned char old_state;
+ int err;
+
+ lock_sock(sk);
+
+ err = -EINVAL;
+ if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
+ goto out;
+
+ old_state = sk->sk_state;
+ if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
+ goto out;
+
+ /* Really, if the socket is already in listen state
+ * we can only allow the backlog to be adjusted.
+ */
+ if (old_state != DCCP_LISTEN) {
+ /*
+ * FIXME: here it probably should be sk->sk_prot->listen_start
+ * see tcp_listen_start
+ */
+ err = dccp_listen_start(sk);
+ if (err)
+ goto out;
+ }
+ sk->sk_max_ack_backlog = backlog;
+ err = 0;
+
+out:
+ release_sock(sk);
+ return err;
+}
+
+static const unsigned char dccp_new_state[] = {
+ /* current state: new state: action: */
+ [0] = DCCP_CLOSED,
+ [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
+ [DCCP_REQUESTING] = DCCP_CLOSED,
+ [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
+ [DCCP_LISTEN] = DCCP_CLOSED,
+ [DCCP_RESPOND] = DCCP_CLOSED,
+ [DCCP_CLOSING] = DCCP_CLOSED,
+ [DCCP_TIME_WAIT] = DCCP_CLOSED,
+ [DCCP_CLOSED] = DCCP_CLOSED,
+};
+
+static int dccp_close_state(struct sock *sk)
+{
+ const int next = dccp_new_state[sk->sk_state];
+ const int ns = next & DCCP_STATE_MASK;
+
+ if (ns != sk->sk_state)
+ dccp_set_state(sk, ns);
+
+ return next & DCCP_ACTION_FIN;
+}
+
+void dccp_close(struct sock *sk, long timeout)
+{
+ struct sk_buff *skb;
+
+ lock_sock(sk);
+
+ sk->sk_shutdown = SHUTDOWN_MASK;
+
+ if (sk->sk_state == DCCP_LISTEN) {
+ dccp_set_state(sk, DCCP_CLOSED);
+
+ /* Special case. */
+ inet_csk_listen_stop(sk);
+
+ goto adjudge_to_death;
+ }
+
+ /*
+ * We need to flush the recv. buffs. We do this only on the
+ * descriptor close, not protocol-sourced closes, because the
+ *reader process may not have drained the data yet!
+ */
+ /* FIXME: check for unread data */
+ while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+ __kfree_skb(skb);
+ }
+
+ if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
+ /* Check zero linger _after_ checking for unread data. */
+ sk->sk_prot->disconnect(sk, 0);
+ } else if (dccp_close_state(sk)) {
+ dccp_send_close(sk, 1);
+ }
+
+ sk_stream_wait_close(sk, timeout);
+
+adjudge_to_death:
+ /*
+ * It is the last release_sock in its life. It will remove backlog.
+ */
+ release_sock(sk);
+ /*
+ * Now socket is owned by kernel and we acquire BH lock
+ * to finish close. No need to check for user refs.
+ */
+ local_bh_disable();
+ bh_lock_sock(sk);
+ BUG_TRAP(!sock_owned_by_user(sk));
+
+ sock_hold(sk);
+ sock_orphan(sk);
+
+ /*
+ * The last release_sock may have processed the CLOSE or RESET
+ * packet moving sock to CLOSED state, if not we have to fire
+ * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
+ * in draft-ietf-dccp-spec-11. -acme
+ */
+ if (sk->sk_state == DCCP_CLOSING) {
+ /* FIXME: should start at 2 * RTT */
+ /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ inet_csk(sk)->icsk_rto,
+ DCCP_RTO_MAX);
+#if 0
+ /* Yeah, we should use sk->sk_prot->orphan_count, etc */
+ dccp_set_state(sk, DCCP_CLOSED);
+#endif
+ }
+
+ atomic_inc(sk->sk_prot->orphan_count);
+ if (sk->sk_state == DCCP_CLOSED)
+ inet_csk_destroy_sock(sk);
+
+ /* Otherwise, socket is reprieved until protocol close. */
+
+ bh_unlock_sock(sk);
+ local_bh_enable();
+ sock_put(sk);
+}
+
+void dccp_shutdown(struct sock *sk, int how)
+{
+ dccp_pr_debug("entry\n");
+}
+
+static struct proto_ops inet_dccp_ops = {
+ .family = PF_INET,
+ .owner = THIS_MODULE,
+ .release = inet_release,
+ .bind = inet_bind,
+ .connect = inet_stream_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = inet_accept,
+ .getname = inet_getname,
+ /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
+ .poll = dccp_poll,
+ .ioctl = inet_ioctl,
+ /* FIXME: work on inet_listen to rename it to sock_common_listen */
+ .listen = inet_dccp_listen,
+ .shutdown = inet_shutdown,
+ .setsockopt = sock_common_setsockopt,
+ .getsockopt = sock_common_getsockopt,
+ .sendmsg = inet_sendmsg,
+ .recvmsg = sock_common_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+extern struct net_proto_family inet_family_ops;
+
+static struct inet_protosw dccp_v4_protosw = {
+ .type = SOCK_DCCP,
+ .protocol = IPPROTO_DCCP,
+ .prot = &dccp_v4_prot,
+ .ops = &inet_dccp_ops,
+ .capability = -1,
+ .no_check = 0,
+ .flags = 0,
+};
+
+/*
+ * This is the global socket data structure used for responding to
+ * the Out-of-the-blue (OOTB) packets. A control sock will be created
+ * for this socket at the initialization time.
+ */
+struct socket *dccp_ctl_socket;
+
+static char dccp_ctl_socket_err_msg[] __initdata =
+ KERN_ERR "DCCP: Failed to create the control socket.\n";
+
+static int __init dccp_ctl_sock_init(void)
+{
+ int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
+ &dccp_ctl_socket);
+ if (rc < 0)
+ printk(dccp_ctl_socket_err_msg);
+ else {
+ dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
+ inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
+
+ /* Unhash it so that IP input processing does not even
+ * see it, we do not wish this socket to see incoming
+ * packets.
+ */
+ dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
+ }
+
+ return rc;
+}
+
+#ifdef CONFIG_IP_DCCP_UNLOAD_HACK
+void dccp_ctl_sock_exit(void)
+{
+ if (dccp_ctl_socket != NULL) {
+ sock_release(dccp_ctl_socket);
+ dccp_ctl_socket = NULL;
+ }
+}
+
+EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
+#endif
+
+static int __init init_dccp_v4_mibs(void)
+{
+ int rc = -ENOMEM;
+
+ dccp_statistics[0] = alloc_percpu(struct dccp_mib);
+ if (dccp_statistics[0] == NULL)
+ goto out;
+
+ dccp_statistics[1] = alloc_percpu(struct dccp_mib);
+ if (dccp_statistics[1] == NULL)
+ goto out_free_one;
+
+ rc = 0;
+out:
+ return rc;
+out_free_one:
+ free_percpu(dccp_statistics[0]);
+ dccp_statistics[0] = NULL;
+ goto out;
+
+}
+
+static int thash_entries;
+module_param(thash_entries, int, 0444);
+MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
+
+#ifdef CONFIG_IP_DCCP_DEBUG
+int dccp_debug;
+module_param(dccp_debug, int, 0444);
+MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
+#endif
+
+static int __init dccp_init(void)
+{
+ unsigned long goal;
+ int ehash_order, bhash_order, i;
+ int rc = proto_register(&dccp_v4_prot, 1);
+
+ if (rc)
+ goto out;
+
+ dccp_hashinfo.bind_bucket_cachep =
+ kmem_cache_create("dccp_bind_bucket",
+ sizeof(struct inet_bind_bucket), 0,
+ SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (!dccp_hashinfo.bind_bucket_cachep)
+ goto out_proto_unregister;
+
+ /*
+ * Size and allocate the main established and bind bucket
+ * hash tables.
+ *
+ * The methodology is similar to that of the buffer cache.
+ */
+ if (num_physpages >= (128 * 1024))
+ goal = num_physpages >> (21 - PAGE_SHIFT);
+ else
+ goal = num_physpages >> (23 - PAGE_SHIFT);
+
+ if (thash_entries)
+ goal = (thash_entries *
+ sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
+ for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
+ ;
+ do {
+ dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
+ sizeof(struct inet_ehash_bucket);
+ dccp_hashinfo.ehash_size >>= 1;
+ while (dccp_hashinfo.ehash_size &
+ (dccp_hashinfo.ehash_size - 1))
+ dccp_hashinfo.ehash_size--;
+ dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
+ __get_free_pages(GFP_ATOMIC, ehash_order);
+ } while (!dccp_hashinfo.ehash && --ehash_order > 0);
+
+ if (!dccp_hashinfo.ehash) {
+ printk(KERN_CRIT "Failed to allocate DCCP "
+ "established hash table\n");
+ goto out_free_bind_bucket_cachep;
+ }
+
+ for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
+ rwlock_init(&dccp_hashinfo.ehash[i].lock);
+ INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
+ }
+
+ bhash_order = ehash_order;
+
+ do {
+ dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
+ sizeof(struct inet_bind_hashbucket);
+ if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
+ bhash_order > 0)
+ continue;
+ dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
+ __get_free_pages(GFP_ATOMIC, bhash_order);
+ } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
+
+ if (!dccp_hashinfo.bhash) {
+ printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
+ goto out_free_dccp_ehash;
+ }
+
+ for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
+ spin_lock_init(&dccp_hashinfo.bhash[i].lock);
+ INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
+ }
+
+ if (init_dccp_v4_mibs())
+ goto out_free_dccp_bhash;
+
+ rc = -EAGAIN;
+ if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
+ goto out_free_dccp_v4_mibs;
+
+ inet_register_protosw(&dccp_v4_protosw);
+
+ rc = dccp_ctl_sock_init();
+ if (rc)
+ goto out_unregister_protosw;
+out:
+ return rc;
+out_unregister_protosw:
+ inet_unregister_protosw(&dccp_v4_protosw);
+ inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
+out_free_dccp_v4_mibs:
+ free_percpu(dccp_statistics[0]);
+ free_percpu(dccp_statistics[1]);
+ dccp_statistics[0] = dccp_statistics[1] = NULL;
+out_free_dccp_bhash:
+ free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
+ dccp_hashinfo.bhash = NULL;
+out_free_dccp_ehash:
+ free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
+ dccp_hashinfo.ehash = NULL;
+out_free_bind_bucket_cachep:
+ kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
+ dccp_hashinfo.bind_bucket_cachep = NULL;
+out_proto_unregister:
+ proto_unregister(&dccp_v4_prot);
+ goto out;
+}
+
+static const char dccp_del_proto_err_msg[] __exitdata =
+ KERN_ERR "can't remove dccp net_protocol\n";
+
+static void __exit dccp_fini(void)
+{
+ inet_unregister_protosw(&dccp_v4_protosw);
+
+ if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
+ printk(dccp_del_proto_err_msg);
+
+ free_percpu(dccp_statistics[0]);
+ free_percpu(dccp_statistics[1]);
+ free_pages((unsigned long)dccp_hashinfo.bhash,
+ get_order(dccp_hashinfo.bhash_size *
+ sizeof(struct inet_bind_hashbucket)));
+ free_pages((unsigned long)dccp_hashinfo.ehash,
+ get_order(dccp_hashinfo.ehash_size *
+ sizeof(struct inet_ehash_bucket)));
+ kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
+ proto_unregister(&dccp_v4_prot);
+}
+
+module_init(dccp_init);
+module_exit(dccp_fini);
+
+/*
+ * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
+ * values directly, Also cover the case where the protocol is not specified,
+ * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
+ */
+MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
+MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
+MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
new file mode 100644
index 00000000000..aa34b576e22
--- /dev/null
+++ b/net/dccp/timer.c
@@ -0,0 +1,255 @@
+/*
+ * net/dccp/timer.c
+ *
+ * An implementation of the DCCP protocol
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+
+#include "dccp.h"
+
+static void dccp_write_timer(unsigned long data);
+static void dccp_keepalive_timer(unsigned long data);
+static void dccp_delack_timer(unsigned long data);
+
+void dccp_init_xmit_timers(struct sock *sk)
+{
+ inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
+ &dccp_keepalive_timer);
+}
+
+static void dccp_write_err(struct sock *sk)
+{
+ sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
+ sk->sk_error_report(sk);
+
+ dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED);
+ dccp_done(sk);
+ DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT);
+}
+
+/* A write timeout has occurred. Process the after effects. */
+static int dccp_write_timeout(struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ int retry_until;
+
+ if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
+ if (icsk->icsk_retransmits != 0)
+ dst_negative_advice(&sk->sk_dst_cache);
+ retry_until = icsk->icsk_syn_retries ? :
+ /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */;
+ } else {
+ if (icsk->icsk_retransmits >=
+ /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) {
+ /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu
+ black hole detection. :-(
+
+ It is place to make it. It is not made. I do not want
+ to make it. It is disguisting. It does not work in any
+ case. Let me to cite the same draft, which requires for
+ us to implement this:
+
+ "The one security concern raised by this memo is that ICMP black holes
+ are often caused by over-zealous security administrators who block
+ all ICMP messages. It is vitally important that those who design and
+ deploy security systems understand the impact of strict filtering on
+ upper-layer protocols. The safest web site in the world is worthless
+ if most TCP implementations cannot transfer data from it. It would
+ be far nicer to have all of the black holes fixed rather than fixing
+ all of the TCP implementations."
+
+ Golden words :-).
+ */
+
+ dst_negative_advice(&sk->sk_dst_cache);
+ }
+
+ retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */;
+ /*
+ * FIXME: see tcp_write_timout and tcp_out_of_resources
+ */
+ }
+
+ if (icsk->icsk_retransmits >= retry_until) {
+ /* Has it gone just too far? */
+ dccp_write_err(sk);
+ return 1;
+ }
+ return 0;
+}
+
+/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
+static void dccp_delack_timer(unsigned long data)
+{
+ struct sock *sk = (struct sock *)data;
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ /* Try again later. */
+ icsk->icsk_ack.blocked = 1;
+ NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
+ sk_reset_timer(sk, &icsk->icsk_delack_timer,
+ jiffies + TCP_DELACK_MIN);
+ goto out;
+ }
+
+ if (sk->sk_state == DCCP_CLOSED ||
+ !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
+ goto out;
+ if (time_after(icsk->icsk_ack.timeout, jiffies)) {
+ sk_reset_timer(sk, &icsk->icsk_delack_timer,
+ icsk->icsk_ack.timeout);
+ goto out;
+ }
+
+ icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
+
+ if (inet_csk_ack_scheduled(sk)) {
+ if (!icsk->icsk_ack.pingpong) {
+ /* Delayed ACK missed: inflate ATO. */
+ icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
+ icsk->icsk_rto);
+ } else {
+ /* Delayed ACK missed: leave pingpong mode and
+ * deflate ATO.
+ */
+ icsk->icsk_ack.pingpong = 0;
+ icsk->icsk_ack.ato = TCP_ATO_MIN;
+ }
+ dccp_send_ack(sk);
+ NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
+ }
+out:
+ bh_unlock_sock(sk);
+ sock_put(sk);
+}
+
+/*
+ * The DCCP retransmit timer.
+ */
+static void dccp_retransmit_timer(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ /*
+ * sk->sk_send_head has to have one skb with
+ * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP
+ * packet types (REQUEST, RESPONSE, the ACK in the 3way handshake
+ * (PARTOPEN timer), etc).
+ */
+ BUG_TRAP(sk->sk_send_head != NULL);
+
+ /*
+ * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
+ * sent, no need to retransmit, this sock is dead.
+ */
+ if (dccp_write_timeout(sk))
+ goto out;
+
+ /*
+ * We want to know the number of packets retransmitted, not the
+ * total number of retransmissions of clones of original packets.
+ */
+ if (icsk->icsk_retransmits == 0)
+ DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS);
+
+ if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) {
+ /*
+ * Retransmission failed because of local congestion,
+ * do not backoff.
+ */
+ if (icsk->icsk_retransmits == 0)
+ icsk->icsk_retransmits = 1;
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ min(icsk->icsk_rto,
+ TCP_RESOURCE_PROBE_INTERVAL),
+ DCCP_RTO_MAX);
+ goto out;
+ }
+
+ icsk->icsk_backoff++;
+ icsk->icsk_retransmits++;
+
+ icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto,
+ DCCP_RTO_MAX);
+ if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */)
+ __sk_dst_reset(sk);
+out:;
+}
+
+static void dccp_write_timer(unsigned long data)
+{
+ struct sock *sk = (struct sock *)data;
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ int event = 0;
+
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ /* Try again later */
+ sk_reset_timer(sk, &icsk->icsk_retransmit_timer,
+ jiffies + (HZ / 20));
+ goto out;
+ }
+
+ if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending)
+ goto out;
+
+ if (time_after(icsk->icsk_timeout, jiffies)) {
+ sk_reset_timer(sk, &icsk->icsk_retransmit_timer,
+ icsk->icsk_timeout);
+ goto out;
+ }
+
+ event = icsk->icsk_pending;
+ icsk->icsk_pending = 0;
+
+ switch (event) {
+ case ICSK_TIME_RETRANS:
+ dccp_retransmit_timer(sk);
+ break;
+ }
+out:
+ bh_unlock_sock(sk);
+ sock_put(sk);
+}
+
+/*
+ * Timer for listening sockets
+ */
+static void dccp_response_timer(struct sock *sk)
+{
+ inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT,
+ DCCP_RTO_MAX);
+}
+
+static void dccp_keepalive_timer(unsigned long data)
+{
+ struct sock *sk = (struct sock *)data;
+
+ /* Only process if socket is not in use. */
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ /* Try again later. */
+ inet_csk_reset_keepalive_timer(sk, HZ / 20);
+ goto out;
+ }
+
+ if (sk->sk_state == DCCP_LISTEN) {
+ dccp_response_timer(sk);
+ goto out;
+ }
+out:
+ bh_unlock_sock(sk);
+ sock_put(sk);
+}