summaryrefslogtreecommitdiffstats
path: root/net/dccp
diff options
context:
space:
mode:
Diffstat (limited to 'net/dccp')
-rw-r--r--net/dccp/Kconfig4
-rw-r--r--net/dccp/Makefile15
-rw-r--r--net/dccp/ackvec.c9
-rw-r--r--net/dccp/ackvec.h54
-rw-r--r--net/dccp/ccid.c214
-rw-r--r--net/dccp/ccid.h40
-rw-r--r--net/dccp/ccids/Kconfig79
-rw-r--r--net/dccp/ccids/Makefile9
-rw-r--r--net/dccp/ccids/ccid2.c28
-rw-r--r--net/dccp/ccids/ccid3.c23
-rw-r--r--net/dccp/ccids/lib/Makefile3
-rw-r--r--net/dccp/ccids/lib/loss_interval.c3
-rw-r--r--net/dccp/ccids/lib/packet_history.c9
-rw-r--r--net/dccp/ccids/lib/tfrc.c19
-rw-r--r--net/dccp/ccids/lib/tfrc.h11
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c4
-rw-r--r--net/dccp/dccp.h19
-rw-r--r--net/dccp/diag.c11
-rw-r--r--net/dccp/feat.c1462
-rw-r--r--net/dccp/feat.h130
-rw-r--r--net/dccp/input.c46
-rw-r--r--net/dccp/ipv4.c13
-rw-r--r--net/dccp/ipv6.c15
-rw-r--r--net/dccp/minisocks.c54
-rw-r--r--net/dccp/options.c229
-rw-r--r--net/dccp/output.c19
-rw-r--r--net/dccp/probe.c19
-rw-r--r--net/dccp/proto.c232
-rw-r--r--net/dccp/sysctl.c21
-rw-r--r--net/dccp/timer.c12
30 files changed, 1641 insertions, 1165 deletions
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 7aa2a7acc7e..ad6dffd9070 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -1,7 +1,6 @@
menuconfig IP_DCCP
tristate "The DCCP Protocol (EXPERIMENTAL)"
depends on INET && EXPERIMENTAL
- select IP_DCCP_CCID2
---help---
Datagram Congestion Control Protocol (RFC 4340)
@@ -25,9 +24,6 @@ config INET_DCCP_DIAG
def_tristate y if (IP_DCCP = y && INET_DIAG = y)
def_tristate m
-config IP_DCCP_ACKVEC
- bool
-
source "net/dccp/ccids/Kconfig"
menu "DCCP Kernel Hacking"
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index f4f8793aaff..2991efcc8de 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -2,14 +2,23 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o
+#
+# CCID algorithms to be used by dccp.ko
+#
+# CCID-2 is default (RFC 4340, p. 77) and has Ack Vectors as dependency
+dccp-y += ccids/ccid2.o ackvec.o
+dccp-$(CONFIG_IP_DCCP_CCID3) += ccids/ccid3.o
+dccp-$(CONFIG_IP_DCCP_TFRC_LIB) += ccids/lib/tfrc.o \
+ ccids/lib/tfrc_equation.o \
+ ccids/lib/packet_history.o \
+ ccids/lib/loss_interval.o
+
dccp_ipv4-y := ipv4.o
# build dccp_ipv6 as module whenever either IPv6 or DCCP is a module
obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o
dccp_ipv6-y := ipv6.o
-dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
-
obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o
@@ -17,5 +26,3 @@ dccp-$(CONFIG_SYSCTL) += sysctl.o
dccp_diag-y := diag.o
dccp_probe-y := probe.o
-
-obj-y += ccids/
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 1e8be246ad1..01e4d39fa23 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -12,7 +12,6 @@
#include "ackvec.h"
#include "dccp.h"
-#include <linux/dccp.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/kernel.h>
@@ -68,7 +67,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
/* Figure out how many options do we need to represent the ackvec */
- const u16 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_MAX_ACKVEC_OPT_LEN);
+ const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN);
u16 len = av->av_vec_len + 2 * nr_opts, i;
u32 elapsed_time;
const unsigned char *tail, *from;
@@ -100,8 +99,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
for (i = 0; i < nr_opts; ++i) {
int copylen = len;
- if (len > DCCP_MAX_ACKVEC_OPT_LEN)
- copylen = DCCP_MAX_ACKVEC_OPT_LEN;
+ if (len > DCCP_SINGLE_OPT_MAXLEN)
+ copylen = DCCP_SINGLE_OPT_MAXLEN;
*to++ = DCCPO_ACK_VECTOR_0;
*to++ = copylen + 2;
@@ -432,7 +431,7 @@ found:
int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
u64 *ackno, const u8 opt, const u8 *value, const u8 len)
{
- if (len > DCCP_MAX_ACKVEC_OPT_LEN)
+ if (len > DCCP_SINGLE_OPT_MAXLEN)
return -1;
/* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index bcb64fb4ace..45f95e55f87 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -11,15 +11,14 @@
* published by the Free Software Foundation.
*/
+#include <linux/dccp.h>
#include <linux/compiler.h>
#include <linux/ktime.h>
#include <linux/list.h>
#include <linux/types.h>
-/* Read about the ECN nonce to see why it is 253 */
-#define DCCP_MAX_ACKVEC_OPT_LEN 253
/* We can spread an ack vector across multiple options */
-#define DCCP_MAX_ACKVEC_LEN (DCCP_MAX_ACKVEC_OPT_LEN * 2)
+#define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2)
#define DCCP_ACKVEC_STATE_RECEIVED 0
#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6)
@@ -85,7 +84,6 @@ struct dccp_ackvec_record {
struct sock;
struct sk_buff;
-#ifdef CONFIG_IP_DCCP_ACKVEC
extern int dccp_ackvec_init(void);
extern void dccp_ackvec_exit(void);
@@ -107,52 +105,4 @@ static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
{
return av->av_vec_len;
}
-#else /* CONFIG_IP_DCCP_ACKVEC */
-static inline int dccp_ackvec_init(void)
-{
- return 0;
-}
-
-static inline void dccp_ackvec_exit(void)
-{
-}
-
-static inline struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
-{
- return NULL;
-}
-
-static inline void dccp_ackvec_free(struct dccp_ackvec *av)
-{
-}
-
-static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
- const u64 ackno, const u8 state)
-{
- return -1;
-}
-
-static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
- struct sock *sk, const u64 ackno)
-{
-}
-
-static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
- const u64 *ackno, const u8 opt,
- const u8 *value, const u8 len)
-{
- return -1;
-}
-
-static inline int dccp_insert_option_ackvec(const struct sock *sk,
- const struct sk_buff *skb)
-{
- return -1;
-}
-
-static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
-{
- return 0;
-}
-#endif /* CONFIG_IP_DCCP_ACKVEC */
#endif /* _ACKVEC_H */
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 8fe931a3d7a..f3e9ba1cfd0 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -12,49 +12,70 @@
*/
#include "ccid.h"
+#include "ccids/lib/tfrc.h"
-static struct ccid_operations *ccids[CCID_MAX];
-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
-static atomic_t ccids_lockct = ATOMIC_INIT(0);
-static DEFINE_SPINLOCK(ccids_lock);
+static struct ccid_operations *ccids[] = {
+ &ccid2_ops,
+#ifdef CONFIG_IP_DCCP_CCID3
+ &ccid3_ops,
+#endif
+};
-/*
- * The strategy is: modifications ccids vector are short, do not sleep and
- * veeery rare, but read access should be free of any exclusive locks.
- */
-static void ccids_write_lock(void)
+static struct ccid_operations *ccid_by_number(const u8 id)
{
- spin_lock(&ccids_lock);
- while (atomic_read(&ccids_lockct) != 0) {
- spin_unlock(&ccids_lock);
- yield();
- spin_lock(&ccids_lock);
- }
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ccids); i++)
+ if (ccids[i]->ccid_id == id)
+ return ccids[i];
+ return NULL;
}
-static inline void ccids_write_unlock(void)
+/* check that up to @array_len members in @ccid_array are supported */
+bool ccid_support_check(u8 const *ccid_array, u8 array_len)
{
- spin_unlock(&ccids_lock);
+ while (array_len > 0)
+ if (ccid_by_number(ccid_array[--array_len]) == NULL)
+ return false;
+ return true;
}
-static inline void ccids_read_lock(void)
+/**
+ * ccid_get_builtin_ccids - Populate a list of built-in CCIDs
+ * @ccid_array: pointer to copy into
+ * @array_len: value to return length into
+ * This function allocates memory - caller must see that it is freed after use.
+ */
+int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len)
{
- atomic_inc(&ccids_lockct);
- smp_mb__after_atomic_inc();
- spin_unlock_wait(&ccids_lock);
+ *ccid_array = kmalloc(ARRAY_SIZE(ccids), gfp_any());
+ if (*ccid_array == NULL)
+ return -ENOBUFS;
+
+ for (*array_len = 0; *array_len < ARRAY_SIZE(ccids); *array_len += 1)
+ (*ccid_array)[*array_len] = ccids[*array_len]->ccid_id;
+ return 0;
}
-static inline void ccids_read_unlock(void)
+int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
+ char __user *optval, int __user *optlen)
{
- atomic_dec(&ccids_lockct);
-}
+ u8 *ccid_array, array_len;
+ int err = 0;
-#else
-#define ccids_write_lock() do { } while(0)
-#define ccids_write_unlock() do { } while(0)
-#define ccids_read_lock() do { } while(0)
-#define ccids_read_unlock() do { } while(0)
-#endif
+ if (len < ARRAY_SIZE(ccids))
+ return -EINVAL;
+
+ if (ccid_get_builtin_ccids(&ccid_array, &array_len))
+ return -ENOBUFS;
+
+ if (put_user(array_len, optlen) ||
+ copy_to_user(optval, ccid_array, array_len))
+ err = -EFAULT;
+
+ kfree(ccid_array);
+ return err;
+}
static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,...)
{
@@ -86,7 +107,7 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
}
}
-int ccid_register(struct ccid_operations *ccid_ops)
+static int ccid_activate(struct ccid_operations *ccid_ops)
{
int err = -ENOBUFS;
@@ -104,79 +125,40 @@ int ccid_register(struct ccid_operations *ccid_ops)
if (ccid_ops->ccid_hc_tx_slab == NULL)
goto out_free_rx_slab;
- ccids_write_lock();
- err = -EEXIST;
- if (ccids[ccid_ops->ccid_id] == NULL) {
- ccids[ccid_ops->ccid_id] = ccid_ops;
- err = 0;
- }
- ccids_write_unlock();
- if (err != 0)
- goto out_free_tx_slab;
-
- pr_info("CCID: Registered CCID %d (%s)\n",
+ pr_info("CCID: Activated CCID %d (%s)\n",
ccid_ops->ccid_id, ccid_ops->ccid_name);
+ err = 0;
out:
return err;
-out_free_tx_slab:
- ccid_kmem_cache_destroy(ccid_ops->ccid_hc_tx_slab);
- ccid_ops->ccid_hc_tx_slab = NULL;
- goto out;
out_free_rx_slab:
ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab);
ccid_ops->ccid_hc_rx_slab = NULL;
goto out;
}
-EXPORT_SYMBOL_GPL(ccid_register);
-
-int ccid_unregister(struct ccid_operations *ccid_ops)
+static void ccid_deactivate(struct ccid_operations *ccid_ops)
{
- ccids_write_lock();
- ccids[ccid_ops->ccid_id] = NULL;
- ccids_write_unlock();
-
ccid_kmem_cache_destroy(ccid_ops->ccid_hc_tx_slab);
ccid_ops->ccid_hc_tx_slab = NULL;
ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab);
ccid_ops->ccid_hc_rx_slab = NULL;
- pr_info("CCID: Unregistered CCID %d (%s)\n",
+ pr_info("CCID: Deactivated CCID %d (%s)\n",
ccid_ops->ccid_id, ccid_ops->ccid_name);
- return 0;
}
-EXPORT_SYMBOL_GPL(ccid_unregister);
-
-struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp)
+struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx)
{
- struct ccid_operations *ccid_ops;
+ struct ccid_operations *ccid_ops = ccid_by_number(id);
struct ccid *ccid = NULL;
- ccids_read_lock();
-#ifdef CONFIG_MODULES
- if (ccids[id] == NULL) {
- /* We only try to load if in process context */
- ccids_read_unlock();
- if (gfp & GFP_ATOMIC)
- goto out;
- request_module("net-dccp-ccid-%d", id);
- ccids_read_lock();
- }
-#endif
- ccid_ops = ccids[id];
if (ccid_ops == NULL)
- goto out_unlock;
-
- if (!try_module_get(ccid_ops->ccid_owner))
- goto out_unlock;
-
- ccids_read_unlock();
+ goto out;
ccid = kmem_cache_alloc(rx ? ccid_ops->ccid_hc_rx_slab :
- ccid_ops->ccid_hc_tx_slab, gfp);
+ ccid_ops->ccid_hc_tx_slab, gfp_any());
if (ccid == NULL)
- goto out_module_put;
+ goto out;
ccid->ccid_ops = ccid_ops;
if (rx) {
memset(ccid + 1, 0, ccid_ops->ccid_hc_rx_obj_size);
@@ -191,67 +173,57 @@ struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp)
}
out:
return ccid;
-out_unlock:
- ccids_read_unlock();
- goto out;
out_free_ccid:
kmem_cache_free(rx ? ccid_ops->ccid_hc_rx_slab :
ccid_ops->ccid_hc_tx_slab, ccid);
ccid = NULL;
-out_module_put:
- module_put(ccid_ops->ccid_owner);
goto out;
}
-EXPORT_SYMBOL_GPL(ccid_new);
-
-struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk, gfp_t gfp)
+void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk)
{
- return ccid_new(id, sk, 1, gfp);
+ if (ccid != NULL) {
+ if (ccid->ccid_ops->ccid_hc_rx_exit != NULL)
+ ccid->ccid_ops->ccid_hc_rx_exit(sk);
+ kmem_cache_free(ccid->ccid_ops->ccid_hc_rx_slab, ccid);
+ }
}
-EXPORT_SYMBOL_GPL(ccid_hc_rx_new);
-
-struct ccid *ccid_hc_tx_new(unsigned char id,struct sock *sk, gfp_t gfp)
+void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk)
{
- return ccid_new(id, sk, 0, gfp);
+ if (ccid != NULL) {
+ if (ccid->ccid_ops->ccid_hc_tx_exit != NULL)
+ ccid->ccid_ops->ccid_hc_tx_exit(sk);
+ kmem_cache_free(ccid->ccid_ops->ccid_hc_tx_slab, ccid);
+ }
}
-EXPORT_SYMBOL_GPL(ccid_hc_tx_new);
-
-static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx)
+int __init ccid_initialize_builtins(void)
{
- struct ccid_operations *ccid_ops;
+ int i, err = tfrc_lib_init();
- if (ccid == NULL)
- return;
+ if (err)
+ return err;
- ccid_ops = ccid->ccid_ops;
- if (rx) {
- if (ccid_ops->ccid_hc_rx_exit != NULL)
- ccid_ops->ccid_hc_rx_exit(sk);
- kmem_cache_free(ccid_ops->ccid_hc_rx_slab, ccid);
- } else {
- if (ccid_ops->ccid_hc_tx_exit != NULL)
- ccid_ops->ccid_hc_tx_exit(sk);
- kmem_cache_free(ccid_ops->ccid_hc_tx_slab, ccid);
+ for (i = 0; i < ARRAY_SIZE(ccids); i++) {
+ err = ccid_activate(ccids[i]);
+ if (err)
+ goto unwind_registrations;
}
- ccids_read_lock();
- if (ccids[ccid_ops->ccid_id] != NULL)
- module_put(ccid_ops->ccid_owner);
- ccids_read_unlock();
-}
+ return 0;
-void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk)
-{
- ccid_delete(ccid, sk, 1);
+unwind_registrations:
+ while(--i >= 0)
+ ccid_deactivate(ccids[i]);
+ tfrc_lib_exit();
+ return err;
}
-EXPORT_SYMBOL_GPL(ccid_hc_rx_delete);
-
-void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk)
+void ccid_cleanup_builtins(void)
{
- ccid_delete(ccid, sk, 0);
-}
+ int i;
-EXPORT_SYMBOL_GPL(ccid_hc_tx_delete);
+ for (i = 0; i < ARRAY_SIZE(ccids); i++)
+ ccid_deactivate(ccids[i]);
+ tfrc_lib_exit();
+}
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index fdeae7b5731..facedd20b53 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -29,7 +29,6 @@ struct tcp_info;
* @ccid_id: numerical CCID ID (up to %CCID_MAX, cf. table 5 in RFC 4340, 10.)
* @ccid_ccmps: the CCMPS including network/transport headers (0 when disabled)
* @ccid_name: alphabetical identifier string for @ccid_id
- * @ccid_owner: module which implements/owns this CCID
* @ccid_hc_{r,t}x_slab: memory pool for the receiver/sender half-connection
* @ccid_hc_{r,t}x_obj_size: size of the receiver/sender half-connection socket
*
@@ -48,7 +47,6 @@ struct ccid_operations {
unsigned char ccid_id;
__u32 ccid_ccmps;
const char *ccid_name;
- struct module *ccid_owner;
struct kmem_cache *ccid_hc_rx_slab,
*ccid_hc_tx_slab;
__u32 ccid_hc_rx_obj_size,
@@ -90,8 +88,13 @@ struct ccid_operations {
int __user *optlen);
};
-extern int ccid_register(struct ccid_operations *ccid_ops);
-extern int ccid_unregister(struct ccid_operations *ccid_ops);
+extern struct ccid_operations ccid2_ops;
+#ifdef CONFIG_IP_DCCP_CCID3
+extern struct ccid_operations ccid3_ops;
+#endif
+
+extern int ccid_initialize_builtins(void);
+extern void ccid_cleanup_builtins(void);
struct ccid {
struct ccid_operations *ccid_ops;
@@ -103,13 +106,30 @@ static inline void *ccid_priv(const struct ccid *ccid)
return (void *)ccid->ccid_priv;
}
-extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx,
- gfp_t gfp);
+extern bool ccid_support_check(u8 const *ccid_array, u8 array_len);
+extern int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len);
+extern int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
+ char __user *, int __user *);
+
+extern struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx);
-extern struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk,
- gfp_t gfp);
-extern struct ccid *ccid_hc_tx_new(unsigned char id, struct sock *sk,
- gfp_t gfp);
+static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp)
+{
+ struct ccid *ccid = dp->dccps_hc_rx_ccid;
+
+ if (ccid == NULL || ccid->ccid_ops == NULL)
+ return -1;
+ return ccid->ccid_ops->ccid_id;
+}
+
+static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp)
+{
+ struct ccid *ccid = dp->dccps_hc_tx_ccid;
+
+ if (ccid == NULL || ccid->ccid_ops == NULL)
+ return -1;
+ return ccid->ccid_ops->ccid_id;
+}
extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 12275943eab..b28bf962edc 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -1,80 +1,51 @@
menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
depends on EXPERIMENTAL
-config IP_DCCP_CCID2
- tristate "CCID2 (TCP-Like) (EXPERIMENTAL)"
- def_tristate IP_DCCP
- select IP_DCCP_ACKVEC
- ---help---
- CCID 2, TCP-like Congestion Control, denotes Additive Increase,
- Multiplicative Decrease (AIMD) congestion control with behavior
- modelled directly on TCP, including congestion window, slow start,
- timeouts, and so forth [RFC 2581]. CCID 2 achieves maximum
- bandwidth over the long term, consistent with the use of end-to-end
- congestion control, but halves its congestion window in response to
- each congestion event. This leads to the abrupt rate changes
- typical of TCP. Applications should use CCID 2 if they prefer
- maximum bandwidth utilization to steadiness of rate. This is often
- the case for applications that are not playing their data directly
- to the user. For example, a hypothetical application that
- transferred files over DCCP, using application-level retransmissions
- for lost packets, would prefer CCID 2 to CCID 3. On-line games may
- also prefer CCID 2. See RFC 4341 for further details.
-
- CCID2 is the default CCID used by DCCP.
-
config IP_DCCP_CCID2_DEBUG
- bool "CCID2 debugging messages"
- depends on IP_DCCP_CCID2
- ---help---
- Enable CCID2-specific debugging messages.
+ bool "CCID-2 debugging messages"
+ ---help---
+ Enable CCID-2 specific debugging messages.
- When compiling CCID2 as a module, this debugging output can
- additionally be toggled by setting the ccid2_debug module
- parameter to 0 or 1.
+ The debugging output can additionally be toggled by setting the
+ ccid2_debug parameter to 0 or 1.
- If in doubt, say N.
+ If in doubt, say N.
config IP_DCCP_CCID3
- tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
- def_tristate IP_DCCP
- select IP_DCCP_TFRC_LIB
+ bool "CCID-3 (TCP-Friendly) (EXPERIMENTAL)"
+ def_bool y if (IP_DCCP = y || IP_DCCP = m)
---help---
- CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
+ CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
rate-controlled congestion control mechanism. TFRC is designed to
be reasonably fair when competing for bandwidth with TCP-like flows,
where a flow is "reasonably fair" if its sending rate is generally
within a factor of two of the sending rate of a TCP flow under the
same conditions. However, TFRC has a much lower variation of
- throughput over time compared with TCP, which makes CCID 3 more
- suitable than CCID 2 for applications such streaming media where a
+ throughput over time compared with TCP, which makes CCID-3 more
+ suitable than CCID-2 for applications such streaming media where a
relatively smooth sending rate is of importance.
- CCID 3 is further described in RFC 4342,
+ CCID-3 is further described in RFC 4342,
http://www.ietf.org/rfc/rfc4342.txt
The TFRC congestion control algorithms were initially described in
- RFC 3448.
+ RFC 5448.
This text was extracted from RFC 4340 (sec. 10.2),
http://www.ietf.org/rfc/rfc4340.txt
-
- To compile this CCID as a module, choose M here: the module will be
- called dccp_ccid3.
- If in doubt, say M.
+ If in doubt, say N.
config IP_DCCP_CCID3_DEBUG
- bool "CCID3 debugging messages"
- depends on IP_DCCP_CCID3
- ---help---
- Enable CCID3-specific debugging messages.
+ bool "CCID-3 debugging messages"
+ depends on IP_DCCP_CCID3
+ ---help---
+ Enable CCID-3 specific debugging messages.
- When compiling CCID3 as a module, this debugging output can
- additionally be toggled by setting the ccid3_debug module
- parameter to 0 or 1.
+ The debugging output can additionally be toggled by setting the
+ ccid3_debug parameter to 0 or 1.
- If in doubt, say N.
+ If in doubt, say N.
config IP_DCCP_CCID3_RTO
int "Use higher bound for nofeedback timer"
@@ -108,12 +79,8 @@ config IP_DCCP_CCID3_RTO
therefore not be performed on WANs.
config IP_DCCP_TFRC_LIB
- tristate
- default n
+ def_bool y if IP_DCCP_CCID3
config IP_DCCP_TFRC_DEBUG
- bool
- depends on IP_DCCP_TFRC_LIB
- default y if IP_DCCP_CCID3_DEBUG
-
+ def_bool y if IP_DCCP_CCID3_DEBUG
endmenu
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile
deleted file mode 100644
index 438f20bccff..00000000000
--- a/net/dccp/ccids/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o
-
-dccp_ccid3-y := ccid3.o
-
-obj-$(CONFIG_IP_DCCP_CCID2) += dccp_ccid2.o
-
-dccp_ccid2-y := ccid2.o
-
-obj-y += lib/
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 9a430734530..d235294ace2 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -25,7 +25,7 @@
/*
* This implementation should follow RFC 4341
*/
-
+#include "../feat.h"
#include "../ccid.h"
#include "../dccp.h"
#include "ccid2.h"
@@ -147,8 +147,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
val = max_ratio;
}
- if (val > 0xFFFF) /* RFC 4340, 11.3 */
- val = 0xFFFF;
+ if (val > DCCPF_ACK_RATIO_MAX)
+ val = DCCPF_ACK_RATIO_MAX;
if (val == dp->dccps_l_ack_ratio)
return;
@@ -768,10 +768,9 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
}
}
-static struct ccid_operations ccid2 = {
+struct ccid_operations ccid2_ops = {
.ccid_id = DCCPC_CCID2,
.ccid_name = "TCP-like",
- .ccid_owner = THIS_MODULE,
.ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
.ccid_hc_tx_init = ccid2_hc_tx_init,
.ccid_hc_tx_exit = ccid2_hc_tx_exit,
@@ -784,22 +783,5 @@ static struct ccid_operations ccid2 = {
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
module_param(ccid2_debug, bool, 0644);
-MODULE_PARM_DESC(ccid2_debug, "Enable debug messages");
+MODULE_PARM_DESC(ccid2_debug, "Enable CCID-2 debug messages");
#endif
-
-static __init int ccid2_module_init(void)
-{
- return ccid_register(&ccid2);
-}
-module_init(ccid2_module_init);
-
-static __exit void ccid2_module_exit(void)
-{
- ccid_unregister(&ccid2);
-}
-module_exit(ccid2_module_exit);
-
-MODULE_AUTHOR("Andrea Bittau <a.bittau@cs.ucl.ac.uk>");
-MODULE_DESCRIPTION("DCCP TCP-Like (CCID2) CCID");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("net-dccp-ccid-2");
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 3b8bd7ca676..a27b7f4c19c 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -940,10 +940,9 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
return 0;
}
-static struct ccid_operations ccid3 = {
+struct ccid_operations ccid3_ops = {
.ccid_id = DCCPC_CCID3,
.ccid_name = "TCP-Friendly Rate Control",
- .ccid_owner = THIS_MODULE,
.ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock),
.ccid_hc_tx_init = ccid3_hc_tx_init,
.ccid_hc_tx_exit = ccid3_hc_tx_exit,
@@ -964,23 +963,5 @@ static struct ccid_operations ccid3 = {
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
module_param(ccid3_debug, bool, 0644);
-MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
+MODULE_PARM_DESC(ccid3_debug, "Enable CCID-3 debug messages");
#endif
-
-static __init int ccid3_module_init(void)
-{
- return ccid_register(&ccid3);
-}
-module_init(ccid3_module_init);
-
-static __exit void ccid3_module_exit(void)
-{
- ccid_unregister(&ccid3);
-}
-module_exit(ccid3_module_exit);
-
-MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
- "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
-MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("net-dccp-ccid-3");
diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile
deleted file mode 100644
index 68c93e3d89d..00000000000
--- a/net/dccp/ccids/lib/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o
-
-dccp_tfrc_lib-y := tfrc.o tfrc_equation.o packet_history.o loss_interval.o
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 5b3ce0688c5..4d1e4012726 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -60,7 +60,6 @@ void tfrc_lh_cleanup(struct tfrc_loss_hist *lh)
lh->ring[LIH_INDEX(lh->counter)] = NULL;
}
}
-EXPORT_SYMBOL_GPL(tfrc_lh_cleanup);
static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
{
@@ -121,7 +120,6 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
return (lh->i_mean < old_i_mean);
}
-EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean);
/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
@@ -169,7 +167,6 @@ int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
}
return 1;
}
-EXPORT_SYMBOL_GPL(tfrc_lh_interval_add);
int __init tfrc_li_init(void)
{
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 6cc108afdc3..b7785b3581e 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -94,7 +94,6 @@ int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
*headp = entry;
return 0;
}
-EXPORT_SYMBOL_GPL(tfrc_tx_hist_add);
void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
{
@@ -109,7 +108,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
*headp = NULL;
}
-EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
const ktime_t now)
@@ -127,7 +125,6 @@ u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
return rtt;
}
-EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
/*
@@ -172,7 +169,6 @@ void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
tfrc_rx_hist_entry_from_skb(entry, skb, ndp);
}
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
/* has the packet contained in skb been seen before? */
int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
@@ -189,7 +185,6 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
return 0;
}
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
{
@@ -390,7 +385,6 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
}
return is_new_loss;
}
-EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);
int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
{
@@ -412,7 +406,6 @@ out_free:
}
return -ENOBUFS;
}
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc);
void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
{
@@ -424,7 +417,6 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
h->ring[i] = NULL;
}
}
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
/**
* tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
@@ -495,4 +487,3 @@ keep_ref_for_next_time:
return sample;
}
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index 185916218e0..60c412ccfee 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -1,20 +1,18 @@
/*
- * TFRC: main module holding the pieces of the TFRC library together
+ * TFRC library initialisation
*
* Copyright (c) 2007 The University of Aberdeen, Scotland, UK
* Copyright (c) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
*/
-#include <linux/module.h>
-#include <linux/moduleparam.h>
#include "tfrc.h"
#ifdef CONFIG_IP_DCCP_TFRC_DEBUG
int tfrc_debug;
module_param(tfrc_debug, bool, 0644);
-MODULE_PARM_DESC(tfrc_debug, "Enable debug messages");
+MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages");
#endif
-static int __init tfrc_module_init(void)
+int __init tfrc_lib_init(void)
{
int rc = tfrc_li_init();
@@ -38,18 +36,9 @@ out:
return rc;
}
-static void __exit tfrc_module_exit(void)
+void __exit tfrc_lib_exit(void)
{
tfrc_rx_packet_history_exit();
tfrc_tx_packet_history_exit();
tfrc_li_exit();
}
-
-module_init(tfrc_module_init);
-module_exit(tfrc_module_exit);
-
-MODULE_AUTHOR("Gerrit Renker <gerrit@erg.abdn.ac.uk>, "
- "Ian McDonald <ian.mcdonald@jandi.co.nz>, "
- "Arnaldo Carvalho de Melo <acme@redhat.com>");
-MODULE_DESCRIPTION("DCCP TFRC library");
-MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index ed9857527ac..e9720b14327 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -17,7 +17,8 @@
#include <linux/types.h>
#include <linux/math64.h>
#include "../../dccp.h"
-/* internal includes that this module exports: */
+
+/* internal includes that this library exports: */
#include "loss_interval.h"
#include "packet_history.h"
@@ -66,4 +67,12 @@ extern void tfrc_rx_packet_history_exit(void);
extern int tfrc_li_init(void);
extern void tfrc_li_exit(void);
+
+#ifdef CONFIG_IP_DCCP_TFRC_LIB
+extern int tfrc_lib_init(void);
+extern void tfrc_lib_exit(void);
+#else
+#define tfrc_lib_init() (0)
+#define tfrc_lib_exit()
+#endif
#endif /* _TFRC_H_ */
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 2f20a29cffe..c5d3a9e5a5a 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -659,8 +659,6 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
return scaled_div32(result, f);
}
-EXPORT_SYMBOL_GPL(tfrc_calc_x);
-
/**
* tfrc_calc_x_reverse_lookup - try to find p given f(p)
*
@@ -693,5 +691,3 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
index = tfrc_binsearch(fvalue, 0);
return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
}
-
-EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index b4bc6e095a0..f2230fc168e 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -49,7 +49,7 @@ extern int dccp_debug;
extern struct inet_hashinfo dccp_hashinfo;
-extern atomic_t dccp_orphan_count;
+extern struct percpu_counter dccp_orphan_count;
extern void dccp_time_wait(struct sock *sk, int state, int timeo);
@@ -98,9 +98,6 @@ extern int sysctl_dccp_retries2;
extern int sysctl_dccp_feat_sequence_window;
extern int sysctl_dccp_feat_rx_ccid;
extern int sysctl_dccp_feat_tx_ccid;
-extern int sysctl_dccp_feat_ack_ratio;
-extern int sysctl_dccp_feat_send_ack_vector;
-extern int sysctl_dccp_feat_send_ndp_count;
extern int sysctl_dccp_tx_qlen;
extern int sysctl_dccp_sync_ratelimit;
@@ -252,7 +249,8 @@ extern const char *dccp_state_name(const int state);
extern void dccp_set_state(struct sock *sk, const int state);
extern void dccp_done(struct sock *sk);
-extern void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb);
+extern int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp,
+ struct sk_buff const *skb);
extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
@@ -434,13 +432,18 @@ static inline int dccp_ack_pending(const struct sock *sk)
{
const struct dccp_sock *dp = dccp_sk(sk);
return dp->dccps_timestamp_echo != 0 ||
-#ifdef CONFIG_IP_DCCP_ACKVEC
- (dccp_msk(sk)->dccpms_send_ack_vector &&
+ (dp->dccps_hc_rx_ackvec != NULL &&
dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
-#endif
inet_csk_ack_scheduled(sk);
}
+extern int dccp_feat_finalise_settings(struct dccp_sock *dp);
+extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq);
+extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*,
+ struct sk_buff *skb);
+extern int dccp_feat_activate_values(struct sock *sk, struct list_head *fn);
+extern void dccp_feat_list_purge(struct list_head *fn_list);
+
extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
extern int dccp_insert_option_elapsed_time(struct sock *sk,
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index d8a3509b26f..b21f261da75 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -29,11 +29,14 @@ static void dccp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_backoff = icsk->icsk_backoff;
info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
- if (dccp_msk(sk)->dccpms_send_ack_vector)
+ if (dp->dccps_hc_rx_ackvec != NULL)
info->tcpi_options |= TCPI_OPT_SACK;
- ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info);
- ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info);
+ if (dp->dccps_hc_rx_ccid != NULL)
+ ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info);
+
+ if (dp->dccps_hc_tx_ccid != NULL)
+ ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info);
}
static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
@@ -45,7 +48,7 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
dccp_get_info(sk, _info);
}
-static struct inet_diag_handler dccp_diag_handler = {
+static const struct inet_diag_handler dccp_diag_handler = {
.idiag_hashinfo = &dccp_hashinfo,
.idiag_get_info = dccp_diag_get_info,
.idiag_type = DCCPDIAG_GETSOCK,
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 933a0ecf8d4..4152308958a 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -1,11 +1,17 @@
/*
* net/dccp/feat.c
*
- * An implementation of the DCCP protocol
- * Andrea Bittau <a.bittau@cs.ucl.ac.uk>
+ * Feature negotiation for the DCCP protocol (RFC 4340, section 6)
+ *
+ * Copyright (c) 2008 Gerrit Renker <gerrit@erg.abdn.ac.uk>
+ * Rewrote from scratch, some bits from earlier code by
+ * Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
+ *
*
* ASSUMPTIONS
* -----------
+ * o Feature negotiation is coordinated with connection setup (as in TCP), wild
+ * changes of parameters of an established connection are not supported.
* o All currently known SP features have 1-byte quantities. If in the future
* extensions of RFCs 4340..42 define features with item lengths larger than
* one byte, a feature-specific extension of the code will be required.
@@ -15,597 +21,1185 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-
#include <linux/module.h>
-
#include "ccid.h"
#include "feat.h"
-#define DCCP_FEAT_SP_NOAGREE (-123)
-
-int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
- u8 *val, u8 len, gfp_t gfp)
-{
- struct dccp_opt_pend *opt;
-
- dccp_feat_debug(type, feature, *val);
-
- if (len > 3) {
- DCCP_WARN("invalid length %d\n", len);
- return -EINVAL;
- }
- /* XXX add further sanity checks */
-
- /* check if that feature is already being negotiated */
- list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
- /* ok we found a negotiation for this option already */
- if (opt->dccpop_feat == feature && opt->dccpop_type == type) {
- dccp_pr_debug("Replacing old\n");
- /* replace */
- BUG_ON(opt->dccpop_val == NULL);
- kfree(opt->dccpop_val);
- opt->dccpop_val = val;
- opt->dccpop_len = len;
- opt->dccpop_conf = 0;
- return 0;
- }
- }
-
- /* negotiation for a new feature */
- opt = kmalloc(sizeof(*opt), gfp);
- if (opt == NULL)
- return -ENOMEM;
-
- opt->dccpop_type = type;
- opt->dccpop_feat = feature;
- opt->dccpop_len = len;
- opt->dccpop_val = val;
- opt->dccpop_conf = 0;
- opt->dccpop_sc = NULL;
-
- BUG_ON(opt->dccpop_val == NULL);
-
- list_add_tail(&opt->dccpop_node, &dmsk->dccpms_pending);
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(dccp_feat_change);
-
-static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr)
+/*
+ * Feature activation handlers.
+ *
+ * These all use an u64 argument, to provide enough room for NN/SP features. At
+ * this stage the negotiated values have been checked to be within their range.
+ */
+static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx)
{
struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_minisock *dmsk = dccp_msk(sk);
- /* figure out if we are changing our CCID or the peer's */
- const int rx = type == DCCPO_CHANGE_R;
- const u8 ccid_nr = rx ? dmsk->dccpms_rx_ccid : dmsk->dccpms_tx_ccid;
- struct ccid *new_ccid;
+ struct ccid *new_ccid = ccid_new(ccid, sk, rx);
- /* Check if nothing is being changed. */
- if (ccid_nr == new_ccid_nr)
- return 0;
-
- new_ccid = ccid_new(new_ccid_nr, sk, rx, GFP_ATOMIC);
if (new_ccid == NULL)
return -ENOMEM;
if (rx) {
ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
dp->dccps_hc_rx_ccid = new_ccid;
- dmsk->dccpms_rx_ccid = new_ccid_nr;
} else {
ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
dp->dccps_hc_tx_ccid = new_ccid;
- dmsk->dccpms_tx_ccid = new_ccid_nr;
}
+ return 0;
+}
+static int dccp_hdlr_seq_win(struct sock *sk, u64 seq_win, bool rx)
+{
+ if (!rx)
+ dccp_msk(sk)->dccpms_sequence_window = seq_win;
return 0;
}
-static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val)
+static int dccp_hdlr_ack_ratio(struct sock *sk, u64 ratio, bool rx)
{
- dccp_feat_debug(type, feat, val);
+ if (rx)
+ dccp_sk(sk)->dccps_r_ack_ratio = ratio;
+ else
+ dccp_sk(sk)->dccps_l_ack_ratio = ratio;
+ return 0;
+}
- switch (feat) {
- case DCCPF_CCID:
- return dccp_feat_update_ccid(sk, type, val);
- default:
- dccp_pr_debug("UNIMPLEMENTED: %s(%d, ...)\n",
- dccp_feat_typename(type), feat);
- break;
+static int dccp_hdlr_ackvec(struct sock *sk, u64 enable, bool rx)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ if (rx) {
+ if (enable && dp->dccps_hc_rx_ackvec == NULL) {
+ dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(gfp_any());
+ if (dp->dccps_hc_rx_ackvec == NULL)
+ return -ENOMEM;
+ } else if (!enable) {
+ dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
+ dp->dccps_hc_rx_ackvec = NULL;
+ }
}
return 0;
}
-static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
- u8 *rpref, u8 rlen)
+static int dccp_hdlr_ndp(struct sock *sk, u64 enable, bool rx)
{
- struct dccp_sock *dp = dccp_sk(sk);
- u8 *spref, slen, *res = NULL;
- int i, j, rc, agree = 1;
+ if (!rx)
+ dccp_sk(sk)->dccps_send_ndp_count = (enable > 0);
+ return 0;
+}
- BUG_ON(rpref == NULL);
+/*
+ * Minimum Checksum Coverage is located at the RX side (9.2.1). This means that
+ * `rx' holds when the sending peer informs about his partial coverage via a
+ * ChangeR() option. In the other case, we are the sender and the receiver
+ * announces its coverage via ChangeL() options. The policy here is to honour
+ * such communication by enabling the corresponding partial coverage - but only
+ * if it has not been set manually before; the warning here means that all
+ * packets will be dropped.
+ */
+static int dccp_hdlr_min_cscov(struct sock *sk, u64 cscov, bool rx)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
- /* check if we are the black sheep */
- if (dp->dccps_role == DCCP_ROLE_CLIENT) {
- spref = rpref;
- slen = rlen;
- rpref = opt->dccpop_val;
- rlen = opt->dccpop_len;
- } else {
- spref = opt->dccpop_val;
- slen = opt->dccpop_len;
+ if (rx)
+ dp->dccps_pcrlen = cscov;
+ else {
+ if (dp->dccps_pcslen == 0)
+ dp->dccps_pcslen = cscov;
+ else if (cscov > dp->dccps_pcslen)
+ DCCP_WARN("CsCov %u too small, peer requires >= %u\n",
+ dp->dccps_pcslen, (u8)cscov);
}
- /*
- * Now we have server preference list in spref and client preference in
- * rpref
- */
- BUG_ON(spref == NULL);
- BUG_ON(rpref == NULL);
+ return 0;
+}
- /* FIXME sanity check vals */
+static const struct {
+ u8 feat_num; /* DCCPF_xxx */
+ enum dccp_feat_type rxtx; /* RX or TX */
+ enum dccp_feat_type reconciliation; /* SP or NN */
+ u8 default_value; /* as in 6.4 */
+ int (*activation_hdlr)(struct sock *sk, u64 val, bool rx);
+/*
+ * Lookup table for location and type of features (from RFC 4340/4342)
+ * +--------------------------+----+-----+----+----+---------+-----------+
+ * | Feature | Location | Reconc. | Initial | Section |
+ * | | RX | TX | SP | NN | Value | Reference |
+ * +--------------------------+----+-----+----+----+---------+-----------+
+ * | DCCPF_CCID | | X | X | | 2 | 10 |
+ * | DCCPF_SHORT_SEQNOS | | X | X | | 0 | 7.6.1 |
+ * | DCCPF_SEQUENCE_WINDOW | | X | | X | 100 | 7.5.2 |
+ * | DCCPF_ECN_INCAPABLE | X | | X | | 0 | 12.1 |
+ * | DCCPF_ACK_RATIO | | X | | X | 2 | 11.3 |
+ * | DCCPF_SEND_ACK_VECTOR | X | | X | | 0 | 11.5 |
+ * | DCCPF_SEND_NDP_COUNT | | X | X | | 0 | 7.7.2 |
+ * | DCCPF_MIN_CSUM_COVER | X | | X | | 0 | 9.2.1 |
+ * | DCCPF_DATA_CHECKSUM | X | | X | | 0 | 9.3.1 |
+ * | DCCPF_SEND_LEV_RATE | X | | X | | 0 | 4342/8.4 |
+ * +--------------------------+----+-----+----+----+---------+-----------+
+ */
+} dccp_feat_table[] = {
+ { DCCPF_CCID, FEAT_AT_TX, FEAT_SP, 2, dccp_hdlr_ccid },
+ { DCCPF_SHORT_SEQNOS, FEAT_AT_TX, FEAT_SP, 0, NULL },
+ { DCCPF_SEQUENCE_WINDOW, FEAT_AT_TX, FEAT_NN, 100, dccp_hdlr_seq_win },
+ { DCCPF_ECN_INCAPABLE, FEAT_AT_RX, FEAT_SP, 0, NULL },
+ { DCCPF_ACK_RATIO, FEAT_AT_TX, FEAT_NN, 2, dccp_hdlr_ack_ratio},
+ { DCCPF_SEND_ACK_VECTOR, FEAT_AT_RX, FEAT_SP, 0, dccp_hdlr_ackvec },
+ { DCCPF_SEND_NDP_COUNT, FEAT_AT_TX, FEAT_SP, 0, dccp_hdlr_ndp },
+ { DCCPF_MIN_CSUM_COVER, FEAT_AT_RX, FEAT_SP, 0, dccp_hdlr_min_cscov},
+ { DCCPF_DATA_CHECKSUM, FEAT_AT_RX, FEAT_SP, 0, NULL },
+ { DCCPF_SEND_LEV_RATE, FEAT_AT_RX, FEAT_SP, 0, NULL },
+};
+#define DCCP_FEAT_SUPPORTED_MAX ARRAY_SIZE(dccp_feat_table)
+
+/**
+ * dccp_feat_index - Hash function to map feature number into array position
+ * Returns consecutive array index or -1 if the feature is not understood.
+ */
+static int dccp_feat_index(u8 feat_num)
+{
+ /* The first 9 entries are occupied by the types from RFC 4340, 6.4 */
+ if (feat_num > DCCPF_RESERVED && feat_num <= DCCPF_DATA_CHECKSUM)
+ return feat_num - 1;
- /* Are values in any order? XXX Lame "algorithm" here */
- for (i = 0; i < slen; i++) {
- for (j = 0; j < rlen; j++) {
- if (spref[i] == rpref[j]) {
- res = &spref[i];
- break;
- }
- }
- if (res)
- break;
+ /*
+ * Other features: add cases for new feature types here after adding
+ * them to the above table.
+ */
+ switch (feat_num) {
+ case DCCPF_SEND_LEV_RATE:
+ return DCCP_FEAT_SUPPORTED_MAX - 1;
}
+ return -1;
+}
- /* we didn't agree on anything */
- if (res == NULL) {
- /* confirm previous value */
- switch (opt->dccpop_feat) {
- case DCCPF_CCID:
- /* XXX did i get this right? =P */
- if (opt->dccpop_type == DCCPO_CHANGE_L)
- res = &dccp_msk(sk)->dccpms_tx_ccid;
- else
- res = &dccp_msk(sk)->dccpms_rx_ccid;
- break;
+static u8 dccp_feat_type(u8 feat_num)
+{
+ int idx = dccp_feat_index(feat_num);
- default:
- DCCP_BUG("Fell through, feat=%d", opt->dccpop_feat);
- /* XXX implement res */
- return -EFAULT;
- }
+ if (idx < 0)
+ return FEAT_UNKNOWN;
+ return dccp_feat_table[idx].reconciliation;
+}
- dccp_pr_debug("Don't agree... reconfirming %d\n", *res);
- agree = 0; /* this is used for mandatory options... */
- }
+static int dccp_feat_default_value(u8 feat_num)
+{
+ int idx = dccp_feat_index(feat_num);
+ /*
+ * There are no default values for unknown features, so encountering a
+ * negative index here indicates a serious problem somewhere else.
+ */
+ DCCP_BUG_ON(idx < 0);
- /* need to put result and our preference list */
- rlen = 1 + opt->dccpop_len;
- rpref = kmalloc(rlen, GFP_ATOMIC);
- if (rpref == NULL)
- return -ENOMEM;
+ return idx < 0 ? 0 : dccp_feat_table[idx].default_value;
+}
+
+static int __dccp_feat_activate(struct sock *sk, const int idx,
+ const bool is_local, dccp_feat_val const *fval)
+{
+ bool rx;
+ u64 val;
- *rpref = *res;
- memcpy(&rpref[1], opt->dccpop_val, opt->dccpop_len);
+ if (idx < 0 || idx >= DCCP_FEAT_SUPPORTED_MAX)
+ return -1;
+ if (dccp_feat_table[idx].activation_hdlr == NULL)
+ return 0;
- /* put it in the "confirm queue" */
- if (opt->dccpop_sc == NULL) {
- opt->dccpop_sc = kmalloc(sizeof(*opt->dccpop_sc), GFP_ATOMIC);
- if (opt->dccpop_sc == NULL) {
- kfree(rpref);
- return -ENOMEM;
+ if (fval == NULL) {
+ val = dccp_feat_table[idx].default_value;
+ } else if (dccp_feat_table[idx].reconciliation == FEAT_SP) {
+ if (fval->sp.vec == NULL) {
+ /*
+ * This can happen when an empty Confirm is sent
+ * for an SP (i.e. known) feature. In this case
+ * we would be using the default anyway.
+ */
+ DCCP_CRIT("Feature #%d undefined: using default", idx);
+ val = dccp_feat_table[idx].default_value;
+ } else {
+ val = fval->sp.vec[0];
}
} else {
- /* recycle the confirm slot */
- BUG_ON(opt->dccpop_sc->dccpoc_val == NULL);
- kfree(opt->dccpop_sc->dccpoc_val);
- dccp_pr_debug("recycling confirm slot\n");
+ val = fval->nn;
}
- memset(opt->dccpop_sc, 0, sizeof(*opt->dccpop_sc));
- opt->dccpop_sc->dccpoc_val = rpref;
- opt->dccpop_sc->dccpoc_len = rlen;
+ /* Location is RX if this is a local-RX or remote-TX feature */
+ rx = (is_local == (dccp_feat_table[idx].rxtx == FEAT_AT_RX));
- /* update the option on our side [we are about to send the confirm] */
- rc = dccp_feat_update(sk, opt->dccpop_type, opt->dccpop_feat, *res);
- if (rc) {
- kfree(opt->dccpop_sc->dccpoc_val);
- kfree(opt->dccpop_sc);
- opt->dccpop_sc = NULL;
- return rc;
- }
+ return dccp_feat_table[idx].activation_hdlr(sk, val, rx);
+}
- dccp_pr_debug("Will confirm %d\n", *rpref);
+/* Test for "Req'd" feature (RFC 4340, 6.4) */
+static inline int dccp_feat_must_be_understood(u8 feat_num)
+{
+ return feat_num == DCCPF_CCID || feat_num == DCCPF_SHORT_SEQNOS ||
+ feat_num == DCCPF_SEQUENCE_WINDOW;
+}
- /* say we want to change to X but we just got a confirm X, suppress our
- * change
- */
- if (!opt->dccpop_conf) {
- if (*opt->dccpop_val == *res)
- opt->dccpop_conf = 1;
- dccp_pr_debug("won't ask for change of same feature\n");
+/* copy constructor, fval must not already contain allocated memory */
+static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len)
+{
+ fval->sp.len = len;
+ if (fval->sp.len > 0) {
+ fval->sp.vec = kmemdup(val, len, gfp_any());
+ if (fval->sp.vec == NULL) {
+ fval->sp.len = 0;
+ return -ENOBUFS;
+ }
}
+ return 0;
+}
- return agree ? 0 : DCCP_FEAT_SP_NOAGREE; /* used for mandatory opts */
+static void dccp_feat_val_destructor(u8 feat_num, dccp_feat_val *val)
+{
+ if (unlikely(val == NULL))
+ return;
+ if (dccp_feat_type(feat_num) == FEAT_SP)
+ kfree(val->sp.vec);
+ memset(val, 0, sizeof(*val));
}
-static int dccp_feat_sp(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
+static struct dccp_feat_entry *
+ dccp_feat_clone_entry(struct dccp_feat_entry const *original)
{
- struct dccp_minisock *dmsk = dccp_msk(sk);
- struct dccp_opt_pend *opt;
- int rc = 1;
- u8 t;
+ struct dccp_feat_entry *new;
+ u8 type = dccp_feat_type(original->feat_num);
- /*
- * We received a CHANGE. We gotta match it against our own preference
- * list. If we got a CHANGE_R it means it's a change for us, so we need
- * to compare our CHANGE_L list.
- */
- if (type == DCCPO_CHANGE_L)
- t = DCCPO_CHANGE_R;
- else
- t = DCCPO_CHANGE_L;
+ if (type == FEAT_UNKNOWN)
+ return NULL;
- /* find our preference list for this feature */
- list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
- if (opt->dccpop_type != t || opt->dccpop_feat != feature)
- continue;
+ new = kmemdup(original, sizeof(struct dccp_feat_entry), gfp_any());
+ if (new == NULL)
+ return NULL;
+
+ if (type == FEAT_SP && dccp_feat_clone_sp_val(&new->val,
+ original->val.sp.vec,
+ original->val.sp.len)) {
+ kfree(new);
+ return NULL;
+ }
+ return new;
+}
- /* find the winner from the two preference lists */
- rc = dccp_feat_reconcile(sk, opt, val, len);
- break;
+static void dccp_feat_entry_destructor(struct dccp_feat_entry *entry)
+{
+ if (entry != NULL) {
+ dccp_feat_val_destructor(entry->feat_num, &entry->val);
+ kfree(entry);
}
+}
- /* We didn't deal with the change. This can happen if we have no
- * preference list for the feature. In fact, it just shouldn't
- * happen---if we understand a feature, we should have a preference list
- * with at least the default value.
- */
- BUG_ON(rc == 1);
+/*
+ * List management functions
+ *
+ * Feature negotiation lists rely on and maintain the following invariants:
+ * - each feat_num in the list is known, i.e. we know its type and default value
+ * - each feat_num/is_local combination is unique (old entries are overwritten)
+ * - SP values are always freshly allocated
+ * - list is sorted in increasing order of feature number (faster lookup)
+ */
+static struct dccp_feat_entry *dccp_feat_list_lookup(struct list_head *fn_list,
+ u8 feat_num, bool is_local)
+{
+ struct dccp_feat_entry *entry;
- return rc;
+ list_for_each_entry(entry, fn_list, node) {
+ if (entry->feat_num == feat_num && entry->is_local == is_local)
+ return entry;
+ else if (entry->feat_num > feat_num)
+ break;
+ }
+ return NULL;
}
-static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
+/**
+ * dccp_feat_entry_new - Central list update routine (called by all others)
+ * @head: list to add to
+ * @feat: feature number
+ * @local: whether the local (1) or remote feature with number @feat is meant
+ * This is the only constructor and serves to ensure the above invariants.
+ */
+static struct dccp_feat_entry *
+ dccp_feat_entry_new(struct list_head *head, u8 feat, bool local)
{
- struct dccp_opt_pend *opt;
- struct dccp_minisock *dmsk = dccp_msk(sk);
- u8 *copy;
- int rc;
+ struct dccp_feat_entry *entry;
+
+ list_for_each_entry(entry, head, node)
+ if (entry->feat_num == feat && entry->is_local == local) {
+ dccp_feat_val_destructor(entry->feat_num, &entry->val);
+ return entry;
+ } else if (entry->feat_num > feat) {
+ head = &entry->node;
+ break;
+ }
- /* NN features must be Change L (sec. 6.3.2) */
- if (type != DCCPO_CHANGE_L) {
- dccp_pr_debug("received %s for NN feature %d\n",
- dccp_feat_typename(type), feature);
- return -EFAULT;
+ entry = kmalloc(sizeof(*entry), gfp_any());
+ if (entry != NULL) {
+ entry->feat_num = feat;
+ entry->is_local = local;
+ list_add_tail(&entry->node, head);
}
+ return entry;
+}
- /* XXX sanity check opt val */
+/**
+ * dccp_feat_push_change - Add/overwrite a Change option in the list
+ * @fn_list: feature-negotiation list to update
+ * @feat: one of %dccp_feature_numbers
+ * @local: whether local (1) or remote (0) @feat_num is meant
+ * @needs_mandatory: whether to use Mandatory feature negotiation options
+ * @fval: pointer to NN/SP value to be inserted (will be copied)
+ */
+static int dccp_feat_push_change(struct list_head *fn_list, u8 feat, u8 local,
+ u8 mandatory, dccp_feat_val *fval)
+{
+ struct dccp_feat_entry *new = dccp_feat_entry_new(fn_list, feat, local);
- /* copy option so we can confirm it */
- opt = kzalloc(sizeof(*opt), GFP_ATOMIC);
- if (opt == NULL)
+ if (new == NULL)
return -ENOMEM;
- copy = kmemdup(val, len, GFP_ATOMIC);
- if (copy == NULL) {
- kfree(opt);
- return -ENOMEM;
- }
+ new->feat_num = feat;
+ new->is_local = local;
+ new->state = FEAT_INITIALISING;
+ new->needs_confirm = 0;
+ new->empty_confirm = 0;
+ new->val = *fval;
+ new->needs_mandatory = mandatory;
- opt->dccpop_type = DCCPO_CONFIRM_R; /* NN can only confirm R */
- opt->dccpop_feat = feature;
- opt->dccpop_val = copy;
- opt->dccpop_len = len;
+ return 0;
+}
- /* change feature */
- rc = dccp_feat_update(sk, type, feature, *val);
- if (rc) {
- kfree(opt->dccpop_val);
- kfree(opt);
- return rc;
- }
+/**
+ * dccp_feat_push_confirm - Add a Confirm entry to the FN list
+ * @fn_list: feature-negotiation list to add to
+ * @feat: one of %dccp_feature_numbers
+ * @local: whether local (1) or remote (0) @feat_num is being confirmed
+ * @fval: pointer to NN/SP value to be inserted or NULL
+ * Returns 0 on success, a Reset code for further processing otherwise.
+ */
+static int dccp_feat_push_confirm(struct list_head *fn_list, u8 feat, u8 local,
+ dccp_feat_val *fval)
+{
+ struct dccp_feat_entry *new = dccp_feat_entry_new(fn_list, feat, local);
- dccp_feat_debug(type, feature, *copy);
+ if (new == NULL)
+ return DCCP_RESET_CODE_TOO_BUSY;
- list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
+ new->feat_num = feat;
+ new->is_local = local;
+ new->state = FEAT_STABLE; /* transition in 6.6.2 */
+ new->needs_confirm = 1;
+ new->empty_confirm = (fval == NULL);
+ new->val.nn = 0; /* zeroes the whole structure */
+ if (!new->empty_confirm)
+ new->val = *fval;
+ new->needs_mandatory = 0;
return 0;
}
-static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk,
- u8 type, u8 feature)
+static int dccp_push_empty_confirm(struct list_head *fn_list, u8 feat, u8 local)
{
- /* XXX check if other confirms for that are queued and recycle slot */
- struct dccp_opt_pend *opt = kzalloc(sizeof(*opt), GFP_ATOMIC);
+ return dccp_feat_push_confirm(fn_list, feat, local, NULL);
+}
- if (opt == NULL) {
- /* XXX what do we do? Ignoring should be fine. It's a change
- * after all =P
- */
- return;
- }
+static inline void dccp_feat_list_pop(struct dccp_feat_entry *entry)
+{
+ list_del(&entry->node);
+ dccp_feat_entry_destructor(entry);
+}
- switch (type) {
- case DCCPO_CHANGE_L:
- opt->dccpop_type = DCCPO_CONFIRM_R;
- break;
- case DCCPO_CHANGE_R:
- opt->dccpop_type = DCCPO_CONFIRM_L;
- break;
- default:
- DCCP_WARN("invalid type %d\n", type);
- kfree(opt);
- return;
+void dccp_feat_list_purge(struct list_head *fn_list)
+{
+ struct dccp_feat_entry *entry, *next;
+
+ list_for_each_entry_safe(entry, next, fn_list, node)
+ dccp_feat_entry_destructor(entry);
+ INIT_LIST_HEAD(fn_list);
+}
+EXPORT_SYMBOL_GPL(dccp_feat_list_purge);
+
+/* generate @to as full clone of @from - @to must not contain any nodes */
+int dccp_feat_clone_list(struct list_head const *from, struct list_head *to)
+{
+ struct dccp_feat_entry *entry, *new;
+
+ INIT_LIST_HEAD(to);
+ list_for_each_entry(entry, from, node) {
+ new = dccp_feat_clone_entry(entry);
+ if (new == NULL)
+ goto cloning_failed;
+ list_add_tail(&new->node, to);
}
- opt->dccpop_feat = feature;
- opt->dccpop_val = NULL;
- opt->dccpop_len = 0;
+ return 0;
- /* change feature */
- dccp_pr_debug("Empty %s(%d)\n", dccp_feat_typename(type), feature);
+cloning_failed:
+ dccp_feat_list_purge(to);
+ return -ENOMEM;
+}
+
+/**
+ * dccp_feat_valid_nn_length - Enforce length constraints on NN options
+ * Length is between 0 and %DCCP_OPTVAL_MAXLEN. Used for outgoing packets only,
+ * incoming options are accepted as long as their values are valid.
+ */
+static u8 dccp_feat_valid_nn_length(u8 feat_num)
+{
+ if (feat_num == DCCPF_ACK_RATIO) /* RFC 4340, 11.3 and 6.6.8 */
+ return 2;
+ if (feat_num == DCCPF_SEQUENCE_WINDOW) /* RFC 4340, 7.5.2 and 6.5 */
+ return 6;
+ return 0;
+}
- list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
+static u8 dccp_feat_is_valid_nn_val(u8 feat_num, u64 val)
+{
+ switch (feat_num) {
+ case DCCPF_ACK_RATIO:
+ return val <= DCCPF_ACK_RATIO_MAX;
+ case DCCPF_SEQUENCE_WINDOW:
+ return val >= DCCPF_SEQ_WMIN && val <= DCCPF_SEQ_WMAX;
+ }
+ return 0; /* feature unknown - so we can't tell */
}
-static void dccp_feat_flush_confirm(struct sock *sk)
+/* check that SP values are within the ranges defined in RFC 4340 */
+static u8 dccp_feat_is_valid_sp_val(u8 feat_num, u8 val)
{
- struct dccp_minisock *dmsk = dccp_msk(sk);
- /* Check if there is anything to confirm in the first place */
- int yes = !list_empty(&dmsk->dccpms_conf);
+ switch (feat_num) {
+ case DCCPF_CCID:
+ return val == DCCPC_CCID2 || val == DCCPC_CCID3;
+ /* Type-check Boolean feature values: */
+ case DCCPF_SHORT_SEQNOS:
+ case DCCPF_ECN_INCAPABLE:
+ case DCCPF_SEND_ACK_VECTOR:
+ case DCCPF_SEND_NDP_COUNT:
+ case DCCPF_DATA_CHECKSUM:
+ case DCCPF_SEND_LEV_RATE:
+ return val < 2;
+ case DCCPF_MIN_CSUM_COVER:
+ return val < 16;
+ }
+ return 0; /* feature unknown */
+}
- if (!yes) {
- struct dccp_opt_pend *opt;
+static u8 dccp_feat_sp_list_ok(u8 feat_num, u8 const *sp_list, u8 sp_len)
+{
+ if (sp_list == NULL || sp_len < 1)
+ return 0;
+ while (sp_len--)
+ if (!dccp_feat_is_valid_sp_val(feat_num, *sp_list++))
+ return 0;
+ return 1;
+}
- list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
- if (opt->dccpop_conf) {
- yes = 1;
- break;
+/**
+ * dccp_feat_insert_opts - Generate FN options from current list state
+ * @skb: next sk_buff to be sent to the peer
+ * @dp: for client during handshake and general negotiation
+ * @dreq: used by the server only (all Changes/Confirms in LISTEN/RESPOND)
+ */
+int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq,
+ struct sk_buff *skb)
+{
+ struct list_head *fn = dreq ? &dreq->dreq_featneg : &dp->dccps_featneg;
+ struct dccp_feat_entry *pos, *next;
+ u8 opt, type, len, *ptr, nn_in_nbo[DCCP_OPTVAL_MAXLEN];
+ bool rpt;
+
+ /* put entries into @skb in the order they appear in the list */
+ list_for_each_entry_safe_reverse(pos, next, fn, node) {
+ opt = dccp_feat_genopt(pos);
+ type = dccp_feat_type(pos->feat_num);
+ rpt = false;
+
+ if (pos->empty_confirm) {
+ len = 0;
+ ptr = NULL;
+ } else {
+ if (type == FEAT_SP) {
+ len = pos->val.sp.len;
+ ptr = pos->val.sp.vec;
+ rpt = pos->needs_confirm;
+ } else if (type == FEAT_NN) {
+ len = dccp_feat_valid_nn_length(pos->feat_num);
+ ptr = nn_in_nbo;
+ dccp_encode_value_var(pos->val.nn, ptr, len);
+ } else {
+ DCCP_BUG("unknown feature %u", pos->feat_num);
+ return -1;
}
}
+
+ if (dccp_insert_fn_opt(skb, opt, pos->feat_num, ptr, len, rpt))
+ return -1;
+ if (pos->needs_mandatory && dccp_insert_option_mandatory(skb))
+ return -1;
+ /*
+ * Enter CHANGING after transmitting the Change option (6.6.2).
+ */
+ if (pos->state == FEAT_INITIALISING)
+ pos->state = FEAT_CHANGING;
}
+ return 0;
+}
- if (!yes)
- return;
+/**
+ * __feat_register_nn - Register new NN value on socket
+ * @fn: feature-negotiation list to register with
+ * @feat: an NN feature from %dccp_feature_numbers
+ * @mandatory: use Mandatory option if 1
+ * @nn_val: value to register (restricted to 4 bytes)
+ * Note that NN features are local by definition (RFC 4340, 6.3.2).
+ */
+static int __feat_register_nn(struct list_head *fn, u8 feat,
+ u8 mandatory, u64 nn_val)
+{
+ dccp_feat_val fval = { .nn = nn_val };
+
+ if (dccp_feat_type(feat) != FEAT_NN ||
+ !dccp_feat_is_valid_nn_val(feat, nn_val))
+ return -EINVAL;
- /* OK there is something to confirm... */
- /* XXX check if packet is in flight? Send delayed ack?? */
- if (sk->sk_state == DCCP_OPEN)
- dccp_send_ack(sk);
+ /* Don't bother with default values, they will be activated anyway. */
+ if (nn_val - (u64)dccp_feat_default_value(feat) == 0)
+ return 0;
+
+ return dccp_feat_push_change(fn, feat, 1, mandatory, &fval);
}
-int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
+/**
+ * __feat_register_sp - Register new SP value/list on socket
+ * @fn: feature-negotiation list to register with
+ * @feat: an SP feature from %dccp_feature_numbers
+ * @is_local: whether the local (1) or the remote (0) @feat is meant
+ * @mandatory: use Mandatory option if 1
+ * @sp_val: SP value followed by optional preference list
+ * @sp_len: length of @sp_val in bytes
+ */
+static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local,
+ u8 mandatory, u8 const *sp_val, u8 sp_len)
{
- int rc;
+ dccp_feat_val fval;
- dccp_feat_debug(type, feature, *val);
+ if (dccp_feat_type(feat) != FEAT_SP ||
+ !dccp_feat_sp_list_ok(feat, sp_val, sp_len))
+ return -EINVAL;
- /* figure out if it's SP or NN feature */
- switch (feature) {
- /* deal with SP features */
- case DCCPF_CCID:
- rc = dccp_feat_sp(sk, type, feature, val, len);
- break;
+ /* Avoid negotiating alien CCIDs by only advertising supported ones */
+ if (feat == DCCPF_CCID && !ccid_support_check(sp_val, sp_len))
+ return -EOPNOTSUPP;
- /* deal with NN features */
- case DCCPF_ACK_RATIO:
- rc = dccp_feat_nn(sk, type, feature, val, len);
- break;
+ if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len))
+ return -ENOMEM;
- /* XXX implement other features */
- default:
- dccp_pr_debug("UNIMPLEMENTED: not handling %s(%d, ...)\n",
- dccp_feat_typename(type), feature);
- rc = -EFAULT;
- break;
- }
+ return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval);
+}
- /* check if there were problems changing features */
- if (rc) {
- /* If we don't agree on SP, we sent a confirm for old value.
- * However we propagate rc to caller in case option was
- * mandatory
+/**
+ * dccp_feat_register_sp - Register requests to change SP feature values
+ * @sk: client or listening socket
+ * @feat: one of %dccp_feature_numbers
+ * @is_local: whether the local (1) or remote (0) @feat is meant
+ * @list: array of preferred values, in descending order of preference
+ * @len: length of @list in bytes
+ */
+int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
+ u8 const *list, u8 len)
+{ /* any changes must be registered before establishing the connection */
+ if (sk->sk_state != DCCP_CLOSED)
+ return -EISCONN;
+ if (dccp_feat_type(feat) != FEAT_SP)
+ return -EINVAL;
+ return __feat_register_sp(&dccp_sk(sk)->dccps_featneg, feat, is_local,
+ 0, list, len);
+}
+
+/* Analogous to dccp_feat_register_sp(), but for non-negotiable values */
+int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val)
+{
+ /* any changes must be registered before establishing the connection */
+ if (sk->sk_state != DCCP_CLOSED)
+ return -EISCONN;
+ if (dccp_feat_type(feat) != FEAT_NN)
+ return -EINVAL;
+ return __feat_register_nn(&dccp_sk(sk)->dccps_featneg, feat, 0, val);
+}
+
+/*
+ * Tracking features whose value depend on the choice of CCID
+ *
+ * This is designed with an extension in mind so that a list walk could be done
+ * before activating any features. However, the existing framework was found to
+ * work satisfactorily up until now, the automatic verification is left open.
+ * When adding new CCIDs, add a corresponding dependency table here.
+ */
+static const struct ccid_dependency *dccp_feat_ccid_deps(u8 ccid, bool is_local)
+{
+ static const struct ccid_dependency ccid2_dependencies[2][2] = {
+ /*
+ * CCID2 mandates Ack Vectors (RFC 4341, 4.): as CCID is a TX
+ * feature and Send Ack Vector is an RX feature, `is_local'
+ * needs to be reversed.
*/
- if (rc != DCCP_FEAT_SP_NOAGREE)
- dccp_feat_empty_confirm(dccp_msk(sk), type, feature);
+ { /* Dependencies of the receiver-side (remote) CCID2 */
+ {
+ .dependent_feat = DCCPF_SEND_ACK_VECTOR,
+ .is_local = true,
+ .is_mandatory = true,
+ .val = 1
+ },
+ { 0, 0, 0, 0 }
+ },
+ { /* Dependencies of the sender-side (local) CCID2 */
+ {
+ .dependent_feat = DCCPF_SEND_ACK_VECTOR,
+ .is_local = false,
+ .is_mandatory = true,
+ .val = 1
+ },
+ { 0, 0, 0, 0 }
+ }
+ };
+ static const struct ccid_dependency ccid3_dependencies[2][5] = {
+ { /*
+ * Dependencies of the receiver-side CCID3
+ */
+ { /* locally disable Ack Vectors */
+ .dependent_feat = DCCPF_SEND_ACK_VECTOR,
+ .is_local = true,
+ .is_mandatory = false,
+ .val = 0
+ },
+ { /* see below why Send Loss Event Rate is on */
+ .dependent_feat = DCCPF_SEND_LEV_RATE,
+ .is_local = true,
+ .is_mandatory = true,
+ .val = 1
+ },
+ { /* NDP Count is needed as per RFC 4342, 6.1.1 */
+ .dependent_feat = DCCPF_SEND_NDP_COUNT,
+ .is_local = false,
+ .is_mandatory = true,
+ .val = 1
+ },
+ { 0, 0, 0, 0 },
+ },
+ { /*
+ * CCID3 at the TX side: we request that the HC-receiver
+ * will not send Ack Vectors (they will be ignored, so
+ * Mandatory is not set); we enable Send Loss Event Rate
+ * (Mandatory since the implementation does not support
+ * the Loss Intervals option of RFC 4342, 8.6).
+ * The last two options are for peer's information only.
+ */
+ {
+ .dependent_feat = DCCPF_SEND_ACK_VECTOR,
+ .is_local = false,
+ .is_mandatory = false,
+ .val = 0
+ },
+ {
+ .dependent_feat = DCCPF_SEND_LEV_RATE,
+ .is_local = false,
+ .is_mandatory = true,
+ .val = 1
+ },
+ { /* this CCID does not support Ack Ratio */
+ .dependent_feat = DCCPF_ACK_RATIO,
+ .is_local = true,
+ .is_mandatory = false,
+ .val = 0
+ },
+ { /* tell receiver we are sending NDP counts */
+ .dependent_feat = DCCPF_SEND_NDP_COUNT,
+ .is_local = true,
+ .is_mandatory = false,
+ .val = 1
+ },
+ { 0, 0, 0, 0 }
+ }
+ };
+ switch (ccid) {
+ case DCCPC_CCID2:
+ return ccid2_dependencies[is_local];
+ case DCCPC_CCID3:
+ return ccid3_dependencies[is_local];
+ default:
+ return NULL;
}
+}
- /* generate the confirm [if required] */
- dccp_feat_flush_confirm(sk);
-
+/**
+ * dccp_feat_propagate_ccid - Resolve dependencies of features on choice of CCID
+ * @fn: feature-negotiation list to update
+ * @id: CCID number to track
+ * @is_local: whether TX CCID (1) or RX CCID (0) is meant
+ * This function needs to be called after registering all other features.
+ */
+static int dccp_feat_propagate_ccid(struct list_head *fn, u8 id, bool is_local)
+{
+ const struct ccid_dependency *table = dccp_feat_ccid_deps(id, is_local);
+ int i, rc = (table == NULL);
+
+ for (i = 0; rc == 0 && table[i].dependent_feat != DCCPF_RESERVED; i++)
+ if (dccp_feat_type(table[i].dependent_feat) == FEAT_SP)
+ rc = __feat_register_sp(fn, table[i].dependent_feat,
+ table[i].is_local,
+ table[i].is_mandatory,
+ &table[i].val, 1);
+ else
+ rc = __feat_register_nn(fn, table[i].dependent_feat,
+ table[i].is_mandatory,
+ table[i].val);
return rc;
}
-EXPORT_SYMBOL_GPL(dccp_feat_change_recv);
+/**
+ * dccp_feat_finalise_settings - Finalise settings before starting negotiation
+ * @dp: client or listening socket (settings will be inherited)
+ * This is called after all registrations (socket initialisation, sysctls, and
+ * sockopt calls), and before sending the first packet containing Change options
+ * (ie. client-Request or server-Response), to ensure internal consistency.
+ */
+int dccp_feat_finalise_settings(struct dccp_sock *dp)
+{
+ struct list_head *fn = &dp->dccps_featneg;
+ struct dccp_feat_entry *entry;
+ int i = 2, ccids[2] = { -1, -1 };
-int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
- u8 *val, u8 len)
+ /*
+ * Propagating CCIDs:
+ * 1) not useful to propagate CCID settings if this host advertises more
+ * than one CCID: the choice of CCID may still change - if this is
+ * the client, or if this is the server and the client sends
+ * singleton CCID values.
+ * 2) since is that propagate_ccid changes the list, we defer changing
+ * the sorted list until after the traversal.
+ */
+ list_for_each_entry(entry, fn, node)
+ if (entry->feat_num == DCCPF_CCID && entry->val.sp.len == 1)
+ ccids[entry->is_local] = entry->val.sp.vec[0];
+ while (i--)
+ if (ccids[i] > 0 && dccp_feat_propagate_ccid(fn, ccids[i], i))
+ return -1;
+ return 0;
+}
+
+/**
+ * dccp_feat_server_ccid_dependencies - Resolve CCID-dependent features
+ * It is the server which resolves the dependencies once the CCID has been
+ * fully negotiated. If no CCID has been negotiated, it uses the default CCID.
+ */
+int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq)
{
- u8 t;
- struct dccp_opt_pend *opt;
- struct dccp_minisock *dmsk = dccp_msk(sk);
- int found = 0;
- int all_confirmed = 1;
+ struct list_head *fn = &dreq->dreq_featneg;
+ struct dccp_feat_entry *entry;
+ u8 is_local, ccid;
- dccp_feat_debug(type, feature, *val);
+ for (is_local = 0; is_local <= 1; is_local++) {
+ entry = dccp_feat_list_lookup(fn, DCCPF_CCID, is_local);
- /* locate our change request */
- switch (type) {
- case DCCPO_CONFIRM_L: t = DCCPO_CHANGE_R; break;
- case DCCPO_CONFIRM_R: t = DCCPO_CHANGE_L; break;
- default: DCCP_WARN("invalid type %d\n", type);
- return 1;
+ if (entry != NULL && !entry->empty_confirm)
+ ccid = entry->val.sp.vec[0];
+ else
+ ccid = dccp_feat_default_value(DCCPF_CCID);
+ if (dccp_feat_propagate_ccid(fn, ccid, is_local))
+ return -1;
}
- /* XXX sanity check feature value */
+ return 0;
+}
- list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
- if (!opt->dccpop_conf && opt->dccpop_type == t &&
- opt->dccpop_feat == feature) {
- found = 1;
- dccp_pr_debug("feature %d found\n", opt->dccpop_feat);
+/* Select the first entry in @servlist that also occurs in @clilist (6.3.1) */
+static int dccp_feat_preflist_match(u8 *servlist, u8 slen, u8 *clilist, u8 clen)
+{
+ u8 c, s;
- /* XXX do sanity check */
+ for (s = 0; s < slen; s++)
+ for (c = 0; c < clen; c++)
+ if (servlist[s] == clilist[c])
+ return servlist[s];
+ return -1;
+}
- opt->dccpop_conf = 1;
+/**
+ * dccp_feat_prefer - Move preferred entry to the start of array
+ * Reorder the @array_len elements in @array so that @preferred_value comes
+ * first. Returns >0 to indicate that @preferred_value does occur in @array.
+ */
+static u8 dccp_feat_prefer(u8 preferred_value, u8 *array, u8 array_len)
+{
+ u8 i, does_occur = 0;
- /* We got a confirmation---change the option */
- dccp_feat_update(sk, opt->dccpop_type,
- opt->dccpop_feat, *val);
+ if (array != NULL) {
+ for (i = 0; i < array_len; i++)
+ if (array[i] == preferred_value) {
+ array[i] = array[0];
+ does_occur++;
+ }
+ if (does_occur)
+ array[0] = preferred_value;
+ }
+ return does_occur;
+}
- /* XXX check the return value of dccp_feat_update */
- break;
- }
+/**
+ * dccp_feat_reconcile - Reconcile SP preference lists
+ * @fval: SP list to reconcile into
+ * @arr: received SP preference list
+ * @len: length of @arr in bytes
+ * @is_server: whether this side is the server (and @fv is the server's list)
+ * @reorder: whether to reorder the list in @fv after reconciling with @arr
+ * When successful, > 0 is returned and the reconciled list is in @fval.
+ * A value of 0 means that negotiation failed (no shared entry).
+ */
+static int dccp_feat_reconcile(dccp_feat_val *fv, u8 *arr, u8 len,
+ bool is_server, bool reorder)
+{
+ int rc;
- if (!opt->dccpop_conf)
- all_confirmed = 0;
+ if (!fv->sp.vec || !arr) {
+ DCCP_CRIT("NULL feature value or array");
+ return 0;
}
- /* fix re-transmit timer */
- /* XXX gotta make sure that no option negotiation occurs during
- * connection shutdown. Consider that the CLOSEREQ is sent and timer is
- * on. if all options are confirmed it might kill timer which should
- * remain alive until close is received.
- */
- if (all_confirmed) {
- dccp_pr_debug("clear feat negotiation timer %p\n", sk);
- inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
- }
+ if (is_server)
+ rc = dccp_feat_preflist_match(fv->sp.vec, fv->sp.len, arr, len);
+ else
+ rc = dccp_feat_preflist_match(arr, len, fv->sp.vec, fv->sp.len);
- if (!found)
- dccp_pr_debug("%s(%d, ...) never requested\n",
- dccp_feat_typename(type), feature);
- return 0;
-}
+ if (!reorder)
+ return rc;
+ if (rc < 0)
+ return 0;
-EXPORT_SYMBOL_GPL(dccp_feat_confirm_recv);
+ /*
+ * Reorder list: used for activating features and in dccp_insert_fn_opt.
+ */
+ return dccp_feat_prefer(rc, fv->sp.vec, fv->sp.len);
+}
-void dccp_feat_clean(struct dccp_minisock *dmsk)
+/**
+ * dccp_feat_change_recv - Process incoming ChangeL/R options
+ * @fn: feature-negotiation list to update
+ * @is_mandatory: whether the Change was preceded by a Mandatory option
+ * @opt: %DCCPO_CHANGE_L or %DCCPO_CHANGE_R
+ * @feat: one of %dccp_feature_numbers
+ * @val: NN value or SP value/preference list
+ * @len: length of @val in bytes
+ * @server: whether this node is the server (1) or the client (0)
+ */
+static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
+ u8 feat, u8 *val, u8 len, const bool server)
{
- struct dccp_opt_pend *opt, *next;
+ u8 defval, type = dccp_feat_type(feat);
+ const bool local = (opt == DCCPO_CHANGE_R);
+ struct dccp_feat_entry *entry;
+ dccp_feat_val fval;
+
+ if (len == 0 || type == FEAT_UNKNOWN) /* 6.1 and 6.6.8 */
+ goto unknown_feature_or_value;
- list_for_each_entry_safe(opt, next, &dmsk->dccpms_pending,
- dccpop_node) {
- BUG_ON(opt->dccpop_val == NULL);
- kfree(opt->dccpop_val);
+ /*
+ * Negotiation of NN features: Change R is invalid, so there is no
+ * simultaneous negotiation; hence we do not look up in the list.
+ */
+ if (type == FEAT_NN) {
+ if (local || len > sizeof(fval.nn))
+ goto unknown_feature_or_value;
- if (opt->dccpop_sc != NULL) {
- BUG_ON(opt->dccpop_sc->dccpoc_val == NULL);
- kfree(opt->dccpop_sc->dccpoc_val);
- kfree(opt->dccpop_sc);
+ /* 6.3.2: "The feature remote MUST accept any valid value..." */
+ fval.nn = dccp_decode_value_var(val, len);
+ if (!dccp_feat_is_valid_nn_val(feat, fval.nn))
+ goto unknown_feature_or_value;
+
+ return dccp_feat_push_confirm(fn, feat, local, &fval);
+ }
+
+ /*
+ * Unidirectional/simultaneous negotiation of SP features (6.3.1)
+ */
+ entry = dccp_feat_list_lookup(fn, feat, local);
+ if (entry == NULL) {
+ /*
+ * No particular preferences have been registered. We deal with
+ * this situation by assuming that all valid values are equally
+ * acceptable, and apply the following checks:
+ * - if the peer's list is a singleton, we accept a valid value;
+ * - if we are the server, we first try to see if the peer (the
+ * client) advertises the default value. If yes, we use it,
+ * otherwise we accept the preferred value;
+ * - else if we are the client, we use the first list element.
+ */
+ if (dccp_feat_clone_sp_val(&fval, val, 1))
+ return DCCP_RESET_CODE_TOO_BUSY;
+
+ if (len > 1 && server) {
+ defval = dccp_feat_default_value(feat);
+ if (dccp_feat_preflist_match(&defval, 1, val, len) > -1)
+ fval.sp.vec[0] = defval;
+ } else if (!dccp_feat_is_valid_sp_val(feat, fval.sp.vec[0])) {
+ kfree(fval.sp.vec);
+ goto unknown_feature_or_value;
+ }
+
+ /* Treat unsupported CCIDs like invalid values */
+ if (feat == DCCPF_CCID && !ccid_support_check(fval.sp.vec, 1)) {
+ kfree(fval.sp.vec);
+ goto not_valid_or_not_known;
}
- kfree(opt);
+ return dccp_feat_push_confirm(fn, feat, local, &fval);
+
+ } else if (entry->state == FEAT_UNSTABLE) { /* 6.6.2 */
+ return 0;
}
- INIT_LIST_HEAD(&dmsk->dccpms_pending);
- list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) {
- BUG_ON(opt == NULL);
- if (opt->dccpop_val != NULL)
- kfree(opt->dccpop_val);
- kfree(opt);
+ if (dccp_feat_reconcile(&entry->val, val, len, server, true)) {
+ entry->empty_confirm = 0;
+ } else if (is_mandatory) {
+ return DCCP_RESET_CODE_MANDATORY_ERROR;
+ } else if (entry->state == FEAT_INITIALISING) {
+ /*
+ * Failed simultaneous negotiation (server only): try to `save'
+ * the connection by checking whether entry contains the default
+ * value for @feat. If yes, send an empty Confirm to signal that
+ * the received Change was not understood - which implies using
+ * the default value.
+ * If this also fails, we use Reset as the last resort.
+ */
+ WARN_ON(!server);
+ defval = dccp_feat_default_value(feat);
+ if (!dccp_feat_reconcile(&entry->val, &defval, 1, server, true))
+ return DCCP_RESET_CODE_OPTION_ERROR;
+ entry->empty_confirm = 1;
}
- INIT_LIST_HEAD(&dmsk->dccpms_conf);
-}
+ entry->needs_confirm = 1;
+ entry->needs_mandatory = 0;
+ entry->state = FEAT_STABLE;
+ return 0;
+
+unknown_feature_or_value:
+ if (!is_mandatory)
+ return dccp_push_empty_confirm(fn, feat, local);
-EXPORT_SYMBOL_GPL(dccp_feat_clean);
+not_valid_or_not_known:
+ return is_mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR
+ : DCCP_RESET_CODE_OPTION_ERROR;
+}
-/* this is to be called only when a listening sock creates its child. It is
- * assumed by the function---the confirm is not duplicated, but rather it is
- * "passed on".
+/**
+ * dccp_feat_confirm_recv - Process received Confirm options
+ * @fn: feature-negotiation list to update
+ * @is_mandatory: whether @opt was preceded by a Mandatory option
+ * @opt: %DCCPO_CONFIRM_L or %DCCPO_CONFIRM_R
+ * @feat: one of %dccp_feature_numbers
+ * @val: NN value or SP value/preference list
+ * @len: length of @val in bytes
+ * @server: whether this node is server (1) or client (0)
*/
-int dccp_feat_clone(struct sock *oldsk, struct sock *newsk)
+static u8 dccp_feat_confirm_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
+ u8 feat, u8 *val, u8 len, const bool server)
{
- struct dccp_minisock *olddmsk = dccp_msk(oldsk);
- struct dccp_minisock *newdmsk = dccp_msk(newsk);
- struct dccp_opt_pend *opt;
- int rc = 0;
+ u8 *plist, plen, type = dccp_feat_type(feat);
+ const bool local = (opt == DCCPO_CONFIRM_R);
+ struct dccp_feat_entry *entry = dccp_feat_list_lookup(fn, feat, local);
- INIT_LIST_HEAD(&newdmsk->dccpms_pending);
- INIT_LIST_HEAD(&newdmsk->dccpms_conf);
+ if (entry == NULL) { /* nothing queued: ignore or handle error */
+ if (is_mandatory && type == FEAT_UNKNOWN)
+ return DCCP_RESET_CODE_MANDATORY_ERROR;
- list_for_each_entry(opt, &olddmsk->dccpms_pending, dccpop_node) {
- struct dccp_opt_pend *newopt;
- /* copy the value of the option */
- u8 *val = kmemdup(opt->dccpop_val, opt->dccpop_len, GFP_ATOMIC);
+ if (!local && type == FEAT_NN) /* 6.3.2 */
+ goto confirmation_failed;
+ return 0;
+ }
- if (val == NULL)
- goto out_clean;
+ if (entry->state != FEAT_CHANGING) /* 6.6.2 */
+ return 0;
- newopt = kmemdup(opt, sizeof(*newopt), GFP_ATOMIC);
- if (newopt == NULL) {
- kfree(val);
- goto out_clean;
- }
+ if (len == 0) {
+ if (dccp_feat_must_be_understood(feat)) /* 6.6.7 */
+ goto confirmation_failed;
+ /*
+ * Empty Confirm during connection setup: this means reverting
+ * to the `old' value, which in this case is the default. Since
+ * we handle default values automatically when no other values
+ * have been set, we revert to the old value by removing this
+ * entry from the list.
+ */
+ dccp_feat_list_pop(entry);
+ return 0;
+ }
- /* insert the option */
- newopt->dccpop_val = val;
- list_add_tail(&newopt->dccpop_node, &newdmsk->dccpms_pending);
+ if (type == FEAT_NN) {
+ if (len > sizeof(entry->val.nn))
+ goto confirmation_failed;
- /* XXX what happens with backlogs and multiple connections at
- * once...
- */
- /* the master socket no longer needs to worry about confirms */
- opt->dccpop_sc = NULL; /* it's not a memleak---new socket has it */
+ if (entry->val.nn == dccp_decode_value_var(val, len))
+ goto confirmation_succeeded;
- /* reset state for a new socket */
- opt->dccpop_conf = 0;
+ DCCP_WARN("Bogus Confirm for non-existing value\n");
+ goto confirmation_failed;
}
- /* XXX not doing anything about the conf queue */
+ /*
+ * Parsing SP Confirms: the first element of @val is the preferred
+ * SP value which the peer confirms, the remainder depends on @len.
+ * Note that only the confirmed value need to be a valid SP value.
+ */
+ if (!dccp_feat_is_valid_sp_val(feat, *val))
+ goto confirmation_failed;
+
+ if (len == 1) { /* peer didn't supply a preference list */
+ plist = val;
+ plen = len;
+ } else { /* preferred value + preference list */
+ plist = val + 1;
+ plen = len - 1;
+ }
-out:
- return rc;
+ /* Check whether the peer got the reconciliation right (6.6.8) */
+ if (dccp_feat_reconcile(&entry->val, plist, plen, server, 0) != *val) {
+ DCCP_WARN("Confirm selected the wrong value %u\n", *val);
+ return DCCP_RESET_CODE_OPTION_ERROR;
+ }
+ entry->val.sp.vec[0] = *val;
-out_clean:
- dccp_feat_clean(newdmsk);
- rc = -ENOMEM;
- goto out;
-}
+confirmation_succeeded:
+ entry->state = FEAT_STABLE;
+ return 0;
-EXPORT_SYMBOL_GPL(dccp_feat_clone);
+confirmation_failed:
+ DCCP_WARN("Confirmation failed\n");
+ return is_mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR
+ : DCCP_RESET_CODE_OPTION_ERROR;
+}
-static int __dccp_feat_init(struct dccp_minisock *dmsk, u8 type, u8 feat,
- u8 *val, u8 len)
+/**
+ * dccp_feat_parse_options - Process Feature-Negotiation Options
+ * @sk: for general use and used by the client during connection setup
+ * @dreq: used by the server during connection setup
+ * @mandatory: whether @opt was preceded by a Mandatory option
+ * @opt: %DCCPO_CHANGE_L | %DCCPO_CHANGE_R | %DCCPO_CONFIRM_L | %DCCPO_CONFIRM_R
+ * @feat: one of %dccp_feature_numbers
+ * @val: value contents of @opt
+ * @len: length of @val in bytes
+ * Returns 0 on success, a Reset code for ending the connection otherwise.
+ */
+int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
+ u8 mandatory, u8 opt, u8 feat, u8 *val, u8 len)
{
- int rc = -ENOMEM;
- u8 *copy = kmemdup(val, len, GFP_KERNEL);
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct list_head *fn = dreq ? &dreq->dreq_featneg : &dp->dccps_featneg;
+ bool server = false;
- if (copy != NULL) {
- rc = dccp_feat_change(dmsk, type, feat, copy, len, GFP_KERNEL);
- if (rc)
- kfree(copy);
+ switch (sk->sk_state) {
+ /*
+ * Negotiation during connection setup
+ */
+ case DCCP_LISTEN:
+ server = true; /* fall through */
+ case DCCP_REQUESTING:
+ switch (opt) {
+ case DCCPO_CHANGE_L:
+ case DCCPO_CHANGE_R:
+ return dccp_feat_change_recv(fn, mandatory, opt, feat,
+ val, len, server);
+ case DCCPO_CONFIRM_R:
+ case DCCPO_CONFIRM_L:
+ return dccp_feat_confirm_recv(fn, mandatory, opt, feat,
+ val, len, server);
+ }
}
- return rc;
+ return 0; /* ignore FN options in all other states */
}
-int dccp_feat_init(struct dccp_minisock *dmsk)
+int dccp_feat_init(struct sock *sk)
{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct dccp_minisock *dmsk = dccp_msk(sk);
int rc;
- INIT_LIST_HEAD(&dmsk->dccpms_pending);
- INIT_LIST_HEAD(&dmsk->dccpms_conf);
-
- /* CCID L */
- rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_CCID,
- &dmsk->dccpms_tx_ccid, 1);
- if (rc)
- goto out;
-
- /* CCID R */
- rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_R, DCCPF_CCID,
- &dmsk->dccpms_rx_ccid, 1);
- if (rc)
- goto out;
+ INIT_LIST_HEAD(&dmsk->dccpms_pending); /* XXX no longer used */
+ INIT_LIST_HEAD(&dmsk->dccpms_conf); /* XXX no longer used */
/* Ack ratio */
- rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_ACK_RATIO,
- &dmsk->dccpms_ack_ratio, 1);
-out:
+ rc = __feat_register_nn(&dp->dccps_featneg, DCCPF_ACK_RATIO, 0,
+ dp->dccps_l_ack_ratio);
return rc;
}
EXPORT_SYMBOL_GPL(dccp_feat_init);
+int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct dccp_feat_entry *cur, *next;
+ int idx;
+ dccp_feat_val *fvals[DCCP_FEAT_SUPPORTED_MAX][2] = {
+ [0 ... DCCP_FEAT_SUPPORTED_MAX-1] = { NULL, NULL }
+ };
+
+ list_for_each_entry(cur, fn_list, node) {
+ /*
+ * An empty Confirm means that either an unknown feature type
+ * or an invalid value was present. In the first case there is
+ * nothing to activate, in the other the default value is used.
+ */
+ if (cur->empty_confirm)
+ continue;
+
+ idx = dccp_feat_index(cur->feat_num);
+ if (idx < 0) {
+ DCCP_BUG("Unknown feature %u", cur->feat_num);
+ goto activation_failed;
+ }
+ if (cur->state != FEAT_STABLE) {
+ DCCP_CRIT("Negotiation of %s %u failed in state %u",
+ cur->is_local ? "local" : "remote",
+ cur->feat_num, cur->state);
+ goto activation_failed;
+ }
+ fvals[idx][cur->is_local] = &cur->val;
+ }
+
+ /*
+ * Activate in decreasing order of index, so that the CCIDs are always
+ * activated as the last feature. This avoids the case where a CCID
+ * relies on the initialisation of one or more features that it depends
+ * on (e.g. Send NDP Count, Send Ack Vector, and Ack Ratio features).
+ */
+ for (idx = DCCP_FEAT_SUPPORTED_MAX; --idx >= 0;)
+ if (__dccp_feat_activate(sk, idx, 0, fvals[idx][0]) ||
+ __dccp_feat_activate(sk, idx, 1, fvals[idx][1])) {
+ DCCP_CRIT("Could not activate %d", idx);
+ goto activation_failed;
+ }
+
+ /* Clean up Change options which have been confirmed already */
+ list_for_each_entry_safe(cur, next, fn_list, node)
+ if (!cur->needs_confirm)
+ dccp_feat_list_pop(cur);
+
+ dccp_pr_debug("Activation OK\n");
+ return 0;
+
+activation_failed:
+ /*
+ * We clean up everything that may have been allocated, since
+ * it is difficult to track at which stage negotiation failed.
+ * This is ok, since all allocation functions below are robust
+ * against NULL arguments.
+ */
+ ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
+ ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+ dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+ dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
+ dp->dccps_hc_rx_ackvec = NULL;
+ return -1;
+}
+
#ifdef CONFIG_IP_DCCP_DEBUG
const char *dccp_feat_typename(const u8 type)
{
@@ -620,8 +1214,6 @@ const char *dccp_feat_typename(const u8 type)
return NULL;
}
-EXPORT_SYMBOL_GPL(dccp_feat_typename);
-
const char *dccp_feat_name(const u8 feat)
{
static const char *feature_names[] = {
@@ -639,11 +1231,11 @@ const char *dccp_feat_name(const u8 feat)
if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC)
return feature_names[DCCPF_RESERVED];
+ if (feat == DCCPF_SEND_LEV_RATE)
+ return "Send Loss Event Rate";
if (feat >= DCCPF_MIN_CCID_SPECIFIC)
return "CCID-specific";
return feature_names[feat];
}
-
-EXPORT_SYMBOL_GPL(dccp_feat_name);
#endif /* CONFIG_IP_DCCP_DEBUG */
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index e272222c7ac..9b46e2a7866 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -3,17 +3,103 @@
/*
* net/dccp/feat.h
*
- * An implementation of the DCCP protocol
+ * Feature negotiation for the DCCP protocol (RFC 4340, section 6)
+ * Copyright (c) 2008 Gerrit Renker <gerrit@erg.abdn.ac.uk>
* Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
*/
-
#include <linux/types.h>
#include "dccp.h"
+/*
+ * Known limit values
+ */
+/* Ack Ratio takes 2-byte integer values (11.3) */
+#define DCCPF_ACK_RATIO_MAX 0xFFFF
+/* Wmin=32 and Wmax=2^46-1 from 7.5.2 */
+#define DCCPF_SEQ_WMIN 32
+#define DCCPF_SEQ_WMAX 0x3FFFFFFFFFFFull
+/* Maximum number of SP values that fit in a single (Confirm) option */
+#define DCCP_FEAT_MAX_SP_VALS (DCCP_SINGLE_OPT_MAXLEN - 2)
+
+enum dccp_feat_type {
+ FEAT_AT_RX = 1, /* located at RX side of half-connection */
+ FEAT_AT_TX = 2, /* located at TX side of half-connection */
+ FEAT_SP = 4, /* server-priority reconciliation (6.3.1) */
+ FEAT_NN = 8, /* non-negotiable reconciliation (6.3.2) */
+ FEAT_UNKNOWN = 0xFF /* not understood or invalid feature */
+};
+
+enum dccp_feat_state {
+ FEAT_DEFAULT = 0, /* using default values from 6.4 */
+ FEAT_INITIALISING, /* feature is being initialised */
+ FEAT_CHANGING, /* Change sent but not confirmed yet */
+ FEAT_UNSTABLE, /* local modification in state CHANGING */
+ FEAT_STABLE /* both ends (think they) agree */
+};
+
+/**
+ * dccp_feat_val - Container for SP or NN feature values
+ * @nn: single NN value
+ * @sp.vec: single SP value plus optional preference list
+ * @sp.len: length of @sp.vec in bytes
+ */
+typedef union {
+ u64 nn;
+ struct {
+ u8 *vec;
+ u8 len;
+ } sp;
+} dccp_feat_val;
+
+/**
+ * struct feat_entry - Data structure to perform feature negotiation
+ * @val: feature's current value (SP features may have preference list)
+ * @state: feature's current state
+ * @feat_num: one of %dccp_feature_numbers
+ * @needs_mandatory: whether Mandatory options should be sent
+ * @needs_confirm: whether to send a Confirm instead of a Change
+ * @empty_confirm: whether to send an empty Confirm (depends on @needs_confirm)
+ * @is_local: feature location (1) or feature-remote (0)
+ * @node: list pointers, entries arranged in FIFO order
+ */
+struct dccp_feat_entry {
+ dccp_feat_val val;
+ enum dccp_feat_state state:8;
+ u8 feat_num;
+
+ bool needs_mandatory,
+ needs_confirm,
+ empty_confirm,
+ is_local;
+
+ struct list_head node;
+};
+
+static inline u8 dccp_feat_genopt(struct dccp_feat_entry *entry)
+{
+ if (entry->needs_confirm)
+ return entry->is_local ? DCCPO_CONFIRM_L : DCCPO_CONFIRM_R;
+ return entry->is_local ? DCCPO_CHANGE_L : DCCPO_CHANGE_R;
+}
+
+/**
+ * struct ccid_dependency - Track changes resulting from choosing a CCID
+ * @dependent_feat: one of %dccp_feature_numbers
+ * @is_local: local (1) or remote (0) @dependent_feat
+ * @is_mandatory: whether presence of @dependent_feat is mission-critical or not
+ * @val: corresponding default value for @dependent_feat (u8 is sufficient here)
+ */
+struct ccid_dependency {
+ u8 dependent_feat;
+ bool is_local:1,
+ is_mandatory:1;
+ u8 val;
+};
+
#ifdef CONFIG_IP_DCCP_DEBUG
extern const char *dccp_feat_typename(const u8 type);
extern const char *dccp_feat_name(const u8 feat);
@@ -27,14 +113,30 @@ static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val)
#define dccp_feat_debug(type, feat, val)
#endif /* CONFIG_IP_DCCP_DEBUG */
-extern int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
- u8 *val, u8 len, gfp_t gfp);
-extern int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature,
- u8 *val, u8 len);
-extern int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
- u8 *val, u8 len);
-extern void dccp_feat_clean(struct dccp_minisock *dmsk);
-extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk);
-extern int dccp_feat_init(struct dccp_minisock *dmsk);
+extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
+ u8 const *list, u8 len);
+extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
+extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
+ u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
+extern int dccp_feat_clone_list(struct list_head const *, struct list_head *);
+extern int dccp_feat_init(struct sock *sk);
+
+/*
+ * Encoding variable-length options and their maximum length.
+ *
+ * This affects NN options (SP options are all u8) and other variable-length
+ * options (see table 3 in RFC 4340). The limit is currently given the Sequence
+ * Window NN value (sec. 7.5.2) and the NDP count (sec. 7.7) option, all other
+ * options consume less than 6 bytes (timestamps are 4 bytes).
+ * When updating this constant (e.g. due to new internet drafts / RFCs), make
+ * sure that you also update all code which refers to it.
+ */
+#define DCCP_OPTVAL_MAXLEN 6
+
+extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len);
+extern u64 dccp_decode_value_var(const u8 *bf, const u8 len);
+extern int dccp_insert_option_mandatory(struct sk_buff *skb);
+extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
+ u8 *val, u8 len, bool repeat_first);
#endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 779d0ed9ae9..7648f316310 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -163,7 +163,7 @@ static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
- if (dccp_msk(sk)->dccpms_send_ack_vector)
+ if (dp->dccps_hc_rx_ackvec != NULL)
dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_ack_seq);
}
@@ -375,7 +375,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb);
- if (dccp_msk(sk)->dccpms_send_ack_vector &&
+ if (dp->dccps_hc_rx_ackvec != NULL &&
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_ACKVEC_STATE_RECEIVED))
@@ -421,20 +421,19 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
goto out_invalid_packet;
}
+ /*
+ * If option processing (Step 8) failed, return 1 here so that
+ * dccp_v4_do_rcv() sends a Reset. The Reset code depends on
+ * the option type and is set in dccp_parse_options().
+ */
if (dccp_parse_options(sk, NULL, skb))
- goto out_invalid_packet;
+ return 1;
/* Obtain usec RTT sample from SYN exchange (used by CCID 3) */
if (likely(dp->dccps_options_received.dccpor_timestamp_echo))
dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp -
dp->dccps_options_received.dccpor_timestamp_echo));
- if (dccp_msk(sk)->dccpms_send_ack_vector &&
- dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
- DCCP_SKB_CB(skb)->dccpd_seq,
- DCCP_ACKVEC_STATE_RECEIVED))
- goto out_invalid_packet; /* FIXME: change error code */
-
/* Stop the REQUEST timer */
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
WARN_ON(sk->sk_send_head == NULL);
@@ -475,6 +474,15 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
*/
dccp_set_state(sk, DCCP_PARTOPEN);
+ /*
+ * If feature negotiation was successful, activate features now;
+ * an activation failure means that this host could not activate
+ * one ore more features (e.g. insufficient memory), which would
+ * leave at least one feature in an undefined state.
+ */
+ if (dccp_feat_activate_values(sk, &dp->dccps_featneg))
+ goto unable_to_proceed;
+
/* Make sure socket is routed, for correct metrics. */
icsk->icsk_af_ops->rebuild_header(sk);
@@ -509,6 +517,16 @@ out_invalid_packet:
/* dccp_v4_do_rcv will send a reset */
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
return 1;
+
+unable_to_proceed:
+ DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_ABORTED;
+ /*
+ * We mark this socket as no longer usable, so that the loop in
+ * dccp_sendmsg() terminates and the application gets notified.
+ */
+ dccp_set_state(sk, DCCP_CLOSED);
+ sk->sk_err = ECOMM;
+ return 1;
}
static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
@@ -590,8 +608,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (inet_csk(sk)->icsk_af_ops->conn_request(sk,
skb) < 0)
return 1;
-
- /* FIXME: do congestion control initialization */
goto discard;
}
if (dh->dccph_type == DCCP_PKT_RESET)
@@ -602,7 +618,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
return 1;
}
- if (sk->sk_state != DCCP_REQUESTING) {
+ if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) {
if (dccp_check_seqno(sk, skb))
goto discard;
@@ -615,7 +631,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb);
- if (dccp_msk(sk)->dccpms_send_ack_vector &&
+ if (dp->dccps_hc_rx_ackvec != NULL &&
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_ACKVEC_STATE_RECEIVED))
@@ -667,8 +683,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
return 1;
case DCCP_REQUESTING:
- /* FIXME: do congestion control initialization */
-
queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
if (queued >= 0)
return queued;
@@ -727,5 +741,3 @@ u32 dccp_sample_rtt(struct sock *sk, long delta)
return delta;
}
-
-EXPORT_SYMBOL_GPL(dccp_sample_rtt);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index e3dfddab21c..d1dd95289b8 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -545,6 +545,7 @@ out:
static void dccp_v4_reqsk_destructor(struct request_sock *req)
{
+ dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
kfree(inet_rsk(req)->opt);
}
@@ -595,7 +596,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (req == NULL)
goto drop;
- dccp_reqsk_init(req, skb);
+ if (dccp_reqsk_init(req, dccp_sk(sk), skb))
+ goto drop_and_free;
dreq = dccp_rsk(req);
if (dccp_parse_options(sk, dreq, skb))
@@ -792,12 +794,10 @@ static int dccp_v4_rcv(struct sk_buff *skb)
DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(dh);
DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
- dccp_pr_debug("%8.8s "
- "src=%u.%u.%u.%u@%-5d "
- "dst=%u.%u.%u.%u@%-5d seq=%llu",
+ dccp_pr_debug("%8.8s src=%pI4@%-5d dst=%pI4@%-5d seq=%llu",
dccp_packet_name(dh->dccph_type),
- NIPQUAD(iph->saddr), ntohs(dh->dccph_sport),
- NIPQUAD(iph->daddr), ntohs(dh->dccph_dport),
+ &iph->saddr, ntohs(dh->dccph_sport),
+ &iph->daddr, ntohs(dh->dccph_dport),
(unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
if (dccp_packet_without_ack(skb)) {
@@ -938,6 +938,7 @@ static struct proto dccp_v4_prot = {
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp_sock),
+ .slab_flags = SLAB_DESTROY_BY_RCU,
.rsk_prot = &dccp_request_sock_ops,
.twsk_prot = &dccp_timewait_sock_ops,
.h.hashinfo = &dccp_hashinfo,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d4ce1224e00..b963f35c65f 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -168,7 +168,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
goto out;
}
- err = xfrm_lookup(&dst, &fl, sk, 0);
+ err = xfrm_lookup(net, &dst, &fl, sk, 0);
if (err < 0) {
sk->sk_err_soft = -err;
goto out;
@@ -279,7 +279,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
if (final_p)
ipv6_addr_copy(&fl.fl6_dst, final_p);
- err = xfrm_lookup(&dst, &fl, sk, 0);
+ err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0);
if (err < 0)
goto done;
@@ -304,6 +304,7 @@ done:
static void dccp_v6_reqsk_destructor(struct request_sock *req)
{
+ dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
if (inet6_rsk(req)->pktopts != NULL)
kfree_skb(inet6_rsk(req)->pktopts);
}
@@ -342,7 +343,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
/* sk = NULL, but it is safe for now. RST socket required. */
if (!ip6_dst_lookup(ctl_sk, &skb->dst, &fl)) {
- if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
+ if (xfrm_lookup(net, &skb->dst, &fl, NULL, 0) >= 0) {
ip6_xmit(ctl_sk, skb, &fl, NULL, 0);
DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
@@ -426,7 +427,8 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (req == NULL)
goto drop;
- dccp_reqsk_init(req, skb);
+ if (dccp_reqsk_init(req, dccp_sk(sk), skb))
+ goto drop_and_free;
dreq = dccp_rsk(req);
if (dccp_parse_options(sk, dreq, skb))
@@ -567,7 +569,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
if (final_p)
ipv6_addr_copy(&fl.fl6_dst, final_p);
- if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+ if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
goto out;
}
@@ -1002,7 +1004,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (final_p)
ipv6_addr_copy(&fl.fl6_dst, final_p);
- err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT);
+ err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
if (err < 0) {
if (err == -EREMOTE)
err = ip6_dst_blackhole(sk, &dst, &fl);
@@ -1138,6 +1140,7 @@ static struct proto dccp_v6_prot = {
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp6_sock),
+ .slab_flags = SLAB_DESTROY_BY_RCU,
.rsk_prot = &dccp6_request_sock_ops,
.twsk_prot = &dccp6_timewait_sock_ops,
.h.hashinfo = &dccp_hashinfo,
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index e6bf99e3e41..6821ae33dd3 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -45,11 +45,6 @@ EXPORT_SYMBOL_GPL(dccp_death_row);
void dccp_minisock_init(struct dccp_minisock *dmsk)
{
dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
- dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid;
- dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid;
- dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio;
- dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
- dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count;
}
void dccp_time_wait(struct sock *sk, int state, int timeo)
@@ -112,7 +107,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
if (newsk != NULL) {
- const struct dccp_request_sock *dreq = dccp_rsk(req);
+ struct dccp_request_sock *dreq = dccp_rsk(req);
struct inet_connection_sock *newicsk = inet_csk(newsk);
struct dccp_sock *newdp = dccp_sk(newsk);
struct dccp_minisock *newdmsk = dccp_msk(newsk);
@@ -125,35 +120,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
newdp->dccps_timestamp_time = dreq->dreq_timestamp_time;
newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
- if (dccp_feat_clone(sk, newsk))
- goto out_free;
-
- if (newdmsk->dccpms_send_ack_vector) {
- newdp->dccps_hc_rx_ackvec =
- dccp_ackvec_alloc(GFP_ATOMIC);
- if (unlikely(newdp->dccps_hc_rx_ackvec == NULL))
- goto out_free;
- }
-
- newdp->dccps_hc_rx_ccid =
- ccid_hc_rx_new(newdmsk->dccpms_rx_ccid,
- newsk, GFP_ATOMIC);
- newdp->dccps_hc_tx_ccid =
- ccid_hc_tx_new(newdmsk->dccpms_tx_ccid,
- newsk, GFP_ATOMIC);
- if (unlikely(newdp->dccps_hc_rx_ccid == NULL ||
- newdp->dccps_hc_tx_ccid == NULL)) {
- dccp_ackvec_free(newdp->dccps_hc_rx_ackvec);
- ccid_hc_rx_delete(newdp->dccps_hc_rx_ccid, newsk);
- ccid_hc_tx_delete(newdp->dccps_hc_tx_ccid, newsk);
-out_free:
- /* It is still raw copy of parent, so invalidate
- * destructor and make plain sk_free() */
- newsk->sk_destruct = NULL;
- sk_free(newsk);
- return NULL;
- }
-
+ INIT_LIST_HEAD(&newdp->dccps_featneg);
/*
* Step 3: Process LISTEN state
*
@@ -184,6 +151,17 @@ out_free:
dccp_set_seqno(&newdp->dccps_awl,
max48(newdp->dccps_awl, newdp->dccps_iss));
+ /*
+ * Activate features after initialising the sequence numbers,
+ * since CCID initialisation may depend on GSS, ISR, ISS etc.
+ */
+ if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) {
+ /* It is still raw copy of parent, so invalidate
+ * destructor and make plain sk_free() */
+ newsk->sk_destruct = NULL;
+ sk_free(newsk);
+ return NULL;
+ }
dccp_init_xmit_timers(newsk);
DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
@@ -304,7 +282,8 @@ void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack);
-void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
+int dccp_reqsk_init(struct request_sock *req,
+ struct dccp_sock const *dp, struct sk_buff const *skb)
{
struct dccp_request_sock *dreq = dccp_rsk(req);
@@ -313,6 +292,9 @@ void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
inet_rsk(req)->acked = 0;
req->rcv_wnd = sysctl_dccp_feat_sequence_window;
dreq->dreq_timestamp_echo = 0;
+
+ /* inherit feature negotiation options from listening socket */
+ return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg);
}
EXPORT_SYMBOL_GPL(dccp_reqsk_init);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 0809b63cb05..7b1165c21f5 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -26,20 +26,21 @@
int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID;
int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID;
-int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO;
-int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
-int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT;
-static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
+u64 dccp_decode_value_var(const u8 *bf, const u8 len)
{
- u32 value = 0;
+ u64 value = 0;
+ if (len >= DCCP_OPTVAL_MAXLEN)
+ value += ((u64)*bf++) << 40;
+ if (len > 4)
+ value += ((u64)*bf++) << 32;
if (len > 3)
- value += *bf++ << 24;
+ value += ((u64)*bf++) << 24;
if (len > 2)
- value += *bf++ << 16;
+ value += ((u64)*bf++) << 16;
if (len > 1)
- value += *bf++ << 8;
+ value += ((u64)*bf++) << 8;
if (len > 0)
value += *bf;
@@ -64,7 +65,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
(dh->dccph_doff * 4);
struct dccp_options_received *opt_recv = &dp->dccps_options_received;
unsigned char opt, len;
- unsigned char *value;
+ unsigned char *uninitialized_var(value);
u32 elapsed_time;
__be32 opt_val;
int rc;
@@ -131,41 +132,19 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
dccp_pr_debug("%s opt: NDP count=%llu\n", dccp_role(sk),
(unsigned long long)opt_recv->dccpor_ndp);
break;
- case DCCPO_CHANGE_L:
- /* fall through */
- case DCCPO_CHANGE_R:
- if (pkt_type == DCCP_PKT_DATA)
+ case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R:
+ if (pkt_type == DCCP_PKT_DATA) /* RFC 4340, 6 */
break;
- if (len < 2)
- goto out_invalid_option;
- rc = dccp_feat_change_recv(sk, opt, *value, value + 1,
- len - 1);
- /*
- * When there is a change error, change_recv is
- * responsible for dealing with it. i.e. reply with an
- * empty confirm.
- * If the change was mandatory, then we need to die.
- */
- if (rc && mandatory)
- goto out_invalid_option;
- break;
- case DCCPO_CONFIRM_L:
- /* fall through */
- case DCCPO_CONFIRM_R:
- if (pkt_type == DCCP_PKT_DATA)
- break;
- if (len < 2) /* FIXME this disallows empty confirm */
- goto out_invalid_option;
- if (dccp_feat_confirm_recv(sk, opt, *value,
- value + 1, len - 1))
- goto out_invalid_option;
+ rc = dccp_feat_parse_options(sk, dreq, mandatory, opt,
+ *value, value + 1, len - 1);
+ if (rc)
+ goto out_featneg_failed;
break;
case DCCPO_ACK_VECTOR_0:
case DCCPO_ACK_VECTOR_1:
if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */
break;
-
- if (dccp_msk(sk)->dccpms_send_ack_vector &&
+ if (dp->dccps_hc_rx_ackvec != NULL &&
dccp_ackvec_parse(sk, skb, &ackno, opt, value, len))
goto out_invalid_option;
break;
@@ -289,8 +268,10 @@ out_nonsensical_length:
out_invalid_option:
DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
- DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
- DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len);
+ rc = DCCP_RESET_CODE_OPTION_ERROR;
+out_featneg_failed:
+ DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc);
+ DCCP_SKB_CB(skb)->dccpd_reset_code = rc;
DCCP_SKB_CB(skb)->dccpd_reset_data[0] = opt;
DCCP_SKB_CB(skb)->dccpd_reset_data[1] = len > 0 ? value[0] : 0;
DCCP_SKB_CB(skb)->dccpd_reset_data[2] = len > 1 ? value[1] : 0;
@@ -299,9 +280,12 @@ out_invalid_option:
EXPORT_SYMBOL_GPL(dccp_parse_options);
-static void dccp_encode_value_var(const u32 value, unsigned char *to,
- const unsigned int len)
+void dccp_encode_value_var(const u64 value, u8 *to, const u8 len)
{
+ if (len >= DCCP_OPTVAL_MAXLEN)
+ *to++ = (value & 0xFF0000000000ull) >> 40;
+ if (len > 4)
+ *to++ = (value & 0xFF00000000ull) >> 32;
if (len > 3)
*to++ = (value & 0xFF000000) >> 24;
if (len > 2)
@@ -461,23 +445,61 @@ static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp,
return 0;
}
-static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat,
- u8 *val, u8 len)
+/**
+ * dccp_insert_option_mandatory - Mandatory option (5.8.2)
+ * Note that since we are using skb_push, this function needs to be called
+ * _after_ inserting the option it is supposed to influence (stack order).
+ */
+int dccp_insert_option_mandatory(struct sk_buff *skb)
+{
+ if (DCCP_SKB_CB(skb)->dccpd_opt_len >= DCCP_MAX_OPT_LEN)
+ return -1;
+
+ DCCP_SKB_CB(skb)->dccpd_opt_len++;
+ *skb_push(skb, 1) = DCCPO_MANDATORY;
+ return 0;
+}
+
+/**
+ * dccp_insert_fn_opt - Insert single Feature-Negotiation option into @skb
+ * @type: %DCCPO_CHANGE_L, %DCCPO_CHANGE_R, %DCCPO_CONFIRM_L, %DCCPO_CONFIRM_R
+ * @feat: one out of %dccp_feature_numbers
+ * @val: NN value or SP array (preferred element first) to copy
+ * @len: true length of @val in bytes (excluding first element repetition)
+ * @repeat_first: whether to copy the first element of @val twice
+ * The last argument is used to construct Confirm options, where the preferred
+ * value and the preference list appear separately (RFC 4340, 6.3.1). Preference
+ * lists are kept such that the preferred entry is always first, so we only need
+ * to copy twice, and avoid the overhead of cloning into a bigger array.
+ */
+int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
+ u8 *val, u8 len, bool repeat_first)
{
- u8 *to;
+ u8 tot_len, *to;
- if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 3 > DCCP_MAX_OPT_LEN) {
- DCCP_WARN("packet too small for feature %d option!\n", feat);
+ /* take the `Feature' field and possible repetition into account */
+ if (len > (DCCP_SINGLE_OPT_MAXLEN - 2)) {
+ DCCP_WARN("length %u for feature %u too large\n", len, feat);
return -1;
}
- DCCP_SKB_CB(skb)->dccpd_opt_len += len + 3;
+ if (unlikely(val == NULL || len == 0))
+ len = repeat_first = 0;
+ tot_len = 3 + repeat_first + len;
+
+ if (DCCP_SKB_CB(skb)->dccpd_opt_len + tot_len > DCCP_MAX_OPT_LEN) {
+ DCCP_WARN("packet too small for feature %d option!\n", feat);
+ return -1;
+ }
+ DCCP_SKB_CB(skb)->dccpd_opt_len += tot_len;
- to = skb_push(skb, len + 3);
+ to = skb_push(skb, tot_len);
*to++ = type;
- *to++ = len + 3;
+ *to++ = tot_len;
*to++ = feat;
+ if (repeat_first)
+ *to++ = *val;
if (len)
memcpy(to, val, len);
@@ -487,69 +509,6 @@ static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat,
return 0;
}
-static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_minisock *dmsk = dccp_msk(sk);
- struct dccp_opt_pend *opt, *next;
- int change = 0;
-
- /* confirm any options [NN opts] */
- list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) {
- dccp_insert_feat_opt(skb, opt->dccpop_type,
- opt->dccpop_feat, opt->dccpop_val,
- opt->dccpop_len);
- /* fear empty confirms */
- if (opt->dccpop_val)
- kfree(opt->dccpop_val);
- kfree(opt);
- }
- INIT_LIST_HEAD(&dmsk->dccpms_conf);
-
- /* see which features we need to send */
- list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
- /* see if we need to send any confirm */
- if (opt->dccpop_sc) {
- dccp_insert_feat_opt(skb, opt->dccpop_type + 1,
- opt->dccpop_feat,
- opt->dccpop_sc->dccpoc_val,
- opt->dccpop_sc->dccpoc_len);
-
- BUG_ON(!opt->dccpop_sc->dccpoc_val);
- kfree(opt->dccpop_sc->dccpoc_val);
- kfree(opt->dccpop_sc);
- opt->dccpop_sc = NULL;
- }
-
- /* any option not confirmed, re-send it */
- if (!opt->dccpop_conf) {
- dccp_insert_feat_opt(skb, opt->dccpop_type,
- opt->dccpop_feat, opt->dccpop_val,
- opt->dccpop_len);
- change++;
- }
- }
-
- /* Retransmit timer.
- * If this is the master listening sock, we don't set a timer on it. It
- * should be fine because if the dude doesn't receive our RESPONSE
- * [which will contain the CHANGE] he will send another REQUEST which
- * will "retrnasmit" the change.
- */
- if (change && dp->dccps_role != DCCP_ROLE_LISTEN) {
- dccp_pr_debug("reset feat negotiation timer %p\n", sk);
-
- /* XXX don't reset the timer on re-transmissions. I.e. reset it
- * only when sending new stuff i guess. Currently the timer
- * never backs off because on re-transmission it just resets it!
- */
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- inet_csk(sk)->icsk_rto, DCCP_RTO_MAX);
- }
-
- return 0;
-}
-
/* The length of all options needs to be a multiple of 4 (5.8) */
static void dccp_insert_option_padding(struct sk_buff *skb)
{
@@ -565,19 +524,31 @@ static void dccp_insert_option_padding(struct sk_buff *skb)
int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_minisock *dmsk = dccp_msk(sk);
DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
- if (dmsk->dccpms_send_ndp_count &&
- dccp_insert_option_ndp(sk, skb))
+ if (dp->dccps_send_ndp_count && dccp_insert_option_ndp(sk, skb))
return -1;
- if (!dccp_packet_without_ack(skb)) {
- if (dmsk->dccpms_send_ack_vector &&
- dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
- dccp_insert_option_ackvec(sk, skb))
+ if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) {
+
+ /* Feature Negotiation */
+ if (dccp_feat_insert_opts(dp, NULL, skb))
return -1;
+
+ if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) {
+ /*
+ * Obtain RTT sample from Request/Response exchange.
+ * This is currently used in CCID 3 initialisation.
+ */
+ if (dccp_insert_option_timestamp(sk, skb))
+ return -1;
+
+ } else if (dp->dccps_hc_rx_ackvec != NULL &&
+ dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
+ dccp_insert_option_ackvec(sk, skb)) {
+ return -1;
+ }
}
if (dp->dccps_hc_rx_insert_options) {
@@ -586,21 +557,6 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
dp->dccps_hc_rx_insert_options = 0;
}
- /* Feature negotiation */
- /* Data packets can't do feat negotiation */
- if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA &&
- DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATAACK &&
- dccp_insert_options_feat(sk, skb))
- return -1;
-
- /*
- * Obtain RTT sample from Request/Response exchange.
- * This is currently used in CCID 3 initialisation.
- */
- if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST &&
- dccp_insert_option_timestamp(sk, skb))
- return -1;
-
if (dp->dccps_timestamp_echo != 0 &&
dccp_insert_option_timestamp_echo(dp, NULL, skb))
return -1;
@@ -613,6 +569,9 @@ int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb)
{
DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
+ if (dccp_feat_insert_opts(NULL, dreq, skb))
+ return -1;
+
if (dreq->dreq_timestamp_echo != 0 &&
dccp_insert_option_timestamp_echo(NULL, dreq, skb))
return -1;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 809d803d500..22a618af489 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -175,7 +175,7 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
* make it a multiple of 4
*/
- cur_mps -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
+ cur_mps -= roundup(5 + 6 + 10 + 6 + 6 + 6, 4);
/* And store cached results */
icsk->icsk_pmtu_cookie = pmtu;
@@ -339,10 +339,12 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss;
- if (dccp_insert_options_rsk(dreq, skb)) {
- kfree_skb(skb);
- return NULL;
- }
+ /* Resolve feature dependencies resulting from choice of CCID */
+ if (dccp_feat_server_ccid_dependencies(dreq))
+ goto response_failed;
+
+ if (dccp_insert_options_rsk(dreq, skb))
+ goto response_failed;
/* Build and checksum header */
dh = dccp_zeroed_hdr(skb, dccp_header_size);
@@ -363,6 +365,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
inet_rsk(req)->acked = 1;
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
return skb;
+response_failed:
+ kfree_skb(skb);
+ return NULL;
}
EXPORT_SYMBOL_GPL(dccp_make_response);
@@ -469,6 +474,10 @@ int dccp_connect(struct sock *sk)
struct sk_buff *skb;
struct inet_connection_sock *icsk = inet_csk(sk);
+ /* do not connect if feature negotiation setup fails */
+ if (dccp_feat_finalise_settings(dccp_sk(sk)))
+ return -EPROTO;
+
dccp_connect_init(sk);
skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation);
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 81368a7f537..37731da4148 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -74,30 +74,27 @@ static void printl(const char *fmt, ...)
static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t size)
{
- const struct dccp_minisock *dmsk = dccp_msk(sk);
const struct inet_sock *inet = inet_sk(sk);
- const struct ccid3_hc_tx_sock *hctx;
+ struct ccid3_hc_tx_sock *hctx = NULL;
- if (dmsk->dccpms_tx_ccid == DCCPC_CCID3)
+ if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3)
hctx = ccid3_hc_tx_sk(sk);
- else
- hctx = NULL;
if (port == 0 || ntohs(inet->dport) == port ||
ntohs(inet->sport) == port) {
if (hctx)
- printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %u "
+ printl("%pI4:%u %pI4:%u %d %d %d %d %u "
"%llu %llu %d\n",
- NIPQUAD(inet->saddr), ntohs(inet->sport),
- NIPQUAD(inet->daddr), ntohs(inet->dport), size,
+ &inet->saddr, ntohs(inet->sport),
+ &inet->daddr, ntohs(inet->dport), size,
hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc,
hctx->ccid3hctx_x_recv >> 6,
hctx->ccid3hctx_x >> 6, hctx->ccid3hctx_t_ipi);
else
- printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n",
- NIPQUAD(inet->saddr), ntohs(inet->sport),
- NIPQUAD(inet->daddr), ntohs(inet->dport), size);
+ printl("%pI4:%u %pI4:%u %d\n",
+ &inet->saddr, ntohs(inet->sport),
+ &inet->daddr, ntohs(inet->dport), size);
}
jprobe_return();
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index d0bd3481976..945b4d5d23b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -40,16 +40,10 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
EXPORT_SYMBOL_GPL(dccp_statistics);
-atomic_t dccp_orphan_count = ATOMIC_INIT(0);
-
+struct percpu_counter dccp_orphan_count;
EXPORT_SYMBOL_GPL(dccp_orphan_count);
-struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
- .lhash_lock = RW_LOCK_UNLOCKED,
- .lhash_users = ATOMIC_INIT(0),
- .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
-};
-
+struct inet_hashinfo dccp_hashinfo;
EXPORT_SYMBOL_GPL(dccp_hashinfo);
/* the maximum queue length for tx in packets. 0 is no limit */
@@ -67,6 +61,9 @@ void dccp_set_state(struct sock *sk, const int state)
case DCCP_OPEN:
if (oldstate != DCCP_OPEN)
DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
+ /* Client retransmits all Confirm options until entering OPEN */
+ if (oldstate == DCCP_PARTOPEN)
+ dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
break;
case DCCP_CLOSED:
@@ -175,7 +172,6 @@ EXPORT_SYMBOL_GPL(dccp_state_name);
int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
{
struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_minisock *dmsk = dccp_msk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
dccp_minisock_init(&dp->dccps_minisock);
@@ -193,45 +189,10 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
dccp_init_xmit_timers(sk);
- /*
- * FIXME: We're hardcoding the CCID, and doing this at this point makes
- * the listening (master) sock get CCID control blocks, which is not
- * necessary, but for now, to not mess with the test userspace apps,
- * lets leave it here, later the real solution is to do this in a
- * setsockopt(CCIDs-I-want/accept). -acme
- */
- if (likely(ctl_sock_initialized)) {
- int rc = dccp_feat_init(dmsk);
-
- if (rc)
- return rc;
-
- if (dmsk->dccpms_send_ack_vector) {
- dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
- if (dp->dccps_hc_rx_ackvec == NULL)
- return -ENOMEM;
- }
- dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
- sk, GFP_KERNEL);
- dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
- sk, GFP_KERNEL);
- if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
- dp->dccps_hc_tx_ccid == NULL)) {
- ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
- ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
- if (dmsk->dccpms_send_ack_vector) {
- dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
- dp->dccps_hc_rx_ackvec = NULL;
- }
- dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
- return -ENOMEM;
- }
- } else {
- /* control socket doesn't need feat nego */
- INIT_LIST_HEAD(&dmsk->dccpms_pending);
- INIT_LIST_HEAD(&dmsk->dccpms_conf);
- }
-
+ INIT_LIST_HEAD(&dp->dccps_featneg);
+ /* control socket doesn't need feat nego */
+ if (likely(ctl_sock_initialized))
+ return dccp_feat_init(sk);
return 0;
}
@@ -240,7 +201,6 @@ EXPORT_SYMBOL_GPL(dccp_init_sock);
void dccp_destroy_sock(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_minisock *dmsk = dccp_msk(sk);
/*
* DCCP doesn't use sk_write_queue, just sk_send_head
@@ -258,7 +218,7 @@ void dccp_destroy_sock(struct sock *sk)
kfree(dp->dccps_service_list);
dp->dccps_service_list = NULL;
- if (dmsk->dccpms_send_ack_vector) {
+ if (dp->dccps_hc_rx_ackvec != NULL) {
dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
dp->dccps_hc_rx_ackvec = NULL;
}
@@ -267,7 +227,7 @@ void dccp_destroy_sock(struct sock *sk)
dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
/* clean up feature negotiation state */
- dccp_feat_clean(dmsk);
+ dccp_feat_list_purge(&dp->dccps_featneg);
}
EXPORT_SYMBOL_GPL(dccp_destroy_sock);
@@ -277,6 +237,9 @@ static inline int dccp_listen_start(struct sock *sk, int backlog)
struct dccp_sock *dp = dccp_sk(sk);
dp->dccps_role = DCCP_ROLE_LISTEN;
+ /* do not start to listen if feature negotiation setup fails */
+ if (dccp_feat_finalise_settings(dp))
+ return -EPROTO;
return inet_csk_listen_start(sk, backlog);
}
@@ -466,42 +429,70 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
return 0;
}
-/* byte 1 is feature. the rest is the preference list */
-static int dccp_setsockopt_change(struct sock *sk, int type,
- struct dccp_so_feat __user *optval)
+static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
{
- struct dccp_so_feat opt;
- u8 *val;
- int rc;
+ u8 *list, len;
+ int i, rc;
- if (copy_from_user(&opt, optval, sizeof(opt)))
- return -EFAULT;
+ if (cscov < 0 || cscov > 15)
+ return -EINVAL;
/*
- * rfc4340: 6.1. Change Options
+ * Populate a list of permissible values, in the range cscov...15. This
+ * is necessary since feature negotiation of single values only works if
+ * both sides incidentally choose the same value. Since the list starts
+ * lowest-value first, negotiation will pick the smallest shared value.
*/
- if (opt.dccpsf_len < 1)
+ if (cscov == 0)
+ return 0;
+ len = 16 - cscov;
+
+ list = kmalloc(len, GFP_KERNEL);
+ if (list == NULL)
+ return -ENOBUFS;
+
+ for (i = 0; i < len; i++)
+ list[i] = cscov++;
+
+ rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
+
+ if (rc == 0) {
+ if (rx)
+ dccp_sk(sk)->dccps_pcrlen = cscov;
+ else
+ dccp_sk(sk)->dccps_pcslen = cscov;
+ }
+ kfree(list);
+ return rc;
+}
+
+static int dccp_setsockopt_ccid(struct sock *sk, int type,
+ char __user *optval, int optlen)
+{
+ u8 *val;
+ int rc = 0;
+
+ if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
return -EINVAL;
- val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
- if (!val)
+ val = kmalloc(optlen, GFP_KERNEL);
+ if (val == NULL)
return -ENOMEM;
- if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
- rc = -EFAULT;
- goto out_free_val;
+ if (copy_from_user(val, optval, optlen)) {
+ kfree(val);
+ return -EFAULT;
}
- rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
- val, opt.dccpsf_len, GFP_KERNEL);
- if (rc)
- goto out_free_val;
+ lock_sock(sk);
+ if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
+ rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
-out:
- return rc;
+ if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
+ rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
+ release_sock(sk);
-out_free_val:
kfree(val);
- goto out;
+ return rc;
}
static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
@@ -510,7 +501,21 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
struct dccp_sock *dp = dccp_sk(sk);
int val, err = 0;
- if (optlen < sizeof(int))
+ switch (optname) {
+ case DCCP_SOCKOPT_PACKET_SIZE:
+ DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
+ return 0;
+ case DCCP_SOCKOPT_CHANGE_L:
+ case DCCP_SOCKOPT_CHANGE_R:
+ DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
+ return 0;
+ case DCCP_SOCKOPT_CCID:
+ case DCCP_SOCKOPT_RX_CCID:
+ case DCCP_SOCKOPT_TX_CCID:
+ return dccp_setsockopt_ccid(sk, optname, optval, optlen);
+ }
+
+ if (optlen < (int)sizeof(int))
return -EINVAL;
if (get_user(val, (int __user *)optval))
@@ -521,53 +526,24 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
lock_sock(sk);
switch (optname) {
- case DCCP_SOCKOPT_PACKET_SIZE:
- DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
- err = 0;
- break;
- case DCCP_SOCKOPT_CHANGE_L:
- if (optlen != sizeof(struct dccp_so_feat))
- err = -EINVAL;
- else
- err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
- (struct dccp_so_feat __user *)
- optval);
- break;
- case DCCP_SOCKOPT_CHANGE_R:
- if (optlen != sizeof(struct dccp_so_feat))
- err = -EINVAL;
- else
- err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
- (struct dccp_so_feat __user *)
- optval);
- break;
case DCCP_SOCKOPT_SERVER_TIMEWAIT:
if (dp->dccps_role != DCCP_ROLE_SERVER)
err = -EOPNOTSUPP;
else
dp->dccps_server_timewait = (val != 0);
break;
- case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
- if (val < 0 || val > 15)
- err = -EINVAL;
- else
- dp->dccps_pcslen = val;
+ case DCCP_SOCKOPT_SEND_CSCOV:
+ err = dccp_setsockopt_cscov(sk, val, false);
break;
- case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
- if (val < 0 || val > 15)
- err = -EINVAL;
- else {
- dp->dccps_pcrlen = val;
- /* FIXME: add feature negotiation,
- * ChangeL(MinimumChecksumCoverage, val) */
- }
+ case DCCP_SOCKOPT_RECV_CSCOV:
+ err = dccp_setsockopt_cscov(sk, val, true);
break;
default:
err = -ENOPROTOOPT;
break;
}
-
release_sock(sk);
+
return err;
}
@@ -648,6 +624,18 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
case DCCP_SOCKOPT_GET_CUR_MPS:
val = dp->dccps_mss_cache;
break;
+ case DCCP_SOCKOPT_AVAILABLE_CCIDS:
+ return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
+ case DCCP_SOCKOPT_TX_CCID:
+ val = ccid_get_current_tx_ccid(dp);
+ if (val < 0)
+ return -ENOPROTOOPT;
+ break;
+ case DCCP_SOCKOPT_RX_CCID:
+ val = ccid_get_current_rx_ccid(dp);
+ if (val < 0)
+ return -ENOPROTOOPT;
+ break;
case DCCP_SOCKOPT_SERVER_TIMEWAIT:
val = dp->dccps_server_timewait;
break;
@@ -976,7 +964,6 @@ adjudge_to_death:
state = sk->sk_state;
sock_hold(sk);
sock_orphan(sk);
- atomic_inc(sk->sk_prot->orphan_count);
/*
* It is the last release_sock in its life. It will remove backlog.
@@ -990,6 +977,8 @@ adjudge_to_death:
bh_lock_sock(sk);
WARN_ON(sock_owned_by_user(sk));
+ percpu_counter_inc(sk->sk_prot->orphan_count);
+
/* Have we already been destroyed by a softirq or backlog? */
if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
goto out;
@@ -1040,17 +1029,21 @@ static int __init dccp_init(void)
{
unsigned long goal;
int ehash_order, bhash_order, i;
- int rc = -ENOBUFS;
+ int rc;
BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
FIELD_SIZEOF(struct sk_buff, cb));
-
+ rc = percpu_counter_init(&dccp_orphan_count, 0);
+ if (rc)
+ goto out;
+ rc = -ENOBUFS;
+ inet_hashinfo_init(&dccp_hashinfo);
dccp_hashinfo.bind_bucket_cachep =
kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
SLAB_HWCACHE_ALIGN, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
- goto out;
+ goto out_free_percpu;
/*
* Size and allocate the main established and bind bucket
@@ -1084,8 +1077,8 @@ static int __init dccp_init(void)
}
for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
- INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
- INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
+ INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
+ INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
}
if (inet_ehash_locks_alloc(&dccp_hashinfo))
@@ -1125,9 +1118,15 @@ static int __init dccp_init(void)
if (rc)
goto out_ackvec_exit;
+ rc = ccid_initialize_builtins();
+ if (rc)
+ goto out_sysctl_exit;
+
dccp_timestamping_init();
out:
return rc;
+out_sysctl_exit:
+ dccp_sysctl_exit();
out_ackvec_exit:
dccp_ackvec_exit();
out_free_dccp_mib:
@@ -1143,11 +1142,14 @@ out_free_dccp_ehash:
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
dccp_hashinfo.bind_bucket_cachep = NULL;
+out_free_percpu:
+ percpu_counter_destroy(&dccp_orphan_count);
goto out;
}
static void __exit dccp_fini(void)
{
+ ccid_cleanup_builtins();
dccp_mib_exit();
free_pages((unsigned long)dccp_hashinfo.bhash,
get_order(dccp_hashinfo.bhash_size *
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 21295993fdb..018e210875e 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -41,27 +41,6 @@ static struct ctl_table dccp_default_table[] = {
.proc_handler = proc_dointvec,
},
{
- .procname = "ack_ratio",
- .data = &sysctl_dccp_feat_ack_ratio,
- .maxlen = sizeof(sysctl_dccp_feat_ack_ratio),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "send_ackvec",
- .data = &sysctl_dccp_feat_send_ack_vector,
- .maxlen = sizeof(sysctl_dccp_feat_send_ack_vector),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "send_ndp",
- .data = &sysctl_dccp_feat_send_ndp_count,
- .maxlen = sizeof(sysctl_dccp_feat_send_ndp_count),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
.procname = "request_retries",
.data = &sysctl_dccp_request_retries,
.maxlen = sizeof(sysctl_dccp_request_retries),
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 54b3c7e9e01..162d1e683c3 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -87,17 +87,6 @@ static void dccp_retransmit_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- /* retransmit timer is used for feature negotiation throughout
- * connection. In this case, no packet is re-transmitted, but rather an
- * ack is generated and pending changes are placed into its options.
- */
- if (sk->sk_send_head == NULL) {
- dccp_pr_debug("feat negotiation retransmit timeout %p\n", sk);
- if (sk->sk_state == DCCP_OPEN)
- dccp_send_ack(sk);
- goto backoff;
- }
-
/*
* More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
* sent, no need to retransmit, this sock is dead.
@@ -126,7 +115,6 @@ static void dccp_retransmit_timer(struct sock *sk)
return;
}
-backoff:
icsk->icsk_backoff++;
icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);