summaryrefslogtreecommitdiffstats
path: root/net/sctp
diff options
context:
space:
mode:
Diffstat (limited to 'net/sctp')
-rw-r--r--net/sctp/Kconfig12
-rw-r--r--net/sctp/Makefile3
-rw-r--r--net/sctp/associola.c13
-rw-r--r--net/sctp/chunk.c4
-rw-r--r--net/sctp/endpointola.c2
-rw-r--r--net/sctp/ipv6.c27
-rw-r--r--net/sctp/output.c27
-rw-r--r--net/sctp/outqueue.c94
-rw-r--r--net/sctp/probe.c214
-rw-r--r--net/sctp/protocol.c9
-rw-r--r--net/sctp/sm_make_chunk.c24
-rw-r--r--net/sctp/sm_sideeffect.c8
-rw-r--r--net/sctp/socket.c39
-rw-r--r--net/sctp/transport.c61
14 files changed, 355 insertions, 182 deletions
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 58b3e882a18..126b014eb79 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -37,6 +37,18 @@ menuconfig IP_SCTP
if IP_SCTP
+config NET_SCTPPROBE
+ tristate "SCTP: Association probing"
+ depends on PROC_FS && KPROBES
+ ---help---
+ This module allows for capturing the changes to SCTP association
+ state in response to incoming packets. It is used for debugging
+ SCTP congestion control algorithms. If you don't understand
+ what was just said, you don't need it: say N.
+
+ To compile this code as a module, choose M here: the
+ module will be called sctp_probe.
+
config SCTP_DBG_MSG
bool "SCTP: Debug messages"
help
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 6b794734380..5c30b7a873d 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -3,6 +3,7 @@
#
obj-$(CONFIG_IP_SCTP) += sctp.o
+obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o
sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
protocol.o endpointola.o associola.o \
@@ -11,6 +12,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
tsnmap.o bind_addr.o socket.o primitive.o \
output.o input.o debug.o ssnmap.o auth.o
+sctp_probe-y := probe.o
+
sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
sctp-$(CONFIG_PROC_FS) += proc.o
sctp-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 99c93ee98ad..3912420cedc 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -87,9 +87,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
/* Retrieve the SCTP per socket area. */
sp = sctp_sk((struct sock *)sk);
- /* Init all variables to a known value. */
- memset(asoc, 0, sizeof(struct sctp_association));
-
/* Discarding const is appropriate here. */
asoc->ep = (struct sctp_endpoint *)ep;
sctp_endpoint_hold(asoc->ep);
@@ -762,7 +759,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
asoc->peer.retran_path = peer;
}
- if (asoc->peer.active_path == asoc->peer.retran_path) {
+ if (asoc->peer.active_path == asoc->peer.retran_path &&
+ peer->state != SCTP_UNCONFIRMED) {
asoc->peer.retran_path = peer;
}
@@ -1320,12 +1318,13 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
/* Keep track of the next transport in case
* we don't find any active transport.
*/
- if (!next)
+ if (t->state != SCTP_UNCONFIRMED && !next)
next = t;
}
}
- asoc->peer.retran_path = t;
+ if (t)
+ asoc->peer.retran_path = t;
SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association"
" %p addr: ",
@@ -1485,7 +1484,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len)
if (asoc->rwnd >= len) {
asoc->rwnd -= len;
if (over) {
- asoc->rwnd_press = asoc->rwnd;
+ asoc->rwnd_press += asoc->rwnd;
asoc->rwnd = 0;
}
} else {
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 3eab6db59a3..476caaf100e 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -58,9 +58,9 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
msg->send_failed = 0;
msg->send_error = 0;
msg->can_abandon = 0;
+ msg->can_delay = 1;
msg->expires_at = 0;
INIT_LIST_HEAD(&msg->chunks);
- msg->msg_size = 0;
}
/* Allocate and initialize datamsg. */
@@ -157,7 +157,6 @@ static void sctp_datamsg_assign(struct sctp_datamsg *msg, struct sctp_chunk *chu
{
sctp_datamsg_hold(msg);
chunk->msg = msg;
- msg->msg_size += chunk->skb->len;
}
@@ -247,6 +246,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
if (msg_len >= first_len) {
msg_len -= first_len;
whole = 1;
+ msg->can_delay = 0;
}
/* How many full sized? How many bytes leftover? */
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 7ec09ba03a1..e10acc01c75 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -70,8 +70,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
struct sctp_shared_key *null_key;
int err;
- memset(ep, 0, sizeof(struct sctp_endpoint));
-
ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp);
if (!ep->digest)
return NULL;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 9fb5d37c37a..732689140fb 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -232,7 +232,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
if (!(transport->param_flags & SPP_PMTUD_ENABLE))
skb->local_df = 1;
- return ip6_xmit(sk, skb, &fl, np->opt, 0);
+ return ip6_xmit(sk, skb, &fl, np->opt);
}
/* Returns the dst cache entry for the given source and destination ip
@@ -277,20 +277,7 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc,
static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
union sctp_addr *s2)
{
- struct in6_addr *a1 = &s1->v6.sin6_addr;
- struct in6_addr *a2 = &s2->v6.sin6_addr;
- int i, j;
-
- for (i = 0; i < 4 ; i++) {
- __be32 a1xora2;
-
- a1xora2 = a1->s6_addr32[i] ^ a2->s6_addr32[i];
-
- if ((j = fls(ntohl(a1xora2))))
- return (i * 32 + 32 - j);
- }
-
- return (i*32);
+ return ipv6_addr_diff(&s1->v6.sin6_addr, &s2->v6.sin6_addr);
}
/* Fills in the source address(saddr) based on the destination address(daddr)
@@ -372,13 +359,13 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
}
read_lock_bh(&in6_dev->lock);
- for (ifp = in6_dev->addr_list; ifp; ifp = ifp->if_next) {
+ list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
/* Add the address to the local list. */
addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC);
if (addr) {
addr->a.v6.sin6_family = AF_INET6;
addr->a.v6.sin6_port = 0;
- addr->a.v6.sin6_addr = ifp->addr;
+ ipv6_addr_copy(&addr->a.v6.sin6_addr, &ifp->addr);
addr->a.v6.sin6_scope_id = dev->ifindex;
addr->valid = 1;
INIT_LIST_HEAD(&addr->list);
@@ -419,7 +406,7 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk)
{
addr->v6.sin6_family = AF_INET6;
addr->v6.sin6_port = 0;
- addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr;
+ ipv6_addr_copy(&addr->v6.sin6_addr, &inet6_sk(sk)->rcv_saddr);
}
/* Initialize sk->sk_rcv_saddr from sctp_addr. */
@@ -432,7 +419,7 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
inet6_sk(sk)->rcv_saddr.s6_addr32[3] =
addr->v4.sin_addr.s_addr;
} else {
- inet6_sk(sk)->rcv_saddr = addr->v6.sin6_addr;
+ ipv6_addr_copy(&inet6_sk(sk)->rcv_saddr, &addr->v6.sin6_addr);
}
}
@@ -445,7 +432,7 @@ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff);
inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
} else {
- inet6_sk(sk)->daddr = addr->v6.sin6_addr;
+ ipv6_addr_copy(&inet6_sk(sk)->daddr, &addr->v6.sin6_addr);
}
}
diff --git a/net/sctp/output.c b/net/sctp/output.c
index fad261d41ec..a646681f5ac 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -429,24 +429,17 @@ int sctp_packet_transmit(struct sctp_packet *packet)
list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
list_del_init(&chunk->list);
if (sctp_chunk_is_data(chunk)) {
+ /* 6.3.1 C4) When data is in flight and when allowed
+ * by rule C5, a new RTT measurement MUST be made each
+ * round trip. Furthermore, new RTT measurements
+ * SHOULD be made no more than once per round-trip
+ * for a given destination transport address.
+ */
- if (!chunk->resent) {
-
- /* 6.3.1 C4) When data is in flight and when allowed
- * by rule C5, a new RTT measurement MUST be made each
- * round trip. Furthermore, new RTT measurements
- * SHOULD be made no more than once per round-trip
- * for a given destination transport address.
- */
-
- if (!tp->rto_pending) {
- chunk->rtt_in_progress = 1;
- tp->rto_pending = 1;
- }
+ if (!tp->rto_pending) {
+ chunk->rtt_in_progress = 1;
+ tp->rto_pending = 1;
}
-
- chunk->resent = 1;
-
has_data = 1;
}
@@ -681,7 +674,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
* Don't delay large message writes that may have been
* fragmeneted into small peices.
*/
- if ((len < max) && (chunk->msg->msg_size < max)) {
+ if ((len < max) && chunk->msg->can_delay) {
retval = SCTP_XMIT_NAGLE_DELAY;
goto finish;
}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index abfc0b8dee7..5d057178ce0 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -62,7 +62,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
struct list_head *transmitted_queue,
struct sctp_transport *transport,
struct sctp_sackhdr *sack,
- __u32 highest_new_tsn);
+ __u32 *highest_new_tsn);
static void sctp_mark_missing(struct sctp_outq *q,
struct list_head *transmitted_queue,
@@ -308,7 +308,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
/* If it is data, queue it up, otherwise, send it
* immediately.
*/
- if (SCTP_CID_DATA == chunk->chunk_hdr->type) {
+ if (sctp_chunk_is_data(chunk)) {
/* Is it OK to queue data chunks? */
/* From 9. Termination of Association
*
@@ -598,11 +598,23 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
if (fast_rtx && !chunk->fast_retransmit)
continue;
+redo:
/* Attempt to append this chunk to the packet. */
status = sctp_packet_append_chunk(pkt, chunk);
switch (status) {
case SCTP_XMIT_PMTU_FULL:
+ if (!pkt->has_data && !pkt->has_cookie_echo) {
+ /* If this packet did not contain DATA then
+ * retransmission did not happen, so do it
+ * again. We'll ignore the error here since
+ * control chunks are already freed so there
+ * is nothing we can do.
+ */
+ sctp_packet_transmit(pkt);
+ goto redo;
+ }
+
/* Send this packet. */
error = sctp_packet_transmit(pkt);
@@ -647,14 +659,6 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
if (chunk->fast_retransmit == SCTP_NEED_FRTX)
chunk->fast_retransmit = SCTP_DONT_FRTX;
- /* Force start T3-rtx timer when fast retransmitting
- * the earliest outstanding TSN
- */
- if (!timer && fast_rtx &&
- ntohl(chunk->subh.data_hdr->tsn) ==
- asoc->ctsn_ack_point + 1)
- timer = 2;
-
q->empty = 0;
break;
}
@@ -854,6 +858,12 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
if (status != SCTP_XMIT_OK) {
/* put the chunk back */
list_add(&chunk->list, &q->control_chunk_list);
+ } else if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) {
+ /* PR-SCTP C5) If a FORWARD TSN is sent, the
+ * sender MUST assure that at least one T3-rtx
+ * timer is running.
+ */
+ sctp_transport_reset_timers(transport);
}
break;
@@ -906,8 +916,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
rtx_timeout, &start_timer);
if (start_timer)
- sctp_transport_reset_timers(transport,
- start_timer-1);
+ sctp_transport_reset_timers(transport);
/* This can happen on COOKIE-ECHO resend. Only
* one chunk can get bundled with a COOKIE-ECHO.
@@ -1040,7 +1049,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
list_add_tail(&chunk->transmitted_list,
&transport->transmitted);
- sctp_transport_reset_timers(transport, 0);
+ sctp_transport_reset_timers(transport);
q->empty = 0;
@@ -1100,32 +1109,6 @@ static void sctp_sack_update_unack_data(struct sctp_association *assoc,
assoc->unack_data = unack_data;
}
-/* Return the highest new tsn that is acknowledged by the given SACK chunk. */
-static __u32 sctp_highest_new_tsn(struct sctp_sackhdr *sack,
- struct sctp_association *asoc)
-{
- struct sctp_transport *transport;
- struct sctp_chunk *chunk;
- __u32 highest_new_tsn, tsn;
- struct list_head *transport_list = &asoc->peer.transport_addr_list;
-
- highest_new_tsn = ntohl(sack->cum_tsn_ack);
-
- list_for_each_entry(transport, transport_list, transports) {
- list_for_each_entry(chunk, &transport->transmitted,
- transmitted_list) {
- tsn = ntohl(chunk->subh.data_hdr->tsn);
-
- if (!chunk->tsn_gap_acked &&
- TSN_lt(highest_new_tsn, tsn) &&
- sctp_acked(sack, tsn))
- highest_new_tsn = tsn;
- }
- }
-
- return highest_new_tsn;
-}
-
/* This is where we REALLY process a SACK.
*
* Process the SACK against the outqueue. Mostly, this just frees
@@ -1145,6 +1128,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
struct sctp_transport *primary = asoc->peer.primary_path;
int count_of_newacks = 0;
int gap_ack_blocks;
+ u8 accum_moved = 0;
/* Grab the association's destination address list. */
transport_list = &asoc->peer.transport_addr_list;
@@ -1193,18 +1177,15 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
if (gap_ack_blocks)
highest_tsn += ntohs(frags[gap_ack_blocks - 1].gab.end);
- if (TSN_lt(asoc->highest_sacked, highest_tsn)) {
- highest_new_tsn = highest_tsn;
+ if (TSN_lt(asoc->highest_sacked, highest_tsn))
asoc->highest_sacked = highest_tsn;
- } else {
- highest_new_tsn = sctp_highest_new_tsn(sack, asoc);
- }
+ highest_new_tsn = sack_ctsn;
/* Run through the retransmit queue. Credit bytes received
* and free those chunks that we can.
*/
- sctp_check_transmitted(q, &q->retransmit, NULL, sack, highest_new_tsn);
+ sctp_check_transmitted(q, &q->retransmit, NULL, sack, &highest_new_tsn);
/* Run through the transmitted queue.
* Credit bytes received and free those chunks which we can.
@@ -1213,7 +1194,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
*/
list_for_each_entry(transport, transport_list, transports) {
sctp_check_transmitted(q, &transport->transmitted,
- transport, sack, highest_new_tsn);
+ transport, sack, &highest_new_tsn);
/*
* SFR-CACC algorithm:
* C) Let count_of_newacks be the number of
@@ -1223,16 +1204,22 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
count_of_newacks ++;
}
+ /* Move the Cumulative TSN Ack Point if appropriate. */
+ if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn)) {
+ asoc->ctsn_ack_point = sack_ctsn;
+ accum_moved = 1;
+ }
+
if (gap_ack_blocks) {
+
+ if (asoc->fast_recovery && accum_moved)
+ highest_new_tsn = highest_tsn;
+
list_for_each_entry(transport, transport_list, transports)
sctp_mark_missing(q, &transport->transmitted, transport,
highest_new_tsn, count_of_newacks);
}
- /* Move the Cumulative TSN Ack Point if appropriate. */
- if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn))
- asoc->ctsn_ack_point = sack_ctsn;
-
/* Update unack_data field in the assoc. */
sctp_sack_update_unack_data(asoc, sack);
@@ -1315,7 +1302,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
struct list_head *transmitted_queue,
struct sctp_transport *transport,
struct sctp_sackhdr *sack,
- __u32 highest_new_tsn_in_sack)
+ __u32 *highest_new_tsn_in_sack)
{
struct list_head *lchunk;
struct sctp_chunk *tchunk;
@@ -1387,7 +1374,6 @@ static void sctp_check_transmitted(struct sctp_outq *q,
* instance).
*/
if (!tchunk->tsn_gap_acked &&
- !tchunk->resent &&
tchunk->rtt_in_progress) {
tchunk->rtt_in_progress = 0;
rtt = jiffies - tchunk->sent_at;
@@ -1404,6 +1390,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
*/
if (!tchunk->tsn_gap_acked) {
tchunk->tsn_gap_acked = 1;
+ *highest_new_tsn_in_sack = tsn;
bytes_acked += sctp_data_size(tchunk);
if (!tchunk->transport)
migrate_bytes += sctp_data_size(tchunk);
@@ -1677,7 +1664,8 @@ static void sctp_mark_missing(struct sctp_outq *q,
struct sctp_chunk *chunk;
__u32 tsn;
char do_fast_retransmit = 0;
- struct sctp_transport *primary = q->asoc->peer.primary_path;
+ struct sctp_association *asoc = q->asoc;
+ struct sctp_transport *primary = asoc->peer.primary_path;
list_for_each_entry(chunk, transmitted_queue, transmitted_list) {
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
new file mode 100644
index 00000000000..db3a42b8b34
--- /dev/null
+++ b/net/sctp/probe.c
@@ -0,0 +1,214 @@
+/*
+ * sctp_probe - Observe the SCTP flow with kprobes.
+ *
+ * The idea for this came from Werner Almesberger's umlsim
+ * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
+ *
+ * Modified for SCTP from Stephen Hemminger's code
+ * Copyright (C) 2010, Wei Yongjun <yjwei@cn.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/socket.h>
+#include <linux/sctp.h>
+#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/kfifo.h>
+#include <linux/time.h>
+#include <net/net_namespace.h>
+
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+MODULE_AUTHOR("Wei Yongjun <yjwei@cn.fujitsu.com>");
+MODULE_DESCRIPTION("SCTP snooper");
+MODULE_LICENSE("GPL");
+
+static int port __read_mostly = 0;
+MODULE_PARM_DESC(port, "Port to match (0=all)");
+module_param(port, int, 0);
+
+static int bufsize __read_mostly = 64 * 1024;
+MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
+module_param(bufsize, int, 0);
+
+static int full __read_mostly = 1;
+MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)");
+module_param(full, int, 0);
+
+static const char procname[] = "sctpprobe";
+
+static struct {
+ struct kfifo fifo;
+ spinlock_t lock;
+ wait_queue_head_t wait;
+ struct timespec tstart;
+} sctpw;
+
+static void printl(const char *fmt, ...)
+{
+ va_list args;
+ int len;
+ char tbuf[256];
+
+ va_start(args, fmt);
+ len = vscnprintf(tbuf, sizeof(tbuf), fmt, args);
+ va_end(args);
+
+ kfifo_in_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
+ wake_up(&sctpw.wait);
+}
+
+static int sctpprobe_open(struct inode *inode, struct file *file)
+{
+ kfifo_reset(&sctpw.fifo);
+ getnstimeofday(&sctpw.tstart);
+
+ return 0;
+}
+
+static ssize_t sctpprobe_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ int error = 0, cnt = 0;
+ unsigned char *tbuf;
+
+ if (!buf)
+ return -EINVAL;
+
+ if (len == 0)
+ return 0;
+
+ tbuf = vmalloc(len);
+ if (!tbuf)
+ return -ENOMEM;
+
+ error = wait_event_interruptible(sctpw.wait,
+ kfifo_len(&sctpw.fifo) != 0);
+ if (error)
+ goto out_free;
+
+ cnt = kfifo_out_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
+ error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
+
+out_free:
+ vfree(tbuf);
+
+ return error ? error : cnt;
+}
+
+static const struct file_operations sctpprobe_fops = {
+ .owner = THIS_MODULE,
+ .open = sctpprobe_open,
+ .read = sctpprobe_read,
+};
+
+sctp_disposition_t jsctp_sf_eat_sack(const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const sctp_subtype_t type,
+ void *arg,
+ sctp_cmd_seq_t *commands)
+{
+ struct sctp_transport *sp;
+ static __u32 lcwnd = 0;
+ struct timespec now;
+
+ sp = asoc->peer.primary_path;
+
+ if ((full || sp->cwnd != lcwnd) &&
+ (!port || asoc->peer.port == port ||
+ ep->base.bind_addr.port == port)) {
+ lcwnd = sp->cwnd;
+
+ getnstimeofday(&now);
+ now = timespec_sub(now, sctpw.tstart);
+
+ printl("%lu.%06lu ", (unsigned long) now.tv_sec,
+ (unsigned long) now.tv_nsec / NSEC_PER_USEC);
+
+ printl("%p %5d %5d %5d %8d %5d ", asoc,
+ ep->base.bind_addr.port, asoc->peer.port,
+ asoc->pathmtu, asoc->peer.rwnd, asoc->unack_data);
+
+ list_for_each_entry(sp, &asoc->peer.transport_addr_list,
+ transports) {
+ if (sp == asoc->peer.primary_path)
+ printl("*");
+
+ if (sp->ipaddr.sa.sa_family == AF_INET)
+ printl("%pI4 ", &sp->ipaddr.v4.sin_addr);
+ else
+ printl("%pI6 ", &sp->ipaddr.v6.sin6_addr);
+
+ printl("%2u %8u %8u %8u %8u %8u ",
+ sp->state, sp->cwnd, sp->ssthresh,
+ sp->flight_size, sp->partial_bytes_acked,
+ sp->pathmtu);
+ }
+ printl("\n");
+ }
+
+ jprobe_return();
+ return 0;
+}
+
+static struct jprobe sctp_recv_probe = {
+ .kp = {
+ .symbol_name = "sctp_sf_eat_sack_6_2",
+ },
+ .entry = jsctp_sf_eat_sack,
+};
+
+static __init int sctpprobe_init(void)
+{
+ int ret = -ENOMEM;
+
+ init_waitqueue_head(&sctpw.wait);
+ spin_lock_init(&sctpw.lock);
+ if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL))
+ return ret;
+
+ if (!proc_net_fops_create(&init_net, procname, S_IRUSR,
+ &sctpprobe_fops))
+ goto free_kfifo;
+
+ ret = register_jprobe(&sctp_recv_probe);
+ if (ret)
+ goto remove_proc;
+
+ pr_info("SCTP probe registered (port=%d)\n", port);
+
+ return 0;
+
+remove_proc:
+ proc_net_remove(&init_net, procname);
+free_kfifo:
+ kfifo_free(&sctpw.fifo);
+ return ret;
+}
+
+static __exit void sctpprobe_exit(void)
+{
+ kfifo_free(&sctpw.fifo);
+ proc_net_remove(&init_net, procname);
+ unregister_jprobe(&sctp_recv_probe);
+}
+
+module_init(sctpprobe_init);
+module_exit(sctpprobe_exit);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index a56f98e82f9..182749867c7 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -474,13 +474,17 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
memset(&fl, 0x0, sizeof(struct flowi));
fl.fl4_dst = daddr->v4.sin_addr.s_addr;
+ fl.fl_ip_dport = daddr->v4.sin_port;
fl.proto = IPPROTO_SCTP;
if (asoc) {
fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk);
fl.oif = asoc->base.sk->sk_bound_dev_if;
+ fl.fl_ip_sport = htons(asoc->base.bind_addr.port);
}
- if (saddr)
+ if (saddr) {
fl.fl4_src = saddr->v4.sin_addr.s_addr;
+ fl.fl_ip_sport = saddr->v4.sin_port;
+ }
SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ",
__func__, &fl.fl4_dst, &fl.fl4_src);
@@ -528,6 +532,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
if ((laddr->state == SCTP_ADDR_SRC) &&
(AF_INET == laddr->a.sa.sa_family)) {
fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
+ fl.fl_ip_sport = laddr->a.v4.sin_port;
if (!ip_route_output_key(&init_net, &rt, &fl)) {
dst = &rt->u.dst;
goto out_unlock;
@@ -854,7 +859,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
- return ip_queue_xmit(skb, 0);
+ return ip_queue_xmit(skb);
}
static struct sctp_af sctp_af_inet;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 30c1767186b..d8261f3d771 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -445,10 +445,17 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
if (!retval)
goto nomem_chunk;
- /* Per the advice in RFC 2960 6.4, send this reply to
- * the source of the INIT packet.
+ /* RFC 2960 6.4 Multi-homed SCTP Endpoints
+ *
+ * An endpoint SHOULD transmit reply chunks (e.g., SACK,
+ * HEARTBEAT ACK, * etc.) to the same destination transport
+ * address from which it received the DATA or control chunk
+ * to which it is replying.
+ *
+ * [INIT ACK back to where the INIT came from.]
*/
retval->transport = chunk->transport;
+
retval->subh.init_hdr =
sctp_addto_chunk(retval, sizeof(initack), &initack);
retval->param_hdr.v = sctp_addto_chunk(retval, addrs_len, addrs.v);
@@ -487,18 +494,6 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
/* We need to remove the const qualifier at this point. */
retval->asoc = (struct sctp_association *) asoc;
- /* RFC 2960 6.4 Multi-homed SCTP Endpoints
- *
- * An endpoint SHOULD transmit reply chunks (e.g., SACK,
- * HEARTBEAT ACK, * etc.) to the same destination transport
- * address from which it received the DATA or control chunk
- * to which it is replying.
- *
- * [INIT ACK back to where the INIT came from.]
- */
- if (chunk)
- retval->transport = chunk->transport;
-
nomem_chunk:
kfree(cookie);
nomem_cookie:
@@ -1254,7 +1249,6 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
INIT_LIST_HEAD(&retval->list);
retval->skb = skb;
retval->asoc = (struct sctp_association *)asoc;
- retval->resent = 0;
retval->has_tsn = 0;
retval->has_ssn = 0;
retval->rtt_in_progress = 0;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index eb1f42f45fd..22e67020044 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -732,11 +732,15 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds,
{
struct sctp_transport *t;
- t = sctp_assoc_choose_alter_transport(asoc,
+ if (chunk->transport)
+ t = chunk->transport;
+ else {
+ t = sctp_assoc_choose_alter_transport(asoc,
asoc->shutdown_last_sent_to);
+ chunk->transport = t;
+ }
asoc->shutdown_last_sent_to = t;
asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto;
- chunk->transport = t;
}
/* Helper function to change the state of an association. */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 44a1ab03a3f..ba1add0b13c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3720,9 +3720,6 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
SCTP_DBG_OBJCNT_INC(sock);
- /* Set socket backlog limit. */
- sk->sk_backlog.limit = sysctl_sctp_rmem[1];
-
local_bh_disable();
percpu_counter_inc(&sctp_sockets_allocated);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -4387,7 +4384,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len,
transports) {
memcpy(&temp, &from->ipaddr, sizeof(temp));
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
- addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len;
+ addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
if (space_left < addrlen)
return -ENOMEM;
if (copy_to_user(to, &temp, addrlen))
@@ -5482,7 +5479,6 @@ pp_found:
*/
int reuse = sk->sk_reuse;
struct sock *sk2;
- struct hlist_node *node;
SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n");
if (pp->fastreuse && sk->sk_reuse &&
@@ -5703,7 +5699,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
struct sctp_sock *sp = sctp_sk(sk);
unsigned int mask;
- poll_wait(file, sk->sk_sleep, wait);
+ poll_wait(file, sk_sleep(sk), wait);
/* A TCP-style listening socket becomes readable when the accept queue
* is not empty.
@@ -5944,7 +5940,7 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p)
int error;
DEFINE_WAIT(wait);
- prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
/* Socket errors? */
error = sock_error(sk);
@@ -5981,14 +5977,14 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p)
sctp_lock_sock(sk);
ready:
- finish_wait(sk->sk_sleep, &wait);
+ finish_wait(sk_sleep(sk), &wait);
return 0;
interrupted:
error = sock_intr_errno(*timeo_p);
out:
- finish_wait(sk->sk_sleep, &wait);
+ finish_wait(sk_sleep(sk), &wait);
*err = error;
return error;
}
@@ -6062,14 +6058,14 @@ static void __sctp_write_space(struct sctp_association *asoc)
wake_up_interruptible(&asoc->wait);
if (sctp_writeable(sk)) {
- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible(sk->sk_sleep);
+ if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+ wake_up_interruptible(sk_sleep(sk));
/* Note that we try to include the Async I/O support
* here by modeling from the current TCP/UDP code.
* We have not tested with it yet.
*/
- if (sock->fasync_list &&
+ if (sock->wq->fasync_list &&
!(sk->sk_shutdown & SEND_SHUTDOWN))
sock_wake_async(sock,
SOCK_WAKE_SPACE, POLL_OUT);
@@ -6191,12 +6187,15 @@ do_nonblock:
void sctp_data_ready(struct sock *sk, int len)
{
- read_lock_bh(&sk->sk_callback_lock);
- if (sk_has_sleeper(sk))
- wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
+ struct socket_wq *wq;
+
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (wq_has_sleeper(wq))
+ wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
POLLRDNORM | POLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
- read_unlock_bh(&sk->sk_callback_lock);
+ rcu_read_unlock();
}
/* If socket sndbuf has changed, wake up all per association waiters. */
@@ -6307,7 +6306,7 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo)
for (;;) {
- prepare_to_wait_exclusive(sk->sk_sleep, &wait,
+ prepare_to_wait_exclusive(sk_sleep(sk), &wait,
TASK_INTERRUPTIBLE);
if (list_empty(&ep->asocs)) {
@@ -6333,7 +6332,7 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo)
break;
}
- finish_wait(sk->sk_sleep, &wait);
+ finish_wait(sk_sleep(sk), &wait);
return err;
}
@@ -6343,7 +6342,7 @@ static void sctp_wait_for_close(struct sock *sk, long timeout)
DEFINE_WAIT(wait);
do {
- prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
if (list_empty(&sctp_sk(sk)->ep->asocs))
break;
sctp_release_sock(sk);
@@ -6351,7 +6350,7 @@ static void sctp_wait_for_close(struct sock *sk, long timeout)
sctp_lock_sock(sk);
} while (!signal_pending(current) && timeout);
- finish_wait(sk->sk_sleep, &wait);
+ finish_wait(sk_sleep(sk), &wait);
}
static void sctp_skb_set_owner_r_frag(struct sk_buff *skb, struct sock *sk)
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 4a368038d46..d67501f92ca 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -64,9 +64,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
/* Copy in the address. */
peer->ipaddr = *addr;
peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
- peer->asoc = NULL;
-
- peer->dst = NULL;
memset(&peer->saddr, 0, sizeof(union sctp_addr));
/* From 6.3.1 RTO Calculation:
@@ -76,34 +73,21 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
* parameter 'RTO.Initial'.
*/
peer->rto = msecs_to_jiffies(sctp_rto_initial);
- peer->rtt = 0;
- peer->rttvar = 0;
- peer->srtt = 0;
- peer->rto_pending = 0;
- peer->hb_sent = 0;
- peer->fast_recovery = 0;
peer->last_time_heard = jiffies;
peer->last_time_ecne_reduced = jiffies;
- peer->init_sent_count = 0;
-
peer->param_flags = SPP_HB_DISABLE |
SPP_PMTUD_ENABLE |
SPP_SACKDELAY_ENABLE;
- peer->hbinterval = 0;
/* Initialize the default path max_retrans. */
peer->pathmaxrxt = sctp_max_retrans_path;
- peer->error_count = 0;
INIT_LIST_HEAD(&peer->transmitted);
INIT_LIST_HEAD(&peer->send_ready);
INIT_LIST_HEAD(&peer->transports);
- peer->T3_rtx_timer.expires = 0;
- peer->hb_timer.expires = 0;
-
setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event,
(unsigned long)peer);
setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event,
@@ -115,15 +99,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
atomic_set(&peer->refcnt, 1);
- peer->dead = 0;
-
- peer->malloced = 0;
-
- /* Initialize the state information for SFR-CACC */
- peer->cacc.changeover_active = 0;
- peer->cacc.cycling_changeover = 0;
- peer->cacc.next_tsn_at_change = 0;
- peer->cacc.cacc_saw_newack = 0;
return peer;
}
@@ -197,7 +172,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport)
/* Start T3_rtx timer if it is not already running and update the heartbeat
* timer. This routine is called every time a DATA chunk is sent.
*/
-void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
+void sctp_transport_reset_timers(struct sctp_transport *transport)
{
/* RFC 2960 6.3.2 Retransmission Timer Rules
*
@@ -207,7 +182,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
* address.
*/
- if (force || !timer_pending(&transport->T3_rtx_timer))
+ if (!timer_pending(&transport->T3_rtx_timer))
if (!mod_timer(&transport->T3_rtx_timer,
jiffies + transport->rto))
sctp_transport_hold(transport);
@@ -405,15 +380,16 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
void sctp_transport_raise_cwnd(struct sctp_transport *transport,
__u32 sack_ctsn, __u32 bytes_acked)
{
+ struct sctp_association *asoc = transport->asoc;
__u32 cwnd, ssthresh, flight_size, pba, pmtu;
cwnd = transport->cwnd;
flight_size = transport->flight_size;
/* See if we need to exit Fast Recovery first */
- if (transport->fast_recovery &&
- TSN_lte(transport->fast_recovery_exit, sack_ctsn))
- transport->fast_recovery = 0;
+ if (asoc->fast_recovery &&
+ TSN_lte(asoc->fast_recovery_exit, sack_ctsn))
+ asoc->fast_recovery = 0;
/* The appropriate cwnd increase algorithm is performed if, and only
* if the cumulative TSN whould advanced and the congestion window is
@@ -442,7 +418,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
* 2) the destination's path MTU. This upper bound protects
* against the ACK-Splitting attack outlined in [SAVAGE99].
*/
- if (transport->fast_recovery)
+ if (asoc->fast_recovery)
return;
if (bytes_acked > pmtu)
@@ -493,6 +469,8 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
void sctp_transport_lower_cwnd(struct sctp_transport *transport,
sctp_lower_cwnd_t reason)
{
+ struct sctp_association *asoc = transport->asoc;
+
switch (reason) {
case SCTP_LOWER_CWND_T3_RTX:
/* RFC 2960 Section 7.2.3, sctpimpguide
@@ -503,11 +481,11 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
* partial_bytes_acked = 0
*/
transport->ssthresh = max(transport->cwnd/2,
- 4*transport->asoc->pathmtu);
- transport->cwnd = transport->asoc->pathmtu;
+ 4*asoc->pathmtu);
+ transport->cwnd = asoc->pathmtu;
- /* T3-rtx also clears fast recovery on the transport */
- transport->fast_recovery = 0;
+ /* T3-rtx also clears fast recovery */
+ asoc->fast_recovery = 0;
break;
case SCTP_LOWER_CWND_FAST_RTX:
@@ -523,15 +501,15 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
* cwnd = ssthresh
* partial_bytes_acked = 0
*/
- if (transport->fast_recovery)
+ if (asoc->fast_recovery)
return;
/* Mark Fast recovery */
- transport->fast_recovery = 1;
- transport->fast_recovery_exit = transport->asoc->next_tsn - 1;
+ asoc->fast_recovery = 1;
+ asoc->fast_recovery_exit = asoc->next_tsn - 1;
transport->ssthresh = max(transport->cwnd/2,
- 4*transport->asoc->pathmtu);
+ 4*asoc->pathmtu);
transport->cwnd = transport->ssthresh;
break;
@@ -551,7 +529,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
if (time_after(jiffies, transport->last_time_ecne_reduced +
transport->rtt)) {
transport->ssthresh = max(transport->cwnd/2,
- 4*transport->asoc->pathmtu);
+ 4*asoc->pathmtu);
transport->cwnd = transport->ssthresh;
transport->last_time_ecne_reduced = jiffies;
}
@@ -567,7 +545,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
* interval.
*/
transport->cwnd = max(transport->cwnd/2,
- 4*transport->asoc->pathmtu);
+ 4*asoc->pathmtu);
break;
}
@@ -652,7 +630,6 @@ void sctp_transport_reset(struct sctp_transport *t)
t->error_count = 0;
t->rto_pending = 0;
t->hb_sent = 0;
- t->fast_recovery = 0;
/* Initialize the state information for SFR-CACC */
t->cacc.changeover_active = 0;