summaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-08-21 11:27:00 +0200
committerIngo Molnar <mingo@kernel.org>2012-08-21 11:27:00 +0200
commitbcada3d4b8c96b8792c2306f363992ca5ab9da42 (patch)
treee420679a5db6ea4e1694eef57f9abb6acac8d4d3 /net/ipv4/tcp.c
parent26198c21d1b286a084fe5d514a30bc7e6c712a34 (diff)
parent000078bc3ee69efb1124b8478c7527389a826074 (diff)
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: * Fix include order for bison/flex-generated C files, from Ben Hutchings * Build fixes and documentation corrections from David Ahern * Group parsing support, from Jiri Olsa * UI/gtk refactorings and improvements from Namhyung Kim * NULL deref fix for perf script, from Namhyung Kim * Assorted cleanups from Robert Richter * Let O= makes handle relative paths, from Steven Rostedt * perf script python fixes, from Feng Tang. * Improve 'perf lock' error message when the needed tracepoints are not present, from David Ahern. * Initial bash completion support, from Frederic Weisbecker * Allow building without libelf, from Namhyung Kim. * Support DWARF CFI based unwind to have callchains when %bp based unwinding is not possible, from Jiri Olsa. * Symbol resolution fixes, while fixing support PPC64 files with an .opt ELF section was the end goal, several fixes for code that handles all architectures and cleanups are included, from Cody Schafer. * Add a description for the JIT interface, from Andi Kleen. * Assorted fixes for Documentation and build in 32 bit, from Robert Richter * Add support for non-tracepoint events in perf script python, from Feng Tang * Cache the libtraceevent event_format associated to each evsel early, so that we avoid relookups, i.e. calling pevent_find_event repeatedly when processing tracepoint events. [ This is to reduce the surface contact with libtraceevents and make clear what is that the perf tools needs from that lib: so far parsing the common and per event fields. ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c77
1 files changed, 69 insertions, 8 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3ba605f60e4..e7e6eeae49c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -270,6 +270,7 @@
#include <linux/slab.h>
#include <net/icmp.h>
+#include <net/inet_common.h>
#include <net/tcp.h>
#include <net/xfrm.h>
#include <net/ip.h>
@@ -376,6 +377,7 @@ void tcp_init_sock(struct sock *sk)
skb_queue_head_init(&tp->out_of_order_queue);
tcp_init_xmit_timers(sk);
tcp_prequeue_init(tp);
+ INIT_LIST_HEAD(&tp->tsq_node);
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev = TCP_TIMEOUT_INIT;
@@ -796,6 +798,10 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
inet_csk(sk)->icsk_ext_hdr_len -
tp->tcp_header_len);
+ /* TSQ : try to have two TSO segments in flight */
+ xmit_size_goal = min_t(u32, xmit_size_goal,
+ sysctl_tcp_limit_output_bytes >> 1);
+
xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
/* We try hard to avoid divides here */
@@ -977,26 +983,67 @@ static inline int select_size(const struct sock *sk, bool sg)
return tmp;
}
+void tcp_free_fastopen_req(struct tcp_sock *tp)
+{
+ if (tp->fastopen_req != NULL) {
+ kfree(tp->fastopen_req);
+ tp->fastopen_req = NULL;
+ }
+}
+
+static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ int err, flags;
+
+ if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE))
+ return -EOPNOTSUPP;
+ if (tp->fastopen_req != NULL)
+ return -EALREADY; /* Another Fast Open is in progress */
+
+ tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request),
+ sk->sk_allocation);
+ if (unlikely(tp->fastopen_req == NULL))
+ return -ENOBUFS;
+ tp->fastopen_req->data = msg;
+
+ flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
+ err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
+ msg->msg_namelen, flags);
+ *size = tp->fastopen_req->copied;
+ tcp_free_fastopen_req(tp);
+ return err;
+}
+
int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t size)
{
struct iovec *iov;
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- int iovlen, flags, err, copied;
- int mss_now = 0, size_goal;
+ int iovlen, flags, err, copied = 0;
+ int mss_now = 0, size_goal, copied_syn = 0, offset = 0;
bool sg;
long timeo;
lock_sock(sk);
flags = msg->msg_flags;
+ if (flags & MSG_FASTOPEN) {
+ err = tcp_sendmsg_fastopen(sk, msg, &copied_syn);
+ if (err == -EINPROGRESS && copied_syn > 0)
+ goto out;
+ else if (err)
+ goto out_err;
+ offset = copied_syn;
+ }
+
timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
/* Wait for a connection to finish. */
if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
- goto out_err;
+ goto do_error;
if (unlikely(tp->repair)) {
if (tp->repair_queue == TCP_RECV_QUEUE) {
@@ -1032,6 +1079,15 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
unsigned char __user *from = iov->iov_base;
iov++;
+ if (unlikely(offset > 0)) { /* Skip bytes copied in SYN */
+ if (offset >= seglen) {
+ offset -= seglen;
+ continue;
+ }
+ seglen -= offset;
+ from += offset;
+ offset = 0;
+ }
while (seglen > 0) {
int copy = 0;
@@ -1194,7 +1250,7 @@ out:
if (copied && likely(!tp->repair))
tcp_push(sk, flags, mss_now, tp->nonagle);
release_sock(sk);
- return copied;
+ return copied + copied_syn;
do_fault:
if (!skb->len) {
@@ -1207,7 +1263,7 @@ do_fault:
}
do_error:
- if (copied)
+ if (copied + copied_syn)
goto out;
out_err:
err = sk_stream_error(sk, flags, err);
@@ -2625,7 +2681,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
/* Cap the max timeout in ms TCP will retry/retrans
* before giving up and aborting (ETIMEDOUT) a connection.
*/
- icsk->icsk_user_timeout = msecs_to_jiffies(val);
+ if (val < 0)
+ err = -EINVAL;
+ else
+ icsk->icsk_user_timeout = msecs_to_jiffies(val);
break;
default:
err = -ENOPROTOOPT;
@@ -3310,8 +3369,7 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
#endif
-/**
- * Each Responder maintains up to two secret values concurrently for
+/* Each Responder maintains up to two secret values concurrently for
* efficient secret rollover. Each secret value has 4 states:
*
* Generating. (tcp_secret_generating != tcp_secret_primary)
@@ -3563,6 +3621,8 @@ void __init tcp_init(void)
pr_info("Hash tables configured (established %u bind %u)\n",
tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
+ tcp_metrics_init();
+
tcp_register_congestion_control(&tcp_reno);
memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets));
@@ -3573,4 +3633,5 @@ void __init tcp_init(void)
tcp_secret_primary = &tcp_secret_one;
tcp_secret_retiring = &tcp_secret_two;
tcp_secret_secondary = &tcp_secret_two;
+ tcp_tasklet_init();
}