summaryrefslogtreecommitdiffstats
path: root/net/ipv6/tcp_ipv6.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-08-21 11:27:00 +0200
committerIngo Molnar <mingo@kernel.org>2012-08-21 11:27:00 +0200
commitbcada3d4b8c96b8792c2306f363992ca5ab9da42 (patch)
treee420679a5db6ea4e1694eef57f9abb6acac8d4d3 /net/ipv6/tcp_ipv6.c
parent26198c21d1b286a084fe5d514a30bc7e6c712a34 (diff)
parent000078bc3ee69efb1124b8478c7527389a826074 (diff)
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: * Fix include order for bison/flex-generated C files, from Ben Hutchings * Build fixes and documentation corrections from David Ahern * Group parsing support, from Jiri Olsa * UI/gtk refactorings and improvements from Namhyung Kim * NULL deref fix for perf script, from Namhyung Kim * Assorted cleanups from Robert Richter * Let O= makes handle relative paths, from Steven Rostedt * perf script python fixes, from Feng Tang. * Improve 'perf lock' error message when the needed tracepoints are not present, from David Ahern. * Initial bash completion support, from Frederic Weisbecker * Allow building without libelf, from Namhyung Kim. * Support DWARF CFI based unwind to have callchains when %bp based unwinding is not possible, from Jiri Olsa. * Symbol resolution fixes, while fixing support PPC64 files with an .opt ELF section was the end goal, several fixes for code that handles all architectures and cleanups are included, from Cody Schafer. * Add a description for the JIT interface, from Andi Kleen. * Assorted fixes for Documentation and build in 32 bit, from Robert Richter * Add support for non-tracepoint events in perf script python, from Feng Tang * Cache the libtraceevent event_format associated to each evsel early, so that we avoid relookups, i.e. calling pevent_find_event repeatedly when processing tracepoint events. [ This is to reduce the surface contact with libtraceevents and make clear what is that the perf tools needs from that lib: so far parsing the common and per event fields. ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'net/ipv6/tcp_ipv6.c')
-rw-r--r--net/ipv6/tcp_ipv6.c236
1 files changed, 92 insertions, 144 deletions
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9df64a50b07..c66b90f71c9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -277,22 +277,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
rt = (struct rt6_info *) dst;
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp &&
- ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) {
- struct inet_peer *peer = rt6_get_peer(rt);
- /*
- * VJ's idea. We save last timestamp seen from
- * the destination in peer table, when entering state
- * TIME-WAIT * and initialize rx_opt.ts_recent from it,
- * when trying new connection.
- */
- if (peer) {
- inet_peer_refcheck(peer);
- if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
- tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
- tp->rx_opt.ts_recent = peer->tcp_ts;
- }
- }
- }
+ ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr))
+ tcp_fetch_timewait_stamp(sk, dst);
icsk->icsk_ext_hdr_len = 0;
if (np->opt)
@@ -329,6 +315,23 @@ failure:
return err;
}
+static void tcp_v6_mtu_reduced(struct sock *sk)
+{
+ struct dst_entry *dst;
+
+ if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+ return;
+
+ dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
+ if (!dst)
+ return;
+
+ if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
+ tcp_sync_mss(sk, dst_mtu(dst));
+ tcp_simple_retransmit(sk);
+ }
+}
+
static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
@@ -356,7 +359,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
bh_lock_sock(sk);
- if (sock_owned_by_user(sk))
+ if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
if (sk->sk_state == TCP_CLOSE)
@@ -377,49 +380,19 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
np = inet6_sk(sk);
- if (type == ICMPV6_PKT_TOOBIG) {
- struct dst_entry *dst;
-
- if (sock_owned_by_user(sk))
- goto out;
- if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
- goto out;
-
- /* icmp should have updated the destination cache entry */
- dst = __sk_dst_check(sk, np->dst_cookie);
-
- if (dst == NULL) {
- struct inet_sock *inet = inet_sk(sk);
- struct flowi6 fl6;
+ if (type == NDISC_REDIRECT) {
+ struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
- /* BUGGG_FUTURE: Again, it is not clear how
- to handle rthdr case. Ignore this complexity
- for now.
- */
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_TCP;
- fl6.daddr = np->daddr;
- fl6.saddr = np->saddr;
- fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.flowi6_mark = sk->sk_mark;
- fl6.fl6_dport = inet->inet_dport;
- fl6.fl6_sport = inet->inet_sport;
- security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
-
- dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
- if (IS_ERR(dst)) {
- sk->sk_err_soft = -PTR_ERR(dst);
- goto out;
- }
-
- } else
- dst_hold(dst);
+ if (dst)
+ dst->ops->redirect(dst, sk, skb);
+ }
- if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
- tcp_sync_mss(sk, dst_mtu(dst));
- tcp_simple_retransmit(sk);
- } /* else let the usual retransmit timer handle it */
- dst_release(dst);
+ if (type == ICMPV6_PKT_TOOBIG) {
+ tp->mtu_info = ntohl(info);
+ if (!sock_owned_by_user(sk))
+ tcp_v6_mtu_reduced(sk);
+ else
+ set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags);
goto out;
}
@@ -475,62 +448,43 @@ out:
}
-static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
+static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
+ struct flowi6 *fl6,
+ struct request_sock *req,
struct request_values *rvp,
u16 queue_mapping)
{
struct inet6_request_sock *treq = inet6_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
struct sk_buff * skb;
- struct ipv6_txoptions *opt = NULL;
- struct in6_addr * final_p, final;
- struct flowi6 fl6;
- struct dst_entry *dst;
- int err;
+ int err = -ENOMEM;
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_TCP;
- fl6.daddr = treq->rmt_addr;
- fl6.saddr = treq->loc_addr;
- fl6.flowlabel = 0;
- fl6.flowi6_oif = treq->iif;
- fl6.flowi6_mark = sk->sk_mark;
- fl6.fl6_dport = inet_rsk(req)->rmt_port;
- fl6.fl6_sport = inet_rsk(req)->loc_port;
- security_req_classify_flow(req, flowi6_to_flowi(&fl6));
-
- opt = np->opt;
- final_p = fl6_update_dst(&fl6, opt, &final);
-
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
- if (IS_ERR(dst)) {
- err = PTR_ERR(dst);
- dst = NULL;
+ /* First, grab a route. */
+ if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
goto done;
- }
+
skb = tcp_make_synack(sk, dst, req, rvp);
- err = -ENOMEM;
+
if (skb) {
__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
- fl6.daddr = treq->rmt_addr;
+ fl6->daddr = treq->rmt_addr;
skb_set_queue_mapping(skb, queue_mapping);
- err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
+ err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
err = net_xmit_eval(err);
}
done:
- if (opt && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
- dst_release(dst);
return err;
}
static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
struct request_values *rvp)
{
+ struct flowi6 fl6;
+
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
- return tcp_v6_send_synack(sk, req, rvp, 0);
+ return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
}
static void tcp_v6_reqsk_destructor(struct request_sock *req)
@@ -1057,6 +1011,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
__u32 isn = TCP_SKB_CB(skb)->when;
struct dst_entry *dst = NULL;
+ struct flowi6 fl6;
bool want_cookie = false;
if (skb->protocol == htons(ETH_P_IP))
@@ -1085,7 +1040,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
+ tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
if (tmp_opt.cookie_plus > 0 &&
tmp_opt.saw_tstamp &&
@@ -1150,8 +1105,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
treq->iif = inet6_iif(skb);
if (!isn) {
- struct inet_peer *peer = NULL;
-
if (ipv6_opt_accepted(sk, skb) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -1176,14 +1129,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
*/
if (tmp_opt.saw_tstamp &&
tcp_death_row.sysctl_tw_recycle &&
- (dst = inet6_csk_route_req(sk, req)) != NULL &&
- (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
- ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6,
- &treq->rmt_addr)) {
- inet_peer_refcheck(peer);
- if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
- (s32)(peer->tcp_ts - req->ts_recent) >
- TCP_PAWS_WINDOW) {
+ (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
+ if (!tcp_peer_is_proven(req, dst, true)) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
goto drop_and_release;
}
@@ -1192,8 +1139,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
else if (!sysctl_tcp_syncookies &&
(sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
(sysctl_max_syn_backlog >> 2)) &&
- (!peer || !peer->tcp_ts_stamp) &&
- (!dst || !dst_metric(dst, RTAX_RTT))) {
+ !tcp_peer_is_proven(req, dst, false)) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
* proven to be alive.
@@ -1215,7 +1161,7 @@ have_isn:
if (security_inet_conn_request(sk, skb, req))
goto drop_and_release;
- if (tcp_v6_send_synack(sk, req,
+ if (tcp_v6_send_synack(sk, dst, &fl6, req,
(struct request_values *)&tmp_ext,
skb_get_queue_mapping(skb)) ||
want_cookie)
@@ -1242,10 +1188,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
- struct ipv6_txoptions *opt;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key;
#endif
+ struct flowi6 fl6;
if (skb->protocol == htons(ETH_P_IP)) {
/*
@@ -1302,13 +1248,12 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
}
treq = inet6_rsk(req);
- opt = np->opt;
if (sk_acceptq_is_full(sk))
goto out_overflow;
if (!dst) {
- dst = inet6_csk_route_req(sk, req);
+ dst = inet6_csk_route_req(sk, &fl6, req);
if (!dst)
goto out;
}
@@ -1354,7 +1299,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
/* Clone pktoptions received with SYN */
newnp->pktoptions = NULL;
if (treq->pktopts != NULL) {
- newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
+ newnp->pktoptions = skb_clone(treq->pktopts,
+ sk_gfp_atomic(sk, GFP_ATOMIC));
consume_skb(treq->pktopts);
treq->pktopts = NULL;
if (newnp->pktoptions)
@@ -1371,11 +1317,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
but we make one more one thing there: reattach optmem
to newsk.
*/
- if (opt) {
- newnp->opt = ipv6_dup_options(newsk, opt);
- if (opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
- }
+ if (np->opt)
+ newnp->opt = ipv6_dup_options(newsk, np->opt);
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (newnp->opt)
@@ -1407,7 +1350,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
* across. Shucks.
*/
tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
- AF_INET6, key->key, key->keylen, GFP_ATOMIC);
+ AF_INET6, key->key, key->keylen,
+ sk_gfp_atomic(sk, GFP_ATOMIC));
}
#endif
@@ -1422,8 +1366,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
out_overflow:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
out_nonewsk:
- if (opt && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
dst_release(dst);
out:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
@@ -1502,7 +1444,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
--ANK (980728)
*/
if (np->rxopt.all)
- opt_skb = skb_clone(skb, GFP_ATOMIC);
+ opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
sock_rps_save_rxhash(sk, skb);
@@ -1734,42 +1676,47 @@ do_time_wait:
goto discard_it;
}
-static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
+static void tcp_v6_early_demux(struct sk_buff *skb)
{
- struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct inet_peer *peer;
+ const struct ipv6hdr *hdr;
+ const struct tcphdr *th;
+ struct sock *sk;
- if (!rt ||
- !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) {
- peer = inet_getpeer_v6(&np->daddr, 1);
- *release_it = true;
- } else {
- if (!rt->rt6i_peer)
- rt6_bind_peer(rt, 1);
- peer = rt->rt6i_peer;
- *release_it = false;
- }
+ if (skb->pkt_type != PACKET_HOST)
+ return;
- return peer;
-}
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
+ return;
-static void *tcp_v6_tw_get_peer(struct sock *sk)
-{
- const struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
- const struct inet_timewait_sock *tw = inet_twsk(sk);
+ hdr = ipv6_hdr(skb);
+ th = tcp_hdr(skb);
- if (tw->tw_family == AF_INET)
- return tcp_v4_tw_get_peer(sk);
+ if (th->doff < sizeof(struct tcphdr) / 4)
+ return;
- return inet_getpeer_v6(&tw6->tw_v6_daddr, 1);
+ sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
+ &hdr->saddr, th->source,
+ &hdr->daddr, ntohs(th->dest),
+ inet6_iif(skb));
+ if (sk) {
+ skb->sk = sk;
+ skb->destructor = sock_edemux;
+ if (sk->sk_state != TCP_TIME_WAIT) {
+ struct dst_entry *dst = sk->sk_rx_dst;
+ struct inet_sock *icsk = inet_sk(sk);
+ if (dst)
+ dst = dst_check(dst, 0);
+ if (dst &&
+ icsk->rx_dst_ifindex == inet6_iif(skb))
+ skb_dst_set_noref(skb, dst);
+ }
+ }
}
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
.twsk_unique = tcp_twsk_unique,
.twsk_destructor= tcp_twsk_destructor,
- .twsk_getpeer = tcp_v6_tw_get_peer,
};
static const struct inet_connection_sock_af_ops ipv6_specific = {
@@ -1778,7 +1725,6 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
.rebuild_header = inet6_sk_rebuild_header,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
- .get_peer = tcp_v6_get_peer,
.net_header_len = sizeof(struct ipv6hdr),
.net_frag_header_len = sizeof(struct frag_hdr),
.setsockopt = ipv6_setsockopt,
@@ -1810,7 +1756,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.rebuild_header = inet_sk_rebuild_header,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
- .get_peer = tcp_v4_get_peer,
.net_header_len = sizeof(struct iphdr),
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
@@ -2049,6 +1994,8 @@ struct proto tcpv6_prot = {
.sendmsg = tcp_sendmsg,
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
+ .release_cb = tcp_release_cb,
+ .mtu_reduced = tcp_v6_mtu_reduced,
.hash = tcp_v6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
@@ -2070,12 +2017,13 @@ struct proto tcpv6_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_MEMCG_KMEM
.proto_cgroup = tcp_proto_cgroup,
#endif
};
static const struct inet6_protocol tcpv6_protocol = {
+ .early_demux = tcp_v6_early_demux,
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
.gso_send_check = tcp_v6_gso_send_check,