summaryrefslogtreecommitdiffstats
path: root/net/ipv6/tcp_ipv6.c
diff options
context:
space:
mode:
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-09-12 11:14:33 -0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-09-12 11:14:33 -0400
commit25a765b7f05cb8460fa01b54568894b20e184862 (patch)
tree0b56db57b4d9f912393ab303c269e0fe6cdf8635 /net/ipv6/tcp_ipv6.c
parent9d2be9287107695708e6aae5105a8a518a6cb4d0 (diff)
parent64282278989d5b0398dcb3ba7904cb00c621dc35 (diff)
Merge branch 'x86/platform' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into stable/for-linus-3.7
* 'x86/platform' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (9690 commits) x86: Document x86_init.paging.pagetable_init() x86: xen: Cleanup and remove x86_init.paging.pagetable_setup_done() x86: Move paging_init() call to x86_init.paging.pagetable_init() x86: Rename pagetable_setup_start() to pagetable_init() x86: Remove base argument from x86_init.paging.pagetable_setup_start Linux 3.6-rc5 HID: tpkbd: work even if the new Lenovo Keyboard driver is not configured Remove user-triggerable BUG from mpol_to_str xen/pciback: Fix proper FLR steps. uml: fix compile error in deliver_alarm() dj: memory scribble in logi_dj Fix order of arguments to compat_put_time[spec|val] xen: Use correct masking in xen_swiotlb_alloc_coherent. xen: fix logical error in tlb flushing xen/p2m: Fix one-off error in checking the P2M tree directory. powerpc: Don't use __put_user() in patch_instruction powerpc: Make sure IPI handlers see data written by IPI senders powerpc: Restore correct DSCR in context switch powerpc: Fix DSCR inheritance in copy_thread() powerpc: Keep thread.dscr and thread.dscr_inherit in sync ...
Diffstat (limited to 'net/ipv6/tcp_ipv6.c')
-rw-r--r--net/ipv6/tcp_ipv6.c261
1 files changed, 117 insertions, 144 deletions
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9df64a50b07..a3e60cc04a8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -94,6 +94,18 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
}
#endif
+static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ const struct rt6_info *rt = (const struct rt6_info *)dst;
+
+ dst_hold(dst);
+ sk->sk_rx_dst = dst;
+ inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+ if (rt->rt6i_node)
+ inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
+}
+
static void tcp_v6_hash(struct sock *sk)
{
if (sk->sk_state != TCP_CLOSE) {
@@ -277,22 +289,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
rt = (struct rt6_info *) dst;
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp &&
- ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) {
- struct inet_peer *peer = rt6_get_peer(rt);
- /*
- * VJ's idea. We save last timestamp seen from
- * the destination in peer table, when entering state
- * TIME-WAIT * and initialize rx_opt.ts_recent from it,
- * when trying new connection.
- */
- if (peer) {
- inet_peer_refcheck(peer);
- if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
- tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
- tp->rx_opt.ts_recent = peer->tcp_ts;
- }
- }
- }
+ ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr))
+ tcp_fetch_timewait_stamp(sk, dst);
icsk->icsk_ext_hdr_len = 0;
if (np->opt)
@@ -329,6 +327,23 @@ failure:
return err;
}
+static void tcp_v6_mtu_reduced(struct sock *sk)
+{
+ struct dst_entry *dst;
+
+ if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+ return;
+
+ dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
+ if (!dst)
+ return;
+
+ if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
+ tcp_sync_mss(sk, dst_mtu(dst));
+ tcp_simple_retransmit(sk);
+ }
+}
+
static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
@@ -356,7 +371,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
bh_lock_sock(sk);
- if (sock_owned_by_user(sk))
+ if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
if (sk->sk_state == TCP_CLOSE)
@@ -377,49 +392,19 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
np = inet6_sk(sk);
- if (type == ICMPV6_PKT_TOOBIG) {
- struct dst_entry *dst;
+ if (type == NDISC_REDIRECT) {
+ struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
- if (sock_owned_by_user(sk))
- goto out;
- if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
- goto out;
-
- /* icmp should have updated the destination cache entry */
- dst = __sk_dst_check(sk, np->dst_cookie);
-
- if (dst == NULL) {
- struct inet_sock *inet = inet_sk(sk);
- struct flowi6 fl6;
-
- /* BUGGG_FUTURE: Again, it is not clear how
- to handle rthdr case. Ignore this complexity
- for now.
- */
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_TCP;
- fl6.daddr = np->daddr;
- fl6.saddr = np->saddr;
- fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.flowi6_mark = sk->sk_mark;
- fl6.fl6_dport = inet->inet_dport;
- fl6.fl6_sport = inet->inet_sport;
- security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
-
- dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
- if (IS_ERR(dst)) {
- sk->sk_err_soft = -PTR_ERR(dst);
- goto out;
- }
-
- } else
- dst_hold(dst);
+ if (dst)
+ dst->ops->redirect(dst, sk, skb);
+ }
- if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
- tcp_sync_mss(sk, dst_mtu(dst));
- tcp_simple_retransmit(sk);
- } /* else let the usual retransmit timer handle it */
- dst_release(dst);
+ if (type == ICMPV6_PKT_TOOBIG) {
+ tp->mtu_info = ntohl(info);
+ if (!sock_owned_by_user(sk))
+ tcp_v6_mtu_reduced(sk);
+ else
+ set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags);
goto out;
}
@@ -475,62 +460,43 @@ out:
}
-static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
+static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
+ struct flowi6 *fl6,
+ struct request_sock *req,
struct request_values *rvp,
u16 queue_mapping)
{
struct inet6_request_sock *treq = inet6_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
struct sk_buff * skb;
- struct ipv6_txoptions *opt = NULL;
- struct in6_addr * final_p, final;
- struct flowi6 fl6;
- struct dst_entry *dst;
- int err;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_TCP;
- fl6.daddr = treq->rmt_addr;
- fl6.saddr = treq->loc_addr;
- fl6.flowlabel = 0;
- fl6.flowi6_oif = treq->iif;
- fl6.flowi6_mark = sk->sk_mark;
- fl6.fl6_dport = inet_rsk(req)->rmt_port;
- fl6.fl6_sport = inet_rsk(req)->loc_port;
- security_req_classify_flow(req, flowi6_to_flowi(&fl6));
-
- opt = np->opt;
- final_p = fl6_update_dst(&fl6, opt, &final);
+ int err = -ENOMEM;
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
- if (IS_ERR(dst)) {
- err = PTR_ERR(dst);
- dst = NULL;
+ /* First, grab a route. */
+ if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
goto done;
- }
+
skb = tcp_make_synack(sk, dst, req, rvp);
- err = -ENOMEM;
+
if (skb) {
__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
- fl6.daddr = treq->rmt_addr;
+ fl6->daddr = treq->rmt_addr;
skb_set_queue_mapping(skb, queue_mapping);
- err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
+ err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
err = net_xmit_eval(err);
}
done:
- if (opt && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
- dst_release(dst);
return err;
}
static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
struct request_values *rvp)
{
+ struct flowi6 fl6;
+
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
- return tcp_v6_send_synack(sk, req, rvp, 0);
+ return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
}
static void tcp_v6_reqsk_destructor(struct request_sock *req)
@@ -1057,6 +1023,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
__u32 isn = TCP_SKB_CB(skb)->when;
struct dst_entry *dst = NULL;
+ struct flowi6 fl6;
bool want_cookie = false;
if (skb->protocol == htons(ETH_P_IP))
@@ -1085,7 +1052,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
+ tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
if (tmp_opt.cookie_plus > 0 &&
tmp_opt.saw_tstamp &&
@@ -1150,8 +1117,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
treq->iif = inet6_iif(skb);
if (!isn) {
- struct inet_peer *peer = NULL;
-
if (ipv6_opt_accepted(sk, skb) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -1176,14 +1141,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
*/
if (tmp_opt.saw_tstamp &&
tcp_death_row.sysctl_tw_recycle &&
- (dst = inet6_csk_route_req(sk, req)) != NULL &&
- (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
- ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6,
- &treq->rmt_addr)) {
- inet_peer_refcheck(peer);
- if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
- (s32)(peer->tcp_ts - req->ts_recent) >
- TCP_PAWS_WINDOW) {
+ (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
+ if (!tcp_peer_is_proven(req, dst, true)) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
goto drop_and_release;
}
@@ -1192,8 +1151,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
else if (!sysctl_tcp_syncookies &&
(sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
(sysctl_max_syn_backlog >> 2)) &&
- (!peer || !peer->tcp_ts_stamp) &&
- (!dst || !dst_metric(dst, RTAX_RTT))) {
+ !tcp_peer_is_proven(req, dst, false)) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
* proven to be alive.
@@ -1215,7 +1173,7 @@ have_isn:
if (security_inet_conn_request(sk, skb, req))
goto drop_and_release;
- if (tcp_v6_send_synack(sk, req,
+ if (tcp_v6_send_synack(sk, dst, &fl6, req,
(struct request_values *)&tmp_ext,
skb_get_queue_mapping(skb)) ||
want_cookie)
@@ -1242,10 +1200,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
- struct ipv6_txoptions *opt;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key;
#endif
+ struct flowi6 fl6;
if (skb->protocol == htons(ETH_P_IP)) {
/*
@@ -1302,13 +1260,12 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
}
treq = inet6_rsk(req);
- opt = np->opt;
if (sk_acceptq_is_full(sk))
goto out_overflow;
if (!dst) {
- dst = inet6_csk_route_req(sk, req);
+ dst = inet6_csk_route_req(sk, &fl6, req);
if (!dst)
goto out;
}
@@ -1325,6 +1282,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->sk_gso_type = SKB_GSO_TCPV6;
__ip6_dst_store(newsk, dst, NULL, NULL);
+ inet6_sk_rx_dst_set(newsk, skb);
newtcp6sk = (struct tcp6_sock *)newsk;
inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
@@ -1354,7 +1312,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
/* Clone pktoptions received with SYN */
newnp->pktoptions = NULL;
if (treq->pktopts != NULL) {
- newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
+ newnp->pktoptions = skb_clone(treq->pktopts,
+ sk_gfp_atomic(sk, GFP_ATOMIC));
consume_skb(treq->pktopts);
treq->pktopts = NULL;
if (newnp->pktoptions)
@@ -1371,11 +1330,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
but we make one more one thing there: reattach optmem
to newsk.
*/
- if (opt) {
- newnp->opt = ipv6_dup_options(newsk, opt);
- if (opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
- }
+ if (np->opt)
+ newnp->opt = ipv6_dup_options(newsk, np->opt);
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (newnp->opt)
@@ -1407,7 +1363,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
* across. Shucks.
*/
tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
- AF_INET6, key->key, key->keylen, GFP_ATOMIC);
+ AF_INET6, key->key, key->keylen,
+ sk_gfp_atomic(sk, GFP_ATOMIC));
}
#endif
@@ -1422,8 +1379,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
out_overflow:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
out_nonewsk:
- if (opt && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
dst_release(dst);
out:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
@@ -1502,10 +1457,20 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
--ANK (980728)
*/
if (np->rxopt.all)
- opt_skb = skb_clone(skb, GFP_ATOMIC);
+ opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+ struct dst_entry *dst = sk->sk_rx_dst;
+
sock_rps_save_rxhash(sk, skb);
+ if (dst) {
+ if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
+ dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
+ dst_release(dst);
+ sk->sk_rx_dst = NULL;
+ }
+ }
+
if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
goto reset;
if (opt_skb)
@@ -1734,51 +1699,56 @@ do_time_wait:
goto discard_it;
}
-static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
+static void tcp_v6_early_demux(struct sk_buff *skb)
{
- struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct inet_peer *peer;
+ const struct ipv6hdr *hdr;
+ const struct tcphdr *th;
+ struct sock *sk;
- if (!rt ||
- !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) {
- peer = inet_getpeer_v6(&np->daddr, 1);
- *release_it = true;
- } else {
- if (!rt->rt6i_peer)
- rt6_bind_peer(rt, 1);
- peer = rt->rt6i_peer;
- *release_it = false;
- }
+ if (skb->pkt_type != PACKET_HOST)
+ return;
- return peer;
-}
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
+ return;
-static void *tcp_v6_tw_get_peer(struct sock *sk)
-{
- const struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
- const struct inet_timewait_sock *tw = inet_twsk(sk);
+ hdr = ipv6_hdr(skb);
+ th = tcp_hdr(skb);
- if (tw->tw_family == AF_INET)
- return tcp_v4_tw_get_peer(sk);
+ if (th->doff < sizeof(struct tcphdr) / 4)
+ return;
- return inet_getpeer_v6(&tw6->tw_v6_daddr, 1);
+ sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
+ &hdr->saddr, th->source,
+ &hdr->daddr, ntohs(th->dest),
+ inet6_iif(skb));
+ if (sk) {
+ skb->sk = sk;
+ skb->destructor = sock_edemux;
+ if (sk->sk_state != TCP_TIME_WAIT) {
+ struct dst_entry *dst = sk->sk_rx_dst;
+ struct inet_sock *icsk = inet_sk(sk);
+ if (dst)
+ dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
+ if (dst &&
+ icsk->rx_dst_ifindex == skb->skb_iif)
+ skb_dst_set_noref(skb, dst);
+ }
+ }
}
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
.twsk_unique = tcp_twsk_unique,
.twsk_destructor= tcp_twsk_destructor,
- .twsk_getpeer = tcp_v6_tw_get_peer,
};
static const struct inet_connection_sock_af_ops ipv6_specific = {
.queue_xmit = inet6_csk_xmit,
.send_check = tcp_v6_send_check,
.rebuild_header = inet6_sk_rebuild_header,
+ .sk_rx_dst_set = inet6_sk_rx_dst_set,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
- .get_peer = tcp_v6_get_peer,
.net_header_len = sizeof(struct ipv6hdr),
.net_frag_header_len = sizeof(struct frag_hdr),
.setsockopt = ipv6_setsockopt,
@@ -1808,9 +1778,9 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
.rebuild_header = inet_sk_rebuild_header,
+ .sk_rx_dst_set = inet_sk_rx_dst_set,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
- .get_peer = tcp_v4_get_peer,
.net_header_len = sizeof(struct iphdr),
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
@@ -2049,6 +2019,8 @@ struct proto tcpv6_prot = {
.sendmsg = tcp_sendmsg,
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
+ .release_cb = tcp_release_cb,
+ .mtu_reduced = tcp_v6_mtu_reduced,
.hash = tcp_v6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
@@ -2070,12 +2042,13 @@ struct proto tcpv6_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_MEMCG_KMEM
.proto_cgroup = tcp_proto_cgroup,
#endif
};
static const struct inet6_protocol tcpv6_protocol = {
+ .early_demux = tcp_v6_early_demux,
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
.gso_send_check = tcp_v6_gso_send_check,