From ea84e5555a56e2013687b19b012bcf959d601128 Mon Sep 17 00:00:00 2001 From: Andreas Petlund Date: Tue, 27 Oct 2009 03:27:21 +0000 Subject: net: Corrected spelling error heurestics->heuristics Corrected a spelling error in a function name. Signed-off-by: Andreas Petlund Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d86784be7ab..a0c3700bae3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2300,7 +2300,7 @@ static inline int tcp_fackets_out(struct tcp_sock *tp) * they differ. Since neither occurs due to loss, TCP should really * ignore them. */ -static inline int tcp_dupack_heurestics(struct tcp_sock *tp) +static inline int tcp_dupack_heuristics(struct tcp_sock *tp) { return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; } @@ -2425,7 +2425,7 @@ static int tcp_time_to_recover(struct sock *sk) return 1; /* Not-A-Trick#2 : Classic rule... */ - if (tcp_dupack_heurestics(tp) > tp->reordering) + if (tcp_dupack_heuristics(tp) > tp->reordering) return 1; /* Trick#3 : when we use RFC2988 timer restart, fast -- cgit v1.2.3-70-g09d2 From 022c3f7d82f0f1c68018696f2f027b87b9bb45c2 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 28 Oct 2009 04:15:22 +0000 Subject: Allow tcp_parse_options to consult dst entry We need tcp_parse_options to be aware of dst_entry to take into account per dst_entry TCP options settings Signed-off-by: Gilad Ben-Yossef Sigend-off-by: Ori Finkelman Sigend-off-by: Yony Amit Signed-off-by: David S. Miller --- include/net/tcp.h | 3 ++- net/ipv4/syncookies.c | 27 ++++++++++++++------------- net/ipv4/tcp_input.c | 9 ++++++--- net/ipv4/tcp_ipv4.c | 21 ++++++++++++--------- net/ipv4/tcp_minisocks.c | 7 +++++-- net/ipv6/syncookies.c | 28 +++++++++++++++------------- net/ipv6/tcp_ipv6.c | 3 ++- 7 files changed, 56 insertions(+), 42 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 03a49c70337..740d09be8e2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -409,7 +409,8 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, extern void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, - int estab); + int estab, + struct dst_entry *dst); extern u8 *tcp_parse_md5sig_option(struct tcphdr *th); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 5ec678ad70e..3146cc40174 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -276,13 +276,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); - /* check for timestamp cookie support */ - memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0); - - if (tcp_opt.saw_tstamp) - cookie_check_timestamp(&tcp_opt); - ret = NULL; req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ if (!req) @@ -298,12 +291,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; ireq->ecn_ok = 0; - ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->rcv_wscale = tcp_opt.rcv_wscale; - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) @@ -352,6 +339,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, } } + /* check for timestamp cookie support */ + memset(&tcp_opt, 0, sizeof(tcp_opt)); + tcp_parse_options(skb, &tcp_opt, 0, &rt->u.dst); + + if (tcp_opt.saw_tstamp) + cookie_check_timestamp(&tcp_opt); + + ireq->snd_wscale = tcp_opt.snd_wscale; + ireq->rcv_wscale = tcp_opt.rcv_wscale; + ireq->sack_ok = tcp_opt.sack_ok; + ireq->wscale_ok = tcp_opt.wscale_ok; + ireq->tstamp_ok = tcp_opt.saw_tstamp; + req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; + /* Try to redo what tcp_v4_send_synack did. */ req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a0c3700bae3..c7625005486 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3698,12 +3698,14 @@ old_ack: * the fast version below fails. */ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, - int estab) + int estab, struct dst_entry *dst) { unsigned char *ptr; struct tcphdr *th = tcp_hdr(skb); int length = (th->doff * 4) - sizeof(struct tcphdr); + BUG_ON(!estab && !dst); + ptr = (unsigned char *)(th + 1); opt_rx->saw_tstamp = 0; @@ -3820,7 +3822,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, if (tcp_parse_aligned_timestamp(tp, th)) return 1; } - tcp_parse_options(skb, &tp->rx_opt, 1); + tcp_parse_options(skb, &tp->rx_opt, 1, NULL); return 1; } @@ -5364,8 +5366,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); int saved_clamp = tp->rx_opt.mss_clamp; + struct dst_entry *dst = __sk_dst_get(sk); - tcp_parse_options(skb, &tp->rx_opt, 0); + tcp_parse_options(skb, &tp->rx_opt, 0, dst); if (th->ack) { /* rfc793: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a4a3390a528..657ae334f12 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1257,11 +1257,21 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; #endif + ireq = inet_rsk(req); + ireq->loc_addr = daddr; + ireq->rmt_addr = saddr; + ireq->no_srccheck = inet_sk(sk)->transparent; + ireq->opt = tcp_v4_save_options(sk, skb); + + dst = inet_csk_route_req(sk, req); + if(!dst) + goto drop_and_free; + tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = 536; tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0); + tcp_parse_options(skb, &tmp_opt, 0, dst); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1270,14 +1280,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init(req, &tmp_opt, skb); - ireq = inet_rsk(req); - ireq->loc_addr = daddr; - ireq->rmt_addr = saddr; - ireq->no_srccheck = inet_sk(sk)->transparent; - ireq->opt = tcp_v4_save_options(sk, skb); - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; + goto drop_and_release; if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); @@ -1302,7 +1306,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && - (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && peer->v4daddr == saddr) { if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 015e7c67dc8..463d51b53d3 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -102,7 +102,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { tmp_opt.tstamp_ok = 1; - tcp_parse_options(skb, &tmp_opt, 1); + tcp_parse_options(skb, &tmp_opt, 1, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = tcptw->tw_ts_recent; @@ -500,10 +500,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, int paws_reject = 0; struct tcp_options_received tmp_opt; struct sock *child; + struct dst_entry *dst = inet_csk_route_req(sk, req); tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { - tcp_parse_options(skb, &tmp_opt, 0); + tcp_parse_options(skb, &tmp_opt, 0, dst); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; @@ -516,6 +517,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, } } + dst_release(dst); + /* Check for pure retransmitted SYN. */ if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn && flg == TCP_FLAG_SYN && diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index c46da533888..612fc53e0bb 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -184,13 +184,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); - /* check for timestamp cookie support */ - memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0); - - if (tcp_opt.saw_tstamp) - cookie_check_timestamp(&tcp_opt); - ret = NULL; req = inet6_reqsk_alloc(&tcp6_request_sock_ops); if (!req) @@ -224,12 +217,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->expires = 0UL; req->retrans = 0; ireq->ecn_ok = 0; - ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->rcv_wscale = tcp_opt.rcv_wscale; - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; @@ -265,6 +252,21 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out_free; } + /* check for timestamp cookie support */ + memset(&tcp_opt, 0, sizeof(tcp_opt)); + tcp_parse_options(skb, &tcp_opt, 0, dst); + + if (tcp_opt.saw_tstamp) + cookie_check_timestamp(&tcp_opt); + + req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; + + ireq->snd_wscale = tcp_opt.snd_wscale; + ireq->rcv_wscale = tcp_opt.rcv_wscale; + ireq->sack_ok = tcp_opt.sack_ok; + ireq->wscale_ok = tcp_opt.wscale_ok; + ireq->tstamp_ok = tcp_opt.saw_tstamp; + req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); tcp_select_initial_window(tcp_full_space(sk), req->mss, &req->rcv_wnd, &req->window_clamp, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c54ec3615de..34925f089e0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1167,6 +1167,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); struct request_sock *req = NULL; __u32 isn = TCP_SKB_CB(skb)->when; + struct dst_entry *dst = __sk_dst_get(sk); #ifdef CONFIG_SYN_COOKIES int want_cookie = 0; #else @@ -1205,7 +1206,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0); + tcp_parse_options(skb, &tmp_opt, 0, dst); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); -- cgit v1.2.3-70-g09d2 From 1aba721eba1d84a2defce45b950272cee1e6c72a Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 28 Oct 2009 04:15:24 +0000 Subject: Add the no SACK route option feature Implement querying and acting upon the no sack bit in the features field. Signed-off-by: Gilad Ben-Yossef Sigend-off-by: Ori Finkelman Sigend-off-by: Yony Amit Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 +- net/ipv4/tcp_input.c | 3 ++- net/ipv4/tcp_output.c | 4 +++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index adf2068d12b..9c802a6b04d 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -377,7 +377,7 @@ enum #define RTAX_MAX (__RTAX_MAX - 1) #define RTAX_FEATURE_ECN 0x00000001 -#define RTAX_FEATURE_SACK 0x00000002 +#define RTAX_FEATURE_NO_SACK 0x00000002 #define RTAX_FEATURE_TIMESTAMP 0x00000004 #define RTAX_FEATURE_ALLFRAG 0x00000008 diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c7625005486..5fb25f97745 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3763,7 +3763,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, break; case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && - !estab && sysctl_tcp_sack) { + !estab && sysctl_tcp_sack && + !dst_feature(dst, RTAX_FEATURE_NO_SACK)) { opt_rx->sack_ok = 1; tcp_sack_reset(opt_rx); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2e2eb74ac4c..b35802af3c4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -464,6 +464,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_md5sig_key **md5) { struct tcp_sock *tp = tcp_sk(sk); unsigned size = 0; + struct dst_entry *dst = __sk_dst_get(sk); #ifdef CONFIG_TCP_MD5SIG *md5 = tp->af_specific->md5_lookup(sk, sk); @@ -498,7 +499,8 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->options |= OPTION_WSCALE; size += TCPOLEN_WSCALE_ALIGNED; } - if (likely(sysctl_tcp_sack)) { + if (likely(sysctl_tcp_sack && + !dst_feature(dst, RTAX_FEATURE_NO_SACK))) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!(OPTION_TS & opts->options))) size += TCPOLEN_SACKPERM_ALIGNED; -- cgit v1.2.3-70-g09d2 From cda42ebd67ee5fdf09d7057b5a4584d36fe8a335 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 28 Oct 2009 04:15:25 +0000 Subject: Allow disabling TCP timestamp options per route Implement querying and acting upon the no timestamp bit in the feature field. Signed-off-by: Gilad Ben-Yossef Sigend-off-by: Ori Finkelman Sigend-off-by: Yony Amit Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 +- net/ipv4/tcp_input.c | 3 ++- net/ipv4/tcp_output.c | 8 ++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 9c802a6b04d..2ab8c758b46 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -378,7 +378,7 @@ enum #define RTAX_FEATURE_ECN 0x00000001 #define RTAX_FEATURE_NO_SACK 0x00000002 -#define RTAX_FEATURE_TIMESTAMP 0x00000004 +#define RTAX_FEATURE_NO_TSTAMP 0x00000004 #define RTAX_FEATURE_ALLFRAG 0x00000008 struct rta_session diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5fb25f97745..6097491aa9f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3755,7 +3755,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, case TCPOPT_TIMESTAMP: if ((opsize == TCPOLEN_TIMESTAMP) && ((estab && opt_rx->tstamp_ok) || - (!estab && sysctl_tcp_timestamps))) { + (!estab && sysctl_tcp_timestamps && + !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP)))) { opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = get_unaligned_be32(ptr); opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b35802af3c4..8819eba8ebb 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -488,7 +488,9 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); size += TCPOLEN_MSS_ALIGNED; - if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { + if (likely(sysctl_tcp_timestamps && + !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) && + *md5 == NULL)) { opts->options |= OPTION_TS; opts->tsval = TCP_SKB_CB(skb)->when; opts->tsecr = tp->rx_opt.ts_recent; @@ -2317,7 +2319,9 @@ static void tcp_connect_init(struct sock *sk) * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ tp->tcp_header_len = sizeof(struct tcphdr) + - (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); + (sysctl_tcp_timestamps && + (!dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) ? + TCPOLEN_TSTAMP_ALIGNED : 0)); #ifdef CONFIG_TCP_MD5SIG if (tp->af_specific->md5_lookup(sk, sk) != NULL) -- cgit v1.2.3-70-g09d2 From 345cda2fd695534be5a4494f1b59da9daed33663 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 28 Oct 2009 04:15:26 +0000 Subject: Allow to turn off TCP window scale opt per route Add and use no window scale bit in the features field. Note that this is not the same as setting a window scale of 0 as would happen with window limit on route. Signed-off-by: Gilad Ben-Yossef Sigend-off-by: Ori Finkelman Sigend-off-by: Yony Amit Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 1 + net/ipv4/tcp_input.c | 3 ++- net/ipv4/tcp_output.c | 6 ++++-- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 2ab8c758b46..6784b342cbb 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -380,6 +380,7 @@ enum #define RTAX_FEATURE_NO_SACK 0x00000002 #define RTAX_FEATURE_NO_TSTAMP 0x00000004 #define RTAX_FEATURE_ALLFRAG 0x00000008 +#define RTAX_FEATURE_NO_WSCALE 0x00000010 struct rta_session { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6097491aa9f..393c56921dc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3739,7 +3739,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, break; case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW && th->syn && - !estab && sysctl_tcp_window_scaling) { + !estab && sysctl_tcp_window_scaling && + !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; if (snd_wscale > 14) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8819eba8ebb..616c686ca25 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -496,7 +496,8 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->tsecr = tp->rx_opt.ts_recent; size += TCPOLEN_TSTAMP_ALIGNED; } - if (likely(sysctl_tcp_window_scaling)) { + if (likely(sysctl_tcp_window_scaling && + !dst_feature(dst, RTAX_FEATURE_NO_WSCALE))) { opts->ws = tp->rx_opt.rcv_wscale; opts->options |= OPTION_WSCALE; size += TCPOLEN_WSCALE_ALIGNED; @@ -2347,7 +2348,8 @@ static void tcp_connect_init(struct sock *sk) tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, - sysctl_tcp_window_scaling, + (sysctl_tcp_window_scaling && + !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)), &rcv_wscale); tp->rx_opt.rcv_wscale = rcv_wscale; -- cgit v1.2.3-70-g09d2 From dc343475ed062e13fc260acccaab91d7d80fd5b2 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 28 Oct 2009 04:15:27 +0000 Subject: Allow disabling of DSACK TCP option per route Add and use no DSCAK bit in the features field. Signed-off-by: Gilad Ben-Yossef Sigend-off-by: Ori Finkelman Sigend-off-by: Yony Amit Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 1 + net/ipv4/tcp_input.c | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 6784b342cbb..e78b60cd65a 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -381,6 +381,7 @@ enum #define RTAX_FEATURE_NO_TSTAMP 0x00000004 #define RTAX_FEATURE_ALLFRAG 0x00000008 #define RTAX_FEATURE_NO_WSCALE 0x00000010 +#define RTAX_FEATURE_NO_DSACK 0x00000020 struct rta_session { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 393c56921dc..ba0eab65fe8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4080,8 +4080,10 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = __sk_dst_get(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack && + !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) { int mib_idx; if (before(seq, tp->rcv_nxt)) @@ -4110,13 +4112,15 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq) static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = __sk_dst_get(sk); if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack && + !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) -- cgit v1.2.3-70-g09d2 From 6a2a2d6bf8581216e08be15fcb563cfd6c430e1e Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 4 Nov 2009 23:23:10 -0800 Subject: tcp: Use defaults when no route options are available Trying to parse the option of a SYN packet that we have no route entry for should just use global wide defaults for route entry options. Signed-off-by: Gilad Ben-Yossef Tested-by: Valdis.Kletnieks@vt.edu Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- net/ipv4/tcp_input.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/net/dst.h b/include/net/dst.h index 39c4a5963e1..387cb3cfde7 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -113,7 +113,7 @@ dst_metric(const struct dst_entry *dst, int metric) static inline u32 dst_feature(const struct dst_entry *dst, u32 feature) { - return dst_metric(dst, RTAX_FEATURES) & feature; + return (dst ? dst_metric(dst, RTAX_FEATURES) & feature : 0); } static inline u32 dst_mtu(const struct dst_entry *dst) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ba0eab65fe8..be0c5bf7bfc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3704,8 +3704,6 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, struct tcphdr *th = tcp_hdr(skb); int length = (th->doff * 4) - sizeof(struct tcphdr); - BUG_ON(!estab && !dst); - ptr = (unsigned char *)(th + 1); opt_rx->saw_tstamp = 0; -- cgit v1.2.3-70-g09d2 From bee7ca9ec03a26676ea2b1c28dc4039348eff3e1 Mon Sep 17 00:00:00 2001 From: William Allen Simpson Date: Tue, 10 Nov 2009 09:51:18 +0000 Subject: net: TCP_MSS_DEFAULT, TCP_MSS_DESIRED Define two symbols needed in both kernel and user space. Remove old (somewhat incorrect) kernel variant that wasn't used in most cases. Default should apply to both RMSS and SMSS (RFC2581). Replace numeric constants with defined symbols. Stand-alone patch, originally developed for TCPCT. Signed-off-by: William.Allen.Simpson@gmail.com Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 ++++++ include/net/tcp.h | 3 --- net/ipv4/tcp_input.c | 4 ++-- net/ipv4/tcp_ipv4.c | 6 +++--- net/ipv4/tcp_minisocks.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 6 files changed, 13 insertions(+), 10 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index eeecb8547a2..32d7d77b4a0 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -81,6 +81,12 @@ enum { TCP_DATA_OFFSET = __cpu_to_be32(0xF0000000) }; +/* + * TCP general constants + */ +#define TCP_MSS_DEFAULT 536U /* IPv4 (RFC1122, RFC2581) */ +#define TCP_MSS_DESIRED 1220U /* IPv6 (tunneled), EDNS0 (RFC3226) */ + /* TCP socket options */ #define TCP_NODELAY 1 /* Turn off Nagle's algorithm. */ #define TCP_MAXSEG 2 /* Limit MSS */ diff --git a/include/net/tcp.h b/include/net/tcp.h index bf20f88fd03..325bfcf5c93 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -62,9 +62,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ #define TCP_MIN_MSS 88U -/* Minimal RCV_MSS. */ -#define TCP_MIN_RCVMSS 536U - /* The least MTU to use for probing */ #define TCP_BASE_MSS 512 diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index be0c5bf7bfc..cc306ac6eb5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -140,7 +140,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) * "len" is invariant segment length, including TCP header. */ len += skb->data - skb_transport_header(skb); - if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) || + if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) || /* If PSH is not set, packet should be * full sized, provided peer TCP is not badly broken. * This observation (if it is correct 8)) allows @@ -411,7 +411,7 @@ void tcp_initialize_rcv_mss(struct sock *sk) unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); hint = min(hint, tp->rcv_wnd / 2); - hint = min(hint, TCP_MIN_RCVMSS); + hint = min(hint, TCP_MSS_DEFAULT); hint = max(hint, TCP_MIN_MSS); inet_csk(sk)->icsk_ack.rcv_mss = hint; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 657ae334f12..cf7f2086e6e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -217,7 +217,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (inet->opt) inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; - tp->rx_opt.mss_clamp = 536; + tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; /* Socket identity is still unknown (sport may be zero). * However we set state to SYN-SENT and not releasing socket @@ -1268,7 +1268,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; tcp_clear_options(&tmp_opt); - tmp_opt.mss_clamp = 536; + tmp_opt.mss_clamp = TCP_MSS_DEFAULT; tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; tcp_parse_options(skb, &tmp_opt, 0, dst); @@ -1815,7 +1815,7 @@ static int tcp_v4_init_sock(struct sock *sk) */ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; - tp->mss_cache = 536; + tp->mss_cache = TCP_MSS_DEFAULT; tp->reordering = sysctl_tcp_reordering; icsk->icsk_ca_ops = &tcp_init_congestion_ops; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a9d34e224cb..4be22280e6b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -476,7 +476,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, if (newtp->af_specific->md5_lookup(sk, newsk)) newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; #endif - if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len) + if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; TCP_ECN_openreq_child(newtp, req); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 696a22f034e..de709091b26 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1851,7 +1851,7 @@ static int tcp_v6_init_sock(struct sock *sk) */ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; - tp->mss_cache = 536; + tp->mss_cache = TCP_MSS_DEFAULT; tp->reordering = sysctl_tcp_reordering; -- cgit v1.2.3-70-g09d2 From 9d4fb27db90043cd2640e4bc778f9c755d3c17c1 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 23 Nov 2009 10:41:23 -0800 Subject: net/ipv4: Move && and || to end of previous line On Sun, 2009-11-22 at 16:31 -0800, David Miller wrote: > It should be of the form: > if (x && > y) > > or: > if (x && y) > > Fix patches, rather than complaints, for existing cases where things > do not follow this pattern are certainly welcome. Also collapsed some multiple tabs to single space. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv4/inet_lro.c | 36 ++++++++++++++++++------------------ net/ipv4/ip_output.c | 6 ++++-- net/ipv4/ipconfig.c | 13 ++++++------- net/ipv4/route.c | 19 ++++++++++--------- net/ipv4/tcp_htcp.c | 10 +++++----- net/ipv4/tcp_input.c | 4 ++-- net/ipv4/tcp_lp.c | 4 ++-- net/ipv4/tcp_output.c | 4 ++-- net/ipv4/tcp_probe.c | 4 ++-- net/ipv4/tcp_veno.c | 5 ++--- net/ipv4/tcp_yeah.c | 4 ++-- net/ipv4/udp.c | 36 ++++++++++++++++++------------------ 12 files changed, 73 insertions(+), 72 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 6a667dae315..47038cb6c13 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -64,15 +64,15 @@ static int lro_tcp_ip_check(struct iphdr *iph, struct tcphdr *tcph, if (iph->ihl != IPH_LEN_WO_OPTIONS) return -1; - if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack - || tcph->rst || tcph->syn || tcph->fin) + if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack || + tcph->rst || tcph->syn || tcph->fin) return -1; if (INET_ECN_is_ce(ipv4_get_dsfield(iph))) return -1; - if (tcph->doff != TCPH_LEN_WO_OPTIONS - && tcph->doff != TCPH_LEN_W_TIMESTAMP) + if (tcph->doff != TCPH_LEN_WO_OPTIONS && + tcph->doff != TCPH_LEN_W_TIMESTAMP) return -1; /* check tcp options (only timestamp allowed) */ @@ -262,10 +262,10 @@ static int lro_check_tcp_conn(struct net_lro_desc *lro_desc, struct iphdr *iph, struct tcphdr *tcph) { - if ((lro_desc->iph->saddr != iph->saddr) - || (lro_desc->iph->daddr != iph->daddr) - || (lro_desc->tcph->source != tcph->source) - || (lro_desc->tcph->dest != tcph->dest)) + if ((lro_desc->iph->saddr != iph->saddr) || + (lro_desc->iph->daddr != iph->daddr) || + (lro_desc->tcph->source != tcph->source) || + (lro_desc->tcph->dest != tcph->dest)) return -1; return 0; } @@ -339,9 +339,9 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, u64 flags; int vlan_hdr_len = 0; - if (!lro_mgr->get_skb_header - || lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph, - &flags, priv)) + if (!lro_mgr->get_skb_header || + lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph, + &flags, priv)) goto out; if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) @@ -351,8 +351,8 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, if (!lro_desc) goto out; - if ((skb->protocol == htons(ETH_P_8021Q)) - && !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) + if ((skb->protocol == htons(ETH_P_8021Q)) && + !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) vlan_hdr_len = VLAN_HLEN; if (!lro_desc->active) { /* start new lro session */ @@ -446,9 +446,9 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, int hdr_len = LRO_MAX_PG_HLEN; int vlan_hdr_len = 0; - if (!lro_mgr->get_frag_header - || lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, - (void *)&tcph, &flags, priv)) { + if (!lro_mgr->get_frag_header || + lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, + (void *)&tcph, &flags, priv)) { mac_hdr = page_address(frags->page) + frags->page_offset; goto out1; } @@ -472,8 +472,8 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, if (!skb) goto out; - if ((skb->protocol == htons(ETH_P_8021Q)) - && !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) + if ((skb->protocol == htons(ETH_P_8021Q)) && + !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) vlan_hdr_len = VLAN_HLEN; iph = (void *)(skb->data + vlan_hdr_len); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 322b40864ac..b78e61502ef 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -264,9 +264,11 @@ int ip_mc_output(struct sk_buff *skb) This check is duplicated in ip_mr_input at the moment. */ - && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED)) + && + ((rt->rt_flags & RTCF_LOCAL) || + !(IPCB(skb)->flags & IPSKB_FORWARDED)) #endif - ) { + ) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); if (newskb) NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb, diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index f8d04c25645..4e08b7f2331 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1172,10 +1172,9 @@ static int __init ic_dynamic(void) schedule_timeout_uninterruptible(1); #ifdef IPCONFIG_DHCP /* DHCP isn't done until we get a DHCPACK. */ - if ((ic_got_reply & IC_BOOTP) - && (ic_proto_enabled & IC_USE_DHCP) - && ic_dhcp_msgtype != DHCPACK) - { + if ((ic_got_reply & IC_BOOTP) && + (ic_proto_enabled & IC_USE_DHCP) && + ic_dhcp_msgtype != DHCPACK) { ic_got_reply = 0; printk(","); continue; @@ -1344,9 +1343,9 @@ static int __init ip_auto_config(void) */ if (ic_myaddr == NONE || #ifdef CONFIG_ROOT_NFS - (root_server_addr == NONE - && ic_servaddr == NONE - && ROOT_DEV == Root_NFS) || + (root_server_addr == NONE && + ic_servaddr == NONE && + ROOT_DEV == Root_NFS) || #endif ic_first_dev->next) { #ifdef IPCONFIG_DYNAMIC diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4284ceef794..7547944ea9b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1346,9 +1346,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, return; net = dev_net(dev); - if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) - || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) - || ipv4_is_zeronet(new_gw)) + if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || + ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) || + ipv4_is_zeronet(new_gw)) goto reject_redirect; if (!rt_caching(net)) @@ -2311,10 +2311,11 @@ skip_cache: ip_hdr(skb)->protocol); if (our #ifdef CONFIG_IP_MROUTE - || (!ipv4_is_local_multicast(daddr) && - IN_DEV_MFORWARD(in_dev)) + || + (!ipv4_is_local_multicast(daddr) && + IN_DEV_MFORWARD(in_dev)) #endif - ) { + ) { rcu_read_unlock(); return ip_route_input_mc(skb, daddr, saddr, tos, dev, our); @@ -2511,9 +2512,9 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, of another iface. --ANK */ - if (oldflp->oif == 0 - && (ipv4_is_multicast(oldflp->fl4_dst) || - oldflp->fl4_dst == htonl(0xFFFFFFFF))) { + if (oldflp->oif == 0 && + (ipv4_is_multicast(oldflp->fl4_dst) || + oldflp->fl4_dst == htonl(0xFFFFFFFF))) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ dev_out = ip_dev_find(net, oldflp->fl4_src); if (dev_out == NULL) diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 26d5c7fc7de..7c94a495541 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -92,8 +92,8 @@ static inline void measure_rtt(struct sock *sk, u32 srtt) if (icsk->icsk_ca_state == TCP_CA_Open) { if (ca->maxRTT < ca->minRTT) ca->maxRTT = ca->minRTT; - if (ca->maxRTT < srtt - && srtt <= ca->maxRTT + msecs_to_jiffies(20)) + if (ca->maxRTT < srtt && + srtt <= ca->maxRTT + msecs_to_jiffies(20)) ca->maxRTT = srtt; } } @@ -123,9 +123,9 @@ static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, s32 rtt ca->packetcount += pkts_acked; - if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) - && now - ca->lasttime >= ca->minRTT - && ca->minRTT > 0) { + if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) && + now - ca->lasttime >= ca->minRTT && + ca->minRTT > 0) { __u32 cur_Bi = ca->packetcount * HZ / (now - ca->lasttime); if (htcp_ccount(ca) <= 3) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cc306ac6eb5..be166e0e11c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4852,11 +4852,11 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) struct tcp_sock *tp = tcp_sk(sk); /* More than one full frame received... */ - if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss + if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && /* ... and right edge of window advances far enough. * (tcp_recvmsg() will send ACK otherwise). Or... */ - && __tcp_select_window(sk) >= tp->rcv_wnd) || + __tcp_select_window(sk) >= tp->rcv_wnd) || /* We ACK each frame or... */ tcp_in_quickack_mode(sk) || /* We have out of order data. */ diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index ce3c41ff50b..de870377fbb 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -143,8 +143,8 @@ static u32 tcp_lp_remote_hz_estimator(struct sock *sk) goto out; /* we can't calc remote HZ with no different!! */ - if (tp->rx_opt.rcv_tsval == lp->remote_ref_time - || tp->rx_opt.rcv_tsecr == lp->local_ref_time) + if (tp->rx_opt.rcv_tsval == lp->remote_ref_time || + tp->rx_opt.rcv_tsecr == lp->local_ref_time) goto out; m = HZ * (tp->rx_opt.rcv_tsval - diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 616c686ca25..875bc6dcd92 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1923,8 +1923,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) * case, when window is shrunk to zero. In this case * our retransmit serves as a zero window probe. */ - if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) - && TCP_SKB_CB(skb)->seq != tp->snd_una) + if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) && + TCP_SKB_CB(skb)->seq != tp->snd_una) return -EAGAIN; if (skb->len > cur_mss) { diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 7a3cc2ffad8..bb110c5ce1d 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -95,8 +95,8 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, /* Only update if port matches */ if ((port == 0 || ntohs(inet->inet_dport) == port || - ntohs(inet->inet_sport) == port) - && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { + ntohs(inet->inet_sport) == port) && + (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { spin_lock(&tcp_probe.lock); /* If log fills, just silently drop */ diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index e9bbff74648..b612acf7618 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -165,9 +165,8 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * every other rtt. */ if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { - if (veno->inc - && tp->snd_cwnd < - tp->snd_cwnd_clamp) { + if (veno->inc && + tp->snd_cwnd < tp->snd_cwnd_clamp) { tp->snd_cwnd++; veno->inc = 0; } else diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 66b6821b984..a0f24035889 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -157,8 +157,8 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) if (queue > TCP_YEAH_ALPHA || rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) { - if (queue > TCP_YEAH_ALPHA - && tp->snd_cwnd > yeah->reno_count) { + if (queue > TCP_YEAH_ALPHA && + tp->snd_cwnd > yeah->reno_count) { u32 reduction = min(queue / TCP_YEAH_GAMMA , tp->snd_cwnd >> TCP_YEAH_EPSILON); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1eaf57567eb..1f9534846ca 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -136,12 +136,12 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, struct hlist_nulls_node *node; sk_nulls_for_each(sk2, node, &hslot->head) - if (net_eq(sock_net(sk2), net) && - sk2 != sk && + if (net_eq(sock_net(sk2), net) && + sk2 != sk && (bitmap || udp_sk(sk2)->udp_port_hash == num) && - (!sk2->sk_reuse || !sk->sk_reuse) && - (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if - || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (!sk2->sk_reuse || !sk->sk_reuse) && + (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || + sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && (*saddr_comp)(sk, sk2)) { if (bitmap) __set_bit(udp_sk(sk2)->udp_port_hash >> log, @@ -168,12 +168,12 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, spin_lock(&hslot2->lock); udp_portaddr_for_each_entry(sk2, node, &hslot2->head) - if (net_eq(sock_net(sk2), net) && - sk2 != sk && - (udp_sk(sk2)->udp_port_hash == num) && - (!sk2->sk_reuse || !sk->sk_reuse) && - (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if - || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + if (net_eq(sock_net(sk2), net) && + sk2 != sk && + (udp_sk(sk2)->udp_port_hash == num) && + (!sk2->sk_reuse || !sk->sk_reuse) && + (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || + sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && (*saddr_comp)(sk, sk2)) { res = 1; break; @@ -545,13 +545,13 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, sk_nulls_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); - if (!net_eq(sock_net(s), net) || - udp_sk(s)->udp_port_hash != hnum || - (inet->inet_daddr && inet->inet_daddr != rmt_addr) || - (inet->inet_dport != rmt_port && inet->inet_dport) || - (inet->inet_rcv_saddr && - inet->inet_rcv_saddr != loc_addr) || - ipv6_only_sock(s) || + if (!net_eq(sock_net(s), net) || + udp_sk(s)->udp_port_hash != hnum || + (inet->inet_daddr && inet->inet_daddr != rmt_addr) || + (inet->inet_dport != rmt_port && inet->inet_dport) || + (inet->inet_rcv_saddr && + inet->inet_rcv_saddr != loc_addr) || + ipv6_only_sock(s) || (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) continue; if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) -- cgit v1.2.3-70-g09d2 From 4957faade11b3a278c3b3cade3411ddc20afa791 Mon Sep 17 00:00:00 2001 From: William Allen Simpson Date: Wed, 2 Dec 2009 18:25:27 +0000 Subject: TCPCT part 1g: Responder Cookie => Initiator Parse incoming TCP_COOKIE option(s). Calculate TCP_COOKIE option. Send optional data. This is a significantly revised implementation of an earlier (year-old) patch that no longer applies cleanly, with permission of the original author (Adam Langley): http://thread.gmane.org/gmane.linux.network/102586 Requires: TCPCT part 1a: add request_values parameter for sending SYNACK TCPCT part 1b: generate Responder Cookie secret TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS TCPCT part 1d: define TCP cookie option, extend existing struct's TCPCT part 1e: implement socket option TCP_COOKIE_TRANSACTIONS TCPCT part 1f: Initiator Cookie => Responder Signed-off-by: William.Allen.Simpson@gmail.com Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/syncookies.c | 5 ++- net/ipv4/tcp_input.c | 75 +++++++++++++++++++++++++++++----- net/ipv4/tcp_ipv4.c | 47 +++++++++++++++++++-- net/ipv4/tcp_minisocks.c | 14 ++++--- net/ipv4/tcp_output.c | 103 +++++++++++++++++++++++++++++++++++++++-------- net/ipv6/syncookies.c | 5 ++- net/ipv6/tcp_ipv6.c | 52 +++++++++++++++++++++++- 8 files changed, 259 insertions(+), 43 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index f9abd9becab..28b04ff8c96 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -407,6 +407,7 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, extern void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, + u8 **hvpp, int estab, struct dst_entry *dst); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 3146cc40174..26399ad2a28 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -253,6 +253,8 @@ EXPORT_SYMBOL(cookie_check_timestamp); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt) { + struct tcp_options_received tcp_opt; + u8 *hash_location; struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct tcp_sock *tp = tcp_sk(sk); @@ -263,7 +265,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, int mss; struct rtable *rt; __u8 rcv_wscale; - struct tcp_options_received tcp_opt; if (!sysctl_tcp_syncookies || !th->ack) goto out; @@ -341,7 +342,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0, &rt->u.dst); + tcp_parse_options(skb, &tcp_opt, &hash_location, 0, &rt->u.dst); if (tcp_opt.saw_tstamp) cookie_check_timestamp(&tcp_opt); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index be166e0e11c..57ae96a0422 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3698,7 +3698,7 @@ old_ack: * the fast version below fails. */ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, - int estab, struct dst_entry *dst) + u8 **hvpp, int estab, struct dst_entry *dst) { unsigned char *ptr; struct tcphdr *th = tcp_hdr(skb); @@ -3785,7 +3785,30 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, */ break; #endif - } + case TCPOPT_COOKIE: + /* This option is variable length. + */ + switch (opsize) { + case TCPOLEN_COOKIE_BASE: + /* not yet implemented */ + break; + case TCPOLEN_COOKIE_PAIR: + /* not yet implemented */ + break; + case TCPOLEN_COOKIE_MIN+0: + case TCPOLEN_COOKIE_MIN+2: + case TCPOLEN_COOKIE_MIN+4: + case TCPOLEN_COOKIE_MIN+6: + case TCPOLEN_COOKIE_MAX: + /* 16-bit multiple */ + opt_rx->cookie_plus = opsize; + *hvpp = ptr; + default: + /* ignore option */ + break; + }; + break; + }; ptr += opsize-2; length -= opsize; @@ -3813,17 +3836,20 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) * If it is wrong it falls back on tcp_parse_options(). */ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, - struct tcp_sock *tp) + struct tcp_sock *tp, u8 **hvpp) { - if (th->doff == sizeof(struct tcphdr) >> 2) { + /* In the spirit of fast parsing, compare doff directly to constant + * values. Because equality is used, short doff can be ignored here. + */ + if (th->doff == (sizeof(*th) / 4)) { tp->rx_opt.saw_tstamp = 0; return 0; } else if (tp->rx_opt.tstamp_ok && - th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { + th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) { if (tcp_parse_aligned_timestamp(tp, th)) return 1; } - tcp_parse_options(skb, &tp->rx_opt, 1, NULL); + tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); return 1; } @@ -5077,10 +5103,12 @@ out: static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, int syn_inerr) { + u8 *hash_location; struct tcp_sock *tp = tcp_sk(sk); /* RFC1323: H1. Apply PAWS check first. */ - if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && + if (tcp_fast_parse_options(skb, th, tp, &hash_location) && + tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); @@ -5368,12 +5396,14 @@ discard: static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, unsigned len) { - struct tcp_sock *tp = tcp_sk(sk); + u8 *hash_location; struct inet_connection_sock *icsk = inet_csk(sk); - int saved_clamp = tp->rx_opt.mss_clamp; + struct tcp_sock *tp = tcp_sk(sk); struct dst_entry *dst = __sk_dst_get(sk); + struct tcp_cookie_values *cvp = tp->cookie_values; + int saved_clamp = tp->rx_opt.mss_clamp; - tcp_parse_options(skb, &tp->rx_opt, 0, dst); + tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, dst); if (th->ack) { /* rfc793: @@ -5470,6 +5500,31 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * Change state from SYN-SENT only after copied_seq * is initialized. */ tp->copied_seq = tp->rcv_nxt; + + if (cvp != NULL && + cvp->cookie_pair_size > 0 && + tp->rx_opt.cookie_plus > 0) { + int cookie_size = tp->rx_opt.cookie_plus + - TCPOLEN_COOKIE_BASE; + int cookie_pair_size = cookie_size + + cvp->cookie_desired; + + /* A cookie extension option was sent and returned. + * Note that each incoming SYNACK replaces the + * Responder cookie. The initial exchange is most + * fragile, as protection against spoofing relies + * entirely upon the sequence and timestamp (above). + * This replacement strategy allows the correct pair to + * pass through, while any others will be filtered via + * Responder verification later. + */ + if (sizeof(cvp->cookie_pair) >= cookie_pair_size) { + memcpy(&cvp->cookie_pair[cvp->cookie_desired], + hash_location, cookie_size); + cvp->cookie_pair_size = cookie_pair_size; + } + } + smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a2bcac9b388..59c911f3889 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1213,9 +1213,12 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = { int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { + struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; + u8 *hash_location; struct request_sock *req; struct inet_request_sock *ireq; + struct tcp_sock *tp = tcp_sk(sk); struct dst_entry *dst = NULL; __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; @@ -1271,15 +1274,49 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = TCP_MSS_DEFAULT; - tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; + tmp_opt.user_mss = tp->rx_opt.user_mss; + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst); + + if (tmp_opt.cookie_plus > 0 && + tmp_opt.saw_tstamp && + !tp->rx_opt.cookie_out_never && + (sysctl_tcp_cookie_size > 0 || + (tp->cookie_values != NULL && + tp->cookie_values->cookie_desired > 0))) { + u8 *c; + u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; + int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; + + if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) + goto drop_and_release; + + /* Secret recipe starts with IP addresses */ + *mess++ ^= daddr; + *mess++ ^= saddr; - tcp_parse_options(skb, &tmp_opt, 0, dst); + /* plus variable length Initiator Cookie */ + c = (u8 *)mess; + while (l-- > 0) + *c++ ^= *hash_location++; + +#ifdef CONFIG_SYN_COOKIES + want_cookie = 0; /* not our kind of cookie */ +#endif + tmp_ext.cookie_out_never = 0; /* false */ + tmp_ext.cookie_plus = tmp_opt.cookie_plus; + } else if (!tp->rx_opt.cookie_in_always) { + /* redundant indications, but ensure initialization. */ + tmp_ext.cookie_out_never = 1; /* true */ + tmp_ext.cookie_plus = 0; + } else { + goto drop_and_release; + } + tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; - tcp_openreq_init(req, &tmp_opt, skb); if (security_inet_conn_request(sk, skb, req)) @@ -1339,7 +1376,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) } tcp_rsk(req)->snt_isn = isn; - if (__tcp_v4_send_synack(sk, dst, req, NULL) || want_cookie) + if (__tcp_v4_send_synack(sk, dst, req, + (struct request_values *)&tmp_ext) || + want_cookie) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 96852af43ca..87accec8d09 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -90,13 +90,14 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th) { - struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); struct tcp_options_received tmp_opt; + u8 *hash_location; + struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); int paws_reject = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { tmp_opt.tstamp_ok = 1; - tcp_parse_options(skb, &tmp_opt, 1, NULL); + tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = tcptw->tw_ts_recent; @@ -518,15 +519,16 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct request_sock **prev) { + struct tcp_options_received tmp_opt; + u8 *hash_location; + struct sock *child; const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); int paws_reject = 0; - struct tcp_options_received tmp_opt; - struct sock *child; - if ((th->doff > (sizeof(struct tcphdr)>>2)) && (req->ts_recent)) { + if ((th->doff > (sizeof(*th) >> 2)) && (req->ts_recent)) { tmp_opt.tstamp_ok = 1; - tcp_parse_options(skb, &tmp_opt, 1, NULL); + tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 35dd983a8a9..2ac8beff4d7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -655,48 +655,77 @@ static unsigned tcp_synack_options(struct sock *sk, struct request_sock *req, unsigned mss, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5) { - unsigned size = 0; + struct tcp_md5sig_key **md5, + struct tcp_extend_values *xvp) +{ struct inet_request_sock *ireq = inet_rsk(req); - char doing_ts; + unsigned remaining = MAX_TCP_OPTION_SPACE; + u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? + xvp->cookie_plus : + 0; + bool doing_ts = ireq->tstamp_ok; #ifdef CONFIG_TCP_MD5SIG *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); if (*md5) { opts->options |= OPTION_MD5; - size += TCPOLEN_MD5SIG_ALIGNED; + remaining -= TCPOLEN_MD5SIG_ALIGNED; + + /* We can't fit any SACK blocks in a packet with MD5 + TS + * options. There was discussion about disabling SACK + * rather than TS in order to fit in better with old, + * buggy kernels, but that was deemed to be unnecessary. + */ + doing_ts &= !ireq->sack_ok; } #else *md5 = NULL; #endif - /* we can't fit any SACK blocks in a packet with MD5 + TS - options. There was discussion about disabling SACK rather than TS in - order to fit in better with old, buggy kernels, but that was deemed - to be unnecessary. */ - doing_ts = ireq->tstamp_ok && !(*md5 && ireq->sack_ok); - + /* We always send an MSS option. */ opts->mss = mss; - size += TCPOLEN_MSS_ALIGNED; + remaining -= TCPOLEN_MSS_ALIGNED; if (likely(ireq->wscale_ok)) { opts->ws = ireq->rcv_wscale; opts->options |= OPTION_WSCALE; - size += TCPOLEN_WSCALE_ALIGNED; + remaining -= TCPOLEN_WSCALE_ALIGNED; } if (likely(doing_ts)) { opts->options |= OPTION_TS; opts->tsval = TCP_SKB_CB(skb)->when; opts->tsecr = req->ts_recent; - size += TCPOLEN_TSTAMP_ALIGNED; + remaining -= TCPOLEN_TSTAMP_ALIGNED; } if (likely(ireq->sack_ok)) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!doing_ts)) - size += TCPOLEN_SACKPERM_ALIGNED; + remaining -= TCPOLEN_SACKPERM_ALIGNED; } - return size; + /* Similar rationale to tcp_syn_options() applies here, too. + * If the options fit, the same options should fit now! + */ + if (*md5 == NULL && + doing_ts && + cookie_plus > TCPOLEN_COOKIE_BASE) { + int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ + + if (0x2 & need) { + /* 32-bit multiple */ + need += 2; /* NOPs */ + } + if (need <= remaining) { + opts->options |= OPTION_COOKIE_EXTENSION; + opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE; + remaining -= need; + } else { + /* There's no error return, so flag it. */ + xvp->cookie_out_never = 1; /* true */ + opts->hash_size = 0; + } + } + return MAX_TCP_OPTION_SPACE - remaining; } /* Compute TCP options for ESTABLISHED sockets. This is not the @@ -2365,6 +2394,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_values *rvp) { struct tcp_out_options opts; + struct tcp_extend_values *xvp = tcp_xv(rvp); struct inet_request_sock *ireq = inet_rsk(req); struct tcp_sock *tp = tcp_sk(sk); struct tcphdr *th; @@ -2408,8 +2438,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, #endif TCP_SKB_CB(skb)->when = tcp_time_stamp; tcp_header_size = tcp_synack_options(sk, req, mss, - skb, &opts, &md5) + - sizeof(struct tcphdr); + skb, &opts, &md5, xvp) + + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -2426,6 +2456,45 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, */ tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); + + if (OPTION_COOKIE_EXTENSION & opts.options) { + const struct tcp_cookie_values *cvp = tp->cookie_values; + + if (cvp != NULL && + cvp->s_data_constant && + cvp->s_data_desired > 0) { + u8 *buf = skb_put(skb, cvp->s_data_desired); + + /* copy data directly from the listening socket. */ + memcpy(buf, cvp->s_data_payload, cvp->s_data_desired); + TCP_SKB_CB(skb)->end_seq += cvp->s_data_desired; + } + + if (opts.hash_size > 0) { + __u32 workspace[SHA_WORKSPACE_WORDS]; + u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS]; + u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1]; + + /* Secret recipe depends on the Timestamp, (future) + * Sequence and Acknowledgment Numbers, Initiator + * Cookie, and others handled by IP variant caller. + */ + *tail-- ^= opts.tsval; + *tail-- ^= tcp_rsk(req)->rcv_isn + 1; + *tail-- ^= TCP_SKB_CB(skb)->seq + 1; + + /* recommended */ + *tail-- ^= ((th->dest << 16) | th->source); + *tail-- ^= (u32)cvp; /* per sockopt */ + + sha_transform((__u32 *)&xvp->cookie_bakery[0], + (char *)mess, + &workspace[0]); + opts.hash_location = + (__u8 *)&xvp->cookie_bakery[0]; + } + } + th->seq = htonl(TCP_SKB_CB(skb)->seq); th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 612fc53e0bb..5b9af508b8f 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -159,6 +159,8 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { + struct tcp_options_received tcp_opt; + u8 *hash_location; struct inet_request_sock *ireq; struct inet6_request_sock *ireq6; struct tcp_request_sock *treq; @@ -171,7 +173,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) int mss; struct dst_entry *dst; __u8 rcv_wscale; - struct tcp_options_received tcp_opt; if (!sysctl_tcp_syncookies || !th->ack) goto out; @@ -254,7 +255,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0, dst); + tcp_parse_options(skb, &tcp_opt, &hash_location, 0, dst); if (tcp_opt.saw_tstamp) cookie_check_timestamp(&tcp_opt); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f2ec38289a4..fc0a4e5895e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1162,7 +1162,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { + struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; + u8 *hash_location; struct request_sock *req; struct inet6_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); @@ -1206,8 +1208,52 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst); + + if (tmp_opt.cookie_plus > 0 && + tmp_opt.saw_tstamp && + !tp->rx_opt.cookie_out_never && + (sysctl_tcp_cookie_size > 0 || + (tp->cookie_values != NULL && + tp->cookie_values->cookie_desired > 0))) { + u8 *c; + u32 *d; + u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; + int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; + + if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) + goto drop_and_free; + + /* Secret recipe starts with IP addresses */ + d = &ipv6_hdr(skb)->daddr.s6_addr32[0]; + *mess++ ^= *d++; + *mess++ ^= *d++; + *mess++ ^= *d++; + *mess++ ^= *d++; + d = &ipv6_hdr(skb)->saddr.s6_addr32[0]; + *mess++ ^= *d++; + *mess++ ^= *d++; + *mess++ ^= *d++; + *mess++ ^= *d++; + + /* plus variable length Initiator Cookie */ + c = (u8 *)mess; + while (l-- > 0) + *c++ ^= *hash_location++; - tcp_parse_options(skb, &tmp_opt, 0, dst); +#ifdef CONFIG_SYN_COOKIES + want_cookie = 0; /* not our kind of cookie */ +#endif + tmp_ext.cookie_out_never = 0; /* false */ + tmp_ext.cookie_plus = tmp_opt.cookie_plus; + } else if (!tp->rx_opt.cookie_in_always) { + /* redundant indications, but ensure initialization. */ + tmp_ext.cookie_out_never = 1; /* true */ + tmp_ext.cookie_plus = 0; + } else { + goto drop_and_free; + } + tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1244,7 +1290,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) security_inet_conn_request(sk, skb, req); - if (tcp_v6_send_synack(sk, req, NULL) || want_cookie) + if (tcp_v6_send_synack(sk, req, + (struct request_values *)&tmp_ext) || + want_cookie) goto drop_and_free; inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); -- cgit v1.2.3-70-g09d2