From 30e502a34b8b21fae2c789da102bd9f6e99fef83 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 26 Sep 2014 22:37:33 +0200 Subject: net: tcp: add flag for ca to indicate that ECN is required This patch adds a flag to TCP congestion algorithms that allows for requesting to mark IPv4/IPv6 sockets with transport as ECN capable, that is, ECT(0), when required by a congestion algorithm. It is currently used and needed in DataCenter TCP (DCTCP), as it requires both peers to assert ECT on all IP packets sent - it uses ECN feedback (i.e. CE, Congestion Encountered information) from switches inside the data center to derive feedback to the end hosts. Therefore, simply add a new flag to icsk_ca_ops. Note that DCTCP's algorithm/behaviour slightly diverges from RFC3168, therefore this is only (!) enabled iff the assigned congestion control ops module has requested this. By that, we can tightly couple this logic really only to the provided congestion control ops. Joint work with Florian Westphal and Glenn Judd. Signed-off-by: Daniel Borkmann Signed-off-by: Florian Westphal Signed-off-by: Glenn Judd Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_output.c | 25 ++++++++++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5073eefa6fa..fb0fe97e1c5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5944,7 +5944,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop_and_free; if (!want_cookie || tmp_opt.tstamp_ok) - TCP_ECN_create_request(req, skb, sock_net(sk)); + TCP_ECN_create_request(req, skb, sk); if (want_cookie) { isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4d92703df4c..20e73271d75 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -318,11 +318,15 @@ static u16 tcp_select_window(struct sock *sk) } /* Packet ECN state for a SYN-ACK */ -static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb) +static inline void TCP_ECN_send_synack(struct sock *sk, struct sk_buff *skb) { + const struct tcp_sock *tp = tcp_sk(sk); + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; if (!(tp->ecn_flags & TCP_ECN_OK)) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; + else if (tcp_ca_needs_ecn(sk)) + INET_ECN_xmit(sk); } /* Packet ECN state for a SYN. */ @@ -331,17 +335,24 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); tp->ecn_flags = 0; - if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1) { + if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || + tcp_ca_needs_ecn(sk)) { TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; tp->ecn_flags = TCP_ECN_OK; + if (tcp_ca_needs_ecn(sk)) + INET_ECN_xmit(sk); } } static __inline__ void -TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th) +TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th, + struct sock *sk) { - if (inet_rsk(req)->ecn_ok) + if (inet_rsk(req)->ecn_ok) { th->ece = 1; + if (tcp_ca_needs_ecn(sk)) + INET_ECN_xmit(sk); + } } /* Set up ECN state for a packet on a ESTABLISHED socket that is about to @@ -362,7 +373,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, tcp_hdr(skb)->cwr = 1; skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; } - } else { + } else if (!tcp_ca_needs_ecn(sk)) { /* ACK or retransmitted segment: clear ECT|CE */ INET_ECN_dontxmit(sk); } @@ -2789,7 +2800,7 @@ int tcp_send_synack(struct sock *sk) } TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK; - TCP_ECN_send_synack(tcp_sk(sk), skb); + TCP_ECN_send_synack(sk, skb); } return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } @@ -2848,7 +2859,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, memset(th, 0, sizeof(struct tcphdr)); th->syn = 1; th->ack = 1; - TCP_ECN_make_synack(req, th); + TCP_ECN_make_synack(req, th, sk); th->source = htons(ireq->ir_num); th->dest = ireq->ir_rmt_port; /* Setting of flags are superfluous here for callers (and ECE is -- cgit v1.2.3-70-g09d2