diff options
Diffstat (limited to 'net/ipv4')
34 files changed, 273 insertions, 305 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 255aa9946fe..23104a3f292 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -243,7 +243,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, int rpf, struct in_device *idev, u32 *itag) { - int ret, no_addr, accept_local; + int ret, no_addr; struct fib_result res; struct flowi4 fl4; struct net *net; @@ -258,16 +258,17 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, no_addr = idev->ifa_list == NULL; - accept_local = IN_DEV_ACCEPT_LOCAL(idev); fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0; net = dev_net(dev); if (fib_lookup(net, &fl4, &res)) goto last_resort; - if (res.type != RTN_UNICAST) { - if (res.type != RTN_LOCAL || !accept_local) - goto e_inval; - } + if (res.type != RTN_UNICAST && + (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) + goto e_inval; + if (!rpf && !fib_num_tclassid_users(dev_net(dev)) && + (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) + goto last_resort; fib_combine_itag(itag, &res); dev_match = false; @@ -321,6 +322,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); if (!r && !fib_num_tclassid_users(dev_net(dev)) && + IN_DEV_ACCEPT_LOCAL(idev) && (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) { *itag = 0; return 0; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b10cd43a472..5b6efb3d230 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -157,9 +157,12 @@ static void rt_fibinfo_free(struct rtable __rcu **rtp) static void free_nh_exceptions(struct fib_nh *nh) { - struct fnhe_hash_bucket *hash = nh->nh_exceptions; + struct fnhe_hash_bucket *hash; int i; + hash = rcu_dereference_protected(nh->nh_exceptions, 1); + if (!hash) + return; for (i = 0; i < FNHE_HASH_SIZE; i++) { struct fib_nh_exception *fnhe; @@ -205,8 +208,7 @@ static void free_fib_info_rcu(struct rcu_head *head) change_nexthops(fi) { if (nexthop_nh->nh_dev) dev_put(nexthop_nh->nh_dev); - if (nexthop_nh->nh_exceptions) - free_nh_exceptions(nexthop_nh); + free_nh_exceptions(nexthop_nh); rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output); rt_fibinfo_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 0485bf7f8f0..7e0756da873 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -125,6 +125,10 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, *csum_err = true; return -EINVAL; } + + skb_checksum_try_convert(skb, IPPROTO_GRE, 0, + null_compute_pseudo); + options++; } diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 6556263c8fa..d3fe2ac0516 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -119,28 +119,6 @@ out: return segs; } -/* Compute the whole skb csum in s/w and store it, then verify GRO csum - * starting from gro_offset. - */ -static __sum16 gro_skb_checksum(struct sk_buff *skb) -{ - __sum16 sum; - - skb->csum = skb_checksum(skb, 0, skb->len, 0); - NAPI_GRO_CB(skb)->csum = csum_sub(skb->csum, - csum_partial(skb->data, skb_gro_offset(skb), 0)); - sum = csum_fold(NAPI_GRO_CB(skb)->csum); - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) { - if (unlikely(!sum) && !skb->csum_complete_sw) - netdev_rx_csum_fault(skb->dev); - } else { - skb->ip_summed = CHECKSUM_COMPLETE; - skb->csum_complete_sw = 1; - } - - return sum; -} - static struct sk_buff **gre_gro_receive(struct sk_buff **head, struct sk_buff *skb) { @@ -192,22 +170,16 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, if (unlikely(!greh)) goto out_unlock; } - if (greh->flags & GRE_CSUM) { /* Need to verify GRE csum first */ - __sum16 csum = 0; - - if (skb->ip_summed == CHECKSUM_COMPLETE) - csum = csum_fold(NAPI_GRO_CB(skb)->csum); - /* Don't trust csum error calculated/reported by h/w */ - if (skb->ip_summed == CHECKSUM_NONE || csum != 0) - csum = gro_skb_checksum(skb); - - /* GRE CSUM is the 1's complement of the 1's complement sum - * of the GRE hdr plus payload so it should add up to 0xffff - * (and 0 after csum_fold()) just like the IPv4 hdr csum. - */ - if (csum) + + /* Don't bother verifying checksum if we're going to flush anyway. */ + if ((greh->flags & GRE_CSUM) && !NAPI_GRO_CB(skb)->flush) { + if (skb_gro_checksum_simple_validate(skb)) goto out_unlock; + + skb_gro_checksum_try_convert(skb, IPPROTO_GRE, 0, + null_compute_pseudo); } + flush = 0; for (p = *head; p; p = p->next) { diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f10eab46228..4146153d875 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -117,7 +117,7 @@ #define IGMP_V2_Unsolicited_Report_Interval (10*HZ) #define IGMP_V3_Unsolicited_Report_Interval (1*HZ) #define IGMP_Query_Response_Interval (10*HZ) -#define IGMP_Unsolicited_Report_Count 2 +#define IGMP_Query_Robustness_Variable 2 #define IGMP_Initial_Report_Delay (1) @@ -756,8 +756,7 @@ static void igmp_ifc_event(struct in_device *in_dev) { if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) return; - in_dev->mr_ifc_count = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + in_dev->mr_ifc_count = in_dev->mr_qrv ?: sysctl_igmp_qrv; igmp_ifc_start_timer(in_dev, 1); } @@ -1086,8 +1085,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) pmc->interface = im->interface; in_dev_hold(in_dev); pmc->multiaddr = im->multiaddr; - pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; pmc->sfmode = im->sfmode; if (pmc->sfmode == MCAST_INCLUDE) { struct ip_sf_list *psf; @@ -1226,8 +1224,7 @@ static void igmp_group_added(struct ip_mc_list *im) } /* else, v3 */ - im->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + im->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; igmp_ifc_event(in_dev); #endif } @@ -1322,7 +1319,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im); - im->unsolicit_count = IGMP_Unsolicited_Report_Count; + im->unsolicit_count = sysctl_igmp_qrv; #endif im->next_rcu = in_dev->mc_list; @@ -1460,7 +1457,7 @@ void ip_mc_init_dev(struct in_device *in_dev) (unsigned long)in_dev); setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, (unsigned long)in_dev); - in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; + in_dev->mr_qrv = sysctl_igmp_qrv; #endif spin_lock_init(&in_dev->mc_tomb_lock); @@ -1474,6 +1471,9 @@ void ip_mc_up(struct in_device *in_dev) ASSERT_RTNL(); +#ifdef CONFIG_IP_MULTICAST + in_dev->mr_qrv = sysctl_igmp_qrv; +#endif ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); for_each_pmc_rtnl(in_dev, pmc) @@ -1540,7 +1540,9 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) */ int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS; int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF; - +#ifdef CONFIG_IP_MULTICAST +int sysctl_igmp_qrv __read_mostly = IGMP_Query_Robustness_Variable; +#endif static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, __be32 *psfsrc) @@ -1575,8 +1577,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, #ifdef CONFIG_IP_MULTICAST if (psf->sf_oldin && !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { - psf->sf_crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + psf->sf_crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; psf->sf_next = pmc->tomb; pmc->tomb = psf; rv = 1; @@ -1639,8 +1640,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, /* filter mode change */ pmc->sfmode = MCAST_INCLUDE; #ifdef CONFIG_IP_MULTICAST - pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; in_dev->mr_ifc_count = pmc->crcount; for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -1818,8 +1818,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, #ifdef CONFIG_IP_MULTICAST /* else no filters; keep old mode for reports */ - pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; in_dev->mr_ifc_count = pmc->crcount; for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -2539,7 +2538,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) querier = "NONE"; #endif - if (rcu_dereference(state->in_dev->mc_list) == im) { + if (rcu_access_pointer(state->in_dev->mc_list) == im) { seq_printf(seq, "%d\t%-10s: %5d %7s\n", state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 43116e8c8e1..9111a4e2215 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -229,7 +229,7 @@ begin: } } else if (score == hiscore && reuseport) { matches++; - if (((u64)phash * matches) >> 32 == 0) + if (reciprocal_scale(phash, matches) == 0) result = sk; phash = next_pseudo_random32(phash); } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 5cb830c7899..455e75bcb16 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -405,7 +405,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct sock_exterr_skb *serr; - struct sk_buff *skb, *skb2; + struct sk_buff *skb; DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct { struct sock_extended_err ee; @@ -415,7 +415,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) int copied; err = -EAGAIN; - skb = skb_dequeue(&sk->sk_error_queue); + skb = sock_dequeue_err_skb(sk); if (skb == NULL) goto out; @@ -462,17 +462,6 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) msg->msg_flags |= MSG_ERRQUEUE; err = copied; - /* Reset and regenerate socket error */ - spin_lock_bh(&sk->sk_error_queue.lock); - sk->sk_err = 0; - skb2 = skb_peek(&sk->sk_error_queue); - if (skb2 != NULL) { - sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno; - spin_unlock_bh(&sk->sk_error_queue.lock); - sk->sk_error_report(sk); - } else - spin_unlock_bh(&sk->sk_error_queue.lock); - out_free_skb: kfree_skb(skb); out: diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 5bbef4fdcb4..648fa1490ea 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -262,7 +262,8 @@ static int __init ic_open_devs(void) /* wait for a carrier on at least one device */ start = jiffies; next_msg = start + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12); - while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { + while (time_before(jiffies, start + + msecs_to_jiffies(CONF_CARRIER_TIMEOUT))) { int wait, elapsed; for_each_netdev(&init_net, dev) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 2510c02c2d2..e90f83a3415 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -285,7 +285,7 @@ clusterip_hashfn(const struct sk_buff *skb, } /* node numbers are 1..n, not 0..n */ - return (((u64)hashval * config->num_total_nodes) >> 32) + 1; + return reciprocal_scale(hashval, config->num_total_nodes) + 1; } static inline int diff --git a/net/ipv4/route.c b/net/ipv4/route.c index eaa4b000c7b..234a43e233d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -596,12 +596,12 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) static inline u32 fnhe_hashfun(__be32 daddr) { + static u32 fnhe_hashrnd __read_mostly; u32 hval; - hval = (__force u32) daddr; - hval ^= (hval >> 11) ^ (hval >> 22); - - return hval & (FNHE_HASH_SIZE - 1); + net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd)); + hval = jhash_1word((__force u32) daddr, fnhe_hashrnd); + return hash_32(hval, FNHE_HASH_SHIFT); } static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) @@ -628,12 +628,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, spin_lock_bh(&fnhe_lock); - hash = nh->nh_exceptions; + hash = rcu_dereference(nh->nh_exceptions); if (!hash) { hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC); if (!hash) goto out_unlock; - nh->nh_exceptions = hash; + rcu_assign_pointer(nh->nh_exceptions, hash); } hash += hval; @@ -1242,7 +1242,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) { - struct fnhe_hash_bucket *hash = nh->nh_exceptions; + struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions); struct fib_nh_exception *fnhe; u32 hval; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index c0c75688896..0431a8f3c8f 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -25,7 +25,7 @@ extern int sysctl_tcp_syncookies; -static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; +static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly; #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 79a007c5255..45d156dacd6 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -450,6 +450,16 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, +#ifdef CONFIG_IP_MULTICAST + { + .procname = "igmp_qrv", + .data = &sysctl_igmp_qrv, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one + }, +#endif { .procname = "inet_peer_threshold", .data = &inet_peer_threshold, diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index d5de69bc04f..bb395d46a38 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -17,7 +17,6 @@ #include <linux/module.h> #include <net/tcp.h> - #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation * max_cwnd = snd_cwnd * beta */ @@ -46,11 +45,10 @@ MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); module_param(smooth_part, int, 0644); MODULE_PARM_DESC(smooth_part, "log(B/(B*Smin))/log(B/(B-1))+B, # of RTT from Wmax-B to Wmax"); - /* BIC TCP Parameters */ struct bictcp { u32 cnt; /* increase cwnd by 1 after ACKs */ - u32 last_max_cwnd; /* last maximum snd_cwnd */ + u32 last_max_cwnd; /* last maximum snd_cwnd */ u32 loss_cwnd; /* congestion window at last loss */ u32 last_cwnd; /* the last snd_cwnd */ u32 last_time; /* time when updated last_cwnd */ @@ -103,7 +101,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) /* binary increase */ if (cwnd < ca->last_max_cwnd) { - __u32 dist = (ca->last_max_cwnd - cwnd) + __u32 dist = (ca->last_max_cwnd - cwnd) / BICTCP_B; if (dist > max_increment) @@ -154,7 +152,6 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) bictcp_update(ca, tp->snd_cwnd); tcp_cong_avoid_ai(tp, ca->cnt); } - } /* @@ -177,7 +174,6 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk) ca->loss_cwnd = tp->snd_cwnd; - if (tp->snd_cwnd <= low_window) return max(tp->snd_cwnd >> 1U, 2U); else @@ -188,6 +184,7 @@ static u32 bictcp_undo_cwnd(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); const struct bictcp *ca = inet_csk_ca(sk); + return max(tp->snd_cwnd, ca->loss_cwnd); } @@ -206,12 +203,12 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt) if (icsk->icsk_ca_state == TCP_CA_Open) { struct bictcp *ca = inet_csk_ca(sk); + cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; ca->delayed_ack += cnt; } } - static struct tcp_congestion_ops bictcp __read_mostly = { .init = bictcp_init, .ssthresh = bictcp_recalc_ssthresh, diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 7b09d8b49fa..80248f56c89 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -142,7 +142,6 @@ static int __init tcp_congestion_default(void) } late_initcall(tcp_congestion_default); - /* Build string with list of available congestion control values */ void tcp_get_available_congestion_control(char *buf, size_t maxlen) { @@ -154,7 +153,6 @@ void tcp_get_available_congestion_control(char *buf, size_t maxlen) offs += snprintf(buf + offs, maxlen - offs, "%s%s", offs == 0 ? "" : " ", ca->name); - } rcu_read_unlock(); } @@ -186,7 +184,6 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen) offs += snprintf(buf + offs, maxlen - offs, "%s%s", offs == 0 ? "" : " ", ca->name); - } rcu_read_unlock(); } @@ -230,7 +227,6 @@ out: return ret; } - /* Change congestion control for socket */ int tcp_set_congestion_control(struct sock *sk, const char *name) { @@ -337,6 +333,7 @@ EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); u32 tcp_reno_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); + return max(tp->snd_cwnd >> 1U, 2U); } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index a9bd8a4828a..20de0118c98 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -82,12 +82,13 @@ MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (mse /* BIC TCP Parameters */ struct bictcp { u32 cnt; /* increase cwnd by 1 after ACKs */ - u32 last_max_cwnd; /* last maximum snd_cwnd */ + u32 last_max_cwnd; /* last maximum snd_cwnd */ u32 loss_cwnd; /* congestion window at last loss */ u32 last_cwnd; /* the last snd_cwnd */ u32 last_time; /* time when updated last_cwnd */ u32 bic_origin_point;/* origin point of bic function */ - u32 bic_K; /* time to origin point from the beginning of the current epoch */ + u32 bic_K; /* time to origin point + from the beginning of the current epoch */ u32 delay_min; /* min delay (msec << 3) */ u32 epoch_start; /* beginning of an epoch */ u32 ack_cnt; /* number of acks */ @@ -219,7 +220,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca->last_time = tcp_time_stamp; if (ca->epoch_start == 0) { - ca->epoch_start = tcp_time_stamp; /* record the beginning of an epoch */ + ca->epoch_start = tcp_time_stamp; /* record beginning */ ca->ack_cnt = 1; /* start counting */ ca->tcp_cwnd = cwnd; /* syn with cubic */ @@ -263,9 +264,9 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) /* c/rtt * (t-K)^3 */ delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ); - if (t < ca->bic_K) /* below origin*/ + if (t < ca->bic_K) /* below origin*/ bic_target = ca->bic_origin_point - delta; - else /* above origin*/ + else /* above origin*/ bic_target = ca->bic_origin_point + delta; /* cubic function - calc bictcp_cnt*/ @@ -285,13 +286,14 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) /* TCP Friendly */ if (tcp_friendliness) { u32 scale = beta_scale; + delta = (cwnd * scale) >> 3; while (ca->ack_cnt > delta) { /* update tcp cwnd */ ca->ack_cnt -= delta; ca->tcp_cwnd++; } - if (ca->tcp_cwnd > cwnd){ /* if bic is slower than tcp */ + if (ca->tcp_cwnd > cwnd) { /* if bic is slower than tcp */ delta = ca->tcp_cwnd - cwnd; max_cnt = cwnd / delta; if (ca->cnt > max_cnt) @@ -320,7 +322,6 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) bictcp_update(ca, tp->snd_cwnd); tcp_cong_avoid_ai(tp, ca->cnt); } - } static u32 bictcp_recalc_ssthresh(struct sock *sk) @@ -452,7 +453,8 @@ static int __init cubictcp_register(void) * based on SRTT of 100ms */ - beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta); + beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3 + / (BICTCP_BETA_SCALE - beta); cube_rtt_scale = (bic_scale * 10); /* 1024*c/rtt */ diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index ed3f2ad42e0..0d73f9ddb55 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -9,7 +9,6 @@ * 2 of the License, or (at your option) any later version. */ - #include <linux/module.h> #include <linux/inet_diag.h> @@ -35,13 +34,13 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) + struct inet_diag_req_v2 *r, struct nlattr *bc) { inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc); } static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) + struct inet_diag_req_v2 *req) { return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); } diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 1c4908280d9..882c08aae2f 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -9,7 +9,6 @@ #include <linux/module.h> #include <net/tcp.h> - /* From AIMD tables from RFC 3649 appendix B, * with fixed-point MD scaled <<8. */ @@ -17,78 +16,78 @@ static const struct hstcp_aimd_val { unsigned int cwnd; unsigned int md; } hstcp_aimd_vals[] = { - { 38, 128, /* 0.50 */ }, - { 118, 112, /* 0.44 */ }, - { 221, 104, /* 0.41 */ }, - { 347, 98, /* 0.38 */ }, - { 495, 93, /* 0.37 */ }, - { 663, 89, /* 0.35 */ }, - { 851, 86, /* 0.34 */ }, - { 1058, 83, /* 0.33 */ }, - { 1284, 81, /* 0.32 */ }, - { 1529, 78, /* 0.31 */ }, - { 1793, 76, /* 0.30 */ }, - { 2076, 74, /* 0.29 */ }, - { 2378, 72, /* 0.28 */ }, - { 2699, 71, /* 0.28 */ }, - { 3039, 69, /* 0.27 */ }, - { 3399, 68, /* 0.27 */ }, - { 3778, 66, /* 0.26 */ }, - { 4177, 65, /* 0.26 */ }, - { 4596, 64, /* 0.25 */ }, - { 5036, 62, /* 0.25 */ }, - { 5497, 61, /* 0.24 */ }, - { 5979, 60, /* 0.24 */ }, - { 6483, 59, /* 0.23 */ }, - { 7009, 58, /* 0.23 */ }, - { 7558, 57, /* 0.22 */ }, - { 8130, 56, /* 0.22 */ }, - { 8726, 55, /* 0.22 */ }, - { 9346, 54, /* 0.21 */ }, - { 9991, 53, /* 0.21 */ }, - { 10661, 52, /* 0.21 */ }, - { 11358, 52, /* 0.20 */ }, - { 12082, 51, /* 0.20 */ }, - { 12834, 50, /* 0.20 */ }, - { 13614, 49, /* 0.19 */ }, - { 14424, 48, /* 0.19 */ }, - { 15265, 48, /* 0.19 */ }, - { 16137, 47, /* 0.19 */ }, - { 17042, 46, /* 0.18 */ }, - { 17981, 45, /* 0.18 */ }, - { 18955, 45, /* 0.18 */ }, - { 19965, 44, /* 0.17 */ }, - { 21013, 43, /* 0.17 */ }, - { 22101, 43, /* 0.17 */ }, - { 23230, 42, /* 0.17 */ }, - { 24402, 41, /* 0.16 */ }, - { 25618, 41, /* 0.16 */ }, - { 26881, 40, /* 0.16 */ }, - { 28193, 39, /* 0.16 */ }, - { 29557, 39, /* 0.15 */ }, - { 30975, 38, /* 0.15 */ }, - { 32450, 38, /* 0.15 */ }, - { 33986, 37, /* 0.15 */ }, - { 35586, 36, /* 0.14 */ }, - { 37253, 36, /* 0.14 */ }, - { 38992, 35, /* 0.14 */ }, - { 40808, 35, /* 0.14 */ }, - { 42707, 34, /* 0.13 */ }, - { 44694, 33, /* 0.13 */ }, - { 46776, 33, /* 0.13 */ }, - { 48961, 32, /* 0.13 */ }, - { 51258, 32, /* 0.13 */ }, - { 53677, 31, /* 0.12 */ }, - { 56230, 30, /* 0.12 */ }, - { 58932, 30, /* 0.12 */ }, - { 61799, 29, /* 0.12 */ }, - { 64851, 28, /* 0.11 */ }, - { 68113, 28, /* 0.11 */ }, - { 71617, 27, /* 0.11 */ }, - { 75401, 26, /* 0.10 */ }, - { 79517, 26, /* 0.10 */ }, - { 84035, 25, /* 0.10 */ }, - { 89053, 24, /* 0.10 */ }, + { 38, 128, /* 0.50 */ }, + { 118, 112, /* 0.44 */ }, + { 221, 104, /* 0.41 */ }, + { 347, 98, /* 0.38 */ }, + { 495, 93, /* 0.37 */ }, + { 663, 89, /* 0.35 */ }, + { 851, 86, /* 0.34 */ }, + { 1058, 83, /* 0.33 */ }, + { 1284, 81, /* 0.32 */ }, + { 1529, 78, /* 0.31 */ }, + { 1793, 76, /* 0.30 */ }, + { 2076, 74, /* 0.29 */ }, + { 2378, 72, /* 0.28 */ }, + { 2699, 71, /* 0.28 */ }, + { 3039, 69, /* 0.27 */ }, + { 3399, 68, /* 0.27 */ }, + { 3778, 66, /* 0.26 */ }, + { 4177, 65, /* 0.26 */ }, + { 4596, 64, /* 0.25 */ }, + { 5036, 62, /* 0.25 */ }, + { 5497, 61, /* 0.24 */ }, + { 5979, 60, /* 0.24 */ }, + { 6483, 59, /* 0.23 */ }, + { 7009, 58, /* 0.23 */ }, + { 7558, 57, /* 0.22 */ }, + { 8130, 56, /* 0.22 */ }, + { 8726, 55, /* 0.22 */ }, + { 9346, 54, /* 0.21 */ }, + { 9991, 53, /* 0.21 */ }, + { 10661, 52, /* 0.21 */ }, + { 11358, 52, /* 0.20 */ }, + { 12082, 51, /* 0.20 */ }, + { 12834, 50, /* 0.20 */ }, + { 13614, 49, /* 0.19 */ }, + { 14424, 48, /* 0.19 */ }, + { 15265, 48, /* 0.19 */ }, + { 16137, 47, /* 0.19 */ }, + { 17042, 46, /* 0.18 */ }, + { 17981, 45, /* 0.18 */ }, + { 18955, 45, /* 0.18 */ }, + { 19965, 44, /* 0.17 */ }, + { 21013, 43, /* 0.17 */ }, + { 22101, 43, /* 0.17 */ }, + { 23230, 42, /* 0.17 */ }, + { 24402, 41, /* 0.16 */ }, + { 25618, 41, /* 0.16 */ }, + { 26881, 40, /* 0.16 */ }, + { 28193, 39, /* 0.16 */ }, + { 29557, 39, /* 0.15 */ }, + { 30975, 38, /* 0.15 */ }, + { 32450, 38, /* 0.15 */ }, + { 33986, 37, /* 0.15 */ }, + { 35586, 36, /* 0.14 */ }, + { 37253, 36, /* 0.14 */ }, + { 38992, 35, /* 0.14 */ }, + { 40808, 35, /* 0.14 */ }, + { 42707, 34, /* 0.13 */ }, + { 44694, 33, /* 0.13 */ }, + { 46776, 33, /* 0.13 */ }, + { 48961, 32, /* 0.13 */ }, + { 51258, 32, /* 0.13 */ }, + { 53677, 31, /* 0.12 */ }, + { 56230, 30, /* 0.12 */ }, + { 58932, 30, /* 0.12 */ }, + { 61799, 29, /* 0.12 */ }, + { 64851, 28, /* 0.11 */ }, + { 68113, 28, /* 0.11 */ }, + { 71617, 27, /* 0.11 */ }, + { 75401, 26, /* 0.10 */ }, + { 79517, 26, /* 0.10 */ }, + { 84035, 25, /* 0.10 */ }, + { 89053, 24, /* 0.10 */ }, }; #define HSTCP_AIMD_MAX ARRAY_SIZE(hstcp_aimd_vals) diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 031361311a8..58469fff6c1 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -98,7 +98,8 @@ static inline void measure_rtt(struct sock *sk, u32 srtt) } } -static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, s32 rtt) +static void measure_achieved_throughput(struct sock *sk, + u32 pkts_acked, s32 rtt) { const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); @@ -148,8 +149,8 @@ static inline void htcp_beta_update(struct htcp *ca, u32 minRTT, u32 maxRTT) if (use_bandwidth_switch) { u32 maxB = ca->maxB; u32 old_maxB = ca->old_maxB; - ca->old_maxB = ca->maxB; + ca->old_maxB = ca->maxB; if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) { ca->beta = BETA_MIN; ca->modeswitch = 0; @@ -270,6 +271,7 @@ static void htcp_state(struct sock *sk, u8 new_state) case TCP_CA_Open: { struct htcp *ca = inet_csk_ca(sk); + if (ca->undo_last_cong) { ca->last_cong = jiffies; ca->undo_last_cong = 0; diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index d8f8f05a495..f963b274f2b 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -29,7 +29,6 @@ static int rtt0 = 25; module_param(rtt0, int, 0644); MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)"); - /* This is called to refresh values for hybla parameters */ static inline void hybla_recalc_param (struct sock *sk) { diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 5999b3972e6..1d5a30a90ad 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -284,7 +284,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked) delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT; if (delta >= tp->snd_cwnd) { tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd, - (u32) tp->snd_cwnd_clamp); + (u32)tp->snd_cwnd_clamp); tp->snd_cwnd_cnt = 0; } } @@ -299,7 +299,6 @@ static u32 tcp_illinois_ssthresh(struct sock *sk) return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U); } - /* Extract info for Tcp socket info provided via netlink. */ static void tcp_illinois_info(struct sock *sk, u32 ext, struct sk_buff *skb) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a906e0200ff..f97003ad0af 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1888,21 +1888,21 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) tp->sacked_out = 0; } -static void tcp_clear_retrans_partial(struct tcp_sock *tp) +void tcp_clear_retrans(struct tcp_sock *tp) { tp->retrans_out = 0; tp->lost_out = 0; - tp->undo_marker = 0; tp->undo_retrans = -1; + tp->fackets_out = 0; + tp->sacked_out = 0; } -void tcp_clear_retrans(struct tcp_sock *tp) +static inline void tcp_init_undo(struct tcp_sock *tp) { - tcp_clear_retrans_partial(tp); - - tp->fackets_out = 0; - tp->sacked_out = 0; + tp->undo_marker = tp->snd_una; + /* Retransmission still in flight may cause DSACKs later. */ + tp->undo_retrans = tp->retrans_out ? : -1; } /* Enter Loss state. If we detect SACK reneging, forget all SACK information @@ -1925,18 +1925,18 @@ void tcp_enter_loss(struct sock *sk) tp->prior_ssthresh = tcp_current_ssthresh(sk); tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tcp_ca_event(sk, CA_EVENT_LOSS); + tcp_init_undo(tp); } tp->snd_cwnd = 1; tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; - tcp_clear_retrans_partial(tp); + tp->retrans_out = 0; + tp->lost_out = 0; if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); - tp->undo_marker = tp->snd_una; - skb = tcp_write_queue_head(sk); is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED); if (is_reneg) { @@ -1950,9 +1950,6 @@ void tcp_enter_loss(struct sock *sk) if (skb == tcp_send_head(sk)) break; - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) - tp->undo_marker = 0; - TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; @@ -2671,8 +2668,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) NET_INC_STATS_BH(sock_net(sk), mib_idx); tp->prior_ssthresh = 0; - tp->undo_marker = tp->snd_una; - tp->undo_retrans = tp->retrans_out ? : -1; + tcp_init_undo(tp); if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { if (!ece_ack) @@ -2971,7 +2967,8 @@ void tcp_rearm_rto(struct sock *sk) if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { struct sk_buff *skb = tcp_write_queue_head(sk); - const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto; + const u32 rto_time_stamp = + tcp_skb_timestamp(skb) + rto; s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); /* delta may not be positive if the socket is locked * when the retrans timer fires and is rescheduled. @@ -5910,7 +5907,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct request_sock *req; struct tcp_sock *tp = tcp_sk(sk); struct dst_entry *dst = NULL; - __u32 isn = TCP_SKB_CB(skb)->when; + __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn; bool want_cookie = false, fastopen; struct flowi fl; struct tcp_fastopen_cookie foc = { .len = -1 }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cd17f009aed..3f9bc3f0bba 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -90,7 +90,6 @@ int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; EXPORT_SYMBOL(sysctl_tcp_low_latency); - #ifdef CONFIG_TCP_MD5SIG static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, __be32 daddr, __be32 saddr, const struct tcphdr *th); @@ -438,8 +437,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) skb = tcp_write_queue_head(sk); BUG_ON(!skb); - remaining = icsk->icsk_rto - min(icsk->icsk_rto, - tcp_time_stamp - TCP_SKB_CB(skb)->when); + remaining = icsk->icsk_rto - + min(icsk->icsk_rto, + tcp_time_stamp - tcp_skb_timestamp(skb)); if (remaining) { inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, @@ -1269,7 +1269,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { .send_ack = tcp_v4_reqsk_send_ack, .destructor = tcp_v4_reqsk_destructor, .send_reset = tcp_v4_send_reset, - .syn_ack_timeout = tcp_syn_ack_timeout, + .syn_ack_timeout = tcp_syn_ack_timeout, }; static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { @@ -1628,7 +1628,7 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff * 4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - TCP_SKB_CB(skb)->when = 0; + TCP_SKB_CB(skb)->tcp_tw_isn = 0; TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); TCP_SKB_CB(skb)->sacked = 0; @@ -2183,7 +2183,7 @@ int tcp_seq_open(struct inode *inode, struct file *file) s = ((struct seq_file *)file->private_data)->private; s->family = afinfo->family; - s->last_pos = 0; + s->last_pos = 0; return 0; } EXPORT_SYMBOL(tcp_seq_open); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 1649988bd1b..a058f411d3a 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -232,7 +232,7 @@ kill: u32 isn = tcptw->tw_snd_nxt + 65535 + 2; if (isn == 0) isn++; - TCP_SKB_CB(skb)->when = isn; + TCP_SKB_CB(skb)->tcp_tw_isn = isn; return TCP_TW_SYN; } diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index bc1b83cb830..72912533a19 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -288,35 +288,14 @@ static int tcp_v4_gso_send_check(struct sk_buff *skb) static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) { - /* Use the IP hdr immediately proceeding for this transport */ - const struct iphdr *iph = skb_gro_network_header(skb); - __wsum wsum; - /* Don't bother verifying checksum if we're going to flush anyway. */ - if (NAPI_GRO_CB(skb)->flush) - goto skip_csum; - - wsum = NAPI_GRO_CB(skb)->csum; - - switch (skb->ip_summed) { - case CHECKSUM_NONE: - wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), - 0); - - /* fall through */ - - case CHECKSUM_COMPLETE: - if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, - wsum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } - + if (!NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_validate(skb, IPPROTO_TCP, + inet_gro_compute_pseudo)) { NAPI_GRO_CB(skb)->flush = 1; return NULL; } -skip_csum: return tcp_gro_receive(head, skb); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5a7c41fbc6d..7f1280dcad5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -550,7 +550,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { opts->options |= OPTION_TS; - opts->tsval = TCP_SKB_CB(skb)->when + tp->tsoffset; + opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } @@ -618,7 +618,7 @@ static unsigned int tcp_synack_options(struct sock *sk, } if (likely(ireq->tstamp_ok)) { opts->options |= OPTION_TS; - opts->tsval = TCP_SKB_CB(skb)->when; + opts->tsval = tcp_skb_timestamp(skb); opts->tsecr = req->ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } @@ -647,7 +647,6 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb struct tcp_out_options *opts, struct tcp_md5sig_key **md5) { - struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; struct tcp_sock *tp = tcp_sk(sk); unsigned int size = 0; unsigned int eff_sacks; @@ -666,7 +665,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb if (likely(tp->rx_opt.tstamp_ok)) { opts->options |= OPTION_TS; - opts->tsval = tcb ? tcb->when + tp->tsoffset : 0; + opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0; opts->tsecr = tp->rx_opt.ts_recent; size += TCPOLEN_TSTAMP_ALIGNED; } @@ -886,8 +885,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb = skb_clone(skb, gfp_mask); if (unlikely(!skb)) return -ENOBUFS; - /* Our usage of tstamp should remain private */ - skb->tstamp.tv64 = 0; } inet = inet_sk(sk); @@ -975,7 +972,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb)); + /* Our usage of tstamp should remain private */ + skb->tstamp.tv64 = 0; err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); + if (likely(err <= 0)) return err; @@ -1146,10 +1146,6 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, buff->ip_summed = skb->ip_summed; - /* Looks stupid, but our code really uses when of - * skbs, which it never sent before. --ANK - */ - TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; buff->tstamp = skb->tstamp; tcp_fragment_tstamp(skb, buff); @@ -1874,8 +1870,8 @@ static int tcp_mtu_probe(struct sock *sk) tcp_init_tso_segs(sk, nskb, nskb->len); /* We're ready to send. If this fails, the probe will - * be resegmented into mss-sized pieces by tcp_write_xmit(). */ - TCP_SKB_CB(nskb)->when = tcp_time_stamp; + * be resegmented into mss-sized pieces by tcp_write_xmit(). + */ if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) { /* Decrement cwnd here because we are sending * effectively two packets. */ @@ -1935,8 +1931,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, BUG_ON(!tso_segs); if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { - /* "when" is used as a start point for the retransmit timer */ - TCP_SKB_CB(skb)->when = tcp_time_stamp; + /* "skb_mstamp" is used as a start point for the retransmit timer */ + skb_mstamp_get(&skb->skb_mstamp); goto repair; /* Skip network transmission */ } @@ -2000,8 +1996,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) break; - TCP_SKB_CB(skb)->when = tcp_time_stamp; - if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) break; @@ -2499,7 +2493,6 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) /* Make a copy, if the first transmission SKB clone we made * is still in somebody's hands, else make a clone. */ - TCP_SKB_CB(skb)->when = tcp_time_stamp; /* make sure skb->data is aligned on arches that require it * and check if ack-trimming & collapsing extended the headroom @@ -2544,7 +2537,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) /* Save stamp of the first retransmit. */ if (!tp->retrans_stamp) - tp->retrans_stamp = TCP_SKB_CB(skb)->when; + tp->retrans_stamp = tcp_skb_timestamp(skb); /* snd_nxt is stored to detect loss of retransmitted segment, * see tcp_input.c tcp_sacktag_write_queue(). @@ -2752,7 +2745,6 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), TCPHDR_ACK | TCPHDR_RST); /* Send it off. */ - TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb, 0, priority)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); @@ -2791,7 +2783,6 @@ int tcp_send_synack(struct sock *sk) TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK; TCP_ECN_send_synack(tcp_sk(sk), skb); } - TCP_SKB_CB(skb)->when = tcp_time_stamp; return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } @@ -2835,10 +2826,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, memset(&opts, 0, sizeof(opts)); #ifdef CONFIG_SYN_COOKIES if (unlikely(req->cookie_ts)) - TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); + skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req); else #endif - TCP_SKB_CB(skb)->when = tcp_time_stamp; + skb_mstamp_get(&skb->skb_mstamp); tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, foc) + sizeof(*th); @@ -3086,7 +3077,7 @@ int tcp_connect(struct sock *sk) skb_reserve(buff, MAX_TCP_HEADER); tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); - tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp; + tp->retrans_stamp = tcp_time_stamp; tcp_connect_queue_skb(sk, buff); TCP_ECN_send_syn(sk, buff); @@ -3194,7 +3185,7 @@ void tcp_send_ack(struct sock *sk) tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); /* Send it off, this clears delayed acks for us. */ - TCP_SKB_CB(buff)->when = tcp_time_stamp; + skb_mstamp_get(&buff->skb_mstamp); tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); } @@ -3226,7 +3217,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) * send it. */ tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); - TCP_SKB_CB(skb)->when = tcp_time_stamp; + skb_mstamp_get(&skb->skb_mstamp); return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); } @@ -3270,7 +3261,6 @@ int tcp_write_wakeup(struct sock *sk) tcp_set_skb_tso_segs(sk, skb, mss); TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; - TCP_SKB_CB(skb)->when = tcp_time_stamp; err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); if (!err) tcp_event_new_data_sent(sk, skb); diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 3b66610d415..ebf5ff57526 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -83,7 +83,6 @@ static struct { struct tcp_log *log; } tcp_probe; - static inline int tcp_probe_used(void) { return (tcp_probe.head - tcp_probe.tail) & (bufsize - 1); @@ -101,7 +100,6 @@ static inline int tcp_probe_avail(void) si4.sin_addr.s_addr = inet->inet_##mem##addr; \ } while (0) \ - /* * Hook inserted to be called before each receive packet. * Note: arguments must match tcp_rcv_established()! @@ -194,8 +192,8 @@ static int tcpprobe_sprint(char *tbuf, int n) return scnprintf(tbuf, n, "%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n", - (unsigned long) tv.tv_sec, - (unsigned long) tv.tv_nsec, + (unsigned long)tv.tv_sec, + (unsigned long)tv.tv_nsec, &p->src, &p->dst, p->length, p->snd_nxt, p->snd_una, p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd); } diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 8250949b885..6824afb65d9 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -31,10 +31,10 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) static u32 tcp_scalable_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); + return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U); } - static struct tcp_congestion_ops tcp_scalable __read_mostly = { .ssthresh = tcp_scalable_ssthresh, .cong_avoid = tcp_scalable_cong_avoid, diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index df90cd1ce37..a339e7ba05a 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -135,10 +135,9 @@ static bool retransmits_timed_out(struct sock *sk, if (!inet_csk(sk)->icsk_retransmits) return false; - if (unlikely(!tcp_sk(sk)->retrans_stamp)) - start_ts = TCP_SKB_CB(tcp_write_queue_head(sk))->when; - else - start_ts = tcp_sk(sk)->retrans_stamp; + start_ts = tcp_sk(sk)->retrans_stamp; + if (unlikely(!start_ts)) + start_ts = tcp_skb_timestamp(tcp_write_queue_head(sk)); if (likely(timeout == 0)) { linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index b40ad897f94..a6afde666ab 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -51,7 +51,6 @@ MODULE_PARM_DESC(beta, "upper bound of packets in network"); module_param(gamma, int, 0644); MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)"); - /* There are several situations when we must "re-start" Vegas: * * o when a connection is established @@ -133,7 +132,6 @@ EXPORT_SYMBOL_GPL(tcp_vegas_pkts_acked); void tcp_vegas_state(struct sock *sk, u8 ca_state) { - if (ca_state == TCP_CA_Open) vegas_enable(sk); else @@ -285,7 +283,6 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) /* Use normal slow start */ else if (tp->snd_cwnd <= tp->snd_ssthresh) tcp_slow_start(tp, acked); - } /* Extract info for Tcp socket info provided via netlink. */ diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 8276977d2c8..a4d2d2d88dc 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -175,7 +175,6 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) } else tp->snd_cwnd_cnt++; } - } if (tp->snd_cwnd < 2) tp->snd_cwnd = 2; diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index b94a04ae2ed..81911a92356 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -42,7 +42,6 @@ struct westwood { u8 reset_rtt_min; /* Reset RTT min to next RTT sample*/ }; - /* TCP Westwood functions and constants */ #define TCP_WESTWOOD_RTT_MIN (HZ/20) /* 50ms */ #define TCP_WESTWOOD_INIT_RTT (20*HZ) /* maybe too conservative?! */ @@ -153,7 +152,6 @@ static inline void update_rtt_min(struct westwood *w) w->rtt_min = min(w->rtt, w->rtt_min); } - /* * @westwood_fast_bw * It is called when we are in fast path. In particular it is called when @@ -208,7 +206,6 @@ static inline u32 westwood_acked_count(struct sock *sk) return w->cumul_ack; } - /* * TCP Westwood * Here limit is evaluated as Bw estimation*RTTmin (for obtaining it @@ -219,6 +216,7 @@ static u32 tcp_westwood_bw_rttmin(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); const struct westwood *w = inet_csk_ca(sk); + return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2); } @@ -254,12 +252,12 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) } } - /* Extract info for Tcp socket info provided via netlink. */ static void tcp_westwood_info(struct sock *sk, u32 ext, struct sk_buff *skb) { const struct westwood *ca = inet_csk_ca(sk); + if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { struct tcpvegas_info info = { .tcpv_enabled = 1, @@ -271,7 +269,6 @@ static void tcp_westwood_info(struct sock *sk, u32 ext, } } - static struct tcp_congestion_ops tcp_westwood __read_mostly = { .init = tcp_westwood_init, .ssthresh = tcp_reno_ssthresh, diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 599b79b8eac..cd727321859 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -54,10 +54,8 @@ static void tcp_yeah_init(struct sock *sk) /* Ensure the MD arithmetic works. This is somewhat pedantic, * since I don't think we will see a cwnd this large. :) */ tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); - } - static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -84,7 +82,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) /* Scalable */ tp->snd_cwnd_cnt += yeah->pkts_acked; - if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){ + if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)) { if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; tp->snd_cwnd_cnt = 0; @@ -120,7 +118,6 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) */ if (after(ack, yeah->vegas.beg_snd_nxt)) { - /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got * at least one RTT sample that wasn't from a delayed ACK. @@ -189,7 +186,6 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) } yeah->lastQ = queue; - } /* Save the extent of the current window so we can use this @@ -205,7 +201,8 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) } } -static u32 tcp_yeah_ssthresh(struct sock *sk) { +static u32 tcp_yeah_ssthresh(struct sock *sk) +{ const struct tcp_sock *tp = tcp_sk(sk); struct yeah *yeah = inet_csk_ca(sk); u32 reduction; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f57c0e4c232..cd0db5471bb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -99,6 +99,7 @@ #include <linux/slab.h> #include <net/tcp_states.h> #include <linux/skbuff.h> +#include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <net/net_namespace.h> @@ -224,7 +225,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, remaining = (high - low) + 1; rand = prandom_u32(); - first = (((u64)rand * remaining) >> 32) + low; + first = reciprocal_scale(rand, remaining) + low; /* * force rand to be an odd multiple of UDP_HTABLE_SIZE */ @@ -448,7 +449,7 @@ begin: } } else if (score == badness && reuseport) { matches++; - if (((u64)hash * matches) >> 32 == 0) + if (reciprocal_scale(hash, matches) == 0) result = sk; hash = next_pseudo_random32(hash); } @@ -529,7 +530,7 @@ begin: } } else if (score == badness && reuseport) { matches++; - if (((u64)hash * matches) >> 32 == 0) + if (reciprocal_scale(hash, matches) == 0) result = sk; hash = next_pseudo_random32(hash); } @@ -1787,6 +1788,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; + if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk)) + skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + inet_compute_pseudo); + ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); @@ -1967,7 +1972,7 @@ void udp_v4_early_demux(struct sk_buff *skb) return; skb->sk = sk; - skb->destructor = sock_edemux; + skb->destructor = sock_efree; dst = sk->sk_rx_dst; if (dst) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 59035bc3008..84e0e05c9c0 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -228,30 +228,24 @@ unlock: } EXPORT_SYMBOL(udp_del_offload); -static struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb) +struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, + struct udphdr *uh) { struct udp_offload_priv *uo_priv; struct sk_buff *p, **pp = NULL; - struct udphdr *uh, *uh2; - unsigned int hlen, off; + struct udphdr *uh2; + unsigned int off = skb_gro_offset(skb); int flush = 1; if (NAPI_GRO_CB(skb)->udp_mark || - (!skb->encapsulation && skb->ip_summed != CHECKSUM_COMPLETE)) + (skb->ip_summed != CHECKSUM_PARTIAL && + NAPI_GRO_CB(skb)->csum_cnt == 0 && + !NAPI_GRO_CB(skb)->csum_valid)) goto out; /* mark that this skb passed once through the udp gro layer */ NAPI_GRO_CB(skb)->udp_mark = 1; - off = skb_gro_offset(skb); - hlen = off + sizeof(*uh); - uh = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - uh = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!uh)) - goto out; - } - rcu_read_lock(); uo_priv = rcu_dereference(udp_offload_base); for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { @@ -269,7 +263,12 @@ unflush: continue; uh2 = (struct udphdr *)(p->data + off); - if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) { + + /* Match ports and either checksums are either both zero + * or nonzero. + */ + if ((*(u32 *)&uh->source != *(u32 *)&uh2->source) || + (!uh->check ^ !uh2->check)) { NAPI_GRO_CB(p)->same_flow = 0; continue; } @@ -286,7 +285,33 @@ out: return pp; } -static int udp_gro_complete(struct sk_buff *skb, int nhoff) +static struct sk_buff **udp4_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct udphdr *uh = udp_gro_udphdr(skb); + + if (unlikely(!uh)) + goto flush; + + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (!NAPI_GRO_CB(skb)->flush) + goto skip; + + if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, + inet_gro_compute_pseudo)) + goto flush; + else if (uh->check) + skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + inet_gro_compute_pseudo); +skip: + return udp_gro_receive(head, skb, uh); + +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; +} + +int udp_gro_complete(struct sk_buff *skb, int nhoff) { struct udp_offload_priv *uo_priv; __be16 newlen = htons(skb->len - nhoff); @@ -311,12 +336,24 @@ static int udp_gro_complete(struct sk_buff *skb, int nhoff) return err; } +int udp4_gro_complete(struct sk_buff *skb, int nhoff) +{ + const struct iphdr *iph = ip_hdr(skb); + struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); + + if (uh->check) + uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, + iph->daddr, 0); + + return udp_gro_complete(skb, nhoff); +} + static const struct net_offload udpv4_offload = { .callbacks = { .gso_send_check = udp4_ufo_send_check, .gso_segment = udp4_ufo_fragment, - .gro_receive = udp_gro_receive, - .gro_complete = udp_gro_complete, + .gro_receive = udp4_gro_receive, + .gro_complete = udp4_gro_complete, }, }; |