summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-10-25 13:25:22 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2011-10-25 13:25:22 +0200
commit8a9ea3237e7eb5c25f09e429ad242ae5a3d5ea22 (patch)
treea0a63398a9983667d52cbbbf4e2405b4f22b1d83 /net/ipv4
parent1be025d3cb40cd295123af2c394f7229ef9b30ca (diff)
parent8b3408f8ee994973869d8ba32c5bf482bc4ddca4 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1745 commits) dp83640: free packet queues on remove dp83640: use proper function to free transmit time stamping packets ipv6: Do not use routes from locally generated RAs |PATCH net-next] tg3: add tx_dropped counter be2net: don't create multiple RX/TX rings in multi channel mode be2net: don't create multiple TXQs in BE2 be2net: refactor VF setup/teardown code into be_vf_setup/clear() be2net: add vlan/rx-mode/flow-control config to be_setup() net_sched: cls_flow: use skb_header_pointer() ipv4: avoid useless call of the function check_peer_pmtu TCP: remove TCP_DEBUG net: Fix driver name for mdio-gpio.c ipv4: tcp: fix TOS value in ACK messages sent from TIME_WAIT rtnetlink: Add missing manual netlink notification in dev_change_net_namespaces ipv4: fix ipsec forward performance regression jme: fix irq storm after suspend/resume route: fix ICMP redirect validation net: hold sock reference while processing tx timestamps tcp: md5: add more const attributes Add ethtool -g support to virtio_net ... Fix up conflicts in: - drivers/net/Kconfig: The split-up generated a trivial conflict with removal of a stale reference to Documentation/networking/net-modules.txt. Remove it from the new location instead. - fs/sysfs/dir.c: Fairly nasty conflicts with the sysfs rb-tree usage, conflicting with Eric Biederman's changes for tagged directories.
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/cipso_ipv4.c2
-rw-r--r--net/ipv4/devinet.c6
-rw-r--r--net/ipv4/fib_trie.c12
-rw-r--r--net/ipv4/gre.c4
-rw-r--r--net/ipv4/icmp.c5
-rw-r--r--net/ipv4/igmp.c12
-rw-r--r--net/ipv4/inet_diag.c5
-rw-r--r--net/ipv4/inet_lro.c10
-rw-r--r--net/ipv4/inet_timewait_sock.c1
-rw-r--r--net/ipv4/ip_fragment.c40
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ip_output.c17
-rw-r--r--net/ipv4/ip_sockglue.c7
-rw-r--r--net/ipv4/ipip.c10
-rw-r--r--net/ipv4/ipmr.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c24
-rw-r--r--net/ipv4/netfilter/nf_nat_ftp.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c36
-rw-r--r--net/ipv4/netfilter/nf_nat_irc.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c16
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c28
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_tftp.c4
-rw-r--r--net/ipv4/route.c53
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/tcp.c97
-rw-r--r--net/ipv4/tcp_input.c244
-rw-r--r--net/ipv4/tcp_ipv4.c57
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c152
-rw-r--r--net/ipv4/tcp_timer.c2
-rw-r--r--net/ipv4/udp.c11
-rw-r--r--net/ipv4/xfrm4_policy.c14
36 files changed, 517 insertions, 392 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index dd2b9478ddd..1b5096a9875 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -893,7 +893,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
EXPORT_SYMBOL(inet_ioctl);
#ifdef CONFIG_COMPAT
-int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct sock *sk = sock->sk;
int err = -ENOIOCTLCMD;
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2c2a98e402e..86f3b885b4f 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -476,7 +476,7 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def,
doi = doi_def->doi;
doi_type = doi_def->type;
- if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN)
+ if (doi_def->doi == CIPSO_V4_DOI_UNKNOWN)
goto doi_add_return;
for (iter = 0; iter < CIPSO_V4_TAG_MAXCNT; iter++) {
switch (doi_def->tags[iter]) {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index bc19bd06dd0..c6b5092f29a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -258,7 +258,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
ip_mc_up(in_dev);
/* we can receive as soon as ip_ptr is set -- do this last */
- rcu_assign_pointer(dev->ip_ptr, in_dev);
+ RCU_INIT_POINTER(dev->ip_ptr, in_dev);
out:
return in_dev;
out_kfree:
@@ -291,7 +291,7 @@ static void inetdev_destroy(struct in_device *in_dev)
inet_free_ifa(ifa);
}
- rcu_assign_pointer(dev->ip_ptr, NULL);
+ RCU_INIT_POINTER(dev->ip_ptr, NULL);
devinet_sysctl_unregister(in_dev);
neigh_parms_release(&arp_tbl, in_dev->arp_parms);
@@ -1175,7 +1175,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
switch (event) {
case NETDEV_REGISTER:
printk(KERN_DEBUG "inetdev_event: bug\n");
- rcu_assign_pointer(dev->ip_ptr, NULL);
+ RCU_INIT_POINTER(dev->ip_ptr, NULL);
break;
case NETDEV_UP:
if (!inetdev_valid_mtu(dev->mtu))
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index de9e2978476..89d6f71a6a9 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -204,7 +204,7 @@ static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node)
return (struct tnode *)(parent & ~NODE_TYPE_MASK);
}
-/* Same as rcu_assign_pointer
+/* Same as RCU_INIT_POINTER
* but that macro() assumes that value is a pointer.
*/
static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
@@ -528,7 +528,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *
if (n)
node_set_parent(n, tn);
- rcu_assign_pointer(tn->child[i], n);
+ RCU_INIT_POINTER(tn->child[i], n);
}
#define MAX_WORK 10
@@ -1014,7 +1014,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
tp = node_parent((struct rt_trie_node *) tn);
if (!tp)
- rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
+ RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn);
tnode_free_flush();
if (!tp)
@@ -1026,7 +1026,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
if (IS_TNODE(tn))
tn = (struct tnode *)resize(t, (struct tnode *)tn);
- rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
+ RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn);
tnode_free_flush();
}
@@ -1163,7 +1163,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
put_child(t, (struct tnode *)tp, cindex,
(struct rt_trie_node *)tn);
} else {
- rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
+ RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn);
tp = tn;
}
}
@@ -1621,7 +1621,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l)
put_child(t, (struct tnode *)tp, cindex, NULL);
trie_rebalance(t, tp);
} else
- rcu_assign_pointer(t->trie, NULL);
+ RCU_INIT_POINTER(t->trie, NULL);
free_leaf(l);
}
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index dbfc21de347..8cb1ebb7cd7 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -34,7 +34,7 @@ int gre_add_protocol(const struct gre_protocol *proto, u8 version)
if (gre_proto[version])
goto err_out_unlock;
- rcu_assign_pointer(gre_proto[version], proto);
+ RCU_INIT_POINTER(gre_proto[version], proto);
spin_unlock(&gre_proto_lock);
return 0;
@@ -54,7 +54,7 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
if (rcu_dereference_protected(gre_proto[version],
lockdep_is_held(&gre_proto_lock)) != proto)
goto err_out_unlock;
- rcu_assign_pointer(gre_proto[version], NULL);
+ RCU_INIT_POINTER(gre_proto[version], NULL);
spin_unlock(&gre_proto_lock);
synchronize_rcu();
return 0;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 23ef31baa1a..ab188ae12fd 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1152,10 +1152,9 @@ static int __net_init icmp_sk_init(struct net *net)
net->ipv4.icmp_sk[i] = sk;
/* Enough space for 2 64K ICMP packets, including
- * sk_buff struct overhead.
+ * sk_buff/skb_shared_info struct overhead.
*/
- sk->sk_sndbuf =
- (2 * ((64 * 1024) + sizeof(struct sk_buff)));
+ sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
/*
* Speedup sock_wfree()
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d577199eabd..c7472eff2d5 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1009,7 +1009,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr)
/* Checking for IFF_MULTICAST here is WRONG-WRONG-WRONG.
We will get multicast token leakage, when IFF_MULTICAST
- is changed. This check should be done in dev->set_multicast_list
+ is changed. This check should be done in ndo_set_rx_mode
routine. Something sort of:
if (dev->mc_list && dev->flags&IFF_MULTICAST) { do it; }
--ANK
@@ -1242,7 +1242,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
im->next_rcu = in_dev->mc_list;
in_dev->mc_count++;
- rcu_assign_pointer(in_dev->mc_list, im);
+ RCU_INIT_POINTER(in_dev->mc_list, im);
#ifdef CONFIG_IP_MULTICAST
igmpv3_del_delrec(in_dev, im->multiaddr);
@@ -1813,7 +1813,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
iml->next_rcu = inet->mc_list;
iml->sflist = NULL;
iml->sfmode = MCAST_EXCLUDE;
- rcu_assign_pointer(inet->mc_list, iml);
+ RCU_INIT_POINTER(inet->mc_list, iml);
ip_mc_inc_group(in_dev, addr);
err = 0;
done:
@@ -1835,7 +1835,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
}
err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
iml->sfmode, psf->sl_count, psf->sl_addr, 0);
- rcu_assign_pointer(iml->sflist, NULL);
+ RCU_INIT_POINTER(iml->sflist, NULL);
/* decrease mem now to avoid the memleak warning */
atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
kfree_rcu(psf, rcu);
@@ -2000,7 +2000,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
}
- rcu_assign_pointer(pmc->sflist, newpsl);
+ RCU_INIT_POINTER(pmc->sflist, newpsl);
psl = newpsl;
}
rv = 1; /* > 0 for insert logic below if sl_count is 0 */
@@ -2103,7 +2103,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
} else
(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
0, NULL, 0);
- rcu_assign_pointer(pmc->sflist, newpsl);
+ RCU_INIT_POINTER(pmc->sflist, newpsl);
pmc->sfmode = msf->imsf_fmode;
err = 0;
done:
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 389a2e6a17f..f5e2bdaef94 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -108,6 +108,9 @@ static int inet_csk_diag_fill(struct sock *sk,
icsk->icsk_ca_ops->name);
}
+ if ((ext & (1 << (INET_DIAG_TOS - 1))) && (sk->sk_family != AF_INET6))
+ RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos);
+
r->idiag_family = sk->sk_family;
r->idiag_state = sk->sk_state;
r->idiag_timer = 0;
@@ -130,6 +133,8 @@ static int inet_csk_diag_fill(struct sock *sk,
&np->rcv_saddr);
ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
&np->daddr);
+ if (ext & (1 << (INET_DIAG_TOS - 1)))
+ RTA_PUT_U8(skb, INET_DIAG_TOS, np->tclass);
}
#endif
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index ef7ae6049a5..cc280a3f4f9 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -244,11 +244,11 @@ static void lro_add_frags(struct net_lro_desc *lro_desc,
skb->truesize += truesize;
skb_frags[0].page_offset += hlen;
- skb_frags[0].size -= hlen;
+ skb_frag_size_sub(&skb_frags[0], hlen);
while (tcp_data_len > 0) {
*(lro_desc->next_frag) = *skb_frags;
- tcp_data_len -= skb_frags->size;
+ tcp_data_len -= skb_frag_size(skb_frags);
lro_desc->next_frag++;
skb_frags++;
skb_shinfo(skb)->nr_frags++;
@@ -400,14 +400,14 @@ static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
skb_frags = skb_shinfo(skb)->frags;
while (data_len > 0) {
*skb_frags = *frags;
- data_len -= frags->size;
+ data_len -= skb_frag_size(frags);
skb_frags++;
frags++;
skb_shinfo(skb)->nr_frags++;
}
skb_shinfo(skb)->frags[0].page_offset += hdr_len;
- skb_shinfo(skb)->frags[0].size -= hdr_len;
+ skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len);
skb->ip_summed = ip_summed;
skb->csum = sum;
@@ -433,7 +433,7 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
if (!lro_mgr->get_frag_header ||
lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph,
(void *)&tcph, &flags, priv)) {
- mac_hdr = page_address(frags->page) + frags->page_offset;
+ mac_hdr = skb_frag_address(frags);
goto out1;
}
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 3c8dfa16614..44d65d546e3 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -183,6 +183,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
tw->tw_daddr = inet->inet_daddr;
tw->tw_rcv_saddr = inet->inet_rcv_saddr;
tw->tw_bound_dev_if = sk->sk_bound_dev_if;
+ tw->tw_tos = inet->tos;
tw->tw_num = inet->inet_num;
tw->tw_state = TCP_TIME_WAIT;
tw->tw_substate = state;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 0e0ab98abc6..fdaabf2f2b6 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -599,8 +599,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
head->next = clone;
skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
skb_frag_list_init(head);
- for (i=0; i<skb_shinfo(head)->nr_frags; i++)
- plen += skb_shinfo(head)->frags[i].size;
+ for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+ plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
clone->len = clone->data_len = head->data_len - plen;
head->data_len -= clone->len;
head->len -= clone->len;
@@ -682,6 +682,42 @@ int ip_defrag(struct sk_buff *skb, u32 user)
}
EXPORT_SYMBOL(ip_defrag);
+struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user)
+{
+ const struct iphdr *iph;
+ u32 len;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ return skb;
+
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ return skb;
+
+ iph = ip_hdr(skb);
+ if (iph->ihl < 5 || iph->version != 4)
+ return skb;
+ if (!pskb_may_pull(skb, iph->ihl*4))
+ return skb;
+ iph = ip_hdr(skb);
+ len = ntohs(iph->tot_len);
+ if (skb->len < len || len < (iph->ihl * 4))
+ return skb;
+
+ if (ip_is_fragment(ip_hdr(skb))) {
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (skb) {
+ if (pskb_trim_rcsum(skb, len))
+ return skb;
+ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+ if (ip_defrag(skb, user))
+ return NULL;
+ skb->rxhash = 0;
+ }
+ }
+ return skb;
+}
+EXPORT_SYMBOL(ip_check_defrag);
+
#ifdef CONFIG_SYSCTL
static int zero;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d7bb94c4834..d55110e9312 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -835,8 +835,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
- if (max_headroom > dev->needed_headroom)
- dev->needed_headroom = max_headroom;
if (!new_skb) {
ip_rt_put(rt);
dev->stats.tx_dropped++;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8c6563361ab..0bc95f3977d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -989,13 +989,13 @@ alloc_new_skb:
if (page && (left = PAGE_SIZE - off) > 0) {
if (copy >= left)
copy = left;
- if (page != frag->page) {
+ if (page != skb_frag_page(frag)) {
if (i == MAX_SKB_FRAGS) {
err = -EMSGSIZE;
goto error;
}
- get_page(page);
skb_fill_page_desc(skb, i, page, off, 0);
+ skb_frag_ref(skb, i);
frag = &skb_shinfo(skb)->frags[i];
}
} else if (i < MAX_SKB_FRAGS) {
@@ -1015,12 +1015,13 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
+ if (getfrag(from, skb_frag_address(frag)+skb_frag_size(frag),
+ offset, copy, skb->len, skb) < 0) {
err = -EFAULT;
goto error;
}
cork->off += copy;
- frag->size += copy;
+ skb_frag_size_add(frag, copy);
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
@@ -1229,7 +1230,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
if (len > size)
len = size;
if (skb_can_coalesce(skb, i, page, offset)) {
- skb_shinfo(skb)->frags[i-1].size += len;
+ skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
@@ -1465,7 +1466,7 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
* structure to pass arguments.
*/
void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
- struct ip_reply_arg *arg, unsigned int len)
+ const struct ip_reply_arg *arg, unsigned int len)
{
struct inet_sock *inet = inet_sk(sk);
struct ip_options_data replyopts;
@@ -1488,7 +1489,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
}
flowi4_init_output(&fl4, arg->bound_dev_if, 0,
- RT_TOS(ip_hdr(skb)->tos),
+ RT_TOS(arg->tos),
RT_SCOPE_UNIVERSE, sk->sk_protocol,
ip_reply_arg_flowi_flags(arg),
daddr, rt->rt_spec_dst,
@@ -1505,7 +1506,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
with locally disabled BH and that sk cannot be already spinlocked.
*/
bh_lock_sock(sk);
- inet->tos = ip_hdr(skb)->tos;
+ inet->tos = arg->tos;
sk->sk_priority = skb->priority;
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8905e92f896..09ff51bf16a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -33,6 +33,7 @@
#include <linux/netfilter.h>
#include <linux/route.h>
#include <linux/mroute.h>
+#include <net/inet_ecn.h>
#include <net/route.h>
#include <net/xfrm.h>
#include <net/compat.h>
@@ -578,8 +579,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
break;
case IP_TOS: /* This sets both TOS and Precedence */
if (sk->sk_type == SOCK_STREAM) {
- val &= ~3;
- val |= inet->tos & 3;
+ val &= ~INET_ECN_MASK;
+ val |= inet->tos & INET_ECN_MASK;
}
if (inet->tos != val) {
inet->tos = val;
@@ -961,7 +962,7 @@ mc_msf_out:
break;
case IP_TRANSPARENT:
- if (!capable(CAP_NET_ADMIN)) {
+ if (!!val && !capable(CAP_NET_RAW) && !capable(CAP_NET_ADMIN)) {
err = -EPERM;
break;
}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 378b20b7ca6..065effd8349 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -231,7 +231,7 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
(iter = rtnl_dereference(*tp)) != NULL;
tp = &iter->next) {
if (t == iter) {
- rcu_assign_pointer(*tp, t->next);
+ RCU_INIT_POINTER(*tp, t->next);
break;
}
}
@@ -241,8 +241,8 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
{
struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
- rcu_assign_pointer(t->next, rtnl_dereference(*tp));
- rcu_assign_pointer(*tp, t);
+ RCU_INIT_POINTER(t->next, rtnl_dereference(*tp));
+ RCU_INIT_POINTER(*tp, t);
}
static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
@@ -301,7 +301,7 @@ static void ipip_tunnel_uninit(struct net_device *dev)
struct ipip_net *ipn = net_generic(net, ipip_net_id);
if (dev == ipn->fb_tunnel_dev)
- rcu_assign_pointer(ipn->tunnels_wc[0], NULL);
+ RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
else
ipip_tunnel_unlink(ipn, netdev_priv(dev));
dev_put(dev);
@@ -791,7 +791,7 @@ static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
return -ENOMEM;
dev_hold(dev);
- rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
+ RCU_INIT_POINTER(ipn->tunnels_wc[0], tunnel);
return 0;
}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 58e87915797..6164e982e0e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1176,7 +1176,7 @@ static void mrtsock_destruct(struct sock *sk)
ipmr_for_each_table(mrt, net) {
if (sk == rtnl_dereference(mrt->mroute_sk)) {
IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
- rcu_assign_pointer(mrt->mroute_sk, NULL);
+ RCU_INIT_POINTER(mrt->mroute_sk, NULL);
mroute_clean_tables(mrt);
}
}
@@ -1203,7 +1203,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
return -ENOENT;
if (optname != MRT_INIT) {
- if (sk != rcu_dereference_raw(mrt->mroute_sk) &&
+ if (sk != rcu_access_pointer(mrt->mroute_sk) &&
!capable(CAP_NET_ADMIN))
return -EACCES;
}
@@ -1224,13 +1224,13 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
ret = ip_ra_control(sk, 1, mrtsock_destruct);
if (ret == 0) {
- rcu_assign_pointer(mrt->mroute_sk, sk);
+ RCU_INIT_POINTER(mrt->mroute_sk, sk);
IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
}
rtnl_unlock();
return ret;
case MRT_DONE:
- if (sk != rcu_dereference_raw(mrt->mroute_sk))
+ if (sk != rcu_access_pointer(mrt->mroute_sk))
return -EACCES;
return ip_ra_control(sk, 0, NULL);
case MRT_ADD_VIF:
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index 703f366fd23..7b22382ff0e 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -70,14 +70,14 @@ static unsigned int help(struct sk_buff *skb,
static void __exit nf_nat_amanda_fini(void)
{
- rcu_assign_pointer(nf_nat_amanda_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_amanda_hook, NULL);
synchronize_rcu();
}
static int __init nf_nat_amanda_init(void)
{
BUG_ON(nf_nat_amanda_hook != NULL);
- rcu_assign_pointer(nf_nat_amanda_hook, help);
+ RCU_INIT_POINTER(nf_nat_amanda_hook, help);
return 0;
}
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 3346de5d94d..447bc5cfdc6 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -514,7 +514,7 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
ret = -EBUSY;
goto out;
}
- rcu_assign_pointer(nf_nat_protos[proto->protonum], proto);
+ RCU_INIT_POINTER(nf_nat_protos[proto->protonum], proto);
out:
spin_unlock_bh(&nf_nat_lock);
return ret;
@@ -525,7 +525,7 @@ EXPORT_SYMBOL(nf_nat_protocol_register);
void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
{
spin_lock_bh(&nf_nat_lock);
- rcu_assign_pointer(nf_nat_protos[proto->protonum],
+ RCU_INIT_POINTER(nf_nat_protos[proto->protonum],
&nf_nat_unknown_protocol);
spin_unlock_bh(&nf_nat_lock);
synchronize_rcu();
@@ -736,10 +736,10 @@ static int __init nf_nat_init(void)
/* Sew in builtin protocols. */
spin_lock_bh(&nf_nat_lock);
for (i = 0; i < MAX_IP_NAT_PROTO; i++)
- rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol);
- rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
- rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
- rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
+ RCU_INIT_POINTER(nf_nat_protos[i], &nf_nat_unknown_protocol);
+ RCU_INIT_POINTER(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
+ RCU_INIT_POINTER(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
+ RCU_INIT_POINTER(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
spin_unlock_bh(&nf_nat_lock);
/* Initialize fake conntrack so that NAT will skip it */
@@ -748,12 +748,12 @@ static int __init nf_nat_init(void)
l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
BUG_ON(nf_nat_seq_adjust_hook != NULL);
- rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
+ RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
- rcu_assign_pointer(nfnetlink_parse_nat_setup_hook,
+ RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook,
nfnetlink_parse_nat_setup);
BUG_ON(nf_ct_nat_offset != NULL);
- rcu_assign_pointer(nf_ct_nat_offset, nf_nat_get_offset);
+ RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset);
return 0;
cleanup_extend:
@@ -766,9 +766,9 @@ static void __exit nf_nat_cleanup(void)
unregister_pernet_subsys(&nf_nat_net_ops);
nf_ct_l3proto_put(l3proto);
nf_ct_extend_unregister(&nat_extend);
- rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL);
- rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL);
- rcu_assign_pointer(nf_ct_nat_offset, NULL);
+ RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL);
+ RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL);
+ RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
synchronize_net();
}
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index dc73abb3fe2..e462a957d08 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -113,14 +113,14 @@ out:
static void __exit nf_nat_ftp_fini(void)
{
- rcu_assign_pointer(nf_nat_ftp_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_ftp_hook, NULL);
synchronize_rcu();
}
static int __init nf_nat_ftp_init(void)
{
BUG_ON(nf_nat_ftp_hook != NULL);
- rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp);
+ RCU_INIT_POINTER(nf_nat_ftp_hook, nf_nat_ftp);
return 0;
}
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 790f3160e01..b9a1136addb 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -581,30 +581,30 @@ static int __init init(void)
BUG_ON(nat_callforwarding_hook != NULL);
BUG_ON(nat_q931_hook != NULL);
- rcu_assign_pointer(set_h245_addr_hook, set_h245_addr);
- rcu_assign_pointer(set_h225_addr_hook, set_h225_addr);
- rcu_assign_pointer(set_sig_addr_hook, set_sig_addr);
- rcu_assign_pointer(set_ras_addr_hook, set_ras_addr);
- rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp);
- rcu_assign_pointer(nat_t120_hook, nat_t120);
- rcu_assign_pointer(nat_h245_hook, nat_h245);
- rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding);
- rcu_assign_pointer(nat_q931_hook, nat_q931);
+ RCU_INIT_POINTER(set_h245_addr_hook, set_h245_addr);
+ RCU_INIT_POINTER(set_h225_addr_hook, set_h225_addr);
+ RCU_INIT_POINTER(set_sig_addr_hook, set_sig_addr);
+ RCU_INIT_POINTER(set_ras_addr_hook, set_ras_addr);
+ RCU_INIT_POINTER(nat_rtp_rtcp_hook, nat_rtp_rtcp);
+ RCU_INIT_POINTER(nat_t120_hook, nat_t120);
+ RCU_INIT_POINTER(nat_h245_hook, nat_h245);
+ RCU_INIT_POINTER(nat_callforwarding_hook, nat_callforwarding);
+ RCU_INIT_POINTER(nat_q931_hook, nat_q931);
return 0;
}
/****************************************************************************/
static void __exit fini(void)
{
- rcu_assign_pointer(set_h245_addr_hook, NULL);
- rcu_assign_pointer(set_h225_addr_hook, NULL);
- rcu_assign_pointer(set_sig_addr_hook, NULL);
- rcu_assign_pointer(set_ras_addr_hook, NULL);
- rcu_assign_pointer(nat_rtp_rtcp_hook, NULL);
- rcu_assign_pointer(nat_t120_hook, NULL);
- rcu_assign_pointer(nat_h245_hook, NULL);
- rcu_assign_pointer(nat_callforwarding_hook, NULL);
- rcu_assign_pointer(nat_q931_hook, NULL);
+ RCU_INIT_POINTER(set_h245_addr_hook, NULL);
+ RCU_INIT_POINTER(set_h225_addr_hook, NULL);
+ RCU_INIT_POINTER(set_sig_addr_hook, NULL);
+ RCU_INIT_POINTER(set_ras_addr_hook, NULL);
+ RCU_INIT_POINTER(nat_rtp_rtcp_hook, NULL);
+ RCU_INIT_POINTER(nat_t120_hook, NULL);
+ RCU_INIT_POINTER(nat_h245_hook, NULL);
+ RCU_INIT_POINTER(nat_callforwarding_hook, NULL);
+ RCU_INIT_POINTER(nat_q931_hook, NULL);
synchronize_rcu();
}
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
index 535e1a80235..979ae165f4e 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -75,14 +75,14 @@ static unsigned int help(struct sk_buff *skb,
static void __exit nf_nat_irc_fini(void)
{
- rcu_assign_pointer(nf_nat_irc_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_irc_hook, NULL);
synchronize_rcu();
}
static int __init nf_nat_irc_init(void)
{
BUG_ON(nf_nat_irc_hook != NULL);
- rcu_assign_pointer(nf_nat_irc_hook, help);
+ RCU_INIT_POINTER(nf_nat_irc_hook, help);
return 0;
}
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 4c060038d29..3e8284ba46b 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -282,25 +282,25 @@ static int __init nf_nat_helper_pptp_init(void)
nf_nat_need_gre();
BUG_ON(nf_nat_pptp_hook_outbound != NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
BUG_ON(nf_nat_pptp_hook_inbound != NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_inbound, pptp_inbound_pkt);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, pptp_inbound_pkt);
BUG_ON(nf_nat_pptp_hook_exp_gre != NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, pptp_exp_gre);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, pptp_exp_gre);
BUG_ON(nf_nat_pptp_hook_expectfn != NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_expectfn, pptp_nat_expected);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, pptp_nat_expected);
return 0;
}
static void __exit nf_nat_helper_pptp_fini(void)
{
- rcu_assign_pointer(nf_nat_pptp_hook_expectfn, NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_inbound, NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_outbound, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, NULL);
synchronize_rcu();
}
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index e40cf7816fd..78844d9208f 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -528,13 +528,13 @@ err1:
static void __exit nf_nat_sip_fini(void)
{
- rcu_assign_pointer(nf_nat_sip_hook, NULL);
- rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, NULL);
- rcu_assign_pointer(nf_nat_sip_expect_hook, NULL);
- rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL);
- rcu_assign_pointer(nf_nat_sdp_port_hook, NULL);
- rcu_assign_pointer(nf_nat_sdp_session_hook, NULL);
- rcu_assign_pointer(nf_nat_sdp_media_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_sip_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL);
synchronize_rcu();
}
@@ -547,13 +547,13 @@ static int __init nf_nat_sip_init(void)
BUG_ON(nf_nat_sdp_port_hook != NULL);
BUG_ON(nf_nat_sdp_session_hook != NULL);
BUG_ON(nf_nat_sdp_media_hook != NULL);
- rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip);
- rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust);
- rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect);
- rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr);
- rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port);
- rcu_assign_pointer(nf_nat_sdp_session_hook, ip_nat_sdp_session);
- rcu_assign_pointer(nf_nat_sdp_media_hook, ip_nat_sdp_media);
+ RCU_INIT_POINTER(nf_nat_sip_hook, ip_nat_sip);
+ RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust);
+ RCU_INIT_POINTER(nf_nat_sip_expect_hook, ip_nat_sip_expect);
+ RCU_INIT_POINTER(nf_nat_sdp_addr_hook, ip_nat_sdp_addr);
+ RCU_INIT_POINTER(nf_nat_sdp_port_hook, ip_nat_sdp_port);
+ RCU_INIT_POINTER(nf_nat_sdp_session_hook, ip_nat_sdp_session);
+ RCU_INIT_POINTER(nf_nat_sdp_media_hook, ip_nat_sdp_media);
return 0;
}
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 076b7c8c4aa..d1cb412c18e 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1310,7 +1310,7 @@ static int __init nf_nat_snmp_basic_init(void)
int ret = 0;
BUG_ON(nf_nat_snmp_hook != NULL);
- rcu_assign_pointer(nf_nat_snmp_hook, help);
+ RCU_INIT_POINTER(nf_nat_snmp_hook, help);
ret = nf_conntrack_helper_register(&snmp_trap_helper);
if (ret < 0) {
@@ -1322,7 +1322,7 @@ static int __init nf_nat_snmp_basic_init(void)
static void __exit nf_nat_snmp_basic_fini(void)
{
- rcu_assign_pointer(nf_nat_snmp_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
nf_conntrack_helper_unregister(&snmp_trap_helper);
}
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index a6e606e8482..92900482ede 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -284,7 +284,7 @@ static int __init nf_nat_standalone_init(void)
#ifdef CONFIG_XFRM
BUG_ON(ip_nat_decode_session != NULL);
- rcu_assign_pointer(ip_nat_decode_session, nat_decode_session);
+ RCU_INIT_POINTER(ip_nat_decode_session, nat_decode_session);
#endif
ret = nf_nat_rule_init();
if (ret < 0) {
@@ -302,7 +302,7 @@ static int __init nf_nat_standalone_init(void)
nf_nat_rule_cleanup();
cleanup_decode_session:
#ifdef CONFIG_XFRM
- rcu_assign_pointer(ip_nat_decode_session, NULL);
+ RCU_INIT_POINTER(ip_nat_decode_session, NULL);
synchronize_net();
#endif
return ret;
@@ -313,7 +313,7 @@ static void __exit nf_nat_standalone_fini(void)
nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
nf_nat_rule_cleanup();
#ifdef CONFIG_XFRM
- rcu_assign_pointer(ip_nat_decode_session, NULL);
+ RCU_INIT_POINTER(ip_nat_decode_session, NULL);
synchronize_net();
#endif
/* Conntrack caches are unregistered in nf_conntrack_cleanup */
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index 7274a43c7a1..a2901bf829c 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -36,14 +36,14 @@ static unsigned int help(struct sk_buff *skb,
static void __exit nf_nat_tftp_fini(void)
{
- rcu_assign_pointer(nf_nat_tftp_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_tftp_hook, NULL);
synchronize_rcu();
}
static int __init nf_nat_tftp_init(void)
{
BUG_ON(nf_nat_tftp_hook != NULL);
- rcu_assign_pointer(nf_nat_tftp_hook, help);
+ RCU_INIT_POINTER(nf_nat_tftp_hook, help);
return 0;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 075212e41b8..155138d8ec8 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -120,7 +120,6 @@
static int ip_rt_max_size;
static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
-static int ip_rt_gc_interval __read_mostly = 60 * HZ;
static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
static int ip_rt_redirect_number __read_mostly = 9;
static int ip_rt_redirect_load __read_mostly = HZ / 50;
@@ -324,7 +323,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
struct rtable *r = NULL;
for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
- if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain))
+ if (!rcu_access_pointer(rt_hash_table[st->bucket].chain))
continue;
rcu_read_lock_bh();
r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
@@ -350,7 +349,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
do {
if (--st->bucket < 0)
return NULL;
- } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain));
+ } while (!rcu_access_pointer(rt_hash_table[st->bucket].chain));
rcu_read_lock_bh();
r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
}
@@ -761,7 +760,7 @@ static void rt_do_flush(struct net *net, int process_context)
if (process_context && need_resched())
cond_resched();
- rth = rcu_dereference_raw(rt_hash_table[i].chain);
+ rth = rcu_access_pointer(rt_hash_table[i].chain);
if (!rth)
continue;
@@ -1309,7 +1308,12 @@ static void rt_del(unsigned hash, struct rtable *rt)
void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
__be32 saddr, struct net_device *dev)
{
+ int s, i;
struct in_device *in_dev = __in_dev_get_rcu(dev);
+ struct rtable *rt;
+ __be32 skeys[2] = { saddr, 0 };
+ int ikeys[2] = { dev->ifindex, 0 };
+ struct flowi4 fl4;
struct inet_peer *peer;
struct net *net;
@@ -1332,13 +1336,34 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
goto reject_redirect;
}
- peer = inet_getpeer_v4(daddr, 1);
- if (peer) {
- peer->redirect_learned.a4 = new_gw;
+ memset(&fl4, 0, sizeof(fl4));
+ fl4.daddr = daddr;
+ for (s = 0; s < 2; s++) {
+ for (i = 0; i < 2; i++) {
+ fl4.flowi4_oif = ikeys[i];
+ fl4.saddr = skeys[s];
+ rt = __ip_route_output_key(net, &fl4);
+ if (IS_ERR(rt))
+ continue;
- inet_putpeer(peer);
+ if (rt->dst.error || rt->dst.dev != dev ||
+ rt->rt_gateway != old_gw) {
+ ip_rt_put(rt);
+ continue;
+ }
+
+ if (!rt->peer)
+ rt_bind_peer(rt, rt->rt_dst, 1);
+
+ peer = rt->peer;
+ if (peer) {
+ peer->redirect_learned.a4 = new_gw;
+ atomic_inc(&__rt_peer_genid);
+ }
- atomic_inc(&__rt_peer_genid);
+ ip_rt_put(rt);
+ return;
+ }
}
return;
@@ -1568,11 +1593,10 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
est_mtu = mtu;
peer->pmtu_learned = mtu;
peer->pmtu_expires = pmtu_expires;
+ atomic_inc(&__rt_peer_genid);
}
inet_putpeer(peer);
-
- atomic_inc(&__rt_peer_genid);
}
return est_mtu ? : new_mtu;
}
@@ -3121,13 +3145,6 @@ static ctl_table ipv4_route_table[] = {
.proc_handler = proc_dointvec_jiffies,
},
{
- .procname = "gc_interval",
- .data = &ip_rt_gc_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
.procname = "redirect_load",
.data = &ip_rt_redirect_load,
.maxlen = sizeof(int),
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 3bc5c8f7c71..d7b89b12f6d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -265,7 +265,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
struct ip_options *opt)
{
struct tcp_options_received tcp_opt;
- u8 *hash_location;
+ const u8 *hash_location;
struct inet_request_sock *ireq;
struct tcp_request_sock *treq;
struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 46febcacb72..34f5db1e1c8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -374,7 +374,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
unsigned int mask;
struct sock *sk = sock->sk;
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
sock_poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_state == TCP_LISTEN)
@@ -524,11 +524,11 @@ EXPORT_SYMBOL(tcp_ioctl);
static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
{
- TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
tp->pushed_seq = tp->write_seq;
}
-static inline int forced_push(struct tcp_sock *tp)
+static inline int forced_push(const struct tcp_sock *tp)
{
return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
}
@@ -540,7 +540,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
skb->csum = 0;
tcb->seq = tcb->end_seq = tp->write_seq;
- tcb->flags = TCPHDR_ACK;
+ tcb->tcp_flags = TCPHDR_ACK;
tcb->sacked = 0;
skb_header_release(skb);
tcp_add_write_queue_tail(sk, skb);
@@ -813,7 +813,7 @@ new_segment:
goto wait_for_memory;
if (can_coalesce) {
- skb_shinfo(skb)->frags[i - 1].size += copy;
+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
@@ -830,7 +830,7 @@ new_segment:
skb_shinfo(skb)->gso_segs = 0;
if (!copied)
- TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
copied += copy;
poffset += copy;
@@ -891,9 +891,9 @@ EXPORT_SYMBOL(tcp_sendpage);
#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
#define TCP_OFF(sk) (sk->sk_sndmsg_off)
-static inline int select_size(struct sock *sk, int sg)
+static inline int select_size(const struct sock *sk, int sg)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
int tmp = tp->mss_cache;
if (sg) {
@@ -1058,8 +1058,7 @@ new_segment:
/* Update the skb. */
if (merge) {
- skb_shinfo(skb)->frags[i - 1].size +=
- copy;
+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
@@ -1074,7 +1073,7 @@ new_segment:
}
if (!copied)
- TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
tp->write_seq += copy;
TCP_SKB_CB(skb)->end_seq += copy;
@@ -1194,13 +1193,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
struct tcp_sock *tp = tcp_sk(sk);
int time_to_ack = 0;
-#if TCP_DEBUG
struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
"cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
-#endif
if (inet_csk_ack_scheduled(sk)) {
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2409,7 +2406,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
unsigned int optlen)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
if (level != SOL_TCP)
return icsk->icsk_af_ops->setsockopt(sk, level, optname,
@@ -2431,9 +2428,9 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
#endif
/* Return information about state of tcp endpoint in API format. */
-void tcp_get_info(struct sock *sk, struct tcp_info *info)
+void tcp_get_info(const struct sock *sk, struct tcp_info *info)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp;
@@ -2455,8 +2452,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
}
- if (tp->ecn_flags&TCP_ECN_OK)
+ if (tp->ecn_flags & TCP_ECN_OK)
info->tcpi_options |= TCPI_OPT_ECN;
+ if (tp->ecn_flags & TCP_ECN_SEEN)
+ info->tcpi_options |= TCPI_OPT_ECN_SEEN;
info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato);
@@ -2857,26 +2856,25 @@ EXPORT_SYMBOL(tcp_gro_complete);
#ifdef CONFIG_TCP_MD5SIG
static unsigned long tcp_md5sig_users;
-static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool;
+static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool;
static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
-static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool)
+static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
{
int cpu;
+
for_each_possible_cpu(cpu) {
- struct tcp_md5sig_pool *p = *per_cpu_ptr(pool, cpu);
- if (p) {
- if (p->md5_desc.tfm)
- crypto_free_hash(p->md5_desc.tfm);
- kfree(p);
- }
+ struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu);
+
+ if (p->md5_desc.tfm)
+ crypto_free_hash(p->md5_desc.tfm);
}
free_percpu(pool);
}
void tcp_free_md5sig_pool(void)
{
- struct tcp_md5sig_pool * __percpu *pool = NULL;
+ struct tcp_md5sig_pool __percpu *pool = NULL;
spin_lock_bh(&tcp_md5sig_pool_lock);
if (--tcp_md5sig_users == 0) {
@@ -2889,30 +2887,24 @@ void tcp_free_md5sig_pool(void)
}
EXPORT_SYMBOL(tcp_free_md5sig_pool);
-static struct tcp_md5sig_pool * __percpu *
+static struct tcp_md5sig_pool __percpu *
__tcp_alloc_md5sig_pool(struct sock *sk)
{
int cpu;
- struct tcp_md5sig_pool * __percpu *pool;
+ struct tcp_md5sig_pool __percpu *pool;
- pool = alloc_percpu(struct tcp_md5sig_pool *);
+ pool = alloc_percpu(struct tcp_md5sig_pool);
if (!pool)
return NULL;
for_each_possible_cpu(cpu) {
- struct tcp_md5sig_pool *p;
struct crypto_hash *hash;
- p = kzalloc(sizeof(*p), sk->sk_allocation);
- if (!p)
- goto out_free;
- *per_cpu_ptr(pool, cpu) = p;
-
hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
if (!hash || IS_ERR(hash))
goto out_free;
- p->md5_desc.tfm = hash;
+ per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
}
return pool;
out_free:
@@ -2920,9 +2912,9 @@ out_free:
return NULL;
}
-struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
+struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
{
- struct tcp_md5sig_pool * __percpu *pool;
+ struct tcp_md5sig_pool __percpu *pool;
int alloc = 0;
retry:
@@ -2941,7 +2933,7 @@ retry:
if (alloc) {
/* we cannot hold spinlock here because this may sleep. */
- struct tcp_md5sig_pool * __percpu *p;
+ struct tcp_md5sig_pool __percpu *p;
p = __tcp_alloc_md5sig_pool(sk);
spin_lock_bh(&tcp_md5sig_pool_lock);
@@ -2974,7 +2966,7 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
*/
struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
{
- struct tcp_md5sig_pool * __percpu *p;
+ struct tcp_md5sig_pool __percpu *p;
local_bh_disable();
@@ -2985,7 +2977,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
spin_unlock(&tcp_md5sig_pool_lock);
if (p)
- return *this_cpu_ptr(p);
+ return this_cpu_ptr(p);
local_bh_enable();
return NULL;
@@ -3000,23 +2992,25 @@ void tcp_put_md5sig_pool(void)
EXPORT_SYMBOL(tcp_put_md5sig_pool);
int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
- struct tcphdr *th)
+ const struct tcphdr *th)
{
struct scatterlist sg;
+ struct tcphdr hdr;
int err;
- __sum16 old_checksum = th->check;
- th->check = 0;
+ /* We are not allowed to change tcphdr, make a local copy */
+ memcpy(&hdr, th, sizeof(hdr));
+ hdr.check = 0;
+
/* options aren't included in the hash */
- sg_init_one(&sg, th, sizeof(struct tcphdr));
- err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr));
- th->check = old_checksum;
+ sg_init_one(&sg, &hdr, sizeof(hdr));
+ err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(hdr));
return err;
}
EXPORT_SYMBOL(tcp_md5_hash_header);
int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
- struct sk_buff *skb, unsigned header_len)
+ const struct sk_buff *skb, unsigned int header_len)
{
struct scatterlist sg;
const struct tcphdr *tp = tcp_hdr(skb);
@@ -3035,8 +3029,9 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
for (i = 0; i < shi->nr_frags; ++i) {
const struct skb_frag_struct *f = &shi->frags[i];
- sg_set_page(&sg, f->page, f->size, f->page_offset);
- if (crypto_hash_update(desc, &sg, f->size))
+ struct page *page = skb_frag_page(f);
+ sg_set_page(&sg, page, skb_frag_size(f), f->page_offset);
+ if (crypto_hash_update(desc, &sg, skb_frag_size(f)))
return 1;
}
@@ -3048,7 +3043,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
}
EXPORT_SYMBOL(tcp_md5_hash_skb_data);
-int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
+int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key)
{
struct scatterlist sg;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d73aab3fbfc..52b5c2d0ecd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -206,7 +206,7 @@ static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
}
-static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb)
+static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
{
if (tcp_hdr(skb)->cwr)
tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
@@ -217,32 +217,41 @@ static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
}
-static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
+static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
{
- if (tp->ecn_flags & TCP_ECN_OK) {
- if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
- tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+ if (!(tp->ecn_flags & TCP_ECN_OK))
+ return;
+
+ switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
+ case INET_ECN_NOT_ECT:
/* Funny extension: if ECT is not set on a segment,
- * it is surely retransmit. It is not in ECN RFC,
- * but Linux follows this rule. */
- else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))
+ * and we already seen ECT on a previous segment,
+ * it is probably a retransmit.
+ */
+ if (tp->ecn_flags & TCP_ECN_SEEN)
tcp_enter_quickack_mode((struct sock *)tp);
+ break;
+ case INET_ECN_CE:
+ tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+ /* fallinto */
+ default:
+ tp->ecn_flags |= TCP_ECN_SEEN;
}
}
-static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th)
+static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
{
if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
tp->ecn_flags &= ~TCP_ECN_OK;
}
-static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th)
+static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
{
if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
tp->ecn_flags &= ~TCP_ECN_OK;
}
-static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
+static inline int TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
{
if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
return 1;
@@ -256,14 +265,11 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
static void tcp_fixup_sndbuf(struct sock *sk)
{
- int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
- sizeof(struct sk_buff);
+ int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
- if (sk->sk_sndbuf < 3 * sndmem) {
- sk->sk_sndbuf = 3 * sndmem;
- if (sk->sk_sndbuf > sysctl_tcp_wmem[2])
- sk->sk_sndbuf = sysctl_tcp_wmem[2];
- }
+ sndmem *= TCP_INIT_CWND;
+ if (sk->sk_sndbuf < sndmem)
+ sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -309,7 +315,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
return 0;
}
-static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
+static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -339,17 +345,24 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
static void tcp_fixup_rcvbuf(struct sock *sk)
{
- struct tcp_sock *tp = tcp_sk(sk);
- int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
+ u32 mss = tcp_sk(sk)->advmss;
+ u32 icwnd = TCP_DEFAULT_INIT_RCVWND;
+ int rcvmem;
- /* Try to select rcvbuf so that 4 mss-sized segments
- * will fit to window and corresponding skbs will fit to our rcvbuf.
- * (was 3; 4 is minimum to allow fast retransmit to work.)
+ /* Limit to 10 segments if mss <= 1460,
+ * or 14600/mss segments, with a minimum of two segments.
*/
- while (tcp_win_from_space(rcvmem) < tp->advmss)
+ if (mss > 1460)
+ icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
+
+ rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
+ while (tcp_win_from_space(rcvmem) < mss)
rcvmem += 128;
- if (sk->sk_rcvbuf < 4 * rcvmem)
- sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
+
+ rcvmem *= icwnd;
+
+ if (sk->sk_rcvbuf < rcvmem)
+ sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
}
/* 4. Try to fixup all. It is made immediately after connection enters
@@ -416,7 +429,7 @@ static void tcp_clamp_window(struct sock *sk)
*/
void tcp_initialize_rcv_mss(struct sock *sk)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
hint = min(hint, tp->rcv_wnd / 2);
@@ -531,8 +544,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
space /= tp->advmss;
if (!space)
space = 1;
- rcvmem = (tp->advmss + MAX_TCP_HEADER +
- 16 + sizeof(struct sk_buff));
+ rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
while (tcp_win_from_space(rcvmem) < tp->advmss)
rcvmem += 128;
space *= rcvmem;
@@ -812,7 +824,7 @@ void tcp_update_metrics(struct sock *sk)
}
}
-__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
+__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
{
__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
@@ -1204,7 +1216,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
tp->lost_retrans_low = new_low_seq;
}
-static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
+static int tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
struct tcp_sack_block_wire *sp, int num_sacks,
u32 prior_snd_una)
{
@@ -1298,7 +1310,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
return in_sack;
}
-static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
+static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
struct tcp_sacktag_state *state,
int dup_sack, int pcount)
{
@@ -1438,7 +1450,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
tp->lost_cnt_hint -= tcp_skb_pcount(prev);
}
- TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags;
+ TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags;
if (skb == tcp_highest_sack(sk))
tcp_advance_highest_sack(sk, skb);
@@ -1453,13 +1465,13 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
/* I wish gso_size would have a bit more sane initialization than
* something-or-zero which complicates things
*/
-static int tcp_skb_seglen(struct sk_buff *skb)
+static int tcp_skb_seglen(const struct sk_buff *skb)
{
return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
}
/* Shifting pages past head area doesn't work */
-static int skb_can_shift(struct sk_buff *skb)
+static int skb_can_shift(const struct sk_buff *skb)
{
return !skb_headlen(skb) && skb_is_nonlinear(skb);
}
@@ -1708,19 +1720,19 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
return skb;
}
-static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache)
+static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
{
return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
}
static int
-tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
+tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
u32 prior_snd_una)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- unsigned char *ptr = (skb_transport_header(ack_skb) +
- TCP_SKB_CB(ack_skb)->sacked);
+ const unsigned char *ptr = (skb_transport_header(ack_skb) +
+ TCP_SKB_CB(ack_skb)->sacked);
struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
struct tcp_sack_block sp[TCP_NUM_SACKS];
struct tcp_sack_block *cache;
@@ -2284,7 +2296,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag)
return 0;
}
-static inline int tcp_fackets_out(struct tcp_sock *tp)
+static inline int tcp_fackets_out(const struct tcp_sock *tp)
{
return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
}
@@ -2304,19 +2316,20 @@ static inline int tcp_fackets_out(struct tcp_sock *tp)
* they differ. Since neither occurs due to loss, TCP should really
* ignore them.
*/
-static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
+static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
{
return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
}
-static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
+static inline int tcp_skb_timedout(const struct sock *sk,
+ const struct sk_buff *skb)
{
return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
}
-static inline int tcp_head_timedout(struct sock *sk)
+static inline int tcp_head_timedout(const struct sock *sk)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
return tp->packets_out &&
tcp_skb_timedout(sk, tcp_write_queue_head(sk));
@@ -2627,7 +2640,7 @@ static void tcp_cwnd_down(struct sock *sk, int flag)
/* Nothing was retransmitted or returned timestamp is less
* than timestamp of the first retransmission.
*/
-static inline int tcp_packet_delayed(struct tcp_sock *tp)
+static inline int tcp_packet_delayed(const struct tcp_sock *tp)
{
return !tp->retrans_stamp ||
(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
@@ -2688,7 +2701,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
tp->snd_cwnd_stamp = tcp_time_stamp;
}
-static inline int tcp_may_undo(struct tcp_sock *tp)
+static inline int tcp_may_undo(const struct tcp_sock *tp)
{
return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
}
@@ -2752,9 +2765,9 @@ static void tcp_try_undo_dsack(struct sock *sk)
* that successive retransmissions of a segment must not advance
* retrans_stamp under any conditions.
*/
-static int tcp_any_retrans_done(struct sock *sk)
+static int tcp_any_retrans_done(const struct sock *sk)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
if (tp->retrans_out)
@@ -2828,9 +2841,13 @@ static int tcp_try_undo_loss(struct sock *sk)
static inline void tcp_complete_cwr(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- /* Do not moderate cwnd if it's already undone in cwr or recovery */
- if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) {
- tp->snd_cwnd = tp->snd_ssthresh;
+
+ /* Do not moderate cwnd if it's already undone in cwr or recovery. */
+ if (tp->undo_marker) {
+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR)
+ tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+ else /* PRR */
+ tp->snd_cwnd = tp->snd_ssthresh;
tp->snd_cwnd_stamp = tcp_time_stamp;
}
tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
@@ -2948,6 +2965,38 @@ void tcp_simple_retransmit(struct sock *sk)
}
EXPORT_SYMBOL(tcp_simple_retransmit);
+/* This function implements the PRR algorithm, specifcally the PRR-SSRB
+ * (proportional rate reduction with slow start reduction bound) as described in
+ * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt.
+ * It computes the number of packets to send (sndcnt) based on packets newly
+ * delivered:
+ * 1) If the packets in flight is larger than ssthresh, PRR spreads the
+ * cwnd reductions across a full RTT.
+ * 2) If packets in flight is lower than ssthresh (such as due to excess
+ * losses and/or application stalls), do not perform any further cwnd
+ * reductions, but instead slow start up to ssthresh.
+ */
+static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
+ int fast_rexmit, int flag)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ int sndcnt = 0;
+ int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
+
+ if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
+ u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
+ tp->prior_cwnd - 1;
+ sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
+ } else {
+ sndcnt = min_t(int, delta,
+ max_t(int, tp->prr_delivered - tp->prr_out,
+ newly_acked_sacked) + 1);
+ }
+
+ sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
+ tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
+}
+
/* Process an event, which can update packets-in-flight not trivially.
* Main goal of this function is to calculate new estimate for left_out,
* taking into account both packets sitting in receiver's buffer and
@@ -2959,7 +3008,8 @@ EXPORT_SYMBOL(tcp_simple_retransmit);
* It does _not_ decide what to send, it is made in function
* tcp_xmit_retransmit_queue().
*/
-static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
+static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
+ int newly_acked_sacked, int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -3109,13 +3159,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
tp->bytes_acked = 0;
tp->snd_cwnd_cnt = 0;
+ tp->prior_cwnd = tp->snd_cwnd;
+ tp->prr_delivered = 0;
+ tp->prr_out = 0;
tcp_set_ca_state(sk, TCP_CA_Recovery);
fast_rexmit = 1;
}
if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
tcp_update_scoreboard(sk, fast_rexmit);
- tcp_cwnd_down(sk, flag);
+ tp->prr_delivered += newly_acked_sacked;
+ tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);
tcp_xmit_retransmit_queue(sk);
}
@@ -3192,7 +3246,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
*/
static void tcp_rearm_rto(struct sock *sk)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
if (!tp->packets_out) {
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
@@ -3296,7 +3350,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
* connection startup slow start one packet too
* quickly. This is severely frowned upon behavior.
*/
- if (!(scb->flags & TCPHDR_SYN)) {
+ if (!(scb->tcp_flags & TCPHDR_SYN)) {
flag |= FLAG_DATA_ACKED;
} else {
flag |= FLAG_SYN_ACKED;
@@ -3444,7 +3498,7 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp,
* Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
* and in FreeBSD. NetBSD's one is even worse.) is wrong.
*/
-static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
+static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
u32 ack_seq)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -3620,7 +3674,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
}
/* This routine deals with incoming acks, but not outgoing ones. */
-static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
+static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -3630,6 +3684,8 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
u32 prior_in_flight;
u32 prior_fackets;
int prior_packets;
+ int prior_sacked = tp->sacked_out;
+ int newly_acked_sacked = 0;
int frto_cwnd = 0;
/* If the ack is older than previous acks
@@ -3701,6 +3757,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
/* See if we can take anything off of the retransmit queue. */
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
+ newly_acked_sacked = (prior_packets - prior_sacked) -
+ (tp->packets_out - tp->sacked_out);
+
if (tp->frto_counter)
frto_cwnd = tcp_process_frto(sk, flag);
/* Guarantee sacktag reordering detection against wrap-arounds */
@@ -3713,7 +3772,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
tcp_may_raise_cwnd(sk, flag))
tcp_cong_avoid(sk, ack, prior_in_flight);
tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
- flag);
+ newly_acked_sacked, flag);
} else {
if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
tcp_cong_avoid(sk, ack, prior_in_flight);
@@ -3752,14 +3811,14 @@ old_ack:
* But, this can also be called on packets in the established flow when
* the fast version below fails.
*/
-void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
- u8 **hvpp, int estab)
+void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
+ const u8 **hvpp, int estab)
{
- unsigned char *ptr;
- struct tcphdr *th = tcp_hdr(skb);
+ const unsigned char *ptr;
+ const struct tcphdr *th = tcp_hdr(skb);
int length = (th->doff * 4) - sizeof(struct tcphdr);
- ptr = (unsigned char *)(th + 1);
+ ptr = (const unsigned char *)(th + 1);
opt_rx->saw_tstamp = 0;
while (length > 0) {
@@ -3870,9 +3929,9 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
}
EXPORT_SYMBOL(tcp_parse_options);
-static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
+static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
{
- __be32 *ptr = (__be32 *)(th + 1);
+ const __be32 *ptr = (const __be32 *)(th + 1);
if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
| (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
@@ -3889,8 +3948,9 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
/* Fast parse options. This hopes to only see timestamps.
* If it is wrong it falls back on tcp_parse_options().
*/
-static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
- struct tcp_sock *tp, u8 **hvpp)
+static int tcp_fast_parse_options(const struct sk_buff *skb,
+ const struct tcphdr *th,
+ struct tcp_sock *tp, const u8 **hvpp)
{
/* In the spirit of fast parsing, compare doff directly to constant
* values. Because equality is used, short doff can be ignored here.
@@ -3911,10 +3971,10 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
/*
* Parse MD5 Signature option
*/
-u8 *tcp_parse_md5sig_option(struct tcphdr *th)
+const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
{
- int length = (th->doff << 2) - sizeof (*th);
- u8 *ptr = (u8*)(th + 1);
+ int length = (th->doff << 2) - sizeof(*th);
+ const u8 *ptr = (const u8 *)(th + 1);
/* If the TCP option is too short, we can short cut */
if (length < TCPOLEN_MD5SIG)
@@ -3991,8 +4051,8 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
{
- struct tcp_sock *tp = tcp_sk(sk);
- struct tcphdr *th = tcp_hdr(skb);
+ const struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcphdr *th = tcp_hdr(skb);
u32 seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
@@ -4031,7 +4091,7 @@ static inline int tcp_paws_discard(const struct sock *sk,
* (borrowed from freebsd)
*/
-static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static inline int tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
{
return !before(end_seq, tp->rcv_wup) &&
!after(seq, tp->rcv_nxt + tcp_receive_window(tp));
@@ -4076,7 +4136,7 @@ static void tcp_reset(struct sock *sk)
*
* If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
*/
-static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
+static void tcp_fin(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -4188,7 +4248,7 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
}
-static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
+static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -4347,7 +4407,7 @@ static void tcp_ofo_queue(struct sock *sk)
__skb_queue_tail(&sk->sk_receive_queue, skb);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
if (tcp_hdr(skb)->fin)
- tcp_fin(skb, sk, tcp_hdr(skb));
+ tcp_fin(sk);
}
}
@@ -4375,7 +4435,7 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{
- struct tcphdr *th = tcp_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
struct tcp_sock *tp = tcp_sk(sk);
int eaten = -1;
@@ -4429,7 +4489,7 @@ queue_and_out:
if (skb->len)
tcp_event_data_recv(sk, skb);
if (th->fin)
- tcp_fin(skb, sk, th);
+ tcp_fin(sk);
if (!skb_queue_empty(&tp->out_of_order_queue)) {
tcp_ofo_queue(sk);
@@ -4859,9 +4919,9 @@ void tcp_cwnd_application_limited(struct sock *sk)
tp->snd_cwnd_stamp = tcp_time_stamp;
}
-static int tcp_should_expand_sndbuf(struct sock *sk)
+static int tcp_should_expand_sndbuf(const struct sock *sk)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
/* If the user specified a specific send buffer setting, do
* not modify it.
@@ -4895,8 +4955,10 @@ static void tcp_new_space(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (tcp_should_expand_sndbuf(sk)) {
- int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
- MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
+ int sndmem = SKB_TRUESIZE(max_t(u32,
+ tp->rx_opt.mss_clamp,
+ tp->mss_cache) +
+ MAX_TCP_HEADER);
int demanded = max_t(unsigned int, tp->snd_cwnd,
tp->reordering + 1);
sndmem *= 2 * demanded;
@@ -4968,7 +5030,7 @@ static inline void tcp_ack_snd_check(struct sock *sk)
* either form (or just set the sysctl tcp_stdurg).
*/
-static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
+static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr);
@@ -5034,7 +5096,7 @@ static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
}
/* This is the 'fast' part of urgent handling. */
-static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
+static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -5155,9 +5217,9 @@ out:
* play significant role here.
*/
static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
- struct tcphdr *th, int syn_inerr)
+ const struct tcphdr *th, int syn_inerr)
{
- u8 *hash_location;
+ const u8 *hash_location;
struct tcp_sock *tp = tcp_sk(sk);
/* RFC1323: H1. Apply PAWS check first. */
@@ -5238,7 +5300,7 @@ discard:
* tcp_data_queue when everything is OK.
*/
int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
- struct tcphdr *th, unsigned len)
+ const struct tcphdr *th, unsigned int len)
{
struct tcp_sock *tp = tcp_sk(sk);
int res;
@@ -5449,9 +5511,9 @@ discard:
EXPORT_SYMBOL(tcp_rcv_established);
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
- struct tcphdr *th, unsigned len)
+ const struct tcphdr *th, unsigned int len)
{
- u8 *hash_location;
+ const u8 *hash_location;
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_cookie_values *cvp = tp->cookie_values;
@@ -5726,7 +5788,7 @@ reset_and_undo:
*/
int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
- struct tcphdr *th, unsigned len)
+ const struct tcphdr *th, unsigned int len)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7963e03f106..0ea10eefa60 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -92,7 +92,7 @@ EXPORT_SYMBOL(sysctl_tcp_low_latency);
static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
__be32 addr);
static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
- __be32 daddr, __be32 saddr, struct tcphdr *th);
+ __be32 daddr, __be32 saddr, const struct tcphdr *th);
#else
static inline
struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
@@ -104,7 +104,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
struct inet_hashinfo tcp_hashinfo;
EXPORT_SYMBOL(tcp_hashinfo);
-static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
+static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
{
return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr,
@@ -552,7 +552,7 @@ static void __tcp_v4_send_check(struct sk_buff *skb,
/* This routine computes an IPv4 TCP checksum. */
void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
{
- struct inet_sock *inet = inet_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
}
@@ -590,7 +590,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
{
- struct tcphdr *th = tcp_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
struct {
struct tcphdr th;
#ifdef CONFIG_TCP_MD5SIG
@@ -652,6 +652,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
net = dev_net(skb_dst(skb)->dev);
+ arg.tos = ip_hdr(skb)->tos;
ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
&arg, arg.iov[0].iov_len);
@@ -666,9 +667,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 ts, int oif,
struct tcp_md5sig_key *key,
- int reply_flags)
+ int reply_flags, u8 tos)
{
- struct tcphdr *th = tcp_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
struct {
struct tcphdr th;
__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
@@ -726,7 +727,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
if (oif)
arg.bound_dev_if = oif;
-
+ arg.tos = tos;
ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
&arg, arg.iov[0].iov_len);
@@ -743,7 +744,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcptw->tw_ts_recent,
tw->tw_bound_dev_if,
tcp_twsk_md5_key(tcptw),
- tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
+ tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
+ tw->tw_tos
);
inet_twsk_put(tw);
@@ -757,7 +759,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
req->ts_recent,
0,
tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
- inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
+ inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
+ ip_hdr(skb)->tos);
}
/*
@@ -1090,7 +1093,7 @@ static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
}
static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
- __be32 daddr, __be32 saddr, struct tcphdr *th)
+ __be32 daddr, __be32 saddr, const struct tcphdr *th)
{
struct tcp_md5sig_pool *hp;
struct hash_desc *desc;
@@ -1122,12 +1125,12 @@ clear_hash_noput:
}
int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
- struct sock *sk, struct request_sock *req,
- struct sk_buff *skb)
+ const struct sock *sk, const struct request_sock *req,
+ const struct sk_buff *skb)
{
struct tcp_md5sig_pool *hp;
struct hash_desc *desc;
- struct tcphdr *th = tcp_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
__be32 saddr, daddr;
if (sk) {
@@ -1172,7 +1175,7 @@ clear_hash_noput:
}
EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
-static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
+static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
{
/*
* This gets called for each TCP segment that arrives
@@ -1182,10 +1185,10 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
* o MD5 hash and we're not expecting one.
* o MD5 hash and its wrong.
*/
- __u8 *hash_location = NULL;
+ const __u8 *hash_location = NULL;
struct tcp_md5sig_key *hash_expected;
const struct iphdr *iph = ip_hdr(skb);
- struct tcphdr *th = tcp_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
int genhash;
unsigned char newhash[16];
@@ -1248,7 +1251,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct tcp_extend_values tmp_ext;
struct tcp_options_received tmp_opt;
- u8 *hash_location;
+ const u8 *hash_location;
struct request_sock *req;
struct inet_request_sock *ireq;
struct tcp_sock *tp = tcp_sk(sk);
@@ -1588,7 +1591,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
#endif
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
- sock_rps_save_rxhash(sk, skb->rxhash);
+ sock_rps_save_rxhash(sk, skb);
if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
rsk = sk;
goto reset;
@@ -1605,7 +1608,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
goto discard;
if (nsk != sk) {
- sock_rps_save_rxhash(nsk, skb->rxhash);
+ sock_rps_save_rxhash(nsk, skb);
if (tcp_child_process(sk, nsk, skb)) {
rsk = nsk;
goto reset;
@@ -1613,7 +1616,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
} else
- sock_rps_save_rxhash(sk, skb->rxhash);
+ sock_rps_save_rxhash(sk, skb);
if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
rsk = sk;
@@ -1645,7 +1648,7 @@ EXPORT_SYMBOL(tcp_v4_do_rcv);
int tcp_v4_rcv(struct sk_buff *skb)
{
const struct iphdr *iph;
- struct tcphdr *th;
+ const struct tcphdr *th;
struct sock *sk;
int ret;
struct net *net = dev_net(skb->dev);
@@ -1680,7 +1683,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
skb->len - th->doff * 4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->when = 0;
- TCP_SKB_CB(skb)->flags = iph->tos;
+ TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
TCP_SKB_CB(skb)->sacked = 0;
sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
@@ -1809,7 +1812,7 @@ EXPORT_SYMBOL(tcp_v4_get_peer);
void *tcp_v4_tw_get_peer(struct sock *sk)
{
- struct inet_timewait_sock *tw = inet_twsk(sk);
+ const struct inet_timewait_sock *tw = inet_twsk(sk);
return inet_getpeer_v4(tw->tw_daddr, 1);
}
@@ -2381,7 +2384,7 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
}
EXPORT_SYMBOL(tcp_proc_unregister);
-static void get_openreq4(struct sock *sk, struct request_sock *req,
+static void get_openreq4(const struct sock *sk, const struct request_sock *req,
struct seq_file *f, int i, int uid, int *len)
{
const struct inet_request_sock *ireq = inet_rsk(req);
@@ -2411,9 +2414,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
{
int timer_active;
unsigned long timer_expires;
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
- struct inet_sock *inet = inet_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
__be32 dest = inet->inet_daddr;
__be32 src = inet->inet_rcv_saddr;
__u16 destp = ntohs(inet->inet_dport);
@@ -2462,7 +2465,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
len);
}
-static void get_timewait4_sock(struct inet_timewait_sock *tw,
+static void get_timewait4_sock(const struct inet_timewait_sock *tw,
struct seq_file *f, int i, int *len)
{
__be32 dest, src;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0ce3d06dce6..85a2fbebd7e 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -141,7 +141,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
const struct tcphdr *th)
{
struct tcp_options_received tmp_opt;
- u8 *hash_location;
+ const u8 *hash_location;
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
int paws_reject = 0;
@@ -567,7 +567,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock **prev)
{
struct tcp_options_received tmp_opt;
- u8 *hash_location;
+ const u8 *hash_location;
struct sock *child;
const struct tcphdr *th = tcp_hdr(skb);
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 882e0b0964d..980b98f6288 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
/* Account for new data that has been sent to the network. */
-static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
+static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
unsigned int prior_packets = tp->packets_out;
@@ -89,9 +89,9 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
* Anything in between SND.UNA...SND.UNA+SND.WND also can be already
* invalid. OK, let's make this for now:
*/
-static inline __u32 tcp_acceptable_seq(struct sock *sk)
+static inline __u32 tcp_acceptable_seq(const struct sock *sk)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
if (!before(tcp_wnd_end(tp), tp->snd_nxt))
return tp->snd_nxt;
@@ -116,7 +116,7 @@ static inline __u32 tcp_acceptable_seq(struct sock *sk)
static __u16 tcp_advertise_mss(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct dst_entry *dst = __sk_dst_get(sk);
+ const struct dst_entry *dst = __sk_dst_get(sk);
int mss = tp->advmss;
if (dst) {
@@ -133,7 +133,7 @@ static __u16 tcp_advertise_mss(struct sock *sk)
/* RFC2861. Reset CWND after idle period longer RTO to "restart window".
* This is the first part of cwnd validation mechanism. */
-static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
+static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst)
{
struct tcp_sock *tp = tcp_sk(sk);
s32 delta = tcp_time_stamp - tp->lsndtime;
@@ -154,7 +154,7 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
/* Congestion state accounting after a packet has been sent. */
static void tcp_event_data_sent(struct tcp_sock *tp,
- struct sk_buff *skb, struct sock *sk)
+ struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const u32 now = tcp_time_stamp;
@@ -295,11 +295,11 @@ static u16 tcp_select_window(struct sock *sk)
}
/* Packet ECN state for a SYN-ACK */
-static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
+static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb)
{
- TCP_SKB_CB(skb)->flags &= ~TCPHDR_CWR;
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
if (!(tp->ecn_flags & TCP_ECN_OK))
- TCP_SKB_CB(skb)->flags &= ~TCPHDR_ECE;
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
}
/* Packet ECN state for a SYN. */
@@ -309,13 +309,13 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
tp->ecn_flags = 0;
if (sysctl_tcp_ecn == 1) {
- TCP_SKB_CB(skb)->flags |= TCPHDR_ECE | TCPHDR_CWR;
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
tp->ecn_flags = TCP_ECN_OK;
}
}
static __inline__ void
-TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th)
+TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th)
{
if (inet_rsk(req)->ecn_ok)
th->ece = 1;
@@ -356,7 +356,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
- TCP_SKB_CB(skb)->flags = flags;
+ TCP_SKB_CB(skb)->tcp_flags = flags;
TCP_SKB_CB(skb)->sacked = 0;
skb_shinfo(skb)->gso_segs = 1;
@@ -565,7 +565,8 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
*/
static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
struct tcp_out_options *opts,
- struct tcp_md5sig_key **md5) {
+ struct tcp_md5sig_key **md5)
+{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_cookie_values *cvp = tp->cookie_values;
unsigned remaining = MAX_TCP_OPTION_SPACE;
@@ -743,7 +744,8 @@ static unsigned tcp_synack_options(struct sock *sk,
*/
static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
struct tcp_out_options *opts,
- struct tcp_md5sig_key **md5) {
+ struct tcp_md5sig_key **md5)
+{
struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
struct tcp_sock *tp = tcp_sk(sk);
unsigned size = 0;
@@ -826,7 +828,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
tcb = TCP_SKB_CB(skb);
memset(&opts, 0, sizeof(opts));
- if (unlikely(tcb->flags & TCPHDR_SYN))
+ if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
else
tcp_options_size = tcp_established_options(sk, skb, &opts,
@@ -850,9 +852,9 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
th->seq = htonl(tcb->seq);
th->ack_seq = htonl(tp->rcv_nxt);
*(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
- tcb->flags);
+ tcb->tcp_flags);
- if (unlikely(tcb->flags & TCPHDR_SYN)) {
+ if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
/* RFC1323: The window in SYN & SYN/ACK segments
* is never scaled.
*/
@@ -875,7 +877,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
}
tcp_options_write((__be32 *)(th + 1), tp, &opts);
- if (likely((tcb->flags & TCPHDR_SYN) == 0))
+ if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
TCP_ECN_send(sk, skb, tcp_header_size);
#ifdef CONFIG_TCP_MD5SIG
@@ -889,11 +891,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
icsk->icsk_af_ops->send_check(sk, skb);
- if (likely(tcb->flags & TCPHDR_ACK))
+ if (likely(tcb->tcp_flags & TCPHDR_ACK))
tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
if (skb->len != tcp_header_size)
- tcp_event_data_sent(tp, skb, sk);
+ tcp_event_data_sent(tp, sk);
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
@@ -926,7 +928,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
}
/* Initialize TSO segments for a packet. */
-static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
+static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
unsigned int mss_now)
{
if (skb->len <= mss_now || !sk_can_gso(sk) ||
@@ -947,7 +949,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
/* When a modification to fackets out becomes necessary, we need to check
* skb is counted to fackets_out or not.
*/
-static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
+static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
int decr)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -962,7 +964,7 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
/* Pcount in the middle of the write queue got changed, we need to do various
* tweaks to fix counters
*/
-static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr)
+static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -1032,9 +1034,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
/* PSH and FIN should only be set in the second packet. */
- flags = TCP_SKB_CB(skb)->flags;
- TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
- TCP_SKB_CB(buff)->flags = flags;
+ flags = TCP_SKB_CB(skb)->tcp_flags;
+ TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
+ TCP_SKB_CB(buff)->tcp_flags = flags;
TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
@@ -1094,14 +1096,16 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
eat = len;
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
- eat -= skb_shinfo(skb)->frags[i].size;
+ int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+
+ if (size <= eat) {
+ skb_frag_unref(skb, i);
+ eat -= size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
if (eat) {
skb_shinfo(skb)->frags[k].page_offset += eat;
- skb_shinfo(skb)->frags[k].size -= eat;
+ skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
eat = 0;
}
k++;
@@ -1144,10 +1148,10 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
}
/* Calculate MSS. Not accounting for SACKs here. */
-int tcp_mtu_to_mss(struct sock *sk, int pmtu)
+int tcp_mtu_to_mss(const struct sock *sk, int pmtu)
{
- struct tcp_sock *tp = tcp_sk(sk);
- struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
int mss_now;
/* Calculate base mss without TCP options:
@@ -1173,10 +1177,10 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu)
}
/* Inverse of above */
-int tcp_mss_to_mtu(struct sock *sk, int mss)
+int tcp_mss_to_mtu(const struct sock *sk, int mss)
{
- struct tcp_sock *tp = tcp_sk(sk);
- struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
int mtu;
mtu = mss +
@@ -1250,8 +1254,8 @@ EXPORT_SYMBOL(tcp_sync_mss);
*/
unsigned int tcp_current_mss(struct sock *sk)
{
- struct tcp_sock *tp = tcp_sk(sk);
- struct dst_entry *dst = __sk_dst_get(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
+ const struct dst_entry *dst = __sk_dst_get(sk);
u32 mss_now;
unsigned header_len;
struct tcp_out_options opts;
@@ -1311,10 +1315,10 @@ static void tcp_cwnd_validate(struct sock *sk)
* modulo only when the receiver window alone is the limiting factor or
* when we would be allowed to send the split-due-to-Nagle skb fully.
*/
-static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
+static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
unsigned int mss_now, unsigned int cwnd)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
u32 needed, window, cwnd_len;
window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
@@ -1334,13 +1338,14 @@ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
/* Can at least one segment of SKB be sent right now, according to the
* congestion window rules? If so, return how many segments are allowed.
*/
-static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
- struct sk_buff *skb)
+static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
+ const struct sk_buff *skb)
{
u32 in_flight, cwnd;
/* Don't be strict about the congestion window for the final FIN. */
- if ((TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1)
+ if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
+ tcp_skb_pcount(skb) == 1)
return 1;
in_flight = tcp_packets_in_flight(tp);
@@ -1355,7 +1360,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
* This must be invoked the first time we consider transmitting
* SKB onto the wire.
*/
-static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
+static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
unsigned int mss_now)
{
int tso_segs = tcp_skb_pcount(skb);
@@ -1393,7 +1398,7 @@ static inline int tcp_nagle_check(const struct tcp_sock *tp,
/* Return non-zero if the Nagle test allows this packet to be
* sent now.
*/
-static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
+static inline int tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
unsigned int cur_mss, int nonagle)
{
/* Nagle rule does not apply to frames, which sit in the middle of the
@@ -1409,7 +1414,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
* Nagle can be ignored during F-RTO too (see RFC4138).
*/
if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
- (TCP_SKB_CB(skb)->flags & TCPHDR_FIN))
+ (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
return 1;
if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
@@ -1419,7 +1424,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
}
/* Does at least the first segment of SKB fit into the send window? */
-static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb,
+static inline int tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb,
unsigned int cur_mss)
{
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
@@ -1434,10 +1439,10 @@ static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb,
* should be put on the wire right now. If so, it returns the number of
* packets allowed by the congestion window.
*/
-static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
+static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
unsigned int cur_mss, int nonagle)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
unsigned int cwnd_quota;
tcp_init_tso_segs(sk, skb, cur_mss);
@@ -1455,7 +1460,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
/* Test if sending is allowed right now. */
int tcp_may_send_now(struct sock *sk)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb = tcp_send_head(sk);
return skb &&
@@ -1497,9 +1502,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
/* PSH and FIN should only be set in the second packet. */
- flags = TCP_SKB_CB(skb)->flags;
- TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
- TCP_SKB_CB(buff)->flags = flags;
+ flags = TCP_SKB_CB(skb)->tcp_flags;
+ TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
+ TCP_SKB_CB(buff)->tcp_flags = flags;
/* This packet was never sent out yet, so no SACK bits. */
TCP_SKB_CB(buff)->sacked = 0;
@@ -1530,7 +1535,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
u32 send_win, cong_win, limit, in_flight;
int win_divisor;
- if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto send_now;
if (icsk->icsk_ca_state != TCP_CA_Open)
@@ -1657,7 +1662,7 @@ static int tcp_mtu_probe(struct sock *sk)
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
- TCP_SKB_CB(nskb)->flags = TCPHDR_ACK;
+ TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
TCP_SKB_CB(nskb)->sacked = 0;
nskb->csum = 0;
nskb->ip_summed = skb->ip_summed;
@@ -1677,11 +1682,11 @@ static int tcp_mtu_probe(struct sock *sk)
if (skb->len <= copy) {
/* We've eaten all the data from this skb.
* Throw it away. */
- TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags;
+ TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
tcp_unlink_write_queue(skb, sk);
sk_wmem_free_skb(sk, skb);
} else {
- TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
+ TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
~(TCPHDR_FIN|TCPHDR_PSH);
if (!skb_shinfo(skb)->nr_frags) {
skb_pull(skb, copy);
@@ -1796,11 +1801,13 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
tcp_event_new_data_sent(sk, skb);
tcp_minshall_update(tp, mss_now, skb);
- sent_pkts++;
+ sent_pkts += tcp_skb_pcount(skb);
if (push_one)
break;
}
+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
+ tp->prr_out += sent_pkts;
if (likely(sent_pkts)) {
tcp_cwnd_validate(sk);
@@ -1985,7 +1992,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
/* Merge over control information. This moves PSH/FIN etc. over */
- TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(next_skb)->flags;
+ TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
/* All done, get rid of second SKB and account for it so
* packet counting does not break.
@@ -2003,7 +2010,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
}
/* Check if coalescing SKBs is legal. */
-static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb)
+static int tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
{
if (tcp_skb_pcount(skb) > 1)
return 0;
@@ -2033,7 +2040,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
if (!sysctl_tcp_retrans_collapse)
return;
- if (TCP_SKB_CB(skb)->flags & TCPHDR_SYN)
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
return;
tcp_for_write_queue_from_safe(skb, tmp, sk) {
@@ -2125,12 +2132,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
* since it is cheap to do so and saves bytes on the network.
*/
if (skb->len > 0 &&
- (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) &&
+ (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
if (!pskb_trim(skb, 0)) {
/* Reuse, even though it does some unnecessary work */
tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
- TCP_SKB_CB(skb)->flags);
+ TCP_SKB_CB(skb)->tcp_flags);
skb->ip_summed = CHECKSUM_NONE;
}
}
@@ -2179,7 +2186,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
static int tcp_can_forward_retransmit(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
/* Forward retransmissions are possible only during Recovery. */
if (icsk->icsk_ca_state != TCP_CA_Recovery)
@@ -2294,6 +2301,9 @@ begin_fwd:
return;
NET_INC_STATS_BH(sock_net(sk), mib_idx);
+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
+ tp->prr_out += tcp_skb_pcount(skb);
+
if (skb == tcp_write_queue_head(sk))
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto,
@@ -2317,7 +2327,7 @@ void tcp_send_fin(struct sock *sk)
mss_now = tcp_current_mss(sk);
if (tcp_send_head(sk) != NULL) {
- TCP_SKB_CB(skb)->flags |= TCPHDR_FIN;
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
TCP_SKB_CB(skb)->end_seq++;
tp->write_seq++;
} else {
@@ -2379,11 +2389,11 @@ int tcp_send_synack(struct sock *sk)
struct sk_buff *skb;
skb = tcp_write_queue_head(sk);
- if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPHDR_SYN)) {
+ if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
return -EFAULT;
}
- if (!(TCP_SKB_CB(skb)->flags & TCPHDR_ACK)) {
+ if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
if (skb_cloned(skb)) {
struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
if (nskb == NULL)
@@ -2397,7 +2407,7 @@ int tcp_send_synack(struct sock *sk)
skb = nskb;
}
- TCP_SKB_CB(skb)->flags |= TCPHDR_ACK;
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
TCP_ECN_send_synack(tcp_sk(sk), skb);
}
TCP_SKB_CB(skb)->when = tcp_time_stamp;
@@ -2542,7 +2552,7 @@ EXPORT_SYMBOL(tcp_make_synack);
/* Do all connect socket setups that can be done AF independent. */
static void tcp_connect_init(struct sock *sk)
{
- struct dst_entry *dst = __sk_dst_get(sk);
+ const struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_sock *tp = tcp_sk(sk);
__u8 rcv_wscale;
@@ -2794,13 +2804,13 @@ int tcp_write_wakeup(struct sock *sk)
if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
skb->len > mss) {
seg_size = min(seg_size, mss);
- TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
if (tcp_fragment(sk, skb, seg_size, mss))
return -1;
} else if (!tcp_skb_pcount(skb))
tcp_set_skb_tso_segs(sk, skb, mss);
- TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
TCP_SKB_CB(skb)->when = tcp_time_stamp;
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
if (!err)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index ecd44b0c45f..2e0f0af76c1 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -334,7 +334,6 @@ void tcp_retransmit_timer(struct sock *sk)
* connection. If the socket is an orphan, time it out,
* we cannot allow such beasts to hang infinitely.
*/
-#ifdef TCP_DEBUG
struct inet_sock *inet = inet_sk(sk);
if (sk->sk_family == AF_INET) {
LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
@@ -349,7 +348,6 @@ void tcp_retransmit_timer(struct sock *sk)
inet->inet_num, tp->snd_una, tp->snd_nxt);
}
#endif
-#endif
if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
tcp_write_err(sk);
goto out;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1b5a19340a9..ebaa96bd346 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1267,7 +1267,7 @@ int udp_disconnect(struct sock *sk, int flags)
sk->sk_state = TCP_CLOSE;
inet->inet_daddr = 0;
inet->inet_dport = 0;
- sock_rps_save_rxhash(sk, 0);
+ sock_rps_reset_rxhash(sk);
sk->sk_bound_dev_if = 0;
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
inet_reset_saddr(sk);
@@ -1355,7 +1355,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
int rc;
if (inet_sk(sk)->inet_daddr)
- sock_rps_save_rxhash(sk, skb->rxhash);
+ sock_rps_save_rxhash(sk, skb);
rc = ip_queue_rcv_skb(sk, skb);
if (rc < 0) {
@@ -1461,10 +1461,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
}
- if (rcu_dereference_raw(sk->sk_filter)) {
- if (udp_lib_checksum_complete(skb))
- goto drop;
- }
+ if (rcu_access_pointer(sk->sk_filter) &&
+ udp_lib_checksum_complete(skb))
+ goto drop;
if (sk_rcvqueues_full(sk, skb))
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index fc5368ad2b0..a0b4c5da8d4 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -79,13 +79,13 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
struct rtable *rt = (struct rtable *)xdst->route;
const struct flowi4 *fl4 = &fl->u.ip4;
- rt->rt_key_dst = fl4->daddr;
- rt->rt_key_src = fl4->saddr;
- rt->rt_key_tos = fl4->flowi4_tos;
- rt->rt_route_iif = fl4->flowi4_iif;
- rt->rt_iif = fl4->flowi4_iif;
- rt->rt_oif = fl4->flowi4_oif;
- rt->rt_mark = fl4->flowi4_mark;
+ xdst->u.rt.rt_key_dst = fl4->daddr;
+ xdst->u.rt.rt_key_src = fl4->saddr;
+ xdst->u.rt.rt_key_tos = fl4->flowi4_tos;
+ xdst->u.rt.rt_route_iif = fl4->flowi4_iif;
+ xdst->u.rt.rt_iif = fl4->flowi4_iif;
+ xdst->u.rt.rt_oif = fl4->flowi4_oif;
+ xdst->u.rt.rt_mark = fl4->flowi4_mark;
xdst->u.dst.dev = dev;
dev_hold(dev);