summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c20
-rw-r--r--net/ipv4/ah4.c18
-rw-r--r--net/ipv4/arp.c21
-rw-r--r--net/ipv4/datagram.c25
-rw-r--r--net/ipv4/devinet.c215
-rw-r--r--net/ipv4/esp4.c12
-rw-r--r--net/ipv4/inet_fragment.c39
-rw-r--r--net/ipv4/ip_fragment.c28
-rw-r--r--net/ipv4/ip_gre.c10
-rw-r--r--net/ipv4/ip_input.c7
-rw-r--r--net/ipv4/ip_output.c1
-rw-r--r--net/ipv4/ipcomp.c8
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c12
-rw-r--r--net/ipv4/ping.c1
-rw-r--r--net/ipv4/protocol.c6
-rw-r--r--net/ipv4/raw.c1
-rw-r--r--net/ipv4/route.c54
-rw-r--r--net/ipv4/sysctl_net_ipv4.c18
-rw-r--r--net/ipv4/tcp.c16
-rw-r--r--net/ipv4/tcp_cong.c42
-rw-r--r--net/ipv4/tcp_input.c35
-rw-r--r--net/ipv4/tcp_ipv4.c27
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c8
-rw-r--r--net/ipv4/udp.c1
25 files changed, 448 insertions, 181 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 49ddca31c4d..e6e5d850633 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -263,21 +263,6 @@ void build_ehash_secret(void)
}
EXPORT_SYMBOL(build_ehash_secret);
-static inline int inet_netns_ok(struct net *net, __u8 protocol)
-{
- const struct net_protocol *ipprot;
-
- if (net_eq(net, &init_net))
- return 1;
-
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot == NULL) {
- /* raw IP is OK */
- return 1;
- }
- return ipprot->netns_ok;
-}
-
/*
* Create an inet socket.
*/
@@ -350,10 +335,6 @@ lookup_protocol:
!ns_capable(net->user_ns, CAP_NET_RAW))
goto out_rcu_unlock;
- err = -EAFNOSUPPORT;
- if (!inet_netns_ok(net, protocol))
- goto out_rcu_unlock;
-
sock->ops = answer->ops;
answer_prot = answer->prot;
answer_no_check = answer->no_check;
@@ -1306,7 +1287,6 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
SKB_GSO_UDP |
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
- SKB_GSO_SHARED_FRAG |
0)))
goto out;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index a0d8392491c..a69b4e4a02b 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -269,7 +269,11 @@ static void ah_input_done(struct crypto_async_request *base, int err)
skb->network_header += ah_hlen;
memcpy(skb_network_header(skb), work_iph, ihl);
__skb_pull(skb, ah_hlen + ihl);
- skb_set_transport_header(skb, -ihl);
+
+ if (x->props.mode == XFRM_MODE_TUNNEL)
+ skb_reset_transport_header(skb);
+ else
+ skb_set_transport_header(skb, -ihl);
out:
kfree(AH_SKB_CB(skb)->tmp);
xfrm_input_resume(skb, err);
@@ -381,7 +385,10 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
skb->network_header += ah_hlen;
memcpy(skb_network_header(skb), work_iph, ihl);
__skb_pull(skb, ah_hlen + ihl);
- skb_set_transport_header(skb, -ihl);
+ if (x->props.mode == XFRM_MODE_TUNNEL)
+ skb_reset_transport_header(skb);
+ else
+ skb_set_transport_header(skb, -ihl);
err = nexthdr;
@@ -413,9 +420,12 @@ static void ah4_err(struct sk_buff *skb, u32 info)
if (!x)
return;
- if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+ if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
+ atomic_inc(&flow_cache_genid);
+ rt_genid_bump(net);
+
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0);
- else
+ } else
ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0);
xfrm_state_put(x);
}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 9547a273b9e..ded146b217f 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -928,24 +928,25 @@ static void parp_redo(struct sk_buff *skb)
static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- struct arphdr *arp;
+ const struct arphdr *arp;
+
+ if (dev->flags & IFF_NOARP ||
+ skb->pkt_type == PACKET_OTHERHOST ||
+ skb->pkt_type == PACKET_LOOPBACK)
+ goto freeskb;
+
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
+ goto out_of_mem;
/* ARP header, plus 2 device addresses, plus 2 IP addresses. */
if (!pskb_may_pull(skb, arp_hdr_len(dev)))
goto freeskb;
arp = arp_hdr(skb);
- if (arp->ar_hln != dev->addr_len ||
- dev->flags & IFF_NOARP ||
- skb->pkt_type == PACKET_OTHERHOST ||
- skb->pkt_type == PACKET_LOOPBACK ||
- arp->ar_pln != 4)
+ if (arp->ar_hln != dev->addr_len || arp->ar_pln != 4)
goto freeskb;
- skb = skb_share_check(skb, GFP_ATOMIC);
- if (skb == NULL)
- goto out_of_mem;
-
memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 424fafbc8cb..b28e863fe0a 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -85,3 +85,28 @@ out:
return err;
}
EXPORT_SYMBOL(ip4_datagram_connect);
+
+void ip4_datagram_release_cb(struct sock *sk)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct ip_options_rcu *inet_opt;
+ __be32 daddr = inet->inet_daddr;
+ struct flowi4 fl4;
+ struct rtable *rt;
+
+ if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0))
+ return;
+
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
+ if (inet_opt && inet_opt->opt.srr)
+ daddr = inet_opt->opt.faddr;
+ rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr,
+ inet->inet_saddr, inet->inet_dport,
+ inet->inet_sport, sk->sk_protocol,
+ RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
+ if (!IS_ERR(rt))
+ __sk_dst_set(sk, &rt->dst);
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(ip4_datagram_release_cb);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a8e4f2665d5..5281314886c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -63,6 +63,7 @@
#include <net/ip_fib.h>
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
+#include <net/addrconf.h>
#include "fib_lookup.h"
@@ -93,6 +94,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
[IFA_ADDRESS] = { .type = NLA_U32 },
[IFA_BROADCAST] = { .type = NLA_U32 },
[IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
+ [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
};
#define IN4_ADDR_HSIZE_SHIFT 8
@@ -417,6 +419,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
}
+static void check_lifetime(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
+
static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
u32 portid)
{
@@ -462,6 +468,9 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
inet_hash_insert(dev_net(in_dev->dev), ifa);
+ cancel_delayed_work(&check_lifetime_work);
+ schedule_delayed_work(&check_lifetime_work, 0);
+
/* Send message first, then call notifier.
Notifier will trigger FIB update, so that
listeners of netlink will know about new ifaddr */
@@ -573,7 +582,107 @@ errout:
return err;
}
-static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
+#define INFINITY_LIFE_TIME 0xFFFFFFFF
+
+static void check_lifetime(struct work_struct *work)
+{
+ unsigned long now, next, next_sec, next_sched;
+ struct in_ifaddr *ifa;
+ struct hlist_node *node;
+ int i;
+
+ now = jiffies;
+ next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
+
+ rcu_read_lock();
+ for (i = 0; i < IN4_ADDR_HSIZE; i++) {
+ hlist_for_each_entry_rcu(ifa, node,
+ &inet_addr_lst[i], hash) {
+ unsigned long age;
+
+ if (ifa->ifa_flags & IFA_F_PERMANENT)
+ continue;
+
+ /* We try to batch several events at once. */
+ age = (now - ifa->ifa_tstamp +
+ ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
+
+ if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
+ age >= ifa->ifa_valid_lft) {
+ struct in_ifaddr **ifap ;
+
+ rtnl_lock();
+ for (ifap = &ifa->ifa_dev->ifa_list;
+ *ifap != NULL; ifap = &ifa->ifa_next) {
+ if (*ifap == ifa)
+ inet_del_ifa(ifa->ifa_dev,
+ ifap, 1);
+ }
+ rtnl_unlock();
+ } else if (ifa->ifa_preferred_lft ==
+ INFINITY_LIFE_TIME) {
+ continue;
+ } else if (age >= ifa->ifa_preferred_lft) {
+ if (time_before(ifa->ifa_tstamp +
+ ifa->ifa_valid_lft * HZ, next))
+ next = ifa->ifa_tstamp +
+ ifa->ifa_valid_lft * HZ;
+
+ if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) {
+ ifa->ifa_flags |= IFA_F_DEPRECATED;
+ rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
+ }
+ } else if (time_before(ifa->ifa_tstamp +
+ ifa->ifa_preferred_lft * HZ,
+ next)) {
+ next = ifa->ifa_tstamp +
+ ifa->ifa_preferred_lft * HZ;
+ }
+ }
+ }
+ rcu_read_unlock();
+
+ next_sec = round_jiffies_up(next);
+ next_sched = next;
+
+ /* If rounded timeout is accurate enough, accept it. */
+ if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
+ next_sched = next_sec;
+
+ now = jiffies;
+ /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
+ if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
+ next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
+
+ schedule_delayed_work(&check_lifetime_work, next_sched - now);
+}
+
+static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
+ __u32 prefered_lft)
+{
+ unsigned long timeout;
+
+ ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
+
+ timeout = addrconf_timeout_fixup(valid_lft, HZ);
+ if (addrconf_finite_timeout(timeout))
+ ifa->ifa_valid_lft = timeout;
+ else
+ ifa->ifa_flags |= IFA_F_PERMANENT;
+
+ timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+ if (addrconf_finite_timeout(timeout)) {
+ if (timeout == 0)
+ ifa->ifa_flags |= IFA_F_DEPRECATED;
+ ifa->ifa_preferred_lft = timeout;
+ }
+ ifa->ifa_tstamp = jiffies;
+ if (!ifa->ifa_cstamp)
+ ifa->ifa_cstamp = ifa->ifa_tstamp;
+}
+
+static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
+ __u32 *pvalid_lft, __u32 *pprefered_lft)
{
struct nlattr *tb[IFA_MAX+1];
struct in_ifaddr *ifa;
@@ -633,24 +742,73 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
else
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+ if (tb[IFA_CACHEINFO]) {
+ struct ifa_cacheinfo *ci;
+
+ ci = nla_data(tb[IFA_CACHEINFO]);
+ if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
+ err = -EINVAL;
+ goto errout;
+ }
+ *pvalid_lft = ci->ifa_valid;
+ *pprefered_lft = ci->ifa_prefered;
+ }
+
return ifa;
errout:
return ERR_PTR(err);
}
+static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
+{
+ struct in_device *in_dev = ifa->ifa_dev;
+ struct in_ifaddr *ifa1, **ifap;
+
+ if (!ifa->ifa_local)
+ return NULL;
+
+ for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
+ ifap = &ifa1->ifa_next) {
+ if (ifa1->ifa_mask == ifa->ifa_mask &&
+ inet_ifa_match(ifa1->ifa_address, ifa) &&
+ ifa1->ifa_local == ifa->ifa_local)
+ return ifa1;
+ }
+ return NULL;
+}
+
static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct net *net = sock_net(skb->sk);
struct in_ifaddr *ifa;
+ struct in_ifaddr *ifa_existing;
+ __u32 valid_lft = INFINITY_LIFE_TIME;
+ __u32 prefered_lft = INFINITY_LIFE_TIME;
ASSERT_RTNL();
- ifa = rtm_to_ifaddr(net, nlh);
+ ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
if (IS_ERR(ifa))
return PTR_ERR(ifa);
- return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
+ ifa_existing = find_matching_ifa(ifa);
+ if (!ifa_existing) {
+ /* It would be best to check for !NLM_F_CREATE here but
+ * userspace alreay relies on not having to provide this.
+ */
+ set_ifa_lifetime(ifa, valid_lft, prefered_lft);
+ return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
+ } else {
+ inet_free_ifa(ifa);
+
+ if (nlh->nlmsg_flags & NLM_F_EXCL ||
+ !(nlh->nlmsg_flags & NLM_F_REPLACE))
+ return -EEXIST;
+
+ set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft);
+ }
+ return 0;
}
/*
@@ -852,6 +1010,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
ifa->ifa_prefixlen = 32;
ifa->ifa_mask = inet_make_mask(32);
}
+ set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
ret = inet_set_ifa(dev, ifa);
break;
@@ -1190,6 +1349,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
ifa->ifa_dev = in_dev;
ifa->ifa_scope = RT_SCOPE_HOST;
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+ set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
+ INFINITY_LIFE_TIME);
inet_insert_ifa(ifa);
}
}
@@ -1246,11 +1407,30 @@ static size_t inet_nlmsg_size(void)
+ nla_total_size(IFNAMSIZ); /* IFA_LABEL */
}
+static inline u32 cstamp_delta(unsigned long cstamp)
+{
+ return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
+}
+
+static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
+ unsigned long tstamp, u32 preferred, u32 valid)
+{
+ struct ifa_cacheinfo ci;
+
+ ci.cstamp = cstamp_delta(cstamp);
+ ci.tstamp = cstamp_delta(tstamp);
+ ci.ifa_prefered = preferred;
+ ci.ifa_valid = valid;
+
+ return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+}
+
static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
u32 portid, u32 seq, int event, unsigned int flags)
{
struct ifaddrmsg *ifm;
struct nlmsghdr *nlh;
+ u32 preferred, valid;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
if (nlh == NULL)
@@ -1259,10 +1439,31 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
ifm = nlmsg_data(nlh);
ifm->ifa_family = AF_INET;
ifm->ifa_prefixlen = ifa->ifa_prefixlen;
- ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
+ ifm->ifa_flags = ifa->ifa_flags;
ifm->ifa_scope = ifa->ifa_scope;
ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
+ if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
+ preferred = ifa->ifa_preferred_lft;
+ valid = ifa->ifa_valid_lft;
+ if (preferred != INFINITY_LIFE_TIME) {
+ long tval = (jiffies - ifa->ifa_tstamp) / HZ;
+
+ if (preferred > tval)
+ preferred -= tval;
+ else
+ preferred = 0;
+ if (valid != INFINITY_LIFE_TIME) {
+ if (valid > tval)
+ valid -= tval;
+ else
+ valid = 0;
+ }
+ }
+ } else {
+ preferred = INFINITY_LIFE_TIME;
+ valid = INFINITY_LIFE_TIME;
+ }
if ((ifa->ifa_address &&
nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
(ifa->ifa_local &&
@@ -1270,7 +1471,9 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
(ifa->ifa_broadcast &&
nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
(ifa->ifa_label[0] &&
- nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
+ nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
+ put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
+ preferred, valid))
goto nla_put_failure;
return nlmsg_end(skb, nlh);
@@ -1988,6 +2191,8 @@ void __init devinet_init(void)
register_gifconf(PF_INET, inet_gifconf);
register_netdevice_notifier(&ip_netdev_notifier);
+ schedule_delayed_work(&check_lifetime_work, 0);
+
rtnl_af_register(&inet_af_ops);
rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b61e9deb7c7..3b4f0cd2e63 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -346,7 +346,10 @@ static int esp_input_done2(struct sk_buff *skb, int err)
pskb_trim(skb, skb->len - alen - padlen - 2);
__skb_pull(skb, hlen);
- skb_set_transport_header(skb, -ihl);
+ if (x->props.mode == XFRM_MODE_TUNNEL)
+ skb_reset_transport_header(skb);
+ else
+ skb_set_transport_header(skb, -ihl);
err = nexthdr[1];
@@ -499,9 +502,12 @@ static void esp4_err(struct sk_buff *skb, u32 info)
if (!x)
return;
- if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+ if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
+ atomic_inc(&flow_cache_genid);
+ rt_genid_bump(net);
+
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
- else
+ } else
ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0);
xfrm_state_put(x);
}
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 4750d2b74d7..2e453bde699 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -73,8 +73,9 @@ EXPORT_SYMBOL(inet_frags_init);
void inet_frags_init_net(struct netns_frags *nf)
{
nf->nqueues = 0;
- atomic_set(&nf->mem, 0);
+ init_frag_mem_limit(nf);
INIT_LIST_HEAD(&nf->lru_list);
+ spin_lock_init(&nf->lru_lock);
}
EXPORT_SYMBOL(inet_frags_init_net);
@@ -91,6 +92,8 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
local_bh_disable();
inet_frag_evictor(nf, f, true);
local_bh_enable();
+
+ percpu_counter_destroy(&nf->mem);
}
EXPORT_SYMBOL(inet_frags_exit_net);
@@ -98,9 +101,9 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
{
write_lock(&f->lock);
hlist_del(&fq->list);
- list_del(&fq->lru_list);
fq->net->nqueues--;
write_unlock(&f->lock);
+ inet_frag_lru_del(fq);
}
void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
@@ -117,12 +120,8 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
EXPORT_SYMBOL(inet_frag_kill);
static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
- struct sk_buff *skb, int *work)
+ struct sk_buff *skb)
{
- if (work)
- *work -= skb->truesize;
-
- atomic_sub(skb->truesize, &nf->mem);
if (f->skb_free)
f->skb_free(skb);
kfree_skb(skb);
@@ -133,6 +132,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
{
struct sk_buff *fp;
struct netns_frags *nf;
+ unsigned int sum, sum_truesize = 0;
WARN_ON(!(q->last_in & INET_FRAG_COMPLETE));
WARN_ON(del_timer(&q->timer) != 0);
@@ -143,13 +143,14 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
while (fp) {
struct sk_buff *xp = fp->next;
- frag_kfree_skb(nf, f, fp, work);
+ sum_truesize += fp->truesize;
+ frag_kfree_skb(nf, f, fp);
fp = xp;
}
-
+ sum = sum_truesize + f->qsize;
if (work)
- *work -= f->qsize;
- atomic_sub(f->qsize, &nf->mem);
+ *work -= sum;
+ sub_frag_mem_limit(q, sum);
if (f->destructor)
f->destructor(q);
@@ -164,22 +165,23 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
int work, evicted = 0;
if (!force) {
- if (atomic_read(&nf->mem) <= nf->high_thresh)
+ if (frag_mem_limit(nf) <= nf->high_thresh)
return 0;
}
- work = atomic_read(&nf->mem) - nf->low_thresh;
+ work = frag_mem_limit(nf) - nf->low_thresh;
while (work > 0) {
- read_lock(&f->lock);
+ spin_lock(&nf->lru_lock);
+
if (list_empty(&nf->lru_list)) {
- read_unlock(&f->lock);
+ spin_unlock(&nf->lru_lock);
break;
}
q = list_first_entry(&nf->lru_list,
struct inet_frag_queue, lru_list);
atomic_inc(&q->refcnt);
- read_unlock(&f->lock);
+ spin_unlock(&nf->lru_lock);
spin_lock(&q->lock);
if (!(q->last_in & INET_FRAG_COMPLETE))
@@ -233,9 +235,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
atomic_inc(&qp->refcnt);
hlist_add_head(&qp->list, &f->hash[hash]);
- list_add_tail(&qp->lru_list, &nf->lru_list);
nf->nqueues++;
write_unlock(&f->lock);
+ inet_frag_lru_add(nf, qp);
return qp;
}
@@ -250,7 +252,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
q->net = nf;
f->constructor(q, arg);
- atomic_add(f->qsize, &nf->mem);
+ add_frag_mem_limit(q, f->qsize);
+
setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock);
atomic_set(&q->refcnt, 1);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index f55a4e61bfb..1211613c6c3 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -122,7 +122,7 @@ int ip_frag_nqueues(struct net *net)
int ip_frag_mem(struct net *net)
{
- return atomic_read(&net->ipv4.frags.mem);
+ return sum_frag_mem_limit(&net->ipv4.frags);
}
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
@@ -161,13 +161,6 @@ static bool ip4_frag_match(struct inet_frag_queue *q, void *a)
qp->user == arg->user;
}
-/* Memory Tracking Functions. */
-static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
-{
- atomic_sub(skb->truesize, &nf->mem);
- kfree_skb(skb);
-}
-
static void ip4_frag_init(struct inet_frag_queue *q, void *a)
{
struct ipq *qp = container_of(q, struct ipq, q);
@@ -340,6 +333,7 @@ static inline int ip_frag_too_far(struct ipq *qp)
static int ip_frag_reinit(struct ipq *qp)
{
struct sk_buff *fp;
+ unsigned int sum_truesize = 0;
if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
atomic_inc(&qp->q.refcnt);
@@ -349,9 +343,12 @@ static int ip_frag_reinit(struct ipq *qp)
fp = qp->q.fragments;
do {
struct sk_buff *xp = fp->next;
- frag_kfree_skb(qp->q.net, fp);
+
+ sum_truesize += fp->truesize;
+ kfree_skb(fp);
fp = xp;
} while (fp);
+ sub_frag_mem_limit(&qp->q, sum_truesize);
qp->q.last_in = 0;
qp->q.len = 0;
@@ -496,7 +493,8 @@ found:
qp->q.fragments = next;
qp->q.meat -= free_it->len;
- frag_kfree_skb(qp->q.net, free_it);
+ sub_frag_mem_limit(&qp->q, free_it->truesize);
+ kfree_skb(free_it);
}
}
@@ -519,7 +517,7 @@ found:
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
qp->ecn |= ecn;
- atomic_add(skb->truesize, &qp->q.net->mem);
+ add_frag_mem_limit(&qp->q, skb->truesize);
if (offset == 0)
qp->q.last_in |= INET_FRAG_FIRST_IN;
@@ -531,9 +529,7 @@ found:
qp->q.meat == qp->q.len)
return ip_frag_reasm(qp, prev, dev);
- write_lock(&ip4_frags.lock);
- list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list);
- write_unlock(&ip4_frags.lock);
+ inet_frag_lru_move(&qp->q);
return -EINPROGRESS;
err:
@@ -617,7 +613,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- atomic_add(clone->truesize, &qp->q.net->mem);
+ add_frag_mem_limit(&qp->q, clone->truesize);
}
skb_push(head, head->data - skb_network_header(head));
@@ -645,7 +641,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
}
fp = next;
}
- atomic_sub(sum_truesize, &qp->q.net->mem);
+ sub_frag_mem_limit(&qp->q, sum_truesize);
head->next = NULL;
head->dev = dev;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index af6be70821c..00a14b9864e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -820,8 +820,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
ttl = tiph->ttl;
tos = tiph->tos;
- if (tos == 1) {
- tos = 0;
+ if (tos & 0x1) {
+ tos &= ~0x1;
if (skb->protocol == htons(ETH_P_IP))
tos = old_iph->tos;
else if (skb->protocol == htons(ETH_P_IPV6))
@@ -965,8 +965,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
ptr--;
}
if (tunnel->parms.o_flags&GRE_CSUM) {
+ int offset = skb_transport_offset(skb);
+
*ptr = 0;
- *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
+ *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset,
+ skb->len - offset,
+ 0));
}
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index f1395a6fb35..87abd3e2bd3 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -208,13 +208,6 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
if (ipprot != NULL) {
int ret;
- if (!net_eq(net, &init_net) && !ipprot->netns_ok) {
- net_info_ratelimited("%s: proto %d isn't netns-ready\n",
- __func__, protocol);
- kfree_skb(skb);
- goto out;
- }
-
if (!ipprot->no_policy) {
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
kfree_skb(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3e98ed2bff5..5e12dca7b3d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -598,6 +598,7 @@ slow_path:
/* for offloaded checksums cleanup checksum before fragmentation */
if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb))
goto fail;
+ iph = ip_hdr(skb);
left = skb->len - hlen; /* Space per frame */
ptr = hlen; /* Where to start from */
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index d3ab47e19a8..f01d1b1aff7 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -47,9 +47,12 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
if (!x)
return;
- if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+ if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
+ atomic_inc(&flow_cache_genid);
+ rt_genid_bump(net);
+
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0);
- else
+ } else
ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0);
xfrm_state_put(x);
}
@@ -160,6 +163,7 @@ static const struct net_protocol ipcomp4_protocol = {
.handler = xfrm4_rcv,
.err_handler = ipcomp4_err,
.no_policy = 1,
+ .netns_ok = 1,
};
static int __init ipcomp4_init(void)
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index b5ef3cba225..7d168dcbd13 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -88,10 +88,8 @@ static void ulog_send(unsigned int nlgroupnum)
{
ulog_buff_t *ub = &ulog_buffers[nlgroupnum];
- if (timer_pending(&ub->timer)) {
- pr_debug("ulog_send: timer was pending, deleting\n");
- del_timer(&ub->timer);
- }
+ pr_debug("ulog_send: timer is deleting\n");
+ del_timer(&ub->timer);
if (!ub->skb) {
pr_debug("ulog_send: nothing to send\n");
@@ -426,10 +424,8 @@ static void __exit ulog_tg_exit(void)
/* remove pending timers and free allocated skb's */
for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
ub = &ulog_buffers[i];
- if (timer_pending(&ub->timer)) {
- pr_debug("timer was pending, deleting\n");
- del_timer(&ub->timer);
- }
+ pr_debug("timer is deleting\n");
+ del_timer(&ub->timer);
if (ub->skb) {
kfree_skb(ub->skb);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 8f3d05424a3..6f9c07268cf 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -738,6 +738,7 @@ struct proto ping_prot = {
.recvmsg = ping_recvmsg,
.bind = ping_bind,
.backlog_rcv = ping_queue_rcv_skb,
+ .release_cb = ip4_datagram_release_cb,
.hash = ping_v4_hash,
.unhash = ping_v4_unhash,
.get_port = ping_v4_get_port,
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 0f9d09f54bd..ce848461acb 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -37,6 +37,12 @@ const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
{
+ if (!prot->netns_ok) {
+ pr_err("Protocol %u is not namespace aware, cannot register.\n",
+ protocol);
+ return -EINVAL;
+ }
+
return !cmpxchg((const struct net_protocol **)&inet_protos[protocol],
NULL, prot) ? 0 : -1;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 73d1e4df4bf..6f08991409c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -894,6 +894,7 @@ struct proto raw_prot = {
.recvmsg = raw_recvmsg,
.bind = raw_bind,
.backlog_rcv = raw_rcv_skb,
+ .release_cb = ip4_datagram_release_cb,
.hash = raw_hash_sk,
.unhash = raw_unhash_sk,
.obj_size = sizeof(struct raw_sock),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 844a9ef60db..a0fcc47fee7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -912,6 +912,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
struct dst_entry *dst = &rt->dst;
struct fib_result res;
+ if (dst_metric_locked(dst, RTAX_MTU))
+ return;
+
if (dst->dev->mtu < mtu)
return;
@@ -962,7 +965,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
}
EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
-void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
+static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
{
const struct iphdr *iph = (const struct iphdr *) skb->data;
struct flowi4 fl4;
@@ -975,6 +978,53 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
ip_rt_put(rt);
}
}
+
+void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
+{
+ const struct iphdr *iph = (const struct iphdr *) skb->data;
+ struct flowi4 fl4;
+ struct rtable *rt;
+ struct dst_entry *dst;
+ bool new = false;
+
+ bh_lock_sock(sk);
+ rt = (struct rtable *) __sk_dst_get(sk);
+
+ if (sock_owned_by_user(sk) || !rt) {
+ __ipv4_sk_update_pmtu(skb, sk, mtu);
+ goto out;
+ }
+
+ __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+
+ if (!__sk_dst_check(sk, 0)) {
+ rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
+ if (IS_ERR(rt))
+ goto out;
+
+ new = true;
+ }
+
+ __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
+
+ dst = dst_check(&rt->dst, 0);
+ if (!dst) {
+ if (new)
+ dst_release(&rt->dst);
+
+ rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
+ if (IS_ERR(rt))
+ goto out;
+
+ new = true;
+ }
+
+ if (new)
+ __sk_dst_set(sk, &rt->dst);
+
+out:
+ bh_unlock_sock(sk);
+}
EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
void ipv4_redirect(struct sk_buff *skb, struct net *net,
@@ -1120,7 +1170,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
if (!mtu || time_after_eq(jiffies, rt->dst.expires))
mtu = dst_metric_raw(dst, RTAX_MTU);
- if (mtu && rt_is_output_route(rt))
+ if (mtu)
return mtu;
mtu = dst->dev->mtu;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index a25e1d286b9..960fd29d9b8 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -27,6 +27,7 @@
#include <net/tcp_memcontrol.h>
static int zero;
+static int one = 1;
static int two = 2;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
@@ -549,14 +550,16 @@ static struct ctl_table ipv4_table[] = {
.data = &sysctl_tcp_wmem,
.maxlen = sizeof(sysctl_tcp_wmem),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
},
{
.procname = "tcp_rmem",
.data = &sysctl_tcp_rmem,
.maxlen = sizeof(sysctl_tcp_rmem),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
},
{
.procname = "tcp_app_win",
@@ -630,13 +633,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_tcp_congestion_control,
},
{
- .procname = "tcp_abc",
- .data = &sysctl_tcp_abc,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
.procname = "tcp_mtu_probing",
.data = &sysctl_tcp_mtu_probing,
.maxlen = sizeof(int),
@@ -779,7 +775,7 @@ static struct ctl_table ipv4_table[] = {
.maxlen = sizeof(sysctl_udp_rmem_min),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero
+ .extra1 = &one
},
{
.procname = "udp_wmem_min",
@@ -787,7 +783,7 @@ static struct ctl_table ipv4_table[] = {
.maxlen = sizeof(sysctl_udp_wmem_min),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero
+ .extra1 = &one
},
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3ec1f69c5ce..1f0bedb8622 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -400,6 +400,8 @@ void tcp_init_sock(struct sock *sk)
tcp_enable_early_retrans(tp);
icsk->icsk_ca_ops = &tcp_init_congestion_ops;
+ tp->tsoffset = 0;
+
sk->sk_state = TCP_CLOSE;
sk->sk_write_space = sk_stream_write_space;
@@ -895,8 +897,7 @@ new_segment:
get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
-
- skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
+ skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
skb->len += copy;
skb->data_len += copy;
@@ -2289,7 +2290,6 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->packets_out = 0;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd_cnt = 0;
- tp->bytes_acked = 0;
tp->window_clamp = 0;
tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp);
@@ -2713,6 +2713,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
else
err = -EINVAL;
break;
+ case TCP_TIMESTAMP:
+ if (!tp->repair)
+ err = -EPERM;
+ else
+ tp->tsoffset = val - tcp_time_stamp;
+ break;
default:
err = -ENOPROTOOPT;
break;
@@ -2961,6 +2967,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
case TCP_USER_TIMEOUT:
val = jiffies_to_msecs(icsk->icsk_user_timeout);
break;
+ case TCP_TIMESTAMP:
+ val = tcp_time_stamp + tp->tsoffset;
+ break;
default:
return -ENOPROTOOPT;
}
@@ -3034,7 +3043,6 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_TCPV6 |
- SKB_GSO_SHARED_FRAG |
0) ||
!(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
goto out;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 291f2ed7cc3..019c2389a34 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -310,35 +310,24 @@ void tcp_slow_start(struct tcp_sock *tp)
{
int cnt; /* increase in packets */
unsigned int delta = 0;
+ u32 snd_cwnd = tp->snd_cwnd;
- /* RFC3465: ABC Slow start
- * Increase only after a full MSS of bytes is acked
- *
- * TCP sender SHOULD increase cwnd by the number of
- * previously unacknowledged bytes ACKed by each incoming
- * acknowledgment, provided the increase is not more than L
- */
- if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache)
- return;
+ if (unlikely(!snd_cwnd)) {
+ pr_err_once("snd_cwnd is nul, please report this bug.\n");
+ snd_cwnd = 1U;
+ }
if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh)
cnt = sysctl_tcp_max_ssthresh >> 1; /* limited slow start */
else
- cnt = tp->snd_cwnd; /* exponential increase */
-
- /* RFC3465: ABC
- * We MAY increase by 2 if discovered delayed ack
- */
- if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache)
- cnt <<= 1;
- tp->bytes_acked = 0;
+ cnt = snd_cwnd; /* exponential increase */
tp->snd_cwnd_cnt += cnt;
- while (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
- tp->snd_cwnd_cnt -= tp->snd_cwnd;
+ while (tp->snd_cwnd_cnt >= snd_cwnd) {
+ tp->snd_cwnd_cnt -= snd_cwnd;
delta++;
}
- tp->snd_cwnd = min(tp->snd_cwnd + delta, tp->snd_cwnd_clamp);
+ tp->snd_cwnd = min(snd_cwnd + delta, tp->snd_cwnd_clamp);
}
EXPORT_SYMBOL_GPL(tcp_slow_start);
@@ -372,20 +361,9 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
/* In "safe" area, increase. */
if (tp->snd_cwnd <= tp->snd_ssthresh)
tcp_slow_start(tp);
-
/* In dangerous area, increase slowly. */
- else if (sysctl_tcp_abc) {
- /* RFC3465: Appropriate Byte Count
- * increase once for each full cwnd acked
- */
- if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
- tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- }
- } else {
+ else
tcp_cong_avoid_ai(tp, tp->snd_cwnd);
- }
}
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 492c7cfe145..a759e19496d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -98,7 +98,6 @@ int sysctl_tcp_frto_response __read_mostly;
int sysctl_tcp_thin_dupack __read_mostly;
int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
-int sysctl_tcp_abc __read_mostly;
int sysctl_tcp_early_retrans __read_mostly = 2;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */
@@ -1240,13 +1239,13 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
*/
if (!skb_shinfo(prev)->gso_size) {
skb_shinfo(prev)->gso_size = mss;
- skb_shinfo(prev)->gso_type |= sk->sk_gso_type;
+ skb_shinfo(prev)->gso_type = sk->sk_gso_type;
}
/* CHECKME: To clear or not to clear? Mimics normal skb currently */
if (skb_shinfo(skb)->gso_segs <= 1) {
skb_shinfo(skb)->gso_size = 0;
- skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG;
+ skb_shinfo(skb)->gso_type = 0;
}
/* Difference in this won't matter, both ACKed by the same cumul. ACK */
@@ -2007,7 +2006,6 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
tp->frto_counter = 0;
- tp->bytes_acked = 0;
tp->reordering = min_t(unsigned int, tp->reordering,
sysctl_tcp_reordering);
@@ -2056,7 +2054,6 @@ void tcp_enter_loss(struct sock *sk, int how)
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
- tp->bytes_acked = 0;
tcp_clear_retrans_partial(tp);
if (tcp_is_reno(tp))
@@ -2684,7 +2681,6 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
struct tcp_sock *tp = tcp_sk(sk);
tp->high_seq = tp->snd_nxt;
- tp->bytes_acked = 0;
tp->snd_cwnd_cnt = 0;
tp->prior_cwnd = tp->snd_cwnd;
tp->prr_delivered = 0;
@@ -2735,7 +2731,6 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
struct tcp_sock *tp = tcp_sk(sk);
tp->prior_ssthresh = 0;
- tp->bytes_acked = 0;
if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
tp->undo_marker = 0;
tcp_init_cwnd_reduction(sk, set_ssthresh);
@@ -3417,7 +3412,6 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
{
tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
tp->snd_cwnd_cnt = 0;
- tp->bytes_acked = 0;
TCP_ECN_queue_cwr(tp);
tcp_moderate_cwnd(tp);
}
@@ -3502,6 +3496,11 @@ static bool tcp_process_frto(struct sock *sk, int flag)
}
} else {
if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
+ if (!tcp_packets_in_flight(tp)) {
+ tcp_enter_frto_loss(sk, 2, flag);
+ return true;
+ }
+
/* Prevent sending of new data. */
tp->snd_cwnd = min(tp->snd_cwnd,
tcp_packets_in_flight(tp));
@@ -3608,15 +3607,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (after(ack, prior_snd_una))
flag |= FLAG_SND_UNA_ADVANCED;
- if (sysctl_tcp_abc) {
- if (icsk->icsk_ca_state < TCP_CA_CWR)
- tp->bytes_acked += ack - prior_snd_una;
- else if (icsk->icsk_ca_state == TCP_CA_Loss)
- /* we assume just one segment left network */
- tp->bytes_acked += min(ack - prior_snd_una,
- tp->mss_cache);
- }
-
prior_fackets = tp->fackets_out;
prior_in_flight = tcp_packets_in_flight(tp);
@@ -3870,7 +3860,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
++ptr;
tp->rx_opt.rcv_tsval = ntohl(*ptr);
++ptr;
- tp->rx_opt.rcv_tsecr = ntohl(*ptr);
+ tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
return true;
}
return false;
@@ -3894,7 +3884,11 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,
if (tcp_parse_aligned_timestamp(tp, th))
return true;
}
+
tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
+ if (tp->rx_opt.saw_tstamp)
+ tp->rx_opt.rcv_tsecr -= tp->tsoffset;
+
return true;
}
@@ -5647,8 +5641,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
* the remote receives only the retransmitted (regular) SYNs: either
* the original SYN-data or the corresponding SYN-ACK is lost.
*/
- syn_drop = (cookie->len <= 0 && data &&
- inet_csk(sk)->icsk_retransmits);
+ syn_drop = (cookie->len <= 0 && data && tp->total_retrans);
tcp_fastopen_cache_set(sk, mss, cookie, syn_drop);
@@ -5676,6 +5669,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
int saved_clamp = tp->rx_opt.mss_clamp;
tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc);
+ if (tp->rx_opt.saw_tstamp)
+ tp->rx_opt.rcv_tsecr -= tp->tsoffset;
if (th->ack) {
/* rfc793:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index bbbdcc5c197..77f5050efc8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -369,11 +369,10 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
* We do take care of PMTU discovery (RFC1191) special case :
* we can receive locally generated ICMP messages while socket is held.
*/
- if (sock_owned_by_user(sk) &&
- type != ICMP_DEST_UNREACH &&
- code != ICMP_FRAG_NEEDED)
- NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
-
+ if (sock_owned_by_user(sk)) {
+ if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
+ NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
+ }
if (sk->sk_state == TCP_CLOSE)
goto out;
@@ -497,6 +496,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
* errors returned from accept().
*/
inet_csk_reqsk_queue_drop(sk, req, prev);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
goto out;
case TCP_SYN_SENT:
@@ -726,7 +726,7 @@ release_sk1:
*/
static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
- u32 win, u32 ts, int oif,
+ u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key,
int reply_flags, u8 tos)
{
@@ -747,12 +747,12 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
arg.iov[0].iov_base = (unsigned char *)&rep;
arg.iov[0].iov_len = sizeof(rep.th);
- if (ts) {
+ if (tsecr) {
rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_TIMESTAMP << 8) |
TCPOLEN_TIMESTAMP);
- rep.opt[1] = htonl(tcp_time_stamp);
- rep.opt[2] = htonl(ts);
+ rep.opt[1] = htonl(tsval);
+ rep.opt[2] = htonl(tsecr);
arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
}
@@ -767,7 +767,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
#ifdef CONFIG_TCP_MD5SIG
if (key) {
- int offset = (ts) ? 3 : 0;
+ int offset = (tsecr) ? 3 : 0;
rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
@@ -802,6 +802,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
+ tcp_time_stamp + tcptw->tw_ts_offset,
tcptw->tw_ts_recent,
tw->tw_bound_dev_if,
tcp_twsk_md5_key(tcptw),
@@ -821,6 +822,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
+ tcp_time_stamp,
req->ts_recent,
0,
tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
@@ -1502,8 +1504,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
* clogging syn queue with openreqs with exponentially increasing
* timeout.
*/
- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+ if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
goto drop;
+ }
req = inet_reqsk_alloc(&tcp_request_sock_ops);
if (!req)
@@ -1668,6 +1672,7 @@ drop_and_release:
drop_and_free:
reqsk_free(req);
drop:
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
return 0;
}
EXPORT_SYMBOL(tcp_v4_conn_request);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f35f2dfb640..b83a49cc381 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -102,6 +102,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
if (tmp_opt.saw_tstamp) {
+ tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset;
tmp_opt.ts_recent = tcptw->tw_ts_recent;
tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
@@ -288,6 +289,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcptw->tw_rcv_wnd = tcp_receive_window(tp);
tcptw->tw_ts_recent = tp->rx_opt.ts_recent;
tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
+ tcptw->tw_ts_offset = tp->tsoffset;
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) {
@@ -446,7 +448,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
*/
newtp->snd_cwnd = TCP_INIT_CWND;
newtp->snd_cwnd_cnt = 0;
- newtp->bytes_acked = 0;
newtp->frto_counter = 0;
newtp->frto_highmark = 0;
@@ -500,6 +501,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->rx_opt.ts_recent_stamp = 0;
newtp->tcp_header_len = sizeof(struct tcphdr);
}
+ newtp->tsoffset = 0;
#ifdef CONFIG_TCP_MD5SIG
newtp->md5sig_info = NULL; /*XXX*/
if (newtp->af_specific->md5_lookup(sk, newsk))
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 367e2ec01da..6182d90e97b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -622,7 +622,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
opts->options |= OPTION_TS;
- opts->tsval = TCP_SKB_CB(skb)->when;
+ opts->tsval = TCP_SKB_CB(skb)->when + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
@@ -806,7 +806,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
if (likely(tp->rx_opt.tstamp_ok)) {
opts->options |= OPTION_TS;
- opts->tsval = tcb ? tcb->when : 0;
+ opts->tsval = tcb ? tcb->when + tp->tsoffset : 0;
opts->tsecr = tp->rx_opt.ts_recent;
size += TCPOLEN_TSTAMP_ALIGNED;
}
@@ -1133,7 +1133,6 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
unsigned int mss_now)
{
- skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG;
if (skb->len <= mss_now || !sk_can_gso(sk) ||
skb->ip_summed == CHECKSUM_NONE) {
/* Avoid the costly divide in the normal
@@ -1141,10 +1140,11 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
*/
skb_shinfo(skb)->gso_segs = 1;
skb_shinfo(skb)->gso_size = 0;
+ skb_shinfo(skb)->gso_type = 0;
} else {
skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
skb_shinfo(skb)->gso_size = mss_now;
- skb_shinfo(skb)->gso_type |= sk->sk_gso_type;
+ skb_shinfo(skb)->gso_type = sk->sk_gso_type;
}
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e0610e4b515..6791aac06ea 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1977,6 +1977,7 @@ struct proto udp_prot = {
.recvmsg = udp_recvmsg,
.sendpage = udp_sendpage,
.backlog_rcv = __udp_queue_rcv_skb,
+ .release_cb = ip4_datagram_release_cb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
.rehash = udp_v4_rehash,