summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/devinet.c36
-rw-r--r--net/ipv4/gre_offload.c2
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/inet_fragment.c283
-rw-r--r--net/ipv4/ip_fragment.c56
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ip_vti.c54
-rw-r--r--net/ipv4/proc.c5
-rw-r--r--net/ipv4/raw.c5
-rw-r--r--net/ipv4/route.c32
-rw-r--r--net/ipv4/tcp_metrics.c1
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv4/udp_offload.c2
-rw-r--r--net/ipv4/xfrm4_protocol.c2
14 files changed, 299 insertions, 185 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e9449376b58..214882e7d6d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -180,11 +180,12 @@ static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
int destroy);
#ifdef CONFIG_SYSCTL
-static void devinet_sysctl_register(struct in_device *idev);
+static int devinet_sysctl_register(struct in_device *idev);
static void devinet_sysctl_unregister(struct in_device *idev);
#else
-static void devinet_sysctl_register(struct in_device *idev)
+static int devinet_sysctl_register(struct in_device *idev)
{
+ return 0;
}
static void devinet_sysctl_unregister(struct in_device *idev)
{
@@ -232,6 +233,7 @@ EXPORT_SYMBOL(in_dev_finish_destroy);
static struct in_device *inetdev_init(struct net_device *dev)
{
struct in_device *in_dev;
+ int err = -ENOMEM;
ASSERT_RTNL();
@@ -252,7 +254,13 @@ static struct in_device *inetdev_init(struct net_device *dev)
/* Account for reference dev->ip_ptr (below) */
in_dev_hold(in_dev);
- devinet_sysctl_register(in_dev);
+ err = devinet_sysctl_register(in_dev);
+ if (err) {
+ in_dev->dead = 1;
+ in_dev_put(in_dev);
+ in_dev = NULL;
+ goto out;
+ }
ip_mc_init_dev(in_dev);
if (dev->flags & IFF_UP)
ip_mc_up(in_dev);
@@ -260,7 +268,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
/* we can receive as soon as ip_ptr is set -- do this last */
rcu_assign_pointer(dev->ip_ptr, in_dev);
out:
- return in_dev;
+ return in_dev ?: ERR_PTR(err);
out_kfree:
kfree(in_dev);
in_dev = NULL;
@@ -1347,8 +1355,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
if (!in_dev) {
if (event == NETDEV_REGISTER) {
in_dev = inetdev_init(dev);
- if (!in_dev)
- return notifier_from_errno(-ENOMEM);
+ if (IS_ERR(in_dev))
+ return notifier_from_errno(PTR_ERR(in_dev));
if (dev->flags & IFF_LOOPBACK) {
IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
@@ -2182,11 +2190,21 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
kfree(t);
}
-static void devinet_sysctl_register(struct in_device *idev)
+static int devinet_sysctl_register(struct in_device *idev)
{
- neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
- __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
+ int err;
+
+ if (!sysctl_dev_name_is_allowed(idev->dev->name))
+ return -EINVAL;
+
+ err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
+ if (err)
+ return err;
+ err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
&idev->cnf);
+ if (err)
+ neigh_sysctl_unregister(idev->arp_parms);
+ return err;
}
static void devinet_sysctl_unregister(struct in_device *idev)
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index f0bdd47bbbc..6556263c8fa 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -74,7 +74,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
/* segment inner packet. */
enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
segs = skb_mac_gso_segment(skb, enc_features);
- if (!segs || IS_ERR(segs)) {
+ if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len);
goto out;
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index db710b059ba..f10eab46228 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1321,7 +1321,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
atomic_set(&im->refcnt, 1);
spin_lock_init(&im->lock);
#ifdef CONFIG_IP_MULTICAST
- setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im);
+ setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im);
im->unsolicit_count = IGMP_Unsolicited_Report_Count;
#endif
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 3b01959bf4b..62b1f73749d 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -25,6 +25,12 @@
#include <net/inet_frag.h>
#include <net/inet_ecn.h>
+#define INETFRAGS_EVICT_BUCKETS 128
+#define INETFRAGS_EVICT_MAX 512
+
+/* don't rebuild inetfrag table with new secret more often than this */
+#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ)
+
/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
* Value : 0xff if frame should be dropped.
* 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
@@ -46,24 +52,39 @@ const u8 ip_frag_ecn_table[16] = {
};
EXPORT_SYMBOL(ip_frag_ecn_table);
-static void inet_frag_secret_rebuild(unsigned long dummy)
+static unsigned int
+inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
+{
+ return f->hashfn(q) & (INETFRAGS_HASHSZ - 1);
+}
+
+static bool inet_frag_may_rebuild(struct inet_frags *f)
+{
+ return time_after(jiffies,
+ f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL);
+}
+
+static void inet_frag_secret_rebuild(struct inet_frags *f)
{
- struct inet_frags *f = (struct inet_frags *)dummy;
- unsigned long now = jiffies;
int i;
- /* Per bucket lock NOT needed here, due to write lock protection */
- write_lock(&f->lock);
+ write_seqlock_bh(&f->rnd_seqlock);
+
+ if (!inet_frag_may_rebuild(f))
+ goto out;
get_random_bytes(&f->rnd, sizeof(u32));
+
for (i = 0; i < INETFRAGS_HASHSZ; i++) {
struct inet_frag_bucket *hb;
struct inet_frag_queue *q;
struct hlist_node *n;
hb = &f->hash[i];
+ spin_lock(&hb->chain_lock);
+
hlist_for_each_entry_safe(q, n, &hb->chain, list) {
- unsigned int hval = f->hashfn(q);
+ unsigned int hval = inet_frag_hashfn(f, q);
if (hval != i) {
struct inet_frag_bucket *hb_dest;
@@ -72,76 +93,195 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
/* Relink to new hash chain. */
hb_dest = &f->hash[hval];
+
+ /* This is the only place where we take
+ * another chain_lock while already holding
+ * one. As this will not run concurrently,
+ * we cannot deadlock on hb_dest lock below, if its
+ * already locked it will be released soon since
+ * other caller cannot be waiting for hb lock
+ * that we've taken above.
+ */
+ spin_lock_nested(&hb_dest->chain_lock,
+ SINGLE_DEPTH_NESTING);
hlist_add_head(&q->list, &hb_dest->chain);
+ spin_unlock(&hb_dest->chain_lock);
}
}
+ spin_unlock(&hb->chain_lock);
}
- write_unlock(&f->lock);
- mod_timer(&f->secret_timer, now + f->secret_interval);
+ f->rebuild = false;
+ f->last_rebuild_jiffies = jiffies;
+out:
+ write_sequnlock_bh(&f->rnd_seqlock);
+}
+
+static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
+{
+ return q->net->low_thresh == 0 ||
+ frag_mem_limit(q->net) >= q->net->low_thresh;
+}
+
+static unsigned int
+inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
+{
+ struct inet_frag_queue *fq;
+ struct hlist_node *n;
+ unsigned int evicted = 0;
+ HLIST_HEAD(expired);
+
+evict_again:
+ spin_lock(&hb->chain_lock);
+
+ hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
+ if (!inet_fragq_should_evict(fq))
+ continue;
+
+ if (!del_timer(&fq->timer)) {
+ /* q expiring right now thus increment its refcount so
+ * it won't be freed under us and wait until the timer
+ * has finished executing then destroy it
+ */
+ atomic_inc(&fq->refcnt);
+ spin_unlock(&hb->chain_lock);
+ del_timer_sync(&fq->timer);
+ WARN_ON(atomic_read(&fq->refcnt) != 1);
+ inet_frag_put(fq, f);
+ goto evict_again;
+ }
+
+ /* suppress xmit of (icmp) error packet */
+ fq->last_in &= ~INET_FRAG_FIRST_IN;
+ fq->last_in |= INET_FRAG_EVICTED;
+ hlist_del(&fq->list);
+ hlist_add_head(&fq->list, &expired);
+ ++evicted;
+ }
+
+ spin_unlock(&hb->chain_lock);
+
+ hlist_for_each_entry_safe(fq, n, &expired, list)
+ f->frag_expire((unsigned long) fq);
+
+ return evicted;
+}
+
+static void inet_frag_worker(struct work_struct *work)
+{
+ unsigned int budget = INETFRAGS_EVICT_BUCKETS;
+ unsigned int i, evicted = 0;
+ struct inet_frags *f;
+
+ f = container_of(work, struct inet_frags, frags_work);
+
+ BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
+
+ local_bh_disable();
+
+ for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
+ evicted += inet_evict_bucket(f, &f->hash[i]);
+ i = (i + 1) & (INETFRAGS_HASHSZ - 1);
+ if (evicted > INETFRAGS_EVICT_MAX)
+ break;
+ }
+
+ f->next_bucket = i;
+
+ local_bh_enable();
+
+ if (f->rebuild && inet_frag_may_rebuild(f))
+ inet_frag_secret_rebuild(f);
+}
+
+static void inet_frag_schedule_worker(struct inet_frags *f)
+{
+ if (unlikely(!work_pending(&f->frags_work)))
+ schedule_work(&f->frags_work);
}
void inet_frags_init(struct inet_frags *f)
{
int i;
+ INIT_WORK(&f->frags_work, inet_frag_worker);
+
for (i = 0; i < INETFRAGS_HASHSZ; i++) {
struct inet_frag_bucket *hb = &f->hash[i];
spin_lock_init(&hb->chain_lock);
INIT_HLIST_HEAD(&hb->chain);
}
- rwlock_init(&f->lock);
- setup_timer(&f->secret_timer, inet_frag_secret_rebuild,
- (unsigned long)f);
- f->secret_timer.expires = jiffies + f->secret_interval;
- add_timer(&f->secret_timer);
+ seqlock_init(&f->rnd_seqlock);
+ f->last_rebuild_jiffies = 0;
}
EXPORT_SYMBOL(inet_frags_init);
void inet_frags_init_net(struct netns_frags *nf)
{
- nf->nqueues = 0;
init_frag_mem_limit(nf);
- INIT_LIST_HEAD(&nf->lru_list);
- spin_lock_init(&nf->lru_lock);
}
EXPORT_SYMBOL(inet_frags_init_net);
void inet_frags_fini(struct inet_frags *f)
{
- del_timer(&f->secret_timer);
+ cancel_work_sync(&f->frags_work);
}
EXPORT_SYMBOL(inet_frags_fini);
void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
{
- nf->low_thresh = 0;
+ unsigned int seq;
+ int i;
+ nf->low_thresh = 0;
local_bh_disable();
- inet_frag_evictor(nf, f, true);
+
+evict_again:
+ seq = read_seqbegin(&f->rnd_seqlock);
+
+ for (i = 0; i < INETFRAGS_HASHSZ ; i++)
+ inet_evict_bucket(f, &f->hash[i]);
+
+ if (read_seqretry(&f->rnd_seqlock, seq))
+ goto evict_again;
+
local_bh_enable();
percpu_counter_destroy(&nf->mem);
}
EXPORT_SYMBOL(inet_frags_exit_net);
-static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+static struct inet_frag_bucket *
+get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
+__acquires(hb->chain_lock)
{
struct inet_frag_bucket *hb;
- unsigned int hash;
+ unsigned int seq, hash;
+
+ restart:
+ seq = read_seqbegin(&f->rnd_seqlock);
- read_lock(&f->lock);
- hash = f->hashfn(fq);
+ hash = inet_frag_hashfn(f, fq);
hb = &f->hash[hash];
spin_lock(&hb->chain_lock);
+ if (read_seqretry(&f->rnd_seqlock, seq)) {
+ spin_unlock(&hb->chain_lock);
+ goto restart;
+ }
+
+ return hb;
+}
+
+static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+{
+ struct inet_frag_bucket *hb;
+
+ hb = get_frag_bucket_locked(fq, f);
hlist_del(&fq->list);
spin_unlock(&hb->chain_lock);
-
- read_unlock(&f->lock);
- inet_frag_lru_del(fq);
}
void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
@@ -165,8 +305,7 @@ static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
kfree_skb(skb);
}
-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
- int *work)
+void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
{
struct sk_buff *fp;
struct netns_frags *nf;
@@ -186,86 +325,30 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
fp = xp;
}
sum = sum_truesize + f->qsize;
- if (work)
- *work -= sum;
sub_frag_mem_limit(q, sum);
if (f->destructor)
f->destructor(q);
kfree(q);
-
}
EXPORT_SYMBOL(inet_frag_destroy);
-int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
-{
- struct inet_frag_queue *q;
- int work, evicted = 0;
-
- if (!force) {
- if (frag_mem_limit(nf) <= nf->high_thresh)
- return 0;
- }
-
- work = frag_mem_limit(nf) - nf->low_thresh;
- while (work > 0 || force) {
- spin_lock(&nf->lru_lock);
-
- if (list_empty(&nf->lru_list)) {
- spin_unlock(&nf->lru_lock);
- break;
- }
-
- q = list_first_entry(&nf->lru_list,
- struct inet_frag_queue, lru_list);
- atomic_inc(&q->refcnt);
- /* Remove q from list to avoid several CPUs grabbing it */
- list_del_init(&q->lru_list);
-
- spin_unlock(&nf->lru_lock);
-
- spin_lock(&q->lock);
- if (!(q->last_in & INET_FRAG_COMPLETE))
- inet_frag_kill(q, f);
- spin_unlock(&q->lock);
-
- if (atomic_dec_and_test(&q->refcnt))
- inet_frag_destroy(q, f, &work);
- evicted++;
- }
-
- return evicted;
-}
-EXPORT_SYMBOL(inet_frag_evictor);
-
static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
struct inet_frag_queue *qp_in, struct inet_frags *f,
void *arg)
{
- struct inet_frag_bucket *hb;
+ struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
struct inet_frag_queue *qp;
- unsigned int hash;
-
- read_lock(&f->lock); /* Protects against hash rebuild */
- /*
- * While we stayed w/o the lock other CPU could update
- * the rnd seed, so we need to re-calculate the hash
- * chain. Fortunatelly the qp_in can be used to get one.
- */
- hash = f->hashfn(qp_in);
- hb = &f->hash[hash];
- spin_lock(&hb->chain_lock);
#ifdef CONFIG_SMP
/* With SMP race we have to recheck hash table, because
- * such entry could be created on other cpu, while we
- * released the hash bucket lock.
+ * such entry could have been created on other cpu before
+ * we acquired hash bucket lock.
*/
hlist_for_each_entry(qp, &hb->chain, list) {
if (qp->net == nf && f->match(qp, arg)) {
atomic_inc(&qp->refcnt);
spin_unlock(&hb->chain_lock);
- read_unlock(&f->lock);
qp_in->last_in |= INET_FRAG_COMPLETE;
inet_frag_put(qp_in, f);
return qp;
@@ -278,9 +361,8 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
atomic_inc(&qp->refcnt);
hlist_add_head(&qp->list, &hb->chain);
- inet_frag_lru_add(nf, qp);
+
spin_unlock(&hb->chain_lock);
- read_unlock(&f->lock);
return qp;
}
@@ -290,6 +372,11 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
{
struct inet_frag_queue *q;
+ if (frag_mem_limit(nf) > nf->high_thresh) {
+ inet_frag_schedule_worker(f);
+ return NULL;
+ }
+
q = kzalloc(f->qsize, GFP_ATOMIC);
if (q == NULL)
return NULL;
@@ -301,7 +388,6 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock);
atomic_set(&q->refcnt, 1);
- INIT_LIST_HEAD(&q->lru_list);
return q;
}
@@ -320,12 +406,15 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
struct inet_frags *f, void *key, unsigned int hash)
- __releases(&f->lock)
{
struct inet_frag_bucket *hb;
struct inet_frag_queue *q;
int depth = 0;
+ if (frag_mem_limit(nf) > nf->low_thresh)
+ inet_frag_schedule_worker(f);
+
+ hash &= (INETFRAGS_HASHSZ - 1);
hb = &f->hash[hash];
spin_lock(&hb->chain_lock);
@@ -333,18 +422,22 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
if (q->net == nf && f->match(q, key)) {
atomic_inc(&q->refcnt);
spin_unlock(&hb->chain_lock);
- read_unlock(&f->lock);
return q;
}
depth++;
}
spin_unlock(&hb->chain_lock);
- read_unlock(&f->lock);
if (depth <= INETFRAGS_MAXDEPTH)
return inet_frag_create(nf, f, key);
- else
- return ERR_PTR(-ENOBUFS);
+
+ if (inet_frag_may_rebuild(f)) {
+ if (!f->rebuild)
+ f->rebuild = true;
+ inet_frag_schedule_worker(f);
+ }
+
+ return ERR_PTR(-ENOBUFS);
}
EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index ed32313e307..634fc31aa24 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -86,11 +86,6 @@ static inline u8 ip4_frag_ecn(u8 tos)
static struct inet_frags ip4_frags;
-int ip_frag_nqueues(struct net *net)
-{
- return net->ipv4.frags.nqueues;
-}
-
int ip_frag_mem(struct net *net)
{
return sum_frag_mem_limit(&net->ipv4.frags);
@@ -109,21 +104,21 @@ static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
return jhash_3words((__force u32)id << 16 | prot,
(__force u32)saddr, (__force u32)daddr,
- ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1);
+ ip4_frags.rnd);
}
-static unsigned int ip4_hashfn(struct inet_frag_queue *q)
+static unsigned int ip4_hashfn(const struct inet_frag_queue *q)
{
- struct ipq *ipq;
+ const struct ipq *ipq;
ipq = container_of(q, struct ipq, q);
return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
}
-static bool ip4_frag_match(struct inet_frag_queue *q, void *a)
+static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a)
{
- struct ipq *qp;
- struct ip4_create_arg *arg = a;
+ const struct ipq *qp;
+ const struct ip4_create_arg *arg = a;
qp = container_of(q, struct ipq, q);
return qp->id == arg->iph->id &&
@@ -133,14 +128,14 @@ static bool ip4_frag_match(struct inet_frag_queue *q, void *a)
qp->user == arg->user;
}
-static void ip4_frag_init(struct inet_frag_queue *q, void *a)
+static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
{
struct ipq *qp = container_of(q, struct ipq, q);
struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
frags);
struct net *net = container_of(ipv4, struct net, ipv4);
- struct ip4_create_arg *arg = a;
+ const struct ip4_create_arg *arg = a;
qp->protocol = arg->iph->protocol;
qp->id = arg->iph->id;
@@ -177,18 +172,6 @@ static void ipq_kill(struct ipq *ipq)
inet_frag_kill(&ipq->q, &ip4_frags);
}
-/* Memory limiting on fragments. Evictor trashes the oldest
- * fragment queue until we are back under the threshold.
- */
-static void ip_evictor(struct net *net)
-{
- int evicted;
-
- evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false);
- if (evicted)
- IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted);
-}
-
/*
* Oops, a fragment queue timed out. Kill it and send an ICMP reply.
*/
@@ -207,7 +190,8 @@ static void ip_expire(unsigned long arg)
ipq_kill(qp);
- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
+ if (!(qp->q.last_in & INET_FRAG_EVICTED))
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
@@ -260,7 +244,6 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
arg.iph = iph;
arg.user = user;
- read_lock(&ip4_frags.lock);
hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
@@ -505,7 +488,6 @@ found:
}
skb_dst_drop(skb);
- inet_frag_lru_move(&qp->q);
return -EINPROGRESS;
err:
@@ -655,9 +637,6 @@ int ip_defrag(struct sk_buff *skb, u32 user)
net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev);
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
- /* Start by cleaning up the memory. */
- ip_evictor(net);
-
/* Lookup (or create) queue header */
if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
int ret;
@@ -721,14 +700,17 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
.data = &init_net.ipv4.frags.high_thresh,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &init_net.ipv4.frags.low_thresh
},
{
.procname = "ipfrag_low_thresh",
.data = &init_net.ipv4.frags.low_thresh,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &init_net.ipv4.frags.high_thresh
},
{
.procname = "ipfrag_time",
@@ -740,10 +722,12 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
{ }
};
+/* secret interval has been deprecated */
+static int ip4_frags_secret_interval_unused;
static struct ctl_table ip4_frags_ctl_table[] = {
{
.procname = "ipfrag_secret_interval",
- .data = &ip4_frags.secret_interval,
+ .data = &ip4_frags_secret_interval_unused,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -771,7 +755,10 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
goto err_alloc;
table[0].data = &net->ipv4.frags.high_thresh;
+ table[0].extra1 = &net->ipv4.frags.low_thresh;
+ table[0].extra2 = &init_net.ipv4.frags.high_thresh;
table[1].data = &net->ipv4.frags.low_thresh;
+ table[1].extra2 = &net->ipv4.frags.high_thresh;
table[2].data = &net->ipv4.frags.timeout;
/* Don't export sysctls to unprivileged users */
@@ -873,6 +860,5 @@ void __init ipfrag_init(void)
ip4_frags.qsize = sizeof(struct ipq);
ip4_frags.match = ip4_frag_match;
ip4_frags.frag_expire = ip_expire;
- ip4_frags.secret_interval = 10 * 60 * HZ;
inet_frags_init(&ip4_frags);
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 64741b93863..5cb830c7899 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1319,7 +1319,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
if (sk->sk_type != SOCK_STREAM)
return -ENOPROTOOPT;
- msg.msg_control = optval;
+ msg.msg_control = (__force void *) optval;
msg.msg_controllen = len;
msg.msg_flags = flags;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index b8960f3527f..e453cb724a9 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -534,40 +534,28 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
static int __init vti_init(void)
{
+ const char *msg;
int err;
- pr_info("IPv4 over IPSec tunneling driver\n");
+ pr_info("IPv4 over IPsec tunneling driver\n");
+ msg = "tunnel device";
err = register_pernet_device(&vti_net_ops);
if (err < 0)
- return err;
- err = xfrm4_protocol_register(&vti_esp4_protocol, IPPROTO_ESP);
- if (err < 0) {
- unregister_pernet_device(&vti_net_ops);
- pr_info("vti init: can't register tunnel\n");
-
- return err;
- }
+ goto pernet_dev_failed;
+ msg = "tunnel protocols";
+ err = xfrm4_protocol_register(&vti_esp4_protocol, IPPROTO_ESP);
+ if (err < 0)
+ goto xfrm_proto_esp_failed;
err = xfrm4_protocol_register(&vti_ah4_protocol, IPPROTO_AH);
- if (err < 0) {
- xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
- unregister_pernet_device(&vti_net_ops);
- pr_info("vti init: can't register tunnel\n");
-
- return err;
- }
-
+ if (err < 0)
+ goto xfrm_proto_ah_failed;
err = xfrm4_protocol_register(&vti_ipcomp4_protocol, IPPROTO_COMP);
- if (err < 0) {
- xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
- xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
- unregister_pernet_device(&vti_net_ops);
- pr_info("vti init: can't register tunnel\n");
-
- return err;
- }
+ if (err < 0)
+ goto xfrm_proto_comp_failed;
+ msg = "netlink interface";
err = rtnl_link_register(&vti_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -576,23 +564,23 @@ static int __init vti_init(void)
rtnl_link_failed:
xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
+xfrm_proto_comp_failed:
xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
+xfrm_proto_ah_failed:
xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
+xfrm_proto_esp_failed:
unregister_pernet_device(&vti_net_ops);
+pernet_dev_failed:
+ pr_err("vti init: failed to register %s\n", msg);
return err;
}
static void __exit vti_fini(void)
{
rtnl_link_unregister(&vti_link_ops);
- if (xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP))
- pr_info("vti close: can't deregister tunnel\n");
- if (xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH))
- pr_info("vti close: can't deregister tunnel\n");
- if (xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP))
- pr_info("vti close: can't deregister tunnel\n");
-
-
+ xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
+ xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
+ xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti_net_ops);
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ae0af9386f7..8e3eb39f84e 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -52,6 +52,7 @@
static int sockstat_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
+ unsigned int frag_mem;
int orphans, sockets;
local_bh_disable();
@@ -71,8 +72,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplite_prot));
seq_printf(seq, "RAW: inuse %d\n",
sock_prot_inuse_get(net, &raw_prot));
- seq_printf(seq, "FRAG: inuse %d memory %d\n",
- ip_frag_nqueues(net), ip_frag_mem(net));
+ frag_mem = ip_frag_mem(net);
+ seq_printf(seq, "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem);
return 0;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2054d7136c6..739db3100c2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -58,6 +58,7 @@
#include <linux/in_route.h>
#include <linux/route.h>
#include <linux/skbuff.h>
+#include <linux/igmp.h>
#include <net/net_namespace.h>
#include <net/dst.h>
#include <net/sock.h>
@@ -174,7 +175,9 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
while (sk) {
delivered = 1;
- if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) {
+ if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) &&
+ ip_mc_sf_allow(sk, iph->daddr, iph->saddr,
+ skb->dev->ifindex)) {
struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
/* Not releasing hash table! */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3162ea923de..190199851c9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -457,8 +457,31 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
return neigh_create(&arp_tbl, pkey, dev);
}
-atomic_t *ip_idents __read_mostly;
-EXPORT_SYMBOL(ip_idents);
+#define IP_IDENTS_SZ 2048u
+struct ip_ident_bucket {
+ atomic_t id;
+ u32 stamp32;
+};
+
+static struct ip_ident_bucket *ip_idents __read_mostly;
+
+/* In order to protect privacy, we add a perturbation to identifiers
+ * if one generator is seldom used. This makes hard for an attacker
+ * to infer how many packets were sent between two points in time.
+ */
+u32 ip_idents_reserve(u32 hash, int segs)
+{
+ struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ;
+ u32 old = ACCESS_ONCE(bucket->stamp32);
+ u32 now = (u32)jiffies;
+ u32 delta = 0;
+
+ if (old != now && cmpxchg(&bucket->stamp32, old, now) == old)
+ delta = prandom_u32_max(now - old);
+
+ return atomic_add_return(segs + delta, &bucket->id) - segs;
+}
+EXPORT_SYMBOL(ip_idents_reserve);
void __ip_select_ident(struct iphdr *iph, int segs)
{
@@ -467,7 +490,10 @@ void __ip_select_ident(struct iphdr *iph, int segs)
net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
- hash = jhash_1word((__force u32)iph->daddr, ip_idents_hashrnd);
+ hash = jhash_3words((__force u32)iph->daddr,
+ (__force u32)iph->saddr,
+ iph->protocol,
+ ip_idents_hashrnd);
id = ip_idents_reserve(hash, segs);
iph->id = htons(id);
}
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 4fe04180598..0d54e59b9ea 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1093,7 +1093,6 @@ static const struct genl_ops tcp_metrics_nl_ops[] = {
.doit = tcp_metrics_nl_cmd_get,
.dumpit = tcp_metrics_nl_dump,
.policy = tcp_metrics_nl_policy,
- .flags = GENL_ADMIN_PERM,
},
{
.cmd = TCP_METRICS_CMD_DEL,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f31053b90ee..f57c0e4c232 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1567,7 +1567,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto csum_error;
- if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
+ if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
is_udplite);
goto drop;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 4807544d018..59035bc3008 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -79,7 +79,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
/* segment inner packet. */
enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
segs = skb_mac_gso_segment(skb, enc_features);
- if (!segs || IS_ERR(segs)) {
+ if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
mac_len);
goto out;
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index a2ce0101eaa..dccefa9d84c 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -124,7 +124,7 @@ static int xfrm4_ah_rcv(struct sk_buff *skb)
for_each_protocol_rcu(ah4_handlers, handler)
if ((ret = handler->handler(skb)) != -EINVAL)
- return ret;;
+ return ret;
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);